util.py
3719 lines
| 117.9 KiB
| text/x-python
|
PythonLexer
/ mercurial / util.py
timeless@mozdev.org
|
r17515 | # util.py - Mercurial utility functions and platform specific implementations | ||
Martin Geisler
|
r8226 | # | ||
# Copyright 2005 K. Thananchayan <thananck@yahoo.com> | ||||
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com> | ||||
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
Matt Mackall
|
r10263 | # GNU General Public License version 2 or any later version. | ||
mpm@selenic.com
|
r1082 | |||
timeless@mozdev.org
|
r17515 | """Mercurial utility functions and platform specific implementations. | ||
mpm@selenic.com
|
r1082 | |||
Martin Geisler
|
r8227 | This contains helper routines that are independent of the SCM core and | ||
hide platform-specific details from the core. | ||||
mpm@selenic.com
|
r1082 | """ | ||
mpm@selenic.com
|
r419 | |||
Gregory Szorc
|
r27358 | from __future__ import absolute_import | ||
import bz2 | ||||
import calendar | ||||
Yuya Nishihara
|
r31453 | import codecs | ||
Gregory Szorc
|
r27358 | import collections | ||
import datetime | ||||
import errno | ||||
import gc | ||||
import hashlib | ||||
import imp | ||||
import os | ||||
Jun Wu
|
r30418 | import platform as pyplatform | ||
Siddharth Agarwal
|
r21907 | import re as remod | ||
Gregory Szorc
|
r27358 | import shutil | ||
import signal | ||||
import socket | ||||
Jun Wu
|
r30418 | import stat | ||
Augie Fackler
|
r30054 | import string | ||
Gregory Szorc
|
r27358 | import subprocess | ||
import sys | ||||
import tempfile | ||||
import textwrap | ||||
import time | ||||
import traceback | ||||
Pierre-Yves David
|
r26266 | import zlib | ||
Matt Mackall
|
r3769 | |||
Gregory Szorc
|
r27358 | from . import ( | ||
encoding, | ||||
error, | ||||
i18n, | ||||
osutil, | ||||
parsers, | ||||
timeless
|
r28818 | pycompat, | ||
Gregory Szorc
|
r27358 | ) | ||
Matt Mackall
|
r3769 | |||
Yuya Nishihara
|
r30471 | empty = pycompat.empty | ||
httplib = pycompat.httplib | ||||
httpserver = pycompat.httpserver | ||||
pickle = pycompat.pickle | ||||
queue = pycompat.queue | ||||
socketserver = pycompat.socketserver | ||||
Yuya Nishihara
|
r30472 | stderr = pycompat.stderr | ||
stdin = pycompat.stdin | ||||
stdout = pycompat.stdout | ||||
Yuya Nishihara
|
r30471 | stringio = pycompat.stringio | ||
urlerr = pycompat.urlerr | ||||
timeless
|
r28883 | urlreq = pycompat.urlreq | ||
Yuya Nishihara
|
r30471 | xmlrpclib = pycompat.xmlrpclib | ||
timeless
|
r28883 | |||
Simon Farnsworth
|
r30876 | def isatty(fp): | ||
try: | ||||
return fp.isatty() | ||||
except AttributeError: | ||||
return False | ||||
# glibc determines buffering on first write to stdout - if we replace a TTY | ||||
# destined stdout with a pipe destined stdout (e.g. pager), we want line | ||||
# buffering | ||||
if isatty(stdout): | ||||
Pulkit Goyal
|
r30988 | stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1) | ||
Simon Farnsworth
|
r30876 | |||
Pulkit Goyal
|
r30639 | if pycompat.osname == 'nt': | ||
Gregory Szorc
|
r27358 | from . import windows as platform | ||
Simon Farnsworth
|
r30876 | stdout = platform.winstdout(stdout) | ||
Adrian Buehlmann
|
r14912 | else: | ||
Gregory Szorc
|
r27358 | from . import posix as platform | ||
Adrian Buehlmann
|
r14926 | |||
Gregory Szorc
|
r27358 | _ = i18n._ | ||
Adrian Buehlmann
|
r14926 | |||
Yuya Nishihara
|
r29530 | bindunixsocket = platform.bindunixsocket | ||
Idan Kamara
|
r14927 | cachestat = platform.cachestat | ||
Adrian Buehlmann
|
r14926 | checkexec = platform.checkexec | ||
checklink = platform.checklink | ||||
Adrian Buehlmann
|
r15011 | copymode = platform.copymode | ||
Adrian Buehlmann
|
r14926 | executablepath = platform.executablepath | ||
expandglobs = platform.expandglobs | ||||
explainexit = platform.explainexit | ||||
findexe = platform.findexe | ||||
gethgcmd = platform.gethgcmd | ||||
getuser = platform.getuser | ||||
timeless
|
r28027 | getpid = os.getpid | ||
Adrian Buehlmann
|
r14926 | groupmembers = platform.groupmembers | ||
groupname = platform.groupname | ||||
hidewindow = platform.hidewindow | ||||
isexec = platform.isexec | ||||
isowner = platform.isowner | ||||
localpath = platform.localpath | ||||
lookupreg = platform.lookupreg | ||||
makedir = platform.makedir | ||||
nlinks = platform.nlinks | ||||
normpath = platform.normpath | ||||
Matt Mackall
|
r15488 | normcase = platform.normcase | ||
Siddharth Agarwal
|
r24605 | normcasespec = platform.normcasespec | ||
normcasefallback = platform.normcasefallback | ||||
Adrian Buehlmann
|
r14926 | openhardlinks = platform.openhardlinks | ||
oslink = platform.oslink | ||||
parsepatchoutput = platform.parsepatchoutput | ||||
pconvert = platform.pconvert | ||||
Pierre-Yves David
|
r25420 | poll = platform.poll | ||
Adrian Buehlmann
|
r14926 | popen = platform.popen | ||
posixfile = platform.posixfile | ||||
quotecommand = platform.quotecommand | ||||
Gregory Szorc
|
r22245 | readpipe = platform.readpipe | ||
Adrian Buehlmann
|
r14926 | rename = platform.rename | ||
FUJIWARA Katsunori
|
r24692 | removedirs = platform.removedirs | ||
Adrian Buehlmann
|
r14926 | samedevice = platform.samedevice | ||
samefile = platform.samefile | ||||
samestat = platform.samestat | ||||
setbinary = platform.setbinary | ||||
setflags = platform.setflags | ||||
setsignalhandler = platform.setsignalhandler | ||||
shellquote = platform.shellquote | ||||
spawndetached = platform.spawndetached | ||||
Bryan O'Sullivan
|
r17560 | split = platform.split | ||
Adrian Buehlmann
|
r14926 | sshargs = platform.sshargs | ||
Bryan O'Sullivan
|
r18026 | statfiles = getattr(osutil, 'statfiles', platform.statfiles) | ||
Bryan O'Sullivan
|
r18868 | statisexec = platform.statisexec | ||
statislink = platform.statislink | ||||
Adrian Buehlmann
|
r14926 | testpid = platform.testpid | ||
umask = platform.umask | ||||
unlink = platform.unlink | ||||
username = platform.username | ||||
Adrian Buehlmann
|
r14912 | |||
Dirkjan Ochtman
|
r6470 | # Python compatibility | ||
Matt Mackall
|
r3769 | |||
Matt Mackall
|
r15656 | _notset = object() | ||
Matt Mackall
|
r27015 | # disable Python's problematic floating point timestamps (issue4836) | ||
# (Python hypocritically says you shouldn't change this behavior in | ||||
# libraries, and sure enough Mercurial is not a library.) | ||||
os.stat_float_times(False) | ||||
Matt Mackall
|
r15656 | def safehasattr(thing, attr): | ||
return getattr(thing, attr, _notset) is not _notset | ||||
Remi Chaintron
|
r30745 | def bitsfrom(container): | ||
bits = 0 | ||||
for bit in container: | ||||
bits |= bit | ||||
return bits | ||||
Mike Hommey
|
r22962 | DIGESTS = { | ||
Augie Fackler
|
r29342 | 'md5': hashlib.md5, | ||
'sha1': hashlib.sha1, | ||||
'sha512': hashlib.sha512, | ||||
Mike Hommey
|
r22962 | } | ||
# List of digest types from strongest to weakest | ||||
Gregory Szorc
|
r27357 | DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5'] | ||
Mike Hommey
|
r22962 | |||
for k in DIGESTS_BY_STRENGTH: | ||||
assert k in DIGESTS | ||||
class digester(object): | ||||
"""helper to compute digests. | ||||
This helper can be used to compute one or more digests given their name. | ||||
>>> d = digester(['md5', 'sha1']) | ||||
>>> d.update('foo') | ||||
>>> [k for k in sorted(d)] | ||||
['md5', 'sha1'] | ||||
>>> d['md5'] | ||||
'acbd18db4cc2f85cedef654fccc4a4d8' | ||||
>>> d['sha1'] | ||||
'0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33' | ||||
>>> digester.preferred(['md5', 'sha1']) | ||||
'sha1' | ||||
""" | ||||
def __init__(self, digests, s=''): | ||||
self._hashes = {} | ||||
for k in digests: | ||||
if k not in DIGESTS: | ||||
raise Abort(_('unknown digest type: %s') % k) | ||||
self._hashes[k] = DIGESTS[k]() | ||||
if s: | ||||
self.update(s) | ||||
def update(self, data): | ||||
for h in self._hashes.values(): | ||||
h.update(data) | ||||
def __getitem__(self, key): | ||||
if key not in DIGESTS: | ||||
raise Abort(_('unknown digest type: %s') % k) | ||||
return self._hashes[key].hexdigest() | ||||
def __iter__(self): | ||||
return iter(self._hashes) | ||||
@staticmethod | ||||
def preferred(supported): | ||||
"""returns the strongest digest type in both supported and DIGESTS.""" | ||||
for k in DIGESTS_BY_STRENGTH: | ||||
if k in supported: | ||||
return k | ||||
return None | ||||
Mike Hommey
|
r22963 | class digestchecker(object): | ||
"""file handle wrapper that additionally checks content against a given | ||||
size and digests. | ||||
d = digestchecker(fh, size, {'md5': '...'}) | ||||
When multiple digests are given, all of them are validated. | ||||
""" | ||||
def __init__(self, fh, size, digests): | ||||
self._fh = fh | ||||
self._size = size | ||||
self._got = 0 | ||||
self._digests = dict(digests) | ||||
self._digester = digester(self._digests.keys()) | ||||
def read(self, length=-1): | ||||
content = self._fh.read(length) | ||||
self._digester.update(content) | ||||
self._got += len(content) | ||||
return content | ||||
def validate(self): | ||||
if self._size != self._got: | ||||
raise Abort(_('size mismatch: expected %d, got %d') % | ||||
(self._size, self._got)) | ||||
for k, v in self._digests.items(): | ||||
if v != self._digester[k]: | ||||
Wagner Bruna
|
r23076 | # i18n: first parameter is a digest name | ||
Mike Hommey
|
r22963 | raise Abort(_('%s mismatch: expected %s, got %s') % | ||
(k, v, self._digester[k])) | ||||
Renato Cunha
|
r11565 | try: | ||
Matt Mackall
|
r15657 | buffer = buffer | ||
Renato Cunha
|
r11565 | except NameError: | ||
Yuya Nishihara
|
r30030 | if not pycompat.ispy3: | ||
Pulkit Goyal
|
r30821 | def buffer(sliceable, offset=0, length=None): | ||
if length is not None: | ||||
return sliceable[offset:offset + length] | ||||
Matt Mackall
|
r15657 | return sliceable[offset:] | ||
else: | ||||
Pulkit Goyal
|
r30821 | def buffer(sliceable, offset=0, length=None): | ||
if length is not None: | ||||
return memoryview(sliceable)[offset:offset + length] | ||||
Matt Mackall
|
r15657 | return memoryview(sliceable)[offset:] | ||
Ronny Pfannschmidt
|
r10756 | |||
Pulkit Goyal
|
r30639 | closefds = pycompat.osname == 'posix' | ||
Patrick Mezard
|
r10197 | |||
Pierre-Yves David
|
r25406 | _chunksize = 4096 | ||
class bufferedinputpipe(object): | ||||
"""a manually buffered input pipe | ||||
Python will not let us use buffered IO and lazy reading with 'polling' at | ||||
the same time. We cannot probe the buffer state and select will not detect | ||||
that data are ready to read if they are already buffered. | ||||
This class let us work around that by implementing its own buffering | ||||
(allowing efficient readline) while offering a way to know if the buffer is | ||||
empty from the output (allowing collaboration of the buffer with polling). | ||||
This class lives in the 'util' module because it makes use of the 'os' | ||||
module from the python stdlib. | ||||
""" | ||||
def __init__(self, input): | ||||
self._input = input | ||||
self._buffer = [] | ||||
self._eof = False | ||||
Pierre-Yves David
|
r25672 | self._lenbuf = 0 | ||
Pierre-Yves David
|
r25406 | |||
@property | ||||
def hasbuffer(self): | ||||
"""True is any data is currently buffered | ||||
This will be used externally a pre-step for polling IO. If there is | ||||
already data then no polling should be set in place.""" | ||||
return bool(self._buffer) | ||||
@property | ||||
def closed(self): | ||||
return self._input.closed | ||||
def fileno(self): | ||||
return self._input.fileno() | ||||
def close(self): | ||||
return self._input.close() | ||||
def read(self, size): | ||||
while (not self._eof) and (self._lenbuf < size): | ||||
self._fillbuffer() | ||||
return self._frombuffer(size) | ||||
def readline(self, *args, **kwargs): | ||||
if 1 < len(self._buffer): | ||||
# this should not happen because both read and readline end with a | ||||
# _frombuffer call that collapse it. | ||||
self._buffer = [''.join(self._buffer)] | ||||
Pierre-Yves David
|
r25672 | self._lenbuf = len(self._buffer[0]) | ||
Pierre-Yves David
|
r25406 | lfi = -1 | ||
if self._buffer: | ||||
lfi = self._buffer[-1].find('\n') | ||||
while (not self._eof) and lfi < 0: | ||||
self._fillbuffer() | ||||
if self._buffer: | ||||
lfi = self._buffer[-1].find('\n') | ||||
size = lfi + 1 | ||||
if lfi < 0: # end of file | ||||
size = self._lenbuf | ||||
elif 1 < len(self._buffer): | ||||
# we need to take previous chunks into account | ||||
size += self._lenbuf - len(self._buffer[-1]) | ||||
return self._frombuffer(size) | ||||
def _frombuffer(self, size): | ||||
"""return at most 'size' data from the buffer | ||||
The data are removed from the buffer.""" | ||||
if size == 0 or not self._buffer: | ||||
return '' | ||||
buf = self._buffer[0] | ||||
if 1 < len(self._buffer): | ||||
buf = ''.join(self._buffer) | ||||
data = buf[:size] | ||||
buf = buf[len(data):] | ||||
if buf: | ||||
self._buffer = [buf] | ||||
Pierre-Yves David
|
r25672 | self._lenbuf = len(buf) | ||
Pierre-Yves David
|
r25406 | else: | ||
self._buffer = [] | ||||
Pierre-Yves David
|
r25672 | self._lenbuf = 0 | ||
Pierre-Yves David
|
r25406 | return data | ||
def _fillbuffer(self): | ||||
"""read data to the buffer""" | ||||
data = os.read(self._input.fileno(), _chunksize) | ||||
if not data: | ||||
self._eof = True | ||||
else: | ||||
Pierre-Yves David
|
r25672 | self._lenbuf += len(data) | ||
Pierre-Yves David
|
r25406 | self._buffer.append(data) | ||
Patrick Mezard
|
r10199 | def popen2(cmd, env=None, newlines=False): | ||
Martin Geisler
|
r9089 | # Setting bufsize to -1 lets the system decide the buffer size. | ||
# The default for bufsize is 0, meaning unbuffered. This leads to | ||||
# poor performance on Mac OS X: http://bugs.python.org/issue4194 | ||||
p = subprocess.Popen(cmd, shell=True, bufsize=-1, | ||||
Bryan O'Sullivan
|
r9083 | close_fds=closefds, | ||
Patrick Mezard
|
r10197 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, | ||
Patrick Mezard
|
r10199 | universal_newlines=newlines, | ||
env=env) | ||||
Martin Geisler
|
r8280 | return p.stdin, p.stdout | ||
Patrick Mezard
|
r10197 | |||
Patrick Mezard
|
r10199 | def popen3(cmd, env=None, newlines=False): | ||
Durham Goode
|
r18759 | stdin, stdout, stderr, p = popen4(cmd, env, newlines) | ||
return stdin, stdout, stderr | ||||
Pierre-Yves David
|
r25245 | def popen4(cmd, env=None, newlines=False, bufsize=-1): | ||
p = subprocess.Popen(cmd, shell=True, bufsize=bufsize, | ||||
Bryan O'Sullivan
|
r9083 | close_fds=closefds, | ||
Martin Geisler
|
r8280 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, | ||
Patrick Mezard
|
r10197 | stderr=subprocess.PIPE, | ||
Patrick Mezard
|
r10199 | universal_newlines=newlines, | ||
env=env) | ||||
Durham Goode
|
r18759 | return p.stdin, p.stdout, p.stderr, p | ||
Dirkjan Ochtman
|
r7106 | |||
Matt Mackall
|
r7632 | def version(): | ||
"""Return version information if available.""" | ||||
try: | ||||
Gregory Szorc
|
r27358 | from . import __version__ | ||
Matt Mackall
|
r7632 | return __version__.version | ||
except ImportError: | ||||
return 'unknown' | ||||
Gregory Szorc
|
r27112 | def versiontuple(v=None, n=4): | ||
"""Parses a Mercurial version string into an N-tuple. | ||||
The version string to be parsed is specified with the ``v`` argument. | ||||
If it isn't defined, the current Mercurial version string will be parsed. | ||||
``n`` can be 2, 3, or 4. Here is how some version strings map to | ||||
returned values: | ||||
>>> v = '3.6.1+190-df9b73d2d444' | ||||
>>> versiontuple(v, 2) | ||||
(3, 6) | ||||
>>> versiontuple(v, 3) | ||||
(3, 6, 1) | ||||
>>> versiontuple(v, 4) | ||||
(3, 6, 1, '190-df9b73d2d444') | ||||
>>> versiontuple('3.6.1+190-df9b73d2d444+20151118') | ||||
(3, 6, 1, '190-df9b73d2d444+20151118') | ||||
>>> v = '3.6' | ||||
>>> versiontuple(v, 2) | ||||
(3, 6) | ||||
>>> versiontuple(v, 3) | ||||
(3, 6, None) | ||||
>>> versiontuple(v, 4) | ||||
(3, 6, None, None) | ||||
Gregory Szorc
|
r29613 | |||
>>> v = '3.9-rc' | ||||
>>> versiontuple(v, 2) | ||||
(3, 9) | ||||
>>> versiontuple(v, 3) | ||||
(3, 9, None) | ||||
>>> versiontuple(v, 4) | ||||
(3, 9, None, 'rc') | ||||
>>> v = '3.9-rc+2-02a8fea4289b' | ||||
>>> versiontuple(v, 2) | ||||
(3, 9) | ||||
>>> versiontuple(v, 3) | ||||
(3, 9, None) | ||||
>>> versiontuple(v, 4) | ||||
(3, 9, None, 'rc+2-02a8fea4289b') | ||||
Gregory Szorc
|
r27112 | """ | ||
if not v: | ||||
v = version() | ||||
Gregory Szorc
|
r29613 | parts = remod.split('[\+-]', v, 1) | ||
Gregory Szorc
|
r27112 | if len(parts) == 1: | ||
vparts, extra = parts[0], None | ||||
else: | ||||
vparts, extra = parts | ||||
vints = [] | ||||
for i in vparts.split('.'): | ||||
try: | ||||
vints.append(int(i)) | ||||
except ValueError: | ||||
break | ||||
# (3, 6) -> (3, 6, None) | ||||
while len(vints) < 3: | ||||
vints.append(None) | ||||
if n == 2: | ||||
return (vints[0], vints[1]) | ||||
if n == 3: | ||||
return (vints[0], vints[1], vints[2]) | ||||
if n == 4: | ||||
return (vints[0], vints[1], vints[2], extra) | ||||
Chris Mason
|
r2609 | # used by parsedate | ||
Matt Mackall
|
r3808 | defaultdateformats = ( | ||
Matt Mackall
|
r29638 | '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601 | ||
'%Y-%m-%dT%H:%M', # without seconds | ||||
'%Y-%m-%dT%H%M%S', # another awful but legal variant without : | ||||
'%Y-%m-%dT%H%M', # without seconds | ||||
'%Y-%m-%d %H:%M:%S', # our common legal variant | ||||
'%Y-%m-%d %H:%M', # without seconds | ||||
'%Y-%m-%d %H%M%S', # without : | ||||
'%Y-%m-%d %H%M', # without seconds | ||||
Matt Mackall
|
r3808 | '%Y-%m-%d %I:%M:%S%p', | ||
'%Y-%m-%d %H:%M', | ||||
'%Y-%m-%d %I:%M%p', | ||||
'%Y-%m-%d', | ||||
'%m-%d', | ||||
'%m/%d', | ||||
'%m/%d/%y', | ||||
'%m/%d/%Y', | ||||
'%a %b %d %H:%M:%S %Y', | ||||
'%a %b %d %I:%M:%S%p %Y', | ||||
Markus F.X.J. Oberhumer
|
r4708 | '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822" | ||
Matt Mackall
|
r3808 | '%b %d %H:%M:%S %Y', | ||
Matt Mackall
|
r3812 | '%b %d %I:%M:%S%p %Y', | ||
'%b %d %H:%M:%S', | ||||
Matt Mackall
|
r3808 | '%b %d %I:%M:%S%p', | ||
'%b %d %H:%M', | ||||
'%b %d %I:%M%p', | ||||
'%b %d %Y', | ||||
'%b %d', | ||||
'%H:%M:%S', | ||||
Carey Evans
|
r9383 | '%I:%M:%S%p', | ||
Matt Mackall
|
r3808 | '%H:%M', | ||
'%I:%M%p', | ||||
) | ||||
Chris Mason
|
r2609 | |||
Matt Mackall
|
r3812 | extendeddateformats = defaultdateformats + ( | ||
"%Y", | ||||
"%Y-%m", | ||||
"%b", | ||||
"%b %Y", | ||||
) | ||||
Chris Mason
|
r2609 | |||
Brendan Cully
|
r3145 | def cachefunc(func): | ||
'''cache the result of function calls''' | ||||
Benoit Boissinot
|
r3147 | # XXX doesn't handle keywords args | ||
timeless
|
r28832 | if func.__code__.co_argcount == 0: | ||
FUJIWARA Katsunori
|
r20835 | cache = [] | ||
def f(): | ||||
if len(cache) == 0: | ||||
cache.append(func()) | ||||
return cache[0] | ||||
return f | ||||
Brendan Cully
|
r3145 | cache = {} | ||
timeless
|
r28832 | if func.__code__.co_argcount == 1: | ||
Benoit Boissinot
|
r3147 | # we gain a small amount of time because | ||
# we don't need to pack/unpack the list | ||||
Brendan Cully
|
r3145 | def f(arg): | ||
if arg not in cache: | ||||
cache[arg] = func(arg) | ||||
return cache[arg] | ||||
else: | ||||
def f(*args): | ||||
if args not in cache: | ||||
cache[args] = func(*args) | ||||
return cache[args] | ||||
return f | ||||
Angel Ezquerra
|
r21813 | class sortdict(dict): | ||
'''a simple sorted dictionary''' | ||||
def __init__(self, data=None): | ||||
self._list = [] | ||||
if data: | ||||
self.update(data) | ||||
def copy(self): | ||||
return sortdict(self) | ||||
def __setitem__(self, key, val): | ||||
if key in self: | ||||
self._list.remove(key) | ||||
self._list.append(key) | ||||
dict.__setitem__(self, key, val) | ||||
def __iter__(self): | ||||
return self._list.__iter__() | ||||
def update(self, src): | ||||
Yuya Nishihara
|
r24236 | if isinstance(src, dict): | ||
src = src.iteritems() | ||||
for k, v in src: | ||||
self[k] = v | ||||
Angel Ezquerra
|
r21813 | def clear(self): | ||
dict.clear(self) | ||||
self._list = [] | ||||
def items(self): | ||||
return [(k, self[k]) for k in self._list] | ||||
def __delitem__(self, key): | ||||
dict.__delitem__(self, key) | ||||
self._list.remove(key) | ||||
Pierre-Yves David
|
r22643 | def pop(self, key, *args, **kwargs): | ||
dict.pop(self, key, *args, **kwargs) | ||||
try: | ||||
self._list.remove(key) | ||||
except ValueError: | ||||
pass | ||||
Angel Ezquerra
|
r21813 | def keys(self): | ||
Martin von Zweigbergk
|
r30854 | return self._list[:] | ||
Angel Ezquerra
|
r21813 | def iterkeys(self): | ||
return self._list.__iter__() | ||||
Sean Farley
|
r23260 | def iteritems(self): | ||
for k in self._list: | ||||
yield k, self[k] | ||||
Sean Farley
|
r23261 | def insert(self, index, key, val): | ||
self._list.insert(index, key) | ||||
dict.__setitem__(self, key, val) | ||||
Gregory Szorc
|
r29592 | def __repr__(self): | ||
if not self: | ||||
return '%s()' % self.__class__.__name__ | ||||
return '%s(%r)' % (self.__class__.__name__, self.items()) | ||||
Angel Ezquerra
|
r21813 | |||
Gregory Szorc
|
r27371 | class _lrucachenode(object): | ||
"""A node in a doubly linked list. | ||||
Holds a reference to nodes on either side as well as a key-value | ||||
pair for the dictionary entry. | ||||
""" | ||||
Pulkit Goyal
|
r30038 | __slots__ = (u'next', u'prev', u'key', u'value') | ||
Gregory Szorc
|
r27371 | |||
def __init__(self): | ||||
self.next = None | ||||
self.prev = None | ||||
self.key = _notset | ||||
self.value = None | ||||
def markempty(self): | ||||
"""Mark the node as emptied.""" | ||||
self.key = _notset | ||||
Siddharth Agarwal
|
r18603 | class lrucachedict(object): | ||
Gregory Szorc
|
r27371 | """Dict that caches most recent accesses and sets. | ||
The dict consists of an actual backing dict - indexed by original | ||||
key - and a doubly linked circular list defining the order of entries in | ||||
the cache. | ||||
The head node is the newest entry in the cache. If the cache is full, | ||||
we recycle head.prev and make it the new head. Cache accesses result in | ||||
the node being moved to before the existing head and being marked as the | ||||
new head node. | ||||
""" | ||||
def __init__(self, max): | ||||
Siddharth Agarwal
|
r18603 | self._cache = {} | ||
Gregory Szorc
|
r27371 | |||
self._head = head = _lrucachenode() | ||||
head.prev = head | ||||
head.next = head | ||||
self._size = 1 | ||||
self._capacity = max | ||||
def __len__(self): | ||||
return len(self._cache) | ||||
def __contains__(self, k): | ||||
return k in self._cache | ||||
Siddharth Agarwal
|
r18603 | |||
Gregory Szorc
|
r27371 | def __iter__(self): | ||
# We don't have to iterate in cache order, but why not. | ||||
n = self._head | ||||
for i in range(len(self._cache)): | ||||
yield n.key | ||||
n = n.next | ||||
def __getitem__(self, k): | ||||
node = self._cache[k] | ||||
self._movetohead(node) | ||||
return node.value | ||||
def __setitem__(self, k, v): | ||||
node = self._cache.get(k) | ||||
# Replace existing value and mark as newest. | ||||
if node is not None: | ||||
node.value = v | ||||
self._movetohead(node) | ||||
return | ||||
if self._size < self._capacity: | ||||
node = self._addcapacity() | ||||
else: | ||||
# Grab the last/oldest item. | ||||
node = self._head.prev | ||||
Siddharth Agarwal
|
r18603 | |||
Gregory Szorc
|
r27371 | # At capacity. Kill the old entry. | ||
if node.key is not _notset: | ||||
del self._cache[node.key] | ||||
node.key = k | ||||
node.value = v | ||||
self._cache[k] = node | ||||
# And mark it as newest entry. No need to adjust order since it | ||||
# is already self._head.prev. | ||||
self._head = node | ||||
Siddharth Agarwal
|
r18603 | |||
Gregory Szorc
|
r27371 | def __delitem__(self, k): | ||
node = self._cache.pop(k) | ||||
node.markempty() | ||||
# Temporarily mark as newest item before re-adjusting head to make | ||||
# this node the oldest item. | ||||
self._movetohead(node) | ||||
self._head = node.next | ||||
# Additional dict methods. | ||||
def get(self, k, default=None): | ||||
try: | ||||
Gregory Szorc
|
r29828 | return self._cache[k].value | ||
Gregory Szorc
|
r27371 | except KeyError: | ||
return default | ||||
Siddharth Agarwal
|
r18603 | |||
Siddharth Agarwal
|
r19710 | def clear(self): | ||
Gregory Szorc
|
r27371 | n = self._head | ||
while n.key is not _notset: | ||||
n.markempty() | ||||
n = n.next | ||||
Siddharth Agarwal
|
r19710 | self._cache.clear() | ||
Gregory Szorc
|
r27371 | |||
Eric Sumner
|
r27576 | def copy(self): | ||
result = lrucachedict(self._capacity) | ||||
n = self._head.prev | ||||
# Iterate in oldest-to-newest order, so the copy has the right ordering | ||||
for i in range(len(self._cache)): | ||||
result[n.key] = n.value | ||||
n = n.prev | ||||
return result | ||||
Gregory Szorc
|
r27371 | def _movetohead(self, node): | ||
"""Mark a node as the newest, making it the new head. | ||||
When a node is accessed, it becomes the freshest entry in the LRU | ||||
list, which is denoted by self._head. | ||||
Visually, let's make ``N`` the new head node (* denotes head): | ||||
previous/oldest <-> head <-> next/next newest | ||||
----<->--- A* ---<->----- | ||||
| | | ||||
E <-> D <-> N <-> C <-> B | ||||
To: | ||||
----<->--- N* ---<->----- | ||||
| | | ||||
E <-> D <-> C <-> B <-> A | ||||
This requires the following moves: | ||||
C.next = D (node.prev.next = node.next) | ||||
D.prev = C (node.next.prev = node.prev) | ||||
E.next = N (head.prev.next = node) | ||||
N.prev = E (node.prev = head.prev) | ||||
N.next = A (node.next = head) | ||||
A.prev = N (head.prev = node) | ||||
""" | ||||
head = self._head | ||||
# C.next = D | ||||
node.prev.next = node.next | ||||
# D.prev = C | ||||
node.next.prev = node.prev | ||||
# N.prev = E | ||||
node.prev = head.prev | ||||
# N.next = A | ||||
# It is tempting to do just "head" here, however if node is | ||||
# adjacent to head, this will do bad things. | ||||
node.next = head.prev.next | ||||
# E.next = N | ||||
node.next.prev = node | ||||
# A.prev = N | ||||
node.prev.next = node | ||||
self._head = node | ||||
def _addcapacity(self): | ||||
"""Add a node to the circular linked list. | ||||
The new node is inserted before the head node. | ||||
""" | ||||
head = self._head | ||||
node = _lrucachenode() | ||||
head.prev.next = node | ||||
node.prev = head.prev | ||||
node.next = head | ||||
head.prev = node | ||||
self._size += 1 | ||||
return node | ||||
Siddharth Agarwal
|
r19710 | |||
Matt Mackall
|
r9097 | def lrucachefunc(func): | ||
'''cache most recent results of function calls''' | ||||
cache = {} | ||||
Martin von Zweigbergk
|
r25113 | order = collections.deque() | ||
timeless
|
r28832 | if func.__code__.co_argcount == 1: | ||
Matt Mackall
|
r9097 | def f(arg): | ||
if arg not in cache: | ||||
if len(cache) > 20: | ||||
Bryan O'Sullivan
|
r16803 | del cache[order.popleft()] | ||
Matt Mackall
|
r9097 | cache[arg] = func(arg) | ||
else: | ||||
order.remove(arg) | ||||
order.append(arg) | ||||
return cache[arg] | ||||
else: | ||||
def f(*args): | ||||
if args not in cache: | ||||
if len(cache) > 20: | ||||
Bryan O'Sullivan
|
r16803 | del cache[order.popleft()] | ||
Matt Mackall
|
r9097 | cache[args] = func(*args) | ||
else: | ||||
order.remove(args) | ||||
order.append(args) | ||||
return cache[args] | ||||
return f | ||||
Matt Mackall
|
r8207 | class propertycache(object): | ||
def __init__(self, func): | ||||
self.func = func | ||||
self.name = func.__name__ | ||||
def __get__(self, obj, type=None): | ||||
result = self.func(obj) | ||||
Pierre-Yves David
|
r18013 | self.cachevalue(obj, result) | ||
Matt Mackall
|
r8207 | return result | ||
Pierre-Yves David
|
r18013 | def cachevalue(self, obj, value): | ||
Mads Kiilerich
|
r19951 | # __dict__ assignment required to bypass __setattr__ (eg: repoview) | ||
Pierre-Yves David
|
r19845 | obj.__dict__[self.name] = value | ||
Pierre-Yves David
|
r18013 | |||
Bryan O'Sullivan
|
r1293 | def pipefilter(s, cmd): | ||
'''filter string S through command CMD, returning its output''' | ||||
Martin Geisler
|
r8302 | p = subprocess.Popen(cmd, shell=True, close_fds=closefds, | ||
stdin=subprocess.PIPE, stdout=subprocess.PIPE) | ||||
pout, perr = p.communicate(s) | ||||
return pout | ||||
mpm@selenic.com
|
r419 | |||
Bryan O'Sullivan
|
r1293 | def tempfilter(s, cmd): | ||
'''filter string S through a pair of temporary files with CMD. | ||||
CMD is used as a template to create the real command to be run, | ||||
with the strings INFILE and OUTFILE replaced by the real names of | ||||
the temporary files generated.''' | ||||
inname, outname = None, None | ||||
try: | ||||
Thomas Arendsen Hein
|
r2165 | infd, inname = tempfile.mkstemp(prefix='hg-filter-in-') | ||
Pulkit Goyal
|
r30925 | fp = os.fdopen(infd, pycompat.sysstr('wb')) | ||
Bryan O'Sullivan
|
r1293 | fp.write(s) | ||
fp.close() | ||||
Thomas Arendsen Hein
|
r2165 | outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-') | ||
Bryan O'Sullivan
|
r1293 | os.close(outfd) | ||
cmd = cmd.replace('INFILE', inname) | ||||
cmd = cmd.replace('OUTFILE', outname) | ||||
code = os.system(cmd) | ||||
Pulkit Goyal
|
r30642 | if pycompat.sysplatform == 'OpenVMS' and code & 1: | ||
Jean-Francois PIERONNE
|
r4720 | code = 0 | ||
Matt Mackall
|
r10282 | if code: | ||
raise Abort(_("command '%s' failed: %s") % | ||||
Adrian Buehlmann
|
r14234 | (cmd, explainexit(code))) | ||
Bryan O'Sullivan
|
r27768 | return readfile(outname) | ||
Bryan O'Sullivan
|
r1293 | finally: | ||
try: | ||||
Matt Mackall
|
r10282 | if inname: | ||
os.unlink(inname) | ||||
Idan Kamara
|
r14004 | except OSError: | ||
Matt Mackall
|
r10282 | pass | ||
Bryan O'Sullivan
|
r1293 | try: | ||
Matt Mackall
|
r10282 | if outname: | ||
os.unlink(outname) | ||||
Idan Kamara
|
r14004 | except OSError: | ||
Matt Mackall
|
r10282 | pass | ||
Bryan O'Sullivan
|
r1293 | |||
filtertable = { | ||||
'tempfile:': tempfilter, | ||||
'pipe:': pipefilter, | ||||
} | ||||
def filter(s, cmd): | ||||
"filter a string through a command that transforms its input to its output" | ||||
for name, fn in filtertable.iteritems(): | ||||
if cmd.startswith(name): | ||||
return fn(s, cmd[len(name):].lstrip()) | ||||
return pipefilter(s, cmd) | ||||
mpm@selenic.com
|
r1015 | def binary(s): | ||
Christian Ebert
|
r6507 | """return true if a string is binary data""" | ||
Martin Geisler
|
r8118 | return bool(s and '\0' in s) | ||
Matt Mackall
|
r6762 | |||
Brendan Cully
|
r7396 | def increasingchunks(source, min=1024, max=65536): | ||
'''return no less than min bytes per chunk while data remains, | ||||
doubling min after each chunk until it reaches max''' | ||||
def log2(x): | ||||
if not x: | ||||
return 0 | ||||
i = 0 | ||||
while x: | ||||
x >>= 1 | ||||
i += 1 | ||||
return i - 1 | ||||
buf = [] | ||||
blen = 0 | ||||
for chunk in source: | ||||
buf.append(chunk) | ||||
blen += len(chunk) | ||||
if blen >= min: | ||||
if min < max: | ||||
min = min << 1 | ||||
nmin = 1 << log2(blen) | ||||
if nmin > min: | ||||
min = nmin | ||||
if min > max: | ||||
min = max | ||||
yield ''.join(buf) | ||||
blen = 0 | ||||
buf = [] | ||||
if buf: | ||||
yield ''.join(buf) | ||||
Matt Mackall
|
r7947 | Abort = error.Abort | ||
mpm@selenic.com
|
r508 | |||
Matt Mackall
|
r10282 | def always(fn): | ||
return True | ||||
def never(fn): | ||||
return False | ||||
Bryan O'Sullivan
|
r724 | |||
Pierre-Yves David
|
r23495 | def nogc(func): | ||
"""disable garbage collector | ||||
Python's garbage collector triggers a GC each time a certain number of | ||||
container objects (the number being defined by gc.get_threshold()) are | ||||
allocated even when marked not to be tracked by the collector. Tracking has | ||||
no effect on when GCs are triggered, only on what objects the GC looks | ||||
Mads Kiilerich
|
r23543 | into. As a workaround, disable GC while building complex (huge) | ||
Pierre-Yves David
|
r23495 | containers. | ||
This garbage collector issue have been fixed in 2.7. | ||||
""" | ||||
Augie Fackler
|
r30053 | if sys.version_info >= (2, 7): | ||
Maciej Fijalkowski
|
r29776 | return func | ||
Pierre-Yves David
|
r23495 | def wrapper(*args, **kwargs): | ||
gcenabled = gc.isenabled() | ||||
gc.disable() | ||||
try: | ||||
return func(*args, **kwargs) | ||||
finally: | ||||
if gcenabled: | ||||
gc.enable() | ||||
return wrapper | ||||
Alexis S. L. Carvalho
|
r4229 | def pathto(root, n1, n2): | ||
Bryan O'Sullivan
|
r886 | '''return the relative path from one place to another. | ||
Alexis S. L. Carvalho
|
r4229 | root should use os.sep to separate directories | ||
Alexis S. L. Carvalho
|
r3669 | n1 should use os.sep to separate directories | ||
n2 should use "/" to separate directories | ||||
returns an os.sep-separated path. | ||||
Alexis S. L. Carvalho
|
r4229 | |||
If n1 is a relative path, it's assumed it's | ||||
relative to root. | ||||
n2 should always be relative to root. | ||||
Alexis S. L. Carvalho
|
r3669 | ''' | ||
Matt Mackall
|
r10282 | if not n1: | ||
return localpath(n2) | ||||
Alexis S. L. Carvalho
|
r4230 | if os.path.isabs(n1): | ||
if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]: | ||||
return os.path.join(root, localpath(n2)) | ||||
n2 = '/'.join((pconvert(root), n2)) | ||||
Shun-ichi GOTO
|
r5844 | a, b = splitpath(n1), n2.split('/') | ||
twaldmann@thinkmo.de
|
r1541 | a.reverse() | ||
b.reverse() | ||||
Bryan O'Sullivan
|
r884 | while a and b and a[-1] == b[-1]: | ||
twaldmann@thinkmo.de
|
r1541 | a.pop() | ||
b.pop() | ||||
Bryan O'Sullivan
|
r884 | b.reverse() | ||
Pulkit Goyal
|
r30613 | return pycompat.ossep.join((['..'] * len(a)) + b) or '.' | ||
Bryan O'Sullivan
|
r884 | |||
Adrian Buehlmann
|
r14228 | def mainfrozen(): | ||
"Paul Moore "
|
r6499 | """return True if we are a frozen executable. | ||
The code supports py2exe (most common, Windows only) and tools/freeze | ||||
(portable, not much used). | ||||
""" | ||||
Augie Fackler
|
r14968 | return (safehasattr(sys, "frozen") or # new py2exe | ||
safehasattr(sys, "importers") or # old py2exe | ||||
Pulkit Goyal
|
r30039 | imp.is_frozen(u"__main__")) # tools/freeze | ||
"Paul Moore "
|
r6499 | |||
Mads Kiilerich
|
r22633 | # the location of data files matching the source code | ||
Matt Harbison
|
r27764 | if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app': | ||
Mads Kiilerich
|
r22633 | # executable version (py2exe) doesn't support __file__ | ||
Pulkit Goyal
|
r30669 | datapath = os.path.dirname(pycompat.sysexecutable) | ||
Mads Kiilerich
|
r22633 | else: | ||
Pulkit Goyal
|
r31074 | datapath = os.path.dirname(pycompat.fsencode(__file__)) | ||
Pulkit Goyal
|
r30301 | |||
Mads Kiilerich
|
r22638 | i18n.setdatapath(datapath) | ||
Mads Kiilerich
|
r22632 | _hgexecutable = None | ||
Thomas Arendsen Hein
|
r5062 | def hgexecutable(): | ||
"""return location of the 'hg' executable. | ||||
Defaults to $HG or 'hg' in the search path. | ||||
""" | ||||
if _hgexecutable is None: | ||||
Pulkit Goyal
|
r30637 | hg = encoding.environ.get('HG') | ||
Augie Fackler
|
r31533 | mainmod = sys.modules[pycompat.sysstr('__main__')] | ||
Bryan O'Sullivan
|
r6500 | if hg: | ||
Adrian Buehlmann
|
r14229 | _sethgexecutable(hg) | ||
Adrian Buehlmann
|
r14228 | elif mainfrozen(): | ||
Matt Harbison
|
r27765 | if getattr(sys, 'frozen', None) == 'macosx_app': | ||
# Env variable set by py2app | ||||
Pulkit Goyal
|
r30637 | _sethgexecutable(encoding.environ['EXECUTABLEPATH']) | ||
Matt Harbison
|
r27765 | else: | ||
Pulkit Goyal
|
r30669 | _sethgexecutable(pycompat.sysexecutable) | ||
Pulkit Goyal
|
r31074 | elif (os.path.basename( | ||
pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'): | ||||
_sethgexecutable(pycompat.fsencode(mainmod.__file__)) | ||||
"Paul Moore "
|
r6499 | else: | ||
Adrian Buehlmann
|
r14271 | exe = findexe('hg') or os.path.basename(sys.argv[0]) | ||
Adrian Buehlmann
|
r14229 | _sethgexecutable(exe) | ||
Thomas Arendsen Hein
|
r5062 | return _hgexecutable | ||
Thomas Arendsen Hein
|
r4686 | |||
Adrian Buehlmann
|
r14229 | def _sethgexecutable(path): | ||
Thomas Arendsen Hein
|
r5062 | """set location of the 'hg' executable""" | ||
Thomas Arendsen Hein
|
r4686 | global _hgexecutable | ||
Thomas Arendsen Hein
|
r5062 | _hgexecutable = path | ||
Thomas Arendsen Hein
|
r4686 | |||
Yuya Nishihara
|
r26450 | def _isstdout(f): | ||
fileno = getattr(f, 'fileno', None) | ||||
return fileno and fileno() == sys.__stdout__.fileno() | ||||
Jun Wu
|
r30736 | def shellenviron(environ=None): | ||
"""return environ with optional override, useful for shelling out""" | ||||
def py2shell(val): | ||||
'convert python object into string that is useful to shell' | ||||
if val is None or val is False: | ||||
return '0' | ||||
if val is True: | ||||
return '1' | ||||
return str(val) | ||||
env = dict(encoding.environ) | ||||
if environ: | ||||
env.update((k, py2shell(v)) for k, v in environ.iteritems()) | ||||
env['HG'] = hgexecutable() | ||||
return env | ||||
Yuya Nishihara
|
r31108 | def system(cmd, environ=None, cwd=None, out=None): | ||
Vadim Gelfer
|
r1882 | '''enhanced shell command execution. | ||
run with environment maybe modified, maybe in different dir. | ||||
mpm@selenic.com
|
r508 | |||
Maxim Khitrov
|
r11469 | if out is specified, it is assumed to be a file-like object that has a | ||
write() method. stdout and stderr will be redirected to out.''' | ||||
Mads Kiilerich
|
r13439 | try: | ||
Yuya Nishihara
|
r30473 | stdout.flush() | ||
Mads Kiilerich
|
r13439 | except Exception: | ||
pass | ||||
Steve Borho
|
r13188 | cmd = quotecommand(cmd) | ||
Pulkit Goyal
|
r30642 | if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2 | ||
Jeff Sickel
|
r19729 | and sys.version_info[1] < 7): | ||
Steven Stallion
|
r16383 | # subprocess kludge to work around issues in half-baked Python | ||
# ports, notably bichued/python: | ||||
if not cwd is None: | ||||
os.chdir(cwd) | ||||
rc = os.system(cmd) | ||||
Maxim Khitrov
|
r11469 | else: | ||
Jun Wu
|
r30736 | env = shellenviron(environ) | ||
Yuya Nishihara
|
r26450 | if out is None or _isstdout(out): | ||
Steven Stallion
|
r16383 | rc = subprocess.call(cmd, shell=True, close_fds=closefds, | ||
env=env, cwd=cwd) | ||||
else: | ||||
proc = subprocess.Popen(cmd, shell=True, close_fds=closefds, | ||||
env=env, cwd=cwd, stdout=subprocess.PIPE, | ||||
stderr=subprocess.STDOUT) | ||||
Augie Fackler
|
r29730 | for line in iter(proc.stdout.readline, ''): | ||
Steven Stallion
|
r16383 | out.write(line) | ||
proc.wait() | ||||
rc = proc.returncode | ||||
Pulkit Goyal
|
r30642 | if pycompat.sysplatform == 'OpenVMS' and rc & 1: | ||
Steven Stallion
|
r16383 | rc = 0 | ||
Mads Kiilerich
|
r9517 | return rc | ||
Vadim Gelfer
|
r1880 | |||
Matt Mackall
|
r7388 | def checksignature(func): | ||
'''wrap a function with code to check for calling errors''' | ||||
def check(*args, **kwargs): | ||||
try: | ||||
return func(*args, **kwargs) | ||||
except TypeError: | ||||
if len(traceback.extract_tb(sys.exc_info()[2])) == 1: | ||||
Matt Mackall
|
r7646 | raise error.SignatureError | ||
Matt Mackall
|
r7388 | raise | ||
return check | ||||
Jun Wu
|
r31575 | # a whilelist of known filesystems where hardlink works reliably | ||
_hardlinkfswhitelist = set([ | ||||
'btrfs', | ||||
'ext2', | ||||
'ext3', | ||||
'ext4', | ||||
Jun Wu
|
r31598 | 'hfs', | ||
Jun Wu
|
r31575 | 'jfs', | ||
'reiserfs', | ||||
'tmpfs', | ||||
Jun Wu
|
r31598 | 'ufs', | ||
Jun Wu
|
r31575 | 'xfs', | ||
Jun Wu
|
r31598 | 'zfs', | ||
Jun Wu
|
r31575 | ]) | ||
FUJIWARA Katsunori
|
r29204 | def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False): | ||
Siddharth Agarwal
|
r27369 | '''copy a file, preserving mode and optionally other stat info like | ||
FUJIWARA Katsunori
|
r29367 | atime/mtime | ||
checkambig argument is used with filestat, and is useful only if | ||||
destination file is guarded by any lock (e.g. repo.lock or | ||||
repo.wlock). | ||||
copystat and checkambig should be exclusive. | ||||
''' | ||||
FUJIWARA Katsunori
|
r29204 | assert not (copystat and checkambig) | ||
oldstat = None | ||||
Mads Kiilerich
|
r18326 | if os.path.lexists(dest): | ||
FUJIWARA Katsunori
|
r29204 | if checkambig: | ||
oldstat = checkambig and filestat(dest) | ||||
Mads Kiilerich
|
r18326 | unlink(dest) | ||
Jun Wu
|
r31575 | if hardlink: | ||
# Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks | ||||
# unless we are confident that dest is on a whitelisted filesystem. | ||||
Yuya Nishihara
|
r31678 | try: | ||
fstype = getfstype(os.path.dirname(dest)) | ||||
except OSError: | ||||
fstype = None | ||||
Jun Wu
|
r31575 | if fstype not in _hardlinkfswhitelist: | ||
hardlink = False | ||||
Jun Wu
|
r31577 | if hardlink: | ||
Pierre-Yves David
|
r23899 | try: | ||
oslink(src, dest) | ||||
return | ||||
except (IOError, OSError): | ||||
pass # fall back to normal copy | ||||
Eric St-Jean
|
r4271 | if os.path.islink(src): | ||
os.symlink(os.readlink(src), dest) | ||||
Siddharth Agarwal
|
r27369 | # copytime is ignored for symlinks, but in general copytime isn't needed | ||
# for them anyway | ||||
Eric St-Jean
|
r4271 | else: | ||
try: | ||||
shutil.copyfile(src, dest) | ||||
Siddharth Agarwal
|
r27369 | if copystat: | ||
# copystat also copies mode | ||||
shutil.copystat(src, dest) | ||||
else: | ||||
shutil.copymode(src, dest) | ||||
FUJIWARA Katsunori
|
r29204 | if oldstat and oldstat.stat: | ||
newstat = filestat(dest) | ||||
if newstat.isambig(oldstat): | ||||
# stat of copied file is ambiguous to original one | ||||
advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff | ||||
os.utime(dest, (advanced, advanced)) | ||||
Gregory Szorc
|
r25660 | except shutil.Error as inst: | ||
Eric St-Jean
|
r4271 | raise Abort(str(inst)) | ||
Matt Mackall
|
r3629 | |||
Augie Fackler
|
r24439 | def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None): | ||
"""Copy a directory tree using hardlinks if possible.""" | ||||
num = 0 | ||||
Stephen Darnell
|
r1241 | |||
Jun Wu
|
r31718 | gettopic = lambda: hardlink and _('linking') or _('copying') | ||
Thomas Arendsen Hein
|
r698 | |||
mpm@selenic.com
|
r1207 | if os.path.isdir(src): | ||
Jun Wu
|
r31719 | if hardlink is None: | ||
hardlink = (os.stat(src).st_dev == | ||||
os.stat(os.path.dirname(dst)).st_dev) | ||||
topic = gettopic() | ||||
mpm@selenic.com
|
r1207 | os.mkdir(dst) | ||
Bryan O'Sullivan
|
r5396 | for name, kind in osutil.listdir(src): | ||
mpm@selenic.com
|
r1207 | srcname = os.path.join(src, name) | ||
dstname = os.path.join(dst, name) | ||||
Augie Fackler
|
r24439 | def nprog(t, pos): | ||
if pos is not None: | ||||
return progress(t, pos + num) | ||||
hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog) | ||||
Adrian Buehlmann
|
r11251 | num += n | ||
mpm@selenic.com
|
r1207 | else: | ||
Jun Wu
|
r31719 | if hardlink is None: | ||
Jun Wu
|
r31720 | hardlink = (os.stat(os.path.dirname(src)).st_dev == | ||
Jun Wu
|
r31719 | os.stat(os.path.dirname(dst)).st_dev) | ||
topic = gettopic() | ||||
Stephen Darnell
|
r1241 | if hardlink: | ||
try: | ||||
Adrian Buehlmann
|
r14235 | oslink(src, dst) | ||
Vadim Gelfer
|
r2050 | except (IOError, OSError): | ||
Stephen Darnell
|
r1241 | hardlink = False | ||
Benoit Boissinot
|
r1591 | shutil.copy(src, dst) | ||
Stephen Darnell
|
r1241 | else: | ||
Benoit Boissinot
|
r1591 | shutil.copy(src, dst) | ||
Adrian Buehlmann
|
r11251 | num += 1 | ||
Augie Fackler
|
r24439 | progress(topic, num) | ||
progress(topic, None) | ||||
Thomas Arendsen Hein
|
r698 | |||
Adrian Buehlmann
|
r11251 | return hardlink, num | ||
Adrian Buehlmann
|
r11254 | |||
Adrian Buehlmann
|
r14262 | _winreservednames = '''con prn aux nul | ||
Adrian Buehlmann
|
r13916 | com1 com2 com3 com4 com5 com6 com7 com8 com9 | ||
lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() | ||||
Adrian Buehlmann
|
r14262 | _winreservedchars = ':*?"<>|' | ||
Adrian Buehlmann
|
r13916 | def checkwinfilename(path): | ||
Mads Kiilerich
|
r20000 | r'''Check that the base-relative path is a valid filename on Windows. | ||
Adrian Buehlmann
|
r13916 | Returns None if the path is ok, or a UI string describing the problem. | ||
>>> checkwinfilename("just/a/normal/path") | ||||
>>> checkwinfilename("foo/bar/con.xml") | ||||
"filename contains 'con', which is reserved on Windows" | ||||
>>> checkwinfilename("foo/con.xml/bar") | ||||
"filename contains 'con', which is reserved on Windows" | ||||
>>> checkwinfilename("foo/bar/xml.con") | ||||
>>> checkwinfilename("foo/bar/AUX/bla.txt") | ||||
"filename contains 'AUX', which is reserved on Windows" | ||||
>>> checkwinfilename("foo/bar/bla:.txt") | ||||
"filename contains ':', which is reserved on Windows" | ||||
>>> checkwinfilename("foo/bar/b\07la.txt") | ||||
Mads Kiilerich
|
r20000 | "filename contains '\\x07', which is invalid on Windows" | ||
Adrian Buehlmann
|
r13916 | >>> checkwinfilename("foo/bar/bla ") | ||
"filename ends with ' ', which is not allowed on Windows" | ||||
Matt Mackall
|
r15358 | >>> checkwinfilename("../bar") | ||
Mads Kiilerich
|
r20000 | >>> checkwinfilename("foo\\") | ||
"filename ends with '\\', which is invalid on Windows" | ||||
>>> checkwinfilename("foo\\/bar") | ||||
"directory name ends with '\\', which is invalid on Windows" | ||||
Adrian Buehlmann
|
r13916 | ''' | ||
Mads Kiilerich
|
r20000 | if path.endswith('\\'): | ||
return _("filename ends with '\\', which is invalid on Windows") | ||||
if '\\/' in path: | ||||
return _("directory name ends with '\\', which is invalid on Windows") | ||||
Adrian Buehlmann
|
r13916 | for n in path.replace('\\', '/').split('/'): | ||
if not n: | ||||
continue | ||||
Augie Fackler
|
r31495 | for c in pycompat.bytestr(n): | ||
Adrian Buehlmann
|
r14262 | if c in _winreservedchars: | ||
Adrian Buehlmann
|
r13916 | return _("filename contains '%s', which is reserved " | ||
"on Windows") % c | ||||
if ord(c) <= 31: | ||||
Adrian Buehlmann
|
r13947 | return _("filename contains %r, which is invalid " | ||
Adrian Buehlmann
|
r13916 | "on Windows") % c | ||
base = n.split('.')[0] | ||||
Adrian Buehlmann
|
r14262 | if base and base.lower() in _winreservednames: | ||
Adrian Buehlmann
|
r13916 | return _("filename contains '%s', which is reserved " | ||
"on Windows") % base | ||||
t = n[-1] | ||||
Matt Mackall
|
r15358 | if t in '. ' and n not in '..': | ||
Adrian Buehlmann
|
r13916 | return _("filename ends with '%s', which is not allowed " | ||
"on Windows") % t | ||||
Pulkit Goyal
|
r30639 | if pycompat.osname == 'nt': | ||
Adrian Buehlmann
|
r13916 | checkosfilename = checkwinfilename | ||
Simon Farnsworth
|
r30974 | timer = time.clock | ||
Matt Mackall
|
r7890 | else: | ||
Adrian Buehlmann
|
r14926 | checkosfilename = platform.checkosfilename | ||
Simon Farnsworth
|
r30974 | timer = time.time | ||
if safehasattr(time, "perf_counter"): | ||||
timer = time.perf_counter | ||||
Matt Mackall
|
r7890 | |||
def makelock(info, pathname): | ||||
try: | ||||
return os.symlink(info, pathname) | ||||
Gregory Szorc
|
r25660 | except OSError as why: | ||
Matt Mackall
|
r7890 | if why.errno == errno.EEXIST: | ||
raise | ||||
except AttributeError: # no symlink in os | ||||
pass | ||||
Thomas Arendsen Hein
|
r704 | ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL) | ||
os.write(ld, info) | ||||
os.close(ld) | ||||
Matt Mackall
|
r7890 | def readlock(pathname): | ||
try: | ||||
return os.readlink(pathname) | ||||
Gregory Szorc
|
r25660 | except OSError as why: | ||
Matt Mackall
|
r7890 | if why.errno not in (errno.EINVAL, errno.ENOSYS): | ||
raise | ||||
except AttributeError: # no symlink in os | ||||
pass | ||||
Dan Villiom Podlaski Christiansen
|
r13400 | fp = posixfile(pathname) | ||
r = fp.read() | ||||
fp.close() | ||||
return r | ||||
Thomas Arendsen Hein
|
r704 | |||
Vadim Gelfer
|
r2176 | def fstat(fp): | ||
'''stat file object that may not have fileno method.''' | ||||
try: | ||||
return os.fstat(fp.fileno()) | ||||
except AttributeError: | ||||
return os.stat(fp.name) | ||||
Matt Mackall
|
r3784 | # File system features | ||
Martin von Zweigbergk
|
r29889 | def fscasesensitive(path): | ||
Matt Mackall
|
r3784 | """ | ||
Mads Kiilerich
|
r18911 | Return true if the given path is on a case-sensitive filesystem | ||
Matt Mackall
|
r3784 | |||
Requires a path (like /foo/.hg) ending with a foldable final | ||||
directory component. | ||||
""" | ||||
Siddharth Agarwal
|
r24902 | s1 = os.lstat(path) | ||
Matt Mackall
|
r3784 | d, b = os.path.split(path) | ||
FUJIWARA Katsunori
|
r15667 | b2 = b.upper() | ||
if b == b2: | ||||
b2 = b.lower() | ||||
if b == b2: | ||||
return True # no evidence against case sensitivity | ||||
p2 = os.path.join(d, b2) | ||||
Matt Mackall
|
r3784 | try: | ||
Siddharth Agarwal
|
r24902 | s2 = os.lstat(p2) | ||
Matt Mackall
|
r3784 | if s2 == s1: | ||
return False | ||||
return True | ||||
Idan Kamara
|
r14004 | except OSError: | ||
Matt Mackall
|
r3784 | return True | ||
Bryan O'Sullivan
|
r16943 | try: | ||
import re2 | ||||
_re2 = None | ||||
except ImportError: | ||||
_re2 = False | ||||
Siddharth Agarwal
|
r21908 | class _re(object): | ||
Siddharth Agarwal
|
r21913 | def _checkre2(self): | ||
global _re2 | ||||
try: | ||||
# check if match works, see issue3964 | ||||
_re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]')) | ||||
except ImportError: | ||||
_re2 = False | ||||
Siddharth Agarwal
|
r21908 | def compile(self, pat, flags=0): | ||
'''Compile a regular expression, using re2 if possible | ||||
Bryan O'Sullivan
|
r16943 | |||
Siddharth Agarwal
|
r21908 | For best performance, use only re2-compatible regexp features. The | ||
only flags from the re module that are re2-compatible are | ||||
IGNORECASE and MULTILINE.''' | ||||
if _re2 is None: | ||||
Siddharth Agarwal
|
r21913 | self._checkre2() | ||
Siddharth Agarwal
|
r21908 | if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0: | ||
if flags & remod.IGNORECASE: | ||||
pat = '(?i)' + pat | ||||
if flags & remod.MULTILINE: | ||||
pat = '(?m)' + pat | ||||
try: | ||||
return re2.compile(pat) | ||||
except re2.error: | ||||
pass | ||||
return remod.compile(pat, flags) | ||||
Siddharth Agarwal
|
r21914 | @propertycache | ||
def escape(self): | ||||
'''Return the version of escape corresponding to self.compile. | ||||
This is imperfect because whether re2 or re is used for a particular | ||||
function depends on the flags, etc, but it's the best we can do. | ||||
''' | ||||
global _re2 | ||||
if _re2 is None: | ||||
self._checkre2() | ||||
if _re2: | ||||
return re2.escape | ||||
else: | ||||
return remod.escape | ||||
Siddharth Agarwal
|
r21908 | re = _re() | ||
Bryan O'Sullivan
|
r16943 | |||
Paul Moore
|
r6676 | _fspathcache = {} | ||
def fspath(name, root): | ||||
'''Get name in the case stored in the filesystem | ||||
FUJIWARA Katsunori
|
r15710 | The name should be relative to root, and be normcase-ed for efficiency. | ||
Note that this function is unnecessary, and should not be | ||||
Paul Moore
|
r6676 | called, for case-sensitive filesystems (simply because it's expensive). | ||
FUJIWARA Katsunori
|
r15670 | |||
FUJIWARA Katsunori
|
r15710 | The root should be normcase-ed, too. | ||
Paul Moore
|
r6676 | ''' | ||
Siddharth Agarwal
|
r23097 | def _makefspathcacheentry(dir): | ||
return dict((normcase(n), n) for n in os.listdir(dir)) | ||||
FUJIWARA Katsunori
|
r15709 | |||
Pulkit Goyal
|
r30613 | seps = pycompat.ossep | ||
Pulkit Goyal
|
r30625 | if pycompat.osaltsep: | ||
seps = seps + pycompat.osaltsep | ||||
Paul Moore
|
r6676 | # Protect backslashes. This gets silly very quickly. | ||
seps.replace('\\','\\\\') | ||||
Augie Fackler
|
r31496 | pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps)) | ||
FUJIWARA Katsunori
|
r15669 | dir = os.path.normpath(root) | ||
Paul Moore
|
r6676 | result = [] | ||
for part, sep in pattern.findall(name): | ||||
if sep: | ||||
result.append(sep) | ||||
continue | ||||
FUJIWARA Katsunori
|
r15719 | if dir not in _fspathcache: | ||
Siddharth Agarwal
|
r23097 | _fspathcache[dir] = _makefspathcacheentry(dir) | ||
FUJIWARA Katsunori
|
r15719 | contents = _fspathcache[dir] | ||
Paul Moore
|
r6676 | |||
Siddharth Agarwal
|
r23097 | found = contents.get(part) | ||
FUJIWARA Katsunori
|
r15709 | if not found: | ||
FUJIWARA Katsunori
|
r15720 | # retry "once per directory" per "dirstate.walk" which | ||
# may take place for each patches of "hg qpush", for example | ||||
Siddharth Agarwal
|
r23097 | _fspathcache[dir] = contents = _makefspathcacheentry(dir) | ||
found = contents.get(part) | ||||
FUJIWARA Katsunori
|
r15709 | |||
result.append(found or part) | ||||
FUJIWARA Katsunori
|
r15669 | dir = os.path.join(dir, part) | ||
Paul Moore
|
r6676 | |||
return ''.join(result) | ||||
Jun Wu
|
r31593 | def getfstype(dirpath): | ||
'''Get the filesystem type name from a directory (best-effort) | ||||
Yuya Nishihara
|
r31678 | Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc. | ||
Jun Wu
|
r31593 | ''' | ||
return getattr(osutil, 'getfstype', lambda x: None)(dirpath) | ||||
Adrian Buehlmann
|
r12938 | def checknlink(testfile): | ||
'''check whether hardlink count reporting works properly''' | ||||
Adrian Buehlmann
|
r13204 | # testfile may be open, so we need a separate file for checking to | ||
# work around issue2543 (or testfile may get lost on Samba shares) | ||||
f1 = testfile + ".hgtmp1" | ||||
if os.path.lexists(f1): | ||||
return False | ||||
Adrian Buehlmann
|
r12938 | try: | ||
Adrian Buehlmann
|
r13204 | posixfile(f1, 'w').close() | ||
except IOError: | ||||
Tony Tung
|
r29832 | try: | ||
os.unlink(f1) | ||||
except OSError: | ||||
pass | ||||
Adrian Buehlmann
|
r12938 | return False | ||
Adrian Buehlmann
|
r13204 | f2 = testfile + ".hgtmp2" | ||
fd = None | ||||
Adrian Buehlmann
|
r12938 | try: | ||
Matt Mackall
|
r25088 | oslink(f1, f2) | ||
Adrian Buehlmann
|
r12938 | # nlinks() may behave differently for files on Windows shares if | ||
# the file is open. | ||||
Adrian Buehlmann
|
r13342 | fd = posixfile(f2) | ||
Adrian Buehlmann
|
r13204 | return nlinks(f2) > 1 | ||
Matt Mackall
|
r25088 | except OSError: | ||
return False | ||||
Adrian Buehlmann
|
r12938 | finally: | ||
Adrian Buehlmann
|
r13204 | if fd is not None: | ||
fd.close() | ||||
for f in (f1, f2): | ||||
try: | ||||
os.unlink(f) | ||||
except OSError: | ||||
pass | ||||
Adrian Buehlmann
|
r12938 | |||
Shun-ichi GOTO
|
r5843 | def endswithsep(path): | ||
'''Check path ends with os.sep or os.altsep.''' | ||||
Pulkit Goyal
|
r30613 | return (path.endswith(pycompat.ossep) | ||
Pulkit Goyal
|
r30625 | or pycompat.osaltsep and path.endswith(pycompat.osaltsep)) | ||
Shun-ichi GOTO
|
r5843 | |||
Shun-ichi GOTO
|
r5844 | def splitpath(path): | ||
'''Split path by os.sep. | ||||
Note that this function does not use os.altsep because this is | ||||
an alternative of simple "xxx.split(os.sep)". | ||||
It is recommended to use os.path.normpath() before using this | ||||
function if need.''' | ||||
Pulkit Goyal
|
r30613 | return path.split(pycompat.ossep) | ||
Shun-ichi GOTO
|
r5844 | |||
Matt Mackall
|
r6007 | def gui(): | ||
'''Are we running in a GUI?''' | ||||
Pulkit Goyal
|
r30642 | if pycompat.sysplatform == 'darwin': | ||
Pulkit Goyal
|
r30637 | if 'SSH_CONNECTION' in encoding.environ: | ||
Dan Villiom Podlaski Christiansen
|
r13734 | # handle SSH access to a box where the user is logged in | ||
return False | ||||
elif getattr(osutil, 'isgui', None): | ||||
# check if a CoreGraphics session is available | ||||
return osutil.isgui() | ||||
else: | ||||
# pure build; use a safe default | ||||
return True | ||||
else: | ||||
Pulkit Goyal
|
r30639 | return pycompat.osname == "nt" or encoding.environ.get("DISPLAY") | ||
Matt Mackall
|
r6007 | |||
Alexis S. L. Carvalho
|
r6062 | def mktempcopy(name, emptyok=False, createmode=None): | ||
Alexis S. L. Carvalho
|
r4827 | """Create a temporary file with the same contents from name | ||
The permission bits are copied from the original file. | ||||
If the temporary file is going to be truncated immediately, you | ||||
can use emptyok=True as an optimization. | ||||
Returns the name of the temporary file. | ||||
Vadim Gelfer
|
r2176 | """ | ||
Alexis S. L. Carvalho
|
r4827 | d, fn = os.path.split(name) | ||
fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d) | ||||
os.close(fd) | ||||
# Temporary files are created with mode 0600, which is usually not | ||||
# what we want. If the original file already exists, just copy | ||||
# its mode. Otherwise, manually obey umask. | ||||
Adrian Buehlmann
|
r15010 | copymode(name, temp, createmode) | ||
Alexis S. L. Carvalho
|
r4827 | if emptyok: | ||
return temp | ||||
try: | ||||
try: | ||||
ifp = posixfile(name, "rb") | ||||
Gregory Szorc
|
r25660 | except IOError as inst: | ||
Alexis S. L. Carvalho
|
r4827 | if inst.errno == errno.ENOENT: | ||
return temp | ||||
if not getattr(inst, 'filename', None): | ||||
inst.filename = name | ||||
raise | ||||
ofp = posixfile(temp, "wb") | ||||
for chunk in filechunkiter(ifp): | ||||
ofp.write(chunk) | ||||
ifp.close() | ||||
ofp.close() | ||||
Brodie Rao
|
r16705 | except: # re-raises | ||
Alexis S. L. Carvalho
|
r4827 | try: os.unlink(temp) | ||
Brodie Rao
|
r16703 | except OSError: pass | ||
Alexis S. L. Carvalho
|
r4827 | raise | ||
return temp | ||||
Vadim Gelfer
|
r2176 | |||
FUJIWARA Katsunori
|
r29200 | class filestat(object): | ||
"""help to exactly detect change of a file | ||||
'stat' attribute is result of 'os.stat()' if specified 'path' | ||||
exists. Otherwise, it is None. This can avoid preparative | ||||
'exists()' examination on client side of this class. | ||||
""" | ||||
def __init__(self, path): | ||||
try: | ||||
self.stat = os.stat(path) | ||||
except OSError as err: | ||||
if err.errno != errno.ENOENT: | ||||
raise | ||||
self.stat = None | ||||
__hash__ = object.__hash__ | ||||
def __eq__(self, old): | ||||
try: | ||||
# if ambiguity between stat of new and old file is | ||||
Mads Kiilerich
|
r30332 | # avoided, comparison of size, ctime and mtime is enough | ||
FUJIWARA Katsunori
|
r29200 | # to exactly detect change of a file regardless of platform | ||
return (self.stat.st_size == old.stat.st_size and | ||||
self.stat.st_ctime == old.stat.st_ctime and | ||||
self.stat.st_mtime == old.stat.st_mtime) | ||||
except AttributeError: | ||||
return False | ||||
def isambig(self, old): | ||||
"""Examine whether new (= self) stat is ambiguous against old one | ||||
"S[N]" below means stat of a file at N-th change: | ||||
- S[n-1].ctime < S[n].ctime: can detect change of a file | ||||
- S[n-1].ctime == S[n].ctime | ||||
- S[n-1].ctime < S[n].mtime: means natural advancing (*1) | ||||
- S[n-1].ctime == S[n].mtime: is ambiguous (*2) | ||||
- S[n-1].ctime > S[n].mtime: never occurs naturally (don't care) | ||||
- S[n-1].ctime > S[n].ctime: never occurs naturally (don't care) | ||||
Case (*2) above means that a file was changed twice or more at | ||||
same time in sec (= S[n-1].ctime), and comparison of timestamp | ||||
is ambiguous. | ||||
Base idea to avoid such ambiguity is "advance mtime 1 sec, if | ||||
timestamp is ambiguous". | ||||
But advancing mtime only in case (*2) doesn't work as | ||||
expected, because naturally advanced S[n].mtime in case (*1) | ||||
might be equal to manually advanced S[n-1 or earlier].mtime. | ||||
Therefore, all "S[n-1].ctime == S[n].ctime" cases should be | ||||
treated as ambiguous regardless of mtime, to avoid overlooking | ||||
by confliction between such mtime. | ||||
Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime != | ||||
S[n].mtime", even if size of a file isn't changed. | ||||
""" | ||||
try: | ||||
return (self.stat.st_ctime == old.stat.st_ctime) | ||||
except AttributeError: | ||||
return False | ||||
FUJIWARA Katsunori
|
r30319 | def avoidambig(self, path, old): | ||
"""Change file stat of specified path to avoid ambiguity | ||||
'old' should be previous filestat of 'path'. | ||||
This skips avoiding ambiguity, if a process doesn't have | ||||
appropriate privileges for 'path'. | ||||
""" | ||||
advanced = (old.stat.st_mtime + 1) & 0x7fffffff | ||||
try: | ||||
os.utime(path, (advanced, advanced)) | ||||
except OSError as inst: | ||||
if inst.errno == errno.EPERM: | ||||
# utime() on the file created by another user causes EPERM, | ||||
# if a process doesn't have appropriate privileges | ||||
return | ||||
raise | ||||
FUJIWARA Katsunori
|
r29298 | def __ne__(self, other): | ||
return not self == other | ||||
Benoit Boissinot
|
r8778 | class atomictempfile(object): | ||
Mads Kiilerich
|
r17424 | '''writable file object that atomically updates a file | ||
Alexis S. L. Carvalho
|
r4827 | |||
Greg Ward
|
r14008 | All writes will go to a temporary copy of the original file. Call | ||
Greg Ward
|
r15057 | close() when you are done writing, and atomictempfile will rename | ||
the temporary copy to the original name, making the changes | ||||
visible. If the object is destroyed without being closed, all your | ||||
writes are discarded. | ||||
FUJIWARA Katsunori
|
r29367 | |||
checkambig argument of constructor is used with filestat, and is | ||||
useful only if target file is guarded by any lock (e.g. repo.lock | ||||
or repo.wlock). | ||||
Greg Ward
|
r14008 | ''' | ||
FUJIWARA Katsunori
|
r29201 | def __init__(self, name, mode='w+b', createmode=None, checkambig=False): | ||
Greg Ward
|
r14007 | self.__name = name # permanent name | ||
self._tempname = mktempcopy(name, emptyok=('w' in mode), | ||||
createmode=createmode) | ||||
self._fp = posixfile(self._tempname, mode) | ||||
FUJIWARA Katsunori
|
r29201 | self._checkambig = checkambig | ||
Bryan O'Sullivan
|
r8327 | |||
Greg Ward
|
r14007 | # delegated methods | ||
Martijn Pieters
|
r29393 | self.read = self._fp.read | ||
Greg Ward
|
r14007 | self.write = self._fp.write | ||
Bryan O'Sullivan
|
r17237 | self.seek = self._fp.seek | ||
self.tell = self._fp.tell | ||||
Greg Ward
|
r14007 | self.fileno = self._fp.fileno | ||
Alexis S. L. Carvalho
|
r4827 | |||
Greg Ward
|
r15057 | def close(self): | ||
Benoit Boissinot
|
r8785 | if not self._fp.closed: | ||
Bryan O'Sullivan
|
r8327 | self._fp.close() | ||
FUJIWARA Katsunori
|
r29201 | filename = localpath(self.__name) | ||
oldstat = self._checkambig and filestat(filename) | ||||
if oldstat and oldstat.stat: | ||||
rename(self._tempname, filename) | ||||
newstat = filestat(filename) | ||||
if newstat.isambig(oldstat): | ||||
# stat of changed file is ambiguous to original one | ||||
advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff | ||||
os.utime(filename, (advanced, advanced)) | ||||
else: | ||||
rename(self._tempname, filename) | ||||
Alexis S. L. Carvalho
|
r4827 | |||
Greg Ward
|
r15057 | def discard(self): | ||
Benoit Boissinot
|
r8785 | if not self._fp.closed: | ||
Alexis S. L. Carvalho
|
r4827 | try: | ||
Greg Ward
|
r14007 | os.unlink(self._tempname) | ||
except OSError: | ||||
pass | ||||
Bryan O'Sullivan
|
r8327 | self._fp.close() | ||
Alexis S. L. Carvalho
|
r4827 | |||
Dan Villiom Podlaski Christiansen
|
r13098 | def __del__(self): | ||
Augie Fackler
|
r14968 | if safehasattr(self, '_fp'): # constructor actually did something | ||
Greg Ward
|
r15057 | self.discard() | ||
Dan Villiom Podlaski Christiansen
|
r13098 | |||
Martijn Pieters
|
r29394 | def __enter__(self): | ||
return self | ||||
def __exit__(self, exctype, excvalue, traceback): | ||||
if exctype is not None: | ||||
self.discard() | ||||
else: | ||||
self.close() | ||||
Ryan McElroy
|
r31539 | def unlinkpath(f, ignoremissing=False): | ||
"""unlink and remove the directory if it is empty""" | ||||
Ryan McElroy
|
r31541 | if ignoremissing: | ||
tryunlink(f) | ||||
else: | ||||
Ryan McElroy
|
r31539 | unlink(f) | ||
# try removing directories that might now be empty | ||||
try: | ||||
removedirs(os.path.dirname(f)) | ||||
except OSError: | ||||
pass | ||||
Ryan McElroy
|
r31540 | def tryunlink(f): | ||
"""Attempt to remove a file, ignoring ENOENT errors.""" | ||||
try: | ||||
unlink(f) | ||||
except OSError as e: | ||||
if e.errno != errno.ENOENT: | ||||
raise | ||||
Angel Ezquerra
|
r18938 | def makedirs(name, mode=None, notindexed=False): | ||
Adam Simpkins
|
r29017 | """recursive directory creation with parent mode inheritance | ||
Newly created directories are marked as "not to be indexed by | ||||
the content indexing service", if ``notindexed`` is specified | ||||
for "write" mode access. | ||||
""" | ||||
Alexis S. L. Carvalho
|
r6062 | try: | ||
Angel Ezquerra
|
r18938 | makedir(name, notindexed) | ||
Gregory Szorc
|
r25660 | except OSError as err: | ||
Alexis S. L. Carvalho
|
r6062 | if err.errno == errno.EEXIST: | ||
return | ||||
Adrian Buehlmann
|
r15058 | if err.errno != errno.ENOENT or not name: | ||
raise | ||||
parent = os.path.dirname(os.path.abspath(name)) | ||||
if parent == name: | ||||
Alexis S. L. Carvalho
|
r6062 | raise | ||
Angel Ezquerra
|
r18938 | makedirs(parent, mode, notindexed) | ||
Adam Simpkins
|
r29017 | try: | ||
makedir(name, notindexed) | ||||
except OSError as err: | ||||
# Catch EEXIST to handle races | ||||
if err.errno == errno.EEXIST: | ||||
return | ||||
raise | ||||
Bryan O'Sullivan
|
r18678 | if mode is not None: | ||
os.chmod(name, mode) | ||||
Bryan O'Sullivan
|
r18668 | |||
Dan Villiom Podlaski Christiansen
|
r14099 | def readfile(path): | ||
Bryan O'Sullivan
|
r27778 | with open(path, 'rb') as fp: | ||
Matt Mackall
|
r14100 | return fp.read() | ||
Dan Villiom Podlaski Christiansen
|
r14099 | |||
Dan Villiom Podlaski Christiansen
|
r14167 | def writefile(path, text): | ||
Bryan O'Sullivan
|
r27778 | with open(path, 'wb') as fp: | ||
Dan Villiom Podlaski Christiansen
|
r14167 | fp.write(text) | ||
def appendfile(path, text): | ||||
Bryan O'Sullivan
|
r27778 | with open(path, 'ab') as fp: | ||
Dan Villiom Podlaski Christiansen
|
r14099 | fp.write(text) | ||
Eric Hopper
|
r1199 | class chunkbuffer(object): | ||
"""Allow arbitrary sized chunks of data to be efficiently read from an | ||||
iterator over chunks of arbitrary size.""" | ||||
Bryan O'Sullivan
|
r1200 | |||
Matt Mackall
|
r5446 | def __init__(self, in_iter): | ||
Eric Hopper
|
r1199 | """in_iter is the iterator that's iterating over the input chunks. | ||
targetsize is how big a buffer to try to maintain.""" | ||||
Benoit Boissinot
|
r11670 | def splitbig(chunks): | ||
for chunk in chunks: | ||||
if len(chunk) > 2**20: | ||||
pos = 0 | ||||
while pos < len(chunk): | ||||
end = pos + 2 ** 18 | ||||
yield chunk[pos:end] | ||||
pos = end | ||||
else: | ||||
yield chunk | ||||
self.iter = splitbig(in_iter) | ||||
Martin von Zweigbergk
|
r25113 | self._queue = collections.deque() | ||
Gregory Szorc
|
r26480 | self._chunkoffset = 0 | ||
Bryan O'Sullivan
|
r1200 | |||
Pierre-Yves David
|
r21018 | def read(self, l=None): | ||
Bryan O'Sullivan
|
r1200 | """Read L bytes of data from the iterator of chunks of data. | ||
Pierre-Yves David
|
r21018 | Returns less than L bytes if the iterator runs dry. | ||
Mads Kiilerich
|
r23139 | If size parameter is omitted, read everything""" | ||
Gregory Szorc
|
r26478 | if l is None: | ||
return ''.join(self.iter) | ||||
Matt Mackall
|
r11758 | left = l | ||
Matt Mackall
|
r17962 | buf = [] | ||
Bryan O'Sullivan
|
r16873 | queue = self._queue | ||
Gregory Szorc
|
r26478 | while left > 0: | ||
Matt Mackall
|
r11758 | # refill the queue | ||
if not queue: | ||||
target = 2**18 | ||||
for chunk in self.iter: | ||||
queue.append(chunk) | ||||
target -= len(chunk) | ||||
if target <= 0: | ||||
break | ||||
if not queue: | ||||
Eric Hopper
|
r1199 | break | ||
Matt Mackall
|
r11758 | |||
Gregory Szorc
|
r26480 | # The easy way to do this would be to queue.popleft(), modify the | ||
# chunk (if necessary), then queue.appendleft(). However, for cases | ||||
# where we read partial chunk content, this incurs 2 dequeue | ||||
# mutations and creates a new str for the remaining chunk in the | ||||
# queue. Our code below avoids this overhead. | ||||
Gregory Szorc
|
r26479 | chunk = queue[0] | ||
chunkl = len(chunk) | ||||
Gregory Szorc
|
r26480 | offset = self._chunkoffset | ||
Gregory Szorc
|
r26479 | |||
# Use full chunk. | ||||
Gregory Szorc
|
r26480 | if offset == 0 and left >= chunkl: | ||
Gregory Szorc
|
r26479 | left -= chunkl | ||
queue.popleft() | ||||
buf.append(chunk) | ||||
Gregory Szorc
|
r26480 | # self._chunkoffset remains at 0. | ||
continue | ||||
chunkremaining = chunkl - offset | ||||
# Use all of unconsumed part of chunk. | ||||
if left >= chunkremaining: | ||||
left -= chunkremaining | ||||
queue.popleft() | ||||
# offset == 0 is enabled by block above, so this won't merely | ||||
# copy via ``chunk[0:]``. | ||||
buf.append(chunk[offset:]) | ||||
self._chunkoffset = 0 | ||||
Gregory Szorc
|
r26479 | # Partial chunk needed. | ||
else: | ||||
Gregory Szorc
|
r26480 | buf.append(chunk[offset:offset + left]) | ||
self._chunkoffset += left | ||||
left -= chunkremaining | ||||
Matt Mackall
|
r11758 | |||
Matt Mackall
|
r17962 | return ''.join(buf) | ||
Matt Mackall
|
r11758 | |||
Mads Kiilerich
|
r30181 | def filechunkiter(f, size=131072, limit=None): | ||
Vadim Gelfer
|
r2462 | """Create a generator that produces the data in the file size | ||
Mads Kiilerich
|
r30181 | (default 131072) bytes at a time, up to optional limit (default is | ||
Vadim Gelfer
|
r2462 | to read all data). Chunks may be less than size bytes if the | ||
chunk is the last chunk in the file, or the file is a socket or | ||||
some other type of file that sometimes reads less data than is | ||||
requested.""" | ||||
assert size >= 0 | ||||
assert limit is None or limit >= 0 | ||||
while True: | ||||
Matt Mackall
|
r10282 | if limit is None: | ||
nbytes = size | ||||
else: | ||||
nbytes = min(limit, size) | ||||
Vadim Gelfer
|
r2462 | s = nbytes and f.read(nbytes) | ||
Matt Mackall
|
r10282 | if not s: | ||
break | ||||
if limit: | ||||
limit -= len(s) | ||||
Eric Hopper
|
r1199 | yield s | ||
Bryan O'Sullivan
|
r1320 | |||
Bryan O'Sullivan
|
r19287 | def makedate(timestamp=None): | ||
'''Return a unix timestamp (or the current time) as a (unixtime, | ||||
offset) tuple based off the local timezone.''' | ||||
if timestamp is None: | ||||
timestamp = time.time() | ||||
Bryan O'Sullivan
|
r19286 | if timestamp < 0: | ||
Adrian Buehlmann
|
r13063 | hint = _("check your clock") | ||
Bryan O'Sullivan
|
r19286 | raise Abort(_("negative timestamp: %d") % timestamp, hint=hint) | ||
delta = (datetime.datetime.utcfromtimestamp(timestamp) - | ||||
datetime.datetime.fromtimestamp(timestamp)) | ||||
Dmitry Panov
|
r15505 | tz = delta.days * 86400 + delta.seconds | ||
Bryan O'Sullivan
|
r19286 | return timestamp, tz | ||
Bryan O'Sullivan
|
r1329 | |||
Matt Mackall
|
r6229 | def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'): | ||
Bryan O'Sullivan
|
r1321 | """represent a (unixtime, offset) tuple as a localized time. | ||
unixtime is seconds since the epoch, and offset is the time zone's | ||||
Adrian Buehlmann
|
r28865 | number of seconds away from UTC. | ||
>>> datestr((0, 0)) | ||||
'Thu Jan 01 00:00:00 1970 +0000' | ||||
>>> datestr((42, 0)) | ||||
'Thu Jan 01 00:00:42 1970 +0000' | ||||
>>> datestr((-42, 0)) | ||||
'Wed Dec 31 23:59:18 1969 +0000' | ||||
>>> datestr((0x7fffffff, 0)) | ||||
'Tue Jan 19 03:14:07 2038 +0000' | ||||
>>> datestr((-0x80000000, 0)) | ||||
'Fri Dec 13 20:45:52 1901 +0000' | ||||
""" | ||||
Bryan O'Sullivan
|
r1321 | t, tz = date or makedate() | ||
Matt Mackall
|
r19989 | if "%1" in format or "%2" in format or "%z" in format: | ||
Matt Mackall
|
r6229 | sign = (tz > 0) and "-" or "+" | ||
Alejandro Santos
|
r9029 | minutes = abs(tz) // 60 | ||
Gregory Szorc
|
r27066 | q, r = divmod(minutes, 60) | ||
Matt Mackall
|
r19989 | format = format.replace("%z", "%1%2") | ||
Gregory Szorc
|
r27066 | format = format.replace("%1", "%c%02d" % (sign, q)) | ||
format = format.replace("%2", "%02d" % r) | ||||
Florent Gallaire
|
r28825 | d = t - tz | ||
if d > 0x7fffffff: | ||||
d = 0x7fffffff | ||||
Florent Gallaire
|
r28864 | elif d < -0x80000000: | ||
d = -0x80000000 | ||||
Florent Gallaire
|
r28825 | # Never use time.gmtime() and datetime.datetime.fromtimestamp() | ||
# because they use the gmtime() system call which is buggy on Windows | ||||
# for negative values. | ||||
t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d) | ||||
Yuya Nishihara
|
r31449 | s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format))) | ||
Vadim Gelfer
|
r1987 | return s | ||
Vadim Gelfer
|
r1829 | |||
Thomas Arendsen Hein
|
r6134 | def shortdate(date=None): | ||
"""turn (timestamp, tzoff) tuple into iso 8631 date.""" | ||||
Matt Mackall
|
r6229 | return datestr(date, format='%Y-%m-%d') | ||
Thomas Arendsen Hein
|
r6134 | |||
Matt Mackall
|
r29636 | def parsetimezone(s): | ||
"""find a trailing timezone, if any, in string, and return a | ||||
(offset, remainder) pair""" | ||||
if s.endswith("GMT") or s.endswith("UTC"): | ||||
return 0, s[:-3].rstrip() | ||||
# Unix-style timezones [+-]hhmm | ||||
if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit(): | ||||
sign = (s[-5] == "+") and 1 or -1 | ||||
hours = int(s[-4:-2]) | ||||
minutes = int(s[-2:]) | ||||
return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip() | ||||
Matt Mackall
|
r29637 | # ISO8601 trailing Z | ||
if s.endswith("Z") and s[-2:-1].isdigit(): | ||||
return 0, s[:-1] | ||||
# ISO8601-style [+-]hh:mm | ||||
if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and | ||||
s[-5:-3].isdigit() and s[-2:].isdigit()): | ||||
sign = (s[-6] == "+") and 1 or -1 | ||||
hours = int(s[-5:-3]) | ||||
minutes = int(s[-2:]) | ||||
return -sign * (hours * 60 + minutes) * 60, s[:-6] | ||||
Matt Mackall
|
r29636 | return None, s | ||
Yuya Nishihara
|
r26126 | |||
Gregory Szorc
|
r31393 | def strdate(string, format, defaults=None): | ||
Jose M. Prieto
|
r2522 | """parse a localized time string and return a (unixtime, offset) tuple. | ||
if the string cannot be parsed, ValueError is raised.""" | ||||
Pierre-Yves David
|
r31465 | if defaults is None: | ||
defaults = {} | ||||
Gregory Szorc
|
r31393 | |||
Jose M. Prieto
|
r3255 | # NOTE: unixtime = localunixtime + offset | ||
Matt Mackall
|
r29636 | offset, date = parsetimezone(string) | ||
Matt Mackall
|
r3808 | |||
Matt Mackall
|
r3812 | # add missing elements from defaults | ||
Matt Mackall
|
r13212 | usenow = False # default to using biased defaults | ||
for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity | ||||
Matt Mackall
|
r3812 | found = [True for p in part if ("%"+p) in format] | ||
if not found: | ||||
Matt Mackall
|
r13212 | date += "@" + defaults[part][usenow] | ||
Matt Mackall
|
r3812 | format += "@%" + part[0] | ||
Matt Mackall
|
r13212 | else: | ||
# We've found a specific time element, less specific time | ||||
# elements are relative to today | ||||
usenow = True | ||||
Matt Mackall
|
r3808 | |||
Jose M. Prieto
|
r3256 | timetuple = time.strptime(date, format) | ||
localunixtime = int(calendar.timegm(timetuple)) | ||||
if offset is None: | ||||
# local timezone | ||||
unixtime = int(time.mktime(timetuple)) | ||||
offset = unixtime - localunixtime | ||||
else: | ||||
unixtime = localunixtime + offset | ||||
Jose M. Prieto
|
r3255 | return unixtime, offset | ||
Jose M. Prieto
|
r2522 | |||
Siddharth Agarwal
|
r26311 | def parsedate(date, formats=None, bias=None): | ||
Matt Mackall
|
r13212 | """parse a localized date/time and return a (unixtime, offset) tuple. | ||
Thomas Arendsen Hein
|
r6139 | |||
Jose M. Prieto
|
r2522 | The date may be a "unixtime offset" string or in one of the specified | ||
Thomas Arendsen Hein
|
r6139 | formats. If the date already is a (unixtime, offset) tuple, it is returned. | ||
Paul Cavallaro
|
r18537 | |||
>>> parsedate(' today ') == parsedate(\ | ||||
datetime.date.today().strftime('%b %d')) | ||||
True | ||||
>>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\ | ||||
datetime.timedelta(days=1)\ | ||||
).strftime('%b %d')) | ||||
True | ||||
Augie Fackler
|
r18614 | >>> now, tz = makedate() | ||
>>> strnow, strtz = parsedate('now') | ||||
>>> (strnow - now) < 1 | ||||
True | ||||
>>> tz == strtz | ||||
True | ||||
Thomas Arendsen Hein
|
r6139 | """ | ||
Siddharth Agarwal
|
r26311 | if bias is None: | ||
bias = {} | ||||
Thomas Arendsen Hein
|
r6139 | if not date: | ||
Matt Mackall
|
r3807 | return 0, 0 | ||
Matt Mackall
|
r6230 | if isinstance(date, tuple) and len(date) == 2: | ||
Thomas Arendsen Hein
|
r6139 | return date | ||
Chris Mason
|
r2609 | if not formats: | ||
formats = defaultdateformats | ||||
Thomas Arendsen Hein
|
r6139 | date = date.strip() | ||
Paul Cavallaro
|
r18537 | |||
André Klitzing
|
r24188 | if date == 'now' or date == _('now'): | ||
Augie Fackler
|
r18614 | return makedate() | ||
André Klitzing
|
r24188 | if date == 'today' or date == _('today'): | ||
Paul Cavallaro
|
r18537 | date = datetime.date.today().strftime('%b %d') | ||
André Klitzing
|
r24188 | elif date == 'yesterday' or date == _('yesterday'): | ||
Paul Cavallaro
|
r18537 | date = (datetime.date.today() - | ||
datetime.timedelta(days=1)).strftime('%b %d') | ||||
Jose M. Prieto
|
r2522 | try: | ||
Thomas Arendsen Hein
|
r6139 | when, offset = map(int, date.split(' ')) | ||
Benoit Boissinot
|
r2523 | except ValueError: | ||
Matt Mackall
|
r3812 | # fill out defaults | ||
now = makedate() | ||||
Matt Mackall
|
r13212 | defaults = {} | ||
David Soria Parra
|
r13200 | for part in ("d", "mb", "yY", "HI", "M", "S"): | ||
Matt Mackall
|
r13212 | # this piece is for rounding the specific end of unknowns | ||
b = bias.get(part) | ||||
if b is None: | ||||
Matt Mackall
|
r3812 | if part[0] in "HMS": | ||
Matt Mackall
|
r13212 | b = "00" | ||
Matt Mackall
|
r3812 | else: | ||
Matt Mackall
|
r13212 | b = "0" | ||
# this piece is for matching the generic end to today's date | ||||
n = datestr(now, "%" + part[0]) | ||||
defaults[part] = (b, n) | ||||
Matt Mackall
|
r3812 | |||
Benoit Boissinot
|
r2523 | for format in formats: | ||
try: | ||||
Thomas Arendsen Hein
|
r6139 | when, offset = strdate(date, format, defaults) | ||
Dirkjan Ochtman
|
r6087 | except (ValueError, OverflowError): | ||
Benoit Boissinot
|
r2523 | pass | ||
else: | ||||
break | ||||
else: | ||||
Nicolas Dumazet
|
r12105 | raise Abort(_('invalid date: %r') % date) | ||
Benoit Boissinot
|
r2523 | # validate explicit (probably user-specified) date and | ||
# time zone offset. values must fit in signed 32 bits for | ||||
# current 32-bit linux runtimes. timezones go from UTC-12 | ||||
# to UTC+14 | ||||
Florent Gallaire
|
r28864 | if when < -0x80000000 or when > 0x7fffffff: | ||
Matt Mackall
|
r3806 | raise Abort(_('date exceeds 32 bits: %d') % when) | ||
Benoit Boissinot
|
r2523 | if offset < -50400 or offset > 43200: | ||
Matt Mackall
|
r3806 | raise Abort(_('impossible time zone offset: %d') % offset) | ||
Benoit Boissinot
|
r2523 | return when, offset | ||
Jose M. Prieto
|
r2522 | |||
Matt Mackall
|
r3812 | def matchdate(date): | ||
"""Return a function that matches a given date match specifier | ||||
Formats include: | ||||
'{date}' match a given date to the accuracy provided | ||||
'<{date}' on or before a given date | ||||
'>{date}' on or after a given date | ||||
Matt Mackall
|
r13212 | >>> p1 = parsedate("10:29:59") | ||
>>> p2 = parsedate("10:30:00") | ||||
>>> p3 = parsedate("10:30:59") | ||||
>>> p4 = parsedate("10:31:00") | ||||
>>> p5 = parsedate("Sep 15 10:30:00 1999") | ||||
>>> f = matchdate("10:30") | ||||
>>> f(p1[0]) | ||||
False | ||||
>>> f(p2[0]) | ||||
True | ||||
>>> f(p3[0]) | ||||
True | ||||
>>> f(p4[0]) | ||||
False | ||||
>>> f(p5[0]) | ||||
False | ||||
Matt Mackall
|
r3812 | """ | ||
def lower(date): | ||||
Augie Fackler
|
r20679 | d = {'mb': "1", 'd': "1"} | ||
Matt Mackall
|
r6230 | return parsedate(date, extendeddateformats, d)[0] | ||
Matt Mackall
|
r3812 | |||
def upper(date): | ||||
Augie Fackler
|
r20679 | d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"} | ||
David Soria Parra
|
r13200 | for days in ("31", "30", "29"): | ||
Matt Mackall
|
r3812 | try: | ||
d["d"] = days | ||||
return parsedate(date, extendeddateformats, d)[0] | ||||
Brodie Rao
|
r16688 | except Abort: | ||
Matt Mackall
|
r3812 | pass | ||
d["d"] = "28" | ||||
return parsedate(date, extendeddateformats, d)[0] | ||||
Justin Peng
|
r7953 | date = date.strip() | ||
Idan Kamara
|
r13780 | |||
if not date: | ||||
raise Abort(_("dates cannot consist entirely of whitespace")) | ||||
elif date[0] == "<": | ||||
Matt Mackall
|
r13869 | if not date[1:]: | ||
Martin Geisler
|
r13886 | raise Abort(_("invalid day spec, use '<DATE'")) | ||
Matt Mackall
|
r3812 | when = upper(date[1:]) | ||
return lambda x: x <= when | ||||
elif date[0] == ">": | ||||
Matt Mackall
|
r13869 | if not date[1:]: | ||
Martin Geisler
|
r13886 | raise Abort(_("invalid day spec, use '>DATE'")) | ||
Matt Mackall
|
r3812 | when = lower(date[1:]) | ||
return lambda x: x >= when | ||||
elif date[0] == "-": | ||||
try: | ||||
days = int(date[1:]) | ||||
except ValueError: | ||||
raise Abort(_("invalid day spec: %s") % date[1:]) | ||||
Yun Lee
|
r13889 | if days < 0: | ||
timeless
|
r29977 | raise Abort(_("%s must be nonnegative (see 'hg help dates')") | ||
Yun Lee
|
r13889 | % date[1:]) | ||
Matt Mackall
|
r3812 | when = makedate()[0] - days * 3600 * 24 | ||
Matt Mackall
|
r3813 | return lambda x: x >= when | ||
Matt Mackall
|
r3812 | elif " to " in date: | ||
a, b = date.split(" to ") | ||||
start, stop = lower(a), upper(b) | ||||
return lambda x: x >= start and x <= stop | ||||
else: | ||||
start, stop = lower(date), upper(date) | ||||
return lambda x: x >= start and x <= stop | ||||
Matt Harbison
|
r30773 | def stringmatcher(pattern, casesensitive=True): | ||
Matt Harbison
|
r26481 | """ | ||
accepts a string, possibly starting with 're:' or 'literal:' prefix. | ||||
returns the matcher name, pattern, and matcher function. | ||||
missing or unknown prefixes are treated as literal matches. | ||||
helper for tests: | ||||
>>> def test(pattern, *tests): | ||||
... kind, pattern, matcher = stringmatcher(pattern) | ||||
... return (kind, pattern, [bool(matcher(t)) for t in tests]) | ||||
Matt Harbison
|
r30773 | >>> def itest(pattern, *tests): | ||
... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) | ||||
... return (kind, pattern, [bool(matcher(t)) for t in tests]) | ||||
Matt Harbison
|
r26481 | |||
exact matching (no prefix): | ||||
>>> test('abcdefg', 'abc', 'def', 'abcdefg') | ||||
('literal', 'abcdefg', [False, False, True]) | ||||
regex matching ('re:' prefix) | ||||
>>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar') | ||||
('re', 'a.+b', [False, False, True]) | ||||
force exact matches ('literal:' prefix) | ||||
>>> test('literal:re:foobar', 'foobar', 're:foobar') | ||||
('literal', 're:foobar', [False, True]) | ||||
unknown prefixes are ignored and treated as literals | ||||
>>> test('foo:bar', 'foo', 'bar', 'foo:bar') | ||||
('literal', 'foo:bar', [False, False, True]) | ||||
Matt Harbison
|
r30773 | |||
case insensitive regex matches | ||||
>>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar') | ||||
('re', 'A.+b', [False, False, True]) | ||||
case insensitive literal matches | ||||
>>> itest('ABCDEFG', 'abc', 'def', 'abcdefg') | ||||
('literal', 'ABCDEFG', [False, False, True]) | ||||
Matt Harbison
|
r26481 | """ | ||
if pattern.startswith('re:'): | ||||
pattern = pattern[3:] | ||||
try: | ||||
Matt Harbison
|
r30773 | flags = 0 | ||
if not casesensitive: | ||||
flags = remod.I | ||||
regex = remod.compile(pattern, flags) | ||||
Matt Harbison
|
r26481 | except remod.error as e: | ||
raise error.ParseError(_('invalid regular expression: %s') | ||||
% e) | ||||
return 're', pattern, regex.search | ||||
elif pattern.startswith('literal:'): | ||||
pattern = pattern[8:] | ||||
Matt Harbison
|
r30773 | |||
match = pattern.__eq__ | ||||
if not casesensitive: | ||||
ipat = encoding.lower(pattern) | ||||
match = lambda s: ipat == encoding.lower(s) | ||||
return 'literal', pattern, match | ||||
Matt Harbison
|
r26481 | |||
Vadim Gelfer
|
r1903 | def shortuser(user): | ||
"""Return a short representation of a user name or email address.""" | ||||
f = user.find('@') | ||||
if f >= 0: | ||||
user = user[:f] | ||||
f = user.find('<') | ||||
if f >= 0: | ||||
Matt Mackall
|
r10282 | user = user[f + 1:] | ||
Thomas Arendsen Hein
|
r3176 | f = user.find(' ') | ||
if f >= 0: | ||||
user = user[:f] | ||||
Matt Mackall
|
r3533 | f = user.find('.') | ||
if f >= 0: | ||||
user = user[:f] | ||||
Vadim Gelfer
|
r1903 | return user | ||
Vadim Gelfer
|
r1920 | |||
Matteo Capobianco
|
r16360 | def emailuser(user): | ||
"""Return the user portion of an email address.""" | ||||
f = user.find('@') | ||||
if f >= 0: | ||||
user = user[:f] | ||||
f = user.find('<') | ||||
if f >= 0: | ||||
user = user[f + 1:] | ||||
return user | ||||
Matt Mackall
|
r5975 | def email(author): | ||
'''get email of author.''' | ||||
r = author.find('>') | ||||
Matt Mackall
|
r10282 | if r == -1: | ||
r = None | ||||
return author[author.find('<') + 1:r] | ||||
Matt Mackall
|
r5975 | |||
Thomas Arendsen Hein
|
r3767 | def ellipsis(text, maxlength=400): | ||
FUJIWARA Katsunori
|
r21857 | """Trim string to at most maxlength (default: 400) columns in display.""" | ||
return encoding.trim(text, maxlength, ellipsis='...') | ||||
Thomas Arendsen Hein
|
r3767 | |||
Bryan O'Sullivan
|
r18735 | def unitcountfn(*unittable): | ||
'''return a function that renders a readable count of some quantity''' | ||||
def go(count): | ||||
for multiplier, divisor, format in unittable: | ||||
if count >= divisor * multiplier: | ||||
return format % (count / float(divisor)) | ||||
return unittable[-1][2] % count | ||||
return go | ||||
Denis Laxalde
|
r31662 | def processlinerange(fromline, toline): | ||
"""Check that linerange <fromline>:<toline> makes sense and return a | ||||
0-based range. | ||||
>>> processlinerange(10, 20) | ||||
(9, 20) | ||||
>>> processlinerange(2, 1) | ||||
Traceback (most recent call last): | ||||
... | ||||
ParseError: line range must be positive | ||||
>>> processlinerange(0, 5) | ||||
Traceback (most recent call last): | ||||
... | ||||
ParseError: fromline must be strictly positive | ||||
""" | ||||
if toline - fromline < 0: | ||||
raise error.ParseError(_("line range must be positive")) | ||||
if fromline < 1: | ||||
raise error.ParseError(_("fromline must be strictly positive")) | ||||
return fromline - 1, toline | ||||
Bryan O'Sullivan
|
r18735 | bytecount = unitcountfn( | ||
Matt Mackall
|
r16397 | (100, 1 << 30, _('%.0f GB')), | ||
(10, 1 << 30, _('%.1f GB')), | ||||
(1, 1 << 30, _('%.2f GB')), | ||||
(100, 1 << 20, _('%.0f MB')), | ||||
(10, 1 << 20, _('%.1f MB')), | ||||
(1, 1 << 20, _('%.2f MB')), | ||||
(100, 1 << 10, _('%.0f KB')), | ||||
(10, 1 << 10, _('%.1f KB')), | ||||
(1, 1 << 10, _('%.2f KB')), | ||||
(1, 1, _('%.0f bytes')), | ||||
) | ||||
Yuya Nishihara
|
r31776 | # Matches a single EOL which can either be a CRLF where repeated CR | ||
# are removed or a LF. We do not care about old Macintosh files, so a | ||||
# stray CR is an error. | ||||
_eolre = remod.compile(br'\r*\n') | ||||
def tolf(s): | ||||
return _eolre.sub('\n', s) | ||||
def tocrlf(s): | ||||
return _eolre.sub('\r\n', s) | ||||
Yuya Nishihara
|
r31777 | if pycompat.oslinesep == '\r\n': | ||
tonativeeol = tocrlf | ||||
fromnativeeol = tolf | ||||
else: | ||||
tonativeeol = pycompat.identity | ||||
fromnativeeol = pycompat.identity | ||||
Yuya Nishihara
|
r31451 | def escapestr(s): | ||
Yuya Nishihara
|
r31453 | # call underlying function of s.encode('string_escape') directly for | ||
# Python 3 compatibility | ||||
return codecs.escape_encode(s)[0] | ||||
Yuya Nishihara
|
r31451 | |||
Yuya Nishihara
|
r31484 | def unescapestr(s): | ||
Yuya Nishihara
|
r31485 | return codecs.escape_decode(s)[0] | ||
Yuya Nishihara
|
r31484 | |||
Patrick Mezard
|
r5291 | def uirepr(s): | ||
# Avoid double backslash in Windows path repr() | ||||
return repr(s).replace('\\\\', '\\') | ||||
Alexander Solovyov
|
r7547 | |||
Matt Mackall
|
r13316 | # delay import of textwrap | ||
def MBTextWrapper(**kwargs): | ||||
class tw(textwrap.TextWrapper): | ||||
""" | ||||
FUJIWARA Katsunori
|
r15066 | Extend TextWrapper for width-awareness. | ||
Neither number of 'bytes' in any encoding nor 'characters' is | ||||
appropriate to calculate terminal columns for specified string. | ||||
Nicolas Dumazet
|
r12957 | |||
FUJIWARA Katsunori
|
r15066 | Original TextWrapper implementation uses built-in 'len()' directly, | ||
so overriding is needed to use width information of each characters. | ||||
Nicolas Dumazet
|
r12957 | |||
FUJIWARA Katsunori
|
r15066 | In addition, characters classified into 'ambiguous' width are | ||
Mads Kiilerich
|
r17424 | treated as wide in East Asian area, but as narrow in other. | ||
FUJIWARA Katsunori
|
r15066 | |||
This requires use decision to determine width of such characters. | ||||
Matt Mackall
|
r13316 | """ | ||
Mads Kiilerich
|
r15065 | def _cutdown(self, ucstr, space_left): | ||
Matt Mackall
|
r13316 | l = 0 | ||
FUJIWARA Katsunori
|
r15066 | colwidth = encoding.ucolwidth | ||
Matt Mackall
|
r13316 | for i in xrange(len(ucstr)): | ||
FUJIWARA Katsunori
|
r15066 | l += colwidth(ucstr[i]) | ||
Matt Mackall
|
r13316 | if space_left < l: | ||
Mads Kiilerich
|
r15065 | return (ucstr[:i], ucstr[i:]) | ||
return ucstr, '' | ||||
FUJIWARA Katsunori
|
r11297 | |||
Matt Mackall
|
r13316 | # overriding of base class | ||
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): | ||||
space_left = max(width - cur_len, 1) | ||||
FUJIWARA Katsunori
|
r11297 | |||
Matt Mackall
|
r13316 | if self.break_long_words: | ||
cut, res = self._cutdown(reversed_chunks[-1], space_left) | ||||
cur_line.append(cut) | ||||
reversed_chunks[-1] = res | ||||
elif not cur_line: | ||||
cur_line.append(reversed_chunks.pop()) | ||||
FUJIWARA Katsunori
|
r11297 | |||
timeless@mozdev.org
|
r26201 | # this overriding code is imported from TextWrapper of Python 2.6 | ||
FUJIWARA Katsunori
|
r15066 | # to calculate columns of string by 'encoding.ucolwidth()' | ||
def _wrap_chunks(self, chunks): | ||||
colwidth = encoding.ucolwidth | ||||
lines = [] | ||||
if self.width <= 0: | ||||
raise ValueError("invalid width %r (must be > 0)" % self.width) | ||||
# Arrange in reverse order so items can be efficiently popped | ||||
# from a stack of chucks. | ||||
chunks.reverse() | ||||
while chunks: | ||||
# Start the list of chunks that will make up the current line. | ||||
# cur_len is just the length of all the chunks in cur_line. | ||||
cur_line = [] | ||||
cur_len = 0 | ||||
# Figure out which static string will prefix this line. | ||||
if lines: | ||||
indent = self.subsequent_indent | ||||
else: | ||||
indent = self.initial_indent | ||||
# Maximum width for this line. | ||||
width = self.width - len(indent) | ||||
# First chunk on line is whitespace -- drop it, unless this | ||||
Mads Kiilerich
|
r17424 | # is the very beginning of the text (i.e. no lines started yet). | ||
FUJIWARA Katsunori
|
r15066 | if self.drop_whitespace and chunks[-1].strip() == '' and lines: | ||
del chunks[-1] | ||||
while chunks: | ||||
l = colwidth(chunks[-1]) | ||||
# Can at least squeeze this chunk onto the current line. | ||||
if cur_len + l <= width: | ||||
cur_line.append(chunks.pop()) | ||||
cur_len += l | ||||
# Nope, this line is full. | ||||
else: | ||||
break | ||||
# The current line is full, and the next chunk is too big to | ||||
# fit on *any* line (not just this one). | ||||
if chunks and colwidth(chunks[-1]) > width: | ||||
self._handle_long_word(chunks, cur_line, cur_len, width) | ||||
# If the last chunk on this line is all whitespace, drop it. | ||||
if (self.drop_whitespace and | ||||
cur_line and cur_line[-1].strip() == ''): | ||||
del cur_line[-1] | ||||
# Convert current line back to a string and store it in list | ||||
# of all lines (return value). | ||||
if cur_line: | ||||
lines.append(indent + ''.join(cur_line)) | ||||
return lines | ||||
Matt Mackall
|
r13316 | global MBTextWrapper | ||
MBTextWrapper = tw | ||||
return tw(**kwargs) | ||||
FUJIWARA Katsunori
|
r11297 | |||
Matt Mackall
|
r12698 | def wrap(line, width, initindent='', hangindent=''): | ||
FUJIWARA Katsunori
|
r11297 | maxindent = max(len(hangindent), len(initindent)) | ||
if width <= maxindent: | ||||
Martin Geisler
|
r9417 | # adjust for weird terminal size | ||
FUJIWARA Katsunori
|
r11297 | width = max(78, maxindent + 1) | ||
Pulkit Goyal
|
r31338 | line = line.decode(pycompat.sysstr(encoding.encoding), | ||
pycompat.sysstr(encoding.encodingmode)) | ||||
initindent = initindent.decode(pycompat.sysstr(encoding.encoding), | ||||
pycompat.sysstr(encoding.encodingmode)) | ||||
hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding), | ||||
pycompat.sysstr(encoding.encodingmode)) | ||||
FUJIWARA Katsunori
|
r11297 | wrapper = MBTextWrapper(width=width, | ||
initial_indent=initindent, | ||||
subsequent_indent=hangindent) | ||||
Pulkit Goyal
|
r31338 | return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) | ||
Martin Geisler
|
r8938 | |||
Jun Wu
|
r30418 | if (pyplatform.python_implementation() == 'CPython' and | ||
sys.version_info < (3, 0)): | ||||
# There is an issue in CPython that some IO methods do not handle EINTR | ||||
# correctly. The following table shows what CPython version (and functions) | ||||
# are affected (buggy: has the EINTR bug, okay: otherwise): | ||||
# | ||||
# | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0 | ||||
# -------------------------------------------------- | ||||
# fp.__iter__ | buggy | buggy | okay | ||||
# fp.read* | buggy | okay [1] | okay | ||||
# | ||||
# [1]: fixed by changeset 67dc99a989cd in the cpython hg repo. | ||||
# | ||||
# Here we workaround the EINTR issue for fileobj.__iter__. Other methods | ||||
# like "read*" are ignored for now, as Python < 2.7.4 is a minority. | ||||
# | ||||
# Although we can workaround the EINTR issue for fp.__iter__, it is slower: | ||||
# "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in | ||||
# CPython 2, because CPython 2 maintains an internal readahead buffer for | ||||
# fp.__iter__ but not other fp.read* methods. | ||||
# | ||||
# On modern systems like Linux, the "read" syscall cannot be interrupted | ||||
# when reading "fast" files like on-disk files. So the EINTR issue only | ||||
# affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG) | ||||
# files approximately as "fast" files and use the fast (unsafe) code path, | ||||
# to minimize the performance impact. | ||||
if sys.version_info >= (2, 7, 4): | ||||
# fp.readline deals with EINTR correctly, use it as a workaround. | ||||
def _safeiterfile(fp): | ||||
return iter(fp.readline, '') | ||||
else: | ||||
# fp.read* are broken too, manually deal with EINTR in a stupid way. | ||||
# note: this may block longer than necessary because of bufsize. | ||||
def _safeiterfile(fp, bufsize=4096): | ||||
fd = fp.fileno() | ||||
line = '' | ||||
while True: | ||||
try: | ||||
buf = os.read(fd, bufsize) | ||||
except OSError as ex: | ||||
# os.read only raises EINTR before any data is read | ||||
if ex.errno == errno.EINTR: | ||||
continue | ||||
else: | ||||
raise | ||||
line += buf | ||||
if '\n' in buf: | ||||
splitted = line.splitlines(True) | ||||
line = '' | ||||
for l in splitted: | ||||
if l[-1] == '\n': | ||||
yield l | ||||
else: | ||||
line = l | ||||
if not buf: | ||||
break | ||||
if line: | ||||
yield line | ||||
def iterfile(fp): | ||||
fastpath = True | ||||
if type(fp) is file: | ||||
fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode) | ||||
if fastpath: | ||||
return fp | ||||
else: | ||||
return _safeiterfile(fp) | ||||
else: | ||||
# PyPy and CPython 3 do not have the EINTR issue thus no workaround needed. | ||||
def iterfile(fp): | ||||
return fp | ||||
Jun Wu
|
r30395 | |||
Alexander Solovyov <piranha at piranha.org.ua>
|
r7879 | def iterlines(iterator): | ||
for chunk in iterator: | ||||
for line in chunk.splitlines(): | ||||
yield line | ||||
Alexander Solovyov
|
r9610 | |||
def expandpath(path): | ||||
return os.path.expanduser(os.path.expandvars(path)) | ||||
Patrick Mezard
|
r10239 | |||
def hgcmd(): | ||||
"""Return the command used to execute current hg | ||||
This is different from hgexecutable() because on Windows we want | ||||
to avoid things opening new shell windows like batch files, so we | ||||
get either the python call or current executable. | ||||
""" | ||||
Adrian Buehlmann
|
r14228 | if mainfrozen(): | ||
Matt Harbison
|
r27766 | if getattr(sys, 'frozen', None) == 'macosx_app': | ||
# Env variable set by py2app | ||||
Pulkit Goyal
|
r30637 | return [encoding.environ['EXECUTABLEPATH']] | ||
Matt Harbison
|
r27766 | else: | ||
Pulkit Goyal
|
r30669 | return [pycompat.sysexecutable] | ||
Patrick Mezard
|
r10239 | return gethgcmd() | ||
Patrick Mezard
|
r10344 | |||
def rundetached(args, condfn): | ||||
"""Execute the argument list in a detached process. | ||||
Augie Fackler
|
r10422 | |||
Patrick Mezard
|
r10344 | condfn is a callable which is called repeatedly and should return | ||
True once the child process is known to have started successfully. | ||||
At this point, the child process PID is returned. If the child | ||||
process fails to start or finishes before condfn() evaluates to | ||||
True, return -1. | ||||
""" | ||||
# Windows case is easier because the child process is either | ||||
# successfully starting and validating the condition or exiting | ||||
# on failure. We just poll on its PID. On Unix, if the child | ||||
# process fails to start, it will be left in a zombie state until | ||||
# the parent wait on it, which we cannot do since we expect a long | ||||
# running process on success. Instead we listen for SIGCHLD telling | ||||
# us our child process terminated. | ||||
terminated = set() | ||||
def handler(signum, frame): | ||||
terminated.add(os.wait()) | ||||
prevhandler = None | ||||
Augie Fackler
|
r14968 | SIGCHLD = getattr(signal, 'SIGCHLD', None) | ||
if SIGCHLD is not None: | ||||
prevhandler = signal.signal(SIGCHLD, handler) | ||||
Patrick Mezard
|
r10344 | try: | ||
pid = spawndetached(args) | ||||
while not condfn(): | ||||
if ((pid in terminated or not testpid(pid)) | ||||
and not condfn()): | ||||
return -1 | ||||
time.sleep(0.1) | ||||
return pid | ||||
finally: | ||||
if prevhandler is not None: | ||||
signal.signal(signal.SIGCHLD, prevhandler) | ||||
Steve Losh
|
r10438 | |||
Roman Sokolov
|
r13392 | def interpolate(prefix, mapping, s, fn=None, escape_prefix=False): | ||
Steve Losh
|
r11988 | """Return the result of interpolating items in the mapping into string s. | ||
prefix is a single character string, or a two character string with | ||||
a backslash as the first character if the prefix needs to be escaped in | ||||
a regular expression. | ||||
fn is an optional function that will be applied to the replacement text | ||||
just before replacement. | ||||
Roman Sokolov
|
r13392 | |||
escape_prefix is an optional flag that allows using doubled prefix for | ||||
its escaping. | ||||
Steve Losh
|
r11988 | """ | ||
fn = fn or (lambda s: s) | ||||
Roman Sokolov
|
r13392 | patterns = '|'.join(mapping.keys()) | ||
if escape_prefix: | ||||
patterns += '|' + prefix | ||||
if len(prefix) > 1: | ||||
prefix_char = prefix[1:] | ||||
else: | ||||
prefix_char = prefix | ||||
mapping[prefix_char] = prefix_char | ||||
Siddharth Agarwal
|
r21907 | r = remod.compile(r'%s(%s)' % (prefix, patterns)) | ||
Steve Losh
|
r11988 | return r.sub(lambda x: fn(mapping[x.group()[1:]]), s) | ||
Brodie Rao
|
r12076 | def getport(port): | ||
"""Return the port for a given network service. | ||||
If port is an integer, it's returned as is. If it's a string, it's | ||||
looked up using socket.getservbyname(). If there's no matching | ||||
Pierre-Yves David
|
r26587 | service, error.Abort is raised. | ||
Brodie Rao
|
r12076 | """ | ||
try: | ||||
return int(port) | ||||
except ValueError: | ||||
pass | ||||
try: | ||||
return socket.getservbyname(port) | ||||
except socket.error: | ||||
raise Abort(_("no port number associated with service '%s'") % port) | ||||
Augie Fackler
|
r12087 | |||
Augie Fackler
|
r12088 | _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True, | ||
'0': False, 'no': False, 'false': False, 'off': False, | ||||
'never': False} | ||||
Augie Fackler
|
r12087 | |||
def parsebool(s): | ||||
"""Parse s into a boolean. | ||||
If s is not a valid boolean, returns None. | ||||
""" | ||||
return _booleans.get(s.lower(), None) | ||||
Brodie Rao
|
r14076 | |||
Brodie Rao
|
r14077 | _hextochr = dict((a + b, chr(int(a + b, 16))) | ||
Augie Fackler
|
r30054 | for a in string.hexdigits for b in string.hexdigits) | ||
Brodie Rao
|
r14077 | |||
Brodie Rao
|
r14076 | class url(object): | ||
Mads Kiilerich
|
r14146 | r"""Reliable URL parser. | ||
Brodie Rao
|
r14076 | |||
This parses URLs and provides attributes for the following | ||||
components: | ||||
<scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment> | ||||
Missing components are set to None. The only exception is | ||||
fragment, which is set to '' if present but empty. | ||||
If parsefragment is False, fragment is included in query. If | ||||
parsequery is False, query is included in path. If both are | ||||
False, both fragment and query are included in path. | ||||
See http://www.ietf.org/rfc/rfc2396.txt for more information. | ||||
Note that for backward compatibility reasons, bundle URLs do not | ||||
take host names. That means 'bundle://../' has a path of '../'. | ||||
Examples: | ||||
>>> url('http://www.ietf.org/rfc/rfc2396.txt') | ||||
<url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'> | ||||
>>> url('ssh://[::1]:2200//home/joe/repo') | ||||
<url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'> | ||||
>>> url('file:///home/joe/repo') | ||||
<url scheme: 'file', path: '/home/joe/repo'> | ||||
Matt Mackall
|
r14915 | >>> url('file:///c:/temp/foo/') | ||
<url scheme: 'file', path: 'c:/temp/foo/'> | ||||
Brodie Rao
|
r14076 | >>> url('bundle:foo') | ||
<url scheme: 'bundle', path: 'foo'> | ||||
>>> url('bundle://../foo') | ||||
<url scheme: 'bundle', path: '../foo'> | ||||
Mads Kiilerich
|
r14146 | >>> url(r'c:\foo\bar') | ||
<url path: 'c:\\foo\\bar'> | ||||
Matt Mackall
|
r14699 | >>> url(r'\\blah\blah\blah') | ||
<url path: '\\\\blah\\blah\\blah'> | ||||
Matt Mackall
|
r15074 | >>> url(r'\\blah\blah\blah#baz') | ||
<url path: '\\\\blah\\blah\\blah', fragment: 'baz'> | ||||
Simon Heimberg
|
r20106 | >>> url(r'file:///C:\users\me') | ||
<url scheme: 'file', path: 'C:\\users\\me'> | ||||
Brodie Rao
|
r14076 | |||
Authentication credentials: | ||||
>>> url('ssh://joe:xyz@x/repo') | ||||
<url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'> | ||||
>>> url('ssh://joe@x/repo') | ||||
<url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'> | ||||
Query strings and fragments: | ||||
>>> url('http://host/a?b#c') | ||||
<url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'> | ||||
>>> url('http://host/a?b#c', parsequery=False, parsefragment=False) | ||||
<url scheme: 'http', host: 'host', path: 'a?b#c'> | ||||
Yuya Nishihara
|
r30036 | |||
Empty path: | ||||
>>> url('') | ||||
<url path: ''> | ||||
>>> url('#a') | ||||
<url path: '', fragment: 'a'> | ||||
>>> url('http://host/') | ||||
<url scheme: 'http', host: 'host', path: ''> | ||||
>>> url('http://host/#a') | ||||
<url scheme: 'http', host: 'host', path: '', fragment: 'a'> | ||||
Only scheme: | ||||
>>> url('http:') | ||||
<url scheme: 'http'> | ||||
Brodie Rao
|
r14076 | """ | ||
_safechars = "!~*'()+" | ||||
Simon Heimberg
|
r20106 | _safepchars = "/!~*'()+:\\" | ||
Augie Fackler
|
r30329 | _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match | ||
Brodie Rao
|
r14076 | |||
def __init__(self, path, parsequery=True, parsefragment=True): | ||||
# We slowly chomp away at path until we have only the path left | ||||
self.scheme = self.user = self.passwd = self.host = None | ||||
self.port = self.path = self.query = self.fragment = None | ||||
self._localpath = True | ||||
self._hostport = '' | ||||
self._origpath = path | ||||
Matt Mackall
|
r15074 | if parsefragment and '#' in path: | ||
path, self.fragment = path.split('#', 1) | ||||
Matt Mackall
|
r14699 | # special case for Windows drive letters and UNC paths | ||
Augie Fackler
|
r30329 | if hasdriveletter(path) or path.startswith('\\\\'): | ||
Brodie Rao
|
r14076 | self.path = path | ||
return | ||||
# For compatibility reasons, we can't handle bundle paths as | ||||
# normal URLS | ||||
if path.startswith('bundle:'): | ||||
self.scheme = 'bundle' | ||||
path = path[7:] | ||||
if path.startswith('//'): | ||||
path = path[2:] | ||||
self.path = path | ||||
return | ||||
if self._matchscheme(path): | ||||
parts = path.split(':', 1) | ||||
if parts[0]: | ||||
self.scheme, path = parts | ||||
self._localpath = False | ||||
if not path: | ||||
path = None | ||||
if self._localpath: | ||||
self.path = '' | ||||
return | ||||
else: | ||||
if self._localpath: | ||||
self.path = path | ||||
return | ||||
if parsequery and '?' in path: | ||||
path, self.query = path.split('?', 1) | ||||
if not path: | ||||
path = None | ||||
if not self.query: | ||||
self.query = None | ||||
# // is required to specify a host/authority | ||||
if path and path.startswith('//'): | ||||
parts = path[2:].split('/', 1) | ||||
if len(parts) > 1: | ||||
self.host, path = parts | ||||
else: | ||||
self.host = parts[0] | ||||
path = None | ||||
if not self.host: | ||||
self.host = None | ||||
Mads Kiilerich
|
r15018 | # path of file:///d is /d | ||
# path of file:///d:/ is d:/, not /d:/ | ||||
Matt Mackall
|
r14915 | if path and not hasdriveletter(path): | ||
Brodie Rao
|
r14076 | path = '/' + path | ||
if self.host and '@' in self.host: | ||||
self.user, self.host = self.host.rsplit('@', 1) | ||||
if ':' in self.user: | ||||
self.user, self.passwd = self.user.split(':', 1) | ||||
if not self.host: | ||||
self.host = None | ||||
# Don't split on colons in IPv6 addresses without ports | ||||
if (self.host and ':' in self.host and | ||||
not (self.host.startswith('[') and self.host.endswith(']'))): | ||||
self._hostport = self.host | ||||
self.host, self.port = self.host.rsplit(':', 1) | ||||
if not self.host: | ||||
self.host = None | ||||
if (self.host and self.scheme == 'file' and | ||||
self.host not in ('localhost', '127.0.0.1', '[::1]')): | ||||
raise Abort(_('file:// URLs can only refer to localhost')) | ||||
self.path = path | ||||
Benoit Boissinot
|
r14988 | # leave the query string escaped | ||
Brodie Rao
|
r14076 | for a in ('user', 'passwd', 'host', 'port', | ||
Benoit Boissinot
|
r14988 | 'path', 'fragment'): | ||
Brodie Rao
|
r14076 | v = getattr(self, a) | ||
if v is not None: | ||||
Gregory Szorc
|
r31567 | setattr(self, a, urlreq.unquote(v)) | ||
Brodie Rao
|
r14076 | |||
def __repr__(self): | ||||
attrs = [] | ||||
for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path', | ||||
'query', 'fragment'): | ||||
v = getattr(self, a) | ||||
if v is not None: | ||||
attrs.append('%s: %r' % (a, v)) | ||||
return '<url %s>' % ', '.join(attrs) | ||||
def __str__(self): | ||||
Mads Kiilerich
|
r14147 | r"""Join the URL's components back into a URL string. | ||
Brodie Rao
|
r14076 | |||
Examples: | ||||
Mads Kiilerich
|
r15452 | >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar')) | ||
'http://user:pw@host:80/c:/bob?fo:oo#ba:ar' | ||||
Benoit Boissinot
|
r14988 | >>> str(url('http://user:pw@host:80/?foo=bar&baz=42')) | ||
'http://user:pw@host:80/?foo=bar&baz=42' | ||||
>>> str(url('http://user:pw@host:80/?foo=bar%3dbaz')) | ||||
'http://user:pw@host:80/?foo=bar%3dbaz' | ||||
Brodie Rao
|
r14076 | >>> str(url('ssh://user:pw@[::1]:2200//home/joe#')) | ||
'ssh://user:pw@[::1]:2200//home/joe#' | ||||
>>> str(url('http://localhost:80//')) | ||||
'http://localhost:80//' | ||||
>>> str(url('http://localhost:80/')) | ||||
'http://localhost:80/' | ||||
>>> str(url('http://localhost:80')) | ||||
'http://localhost:80/' | ||||
>>> str(url('bundle:foo')) | ||||
'bundle:foo' | ||||
>>> str(url('bundle://../foo')) | ||||
'bundle:../foo' | ||||
>>> str(url('path')) | ||||
'path' | ||||
Peter Arrenbrecht
|
r14313 | >>> str(url('file:///tmp/foo/bar')) | ||
'file:///tmp/foo/bar' | ||||
Patrick Mezard
|
r15609 | >>> str(url('file:///c:/tmp/foo/bar')) | ||
Matt Mackall
|
r15611 | 'file:///c:/tmp/foo/bar' | ||
Mads Kiilerich
|
r14147 | >>> print url(r'bundle:foo\bar') | ||
bundle:foo\bar | ||||
Simon Heimberg
|
r20106 | >>> print url(r'file:///D:\data\hg') | ||
file:///D:\data\hg | ||||
Brodie Rao
|
r14076 | """ | ||
Yuya Nishihara
|
r31448 | return encoding.strfromlocal(self.__bytes__()) | ||
Augie Fackler
|
r31368 | |||
def __bytes__(self): | ||||
Brodie Rao
|
r14076 | if self._localpath: | ||
s = self.path | ||||
if self.scheme == 'bundle': | ||||
s = 'bundle:' + s | ||||
if self.fragment: | ||||
s += '#' + self.fragment | ||||
return s | ||||
s = self.scheme + ':' | ||||
Peter Arrenbrecht
|
r14313 | if self.user or self.passwd or self.host: | ||
s += '//' | ||||
Patrick Mezard
|
r15609 | elif self.scheme and (not self.path or self.path.startswith('/') | ||
or hasdriveletter(self.path)): | ||||
Brodie Rao
|
r14076 | s += '//' | ||
Patrick Mezard
|
r15609 | if hasdriveletter(self.path): | ||
s += '/' | ||||
Brodie Rao
|
r14076 | if self.user: | ||
timeless
|
r28883 | s += urlreq.quote(self.user, safe=self._safechars) | ||
Brodie Rao
|
r14076 | if self.passwd: | ||
timeless
|
r28883 | s += ':' + urlreq.quote(self.passwd, safe=self._safechars) | ||
Brodie Rao
|
r14076 | if self.user or self.passwd: | ||
s += '@' | ||||
if self.host: | ||||
if not (self.host.startswith('[') and self.host.endswith(']')): | ||||
timeless
|
r28883 | s += urlreq.quote(self.host) | ||
Brodie Rao
|
r14076 | else: | ||
s += self.host | ||||
if self.port: | ||||
timeless
|
r28883 | s += ':' + urlreq.quote(self.port) | ||
Brodie Rao
|
r14076 | if self.host: | ||
s += '/' | ||||
if self.path: | ||||
Benoit Boissinot
|
r14988 | # TODO: similar to the query string, we should not unescape the | ||
# path when we store it, the path might contain '%2f' = '/', | ||||
# which we should *not* escape. | ||||
timeless
|
r28883 | s += urlreq.quote(self.path, safe=self._safepchars) | ||
Brodie Rao
|
r14076 | if self.query: | ||
Benoit Boissinot
|
r14988 | # we store the query in escaped form. | ||
s += '?' + self.query | ||||
Brodie Rao
|
r14076 | if self.fragment is not None: | ||
timeless
|
r28883 | s += '#' + urlreq.quote(self.fragment, safe=self._safepchars) | ||
Brodie Rao
|
r14076 | return s | ||
def authinfo(self): | ||||
user, passwd = self.user, self.passwd | ||||
try: | ||||
self.user, self.passwd = None, None | ||||
s = str(self) | ||||
finally: | ||||
self.user, self.passwd = user, passwd | ||||
if not self.user: | ||||
return (s, None) | ||||
Patrick Mezard
|
r15028 | # authinfo[1] is passed to urllib2 password manager, and its | ||
# URIs must not contain credentials. The host is passed in the | ||||
# URIs list because Python < 2.4.3 uses only that to search for | ||||
# a password. | ||||
Patrick Mezard
|
r15024 | return (s, (None, (s, self.host), | ||
Brodie Rao
|
r14076 | self.user, self.passwd or '')) | ||
Matt Mackall
|
r14766 | def isabs(self): | ||
if self.scheme and self.scheme != 'file': | ||||
return True # remote URL | ||||
if hasdriveletter(self.path): | ||||
return True # absolute for our purposes - can't be joined() | ||||
if self.path.startswith(r'\\'): | ||||
return True # Windows UNC path | ||||
if self.path.startswith('/'): | ||||
return True # POSIX-style | ||||
return False | ||||
Brodie Rao
|
r14076 | def localpath(self): | ||
if self.scheme == 'file' or self.scheme == 'bundle': | ||||
path = self.path or '/' | ||||
# For Windows, we need to promote hosts containing drive | ||||
# letters to paths with drive letters. | ||||
if hasdriveletter(self._hostport): | ||||
path = self._hostport + '/' + self.path | ||||
Mads Kiilerich
|
r15496 | elif (self.host is not None and self.path | ||
and not hasdriveletter(path)): | ||||
Brodie Rao
|
r14076 | path = '/' + path | ||
return path | ||||
return self._origpath | ||||
Siddharth Agarwal
|
r20353 | def islocal(self): | ||
'''whether localpath will return something that posixfile can open''' | ||||
return (not self.scheme or self.scheme == 'file' | ||||
or self.scheme == 'bundle') | ||||
Brodie Rao
|
r14076 | def hasscheme(path): | ||
return bool(url(path).scheme) | ||||
def hasdriveletter(path): | ||||
Patrick Mezard
|
r15609 | return path and path[1:2] == ':' and path[0:1].isalpha() | ||
Brodie Rao
|
r14076 | |||
Mads Kiilerich
|
r14825 | def urllocalpath(path): | ||
Brodie Rao
|
r14076 | return url(path, parsequery=False, parsefragment=False).localpath() | ||
def hidepassword(u): | ||||
'''hide user credential in a url string''' | ||||
u = url(u) | ||||
if u.passwd: | ||||
u.passwd = '***' | ||||
return str(u) | ||||
def removeauth(u): | ||||
'''remove all authentication information from a url string''' | ||||
u = url(u) | ||||
u.user = u.passwd = None | ||||
return str(u) | ||||
Idan Kamara
|
r14515 | |||
Bryan O'Sullivan
|
r18736 | timecount = unitcountfn( | ||
(1, 1e3, _('%.0f s')), | ||||
(100, 1, _('%.1f s')), | ||||
(10, 1, _('%.2f s')), | ||||
(1, 1, _('%.3f s')), | ||||
(100, 0.001, _('%.1f ms')), | ||||
(10, 0.001, _('%.2f ms')), | ||||
(1, 0.001, _('%.3f ms')), | ||||
(100, 0.000001, _('%.1f us')), | ||||
(10, 0.000001, _('%.2f us')), | ||||
(1, 0.000001, _('%.3f us')), | ||||
(100, 0.000000001, _('%.1f ns')), | ||||
(10, 0.000000001, _('%.2f ns')), | ||||
(1, 0.000000001, _('%.3f ns')), | ||||
) | ||||
_timenesting = [0] | ||||
def timed(func): | ||||
'''Report the execution time of a function call to stderr. | ||||
During development, use as a decorator when you need to measure | ||||
the cost of a function, e.g. as follows: | ||||
@util.timed | ||||
def foo(a, b, c): | ||||
pass | ||||
''' | ||||
def wrapper(*args, **kwargs): | ||||
Simon Farnsworth
|
r30975 | start = timer() | ||
Bryan O'Sullivan
|
r18736 | indent = 2 | ||
_timenesting[0] += indent | ||||
try: | ||||
return func(*args, **kwargs) | ||||
finally: | ||||
Simon Farnsworth
|
r30975 | elapsed = timer() - start | ||
Bryan O'Sullivan
|
r18736 | _timenesting[0] -= indent | ||
Yuya Nishihara
|
r30473 | stderr.write('%s%s: %s\n' % | ||
(' ' * _timenesting[0], func.__name__, | ||||
timecount(elapsed))) | ||||
Bryan O'Sullivan
|
r18736 | return wrapper | ||
Bryan O'Sullivan
|
r19194 | |||
_sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30), | ||||
('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1)) | ||||
def sizetoint(s): | ||||
'''Convert a space specifier to a byte count. | ||||
>>> sizetoint('30') | ||||
30 | ||||
>>> sizetoint('2.2kb') | ||||
2252 | ||||
>>> sizetoint('6M') | ||||
6291456 | ||||
''' | ||||
t = s.strip().lower() | ||||
try: | ||||
for k, u in _sizeunits: | ||||
if t.endswith(k): | ||||
return int(float(t[:-len(k)]) * u) | ||||
return int(t) | ||||
except ValueError: | ||||
raise error.ParseError(_("couldn't parse size: %s") % s) | ||||
Bryan O'Sullivan
|
r19211 | |||
class hooks(object): | ||||
'''A collection of hook functions that can be used to extend a | ||||
timeless@mozdev.org
|
r26098 | function's behavior. Hooks are called in lexicographic order, | ||
Bryan O'Sullivan
|
r19211 | based on the names of their sources.''' | ||
def __init__(self): | ||||
self._hooks = [] | ||||
def add(self, source, hook): | ||||
self._hooks.append((source, hook)) | ||||
def __call__(self, *args): | ||||
self._hooks.sort(key=lambda x: x[0]) | ||||
FUJIWARA Katsunori
|
r21046 | results = [] | ||
Bryan O'Sullivan
|
r19211 | for source, hook in self._hooks: | ||
FUJIWARA Katsunori
|
r21046 | results.append(hook(*args)) | ||
return results | ||||
Mads Kiilerich
|
r20244 | |||
Mads Kiilerich
|
r31315 | def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0): | ||
timeless
|
r28497 | '''Yields lines for a nicely formatted stacktrace. | ||
Mads Kiilerich
|
r31315 | Skips the 'skip' last entries, then return the last 'depth' entries. | ||
timeless
|
r28497 | Each file+linenumber is formatted according to fileline. | ||
Each line is formatted according to line. | ||||
If line is None, it yields: | ||||
length of longest filepath+line number, | ||||
filepath+linenumber, | ||||
function | ||||
Not be used in production code but very convenient while developing. | ||||
''' | ||||
entries = [(fileline % (fn, ln), func) | ||||
Mads Kiilerich
|
r31315 | for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1] | ||
][-depth:] | ||||
timeless
|
r28497 | if entries: | ||
fnmax = max(len(entry[0]) for entry in entries) | ||||
for fnln, func in entries: | ||||
if line is None: | ||||
yield (fnmax, fnln, func) | ||||
else: | ||||
yield line % (fnmax, fnln, func) | ||||
Mads Kiilerich
|
r31315 | def debugstacktrace(msg='stacktrace', skip=0, | ||
f=stderr, otherf=stdout, depth=0): | ||||
Mads Kiilerich
|
r20244 | '''Writes a message to f (stderr) with a nicely formatted stacktrace. | ||
Mads Kiilerich
|
r31315 | Skips the 'skip' entries closest to the call, then show 'depth' entries. | ||
By default it will flush stdout first. | ||||
timeless
|
r28496 | It can be used everywhere and intentionally does not require an ui object. | ||
Mads Kiilerich
|
r20244 | Not be used in production code but very convenient while developing. | ||
''' | ||||
Mads Kiilerich
|
r20542 | if otherf: | ||
otherf.flush() | ||||
Mads Kiilerich
|
r31314 | f.write('%s at:\n' % msg.rstrip()) | ||
Mads Kiilerich
|
r31315 | for line in getstackframes(skip + 1, depth=depth): | ||
timeless
|
r28497 | f.write(line) | ||
Mads Kiilerich
|
r20542 | f.flush() | ||
Mads Kiilerich
|
r20244 | |||
Drew Gottlieb
|
r24635 | class dirs(object): | ||
'''a multiset of directory names from a dirstate or manifest''' | ||||
def __init__(self, map, skip=None): | ||||
self._dirs = {} | ||||
addpath = self.addpath | ||||
if safehasattr(map, 'iteritems') and skip is not None: | ||||
for f, s in map.iteritems(): | ||||
if s[0] != skip: | ||||
addpath(f) | ||||
else: | ||||
for f in map: | ||||
addpath(f) | ||||
def addpath(self, path): | ||||
dirs = self._dirs | ||||
for base in finddirs(path): | ||||
if base in dirs: | ||||
dirs[base] += 1 | ||||
return | ||||
dirs[base] = 1 | ||||
def delpath(self, path): | ||||
dirs = self._dirs | ||||
for base in finddirs(path): | ||||
if dirs[base] > 1: | ||||
dirs[base] -= 1 | ||||
return | ||||
del dirs[base] | ||||
def __iter__(self): | ||||
Rishabh Madan
|
r31430 | return iter(self._dirs) | ||
Drew Gottlieb
|
r24635 | |||
def __contains__(self, d): | ||||
return d in self._dirs | ||||
if safehasattr(parsers, 'dirs'): | ||||
dirs = parsers.dirs | ||||
def finddirs(path): | ||||
pos = path.rfind('/') | ||||
while pos != -1: | ||||
yield path[:pos] | ||||
pos = path.rfind('/', 0, pos) | ||||
Bryan O'Sullivan
|
r27703 | class ctxmanager(object): | ||
'''A context manager for use in 'with' blocks to allow multiple | ||||
contexts to be entered at once. This is both safer and more | ||||
flexible than contextlib.nested. | ||||
Once Mercurial supports Python 2.7+, this will become mostly | ||||
unnecessary. | ||||
''' | ||||
def __init__(self, *args): | ||||
'''Accepts a list of no-argument functions that return context | ||||
managers. These will be invoked at __call__ time.''' | ||||
self._pending = args | ||||
self._atexit = [] | ||||
def __enter__(self): | ||||
return self | ||||
Bryan O'Sullivan
|
r27785 | def enter(self): | ||
Bryan O'Sullivan
|
r27703 | '''Create and enter context managers in the order in which they were | ||
passed to the constructor.''' | ||||
values = [] | ||||
for func in self._pending: | ||||
obj = func() | ||||
values.append(obj.__enter__()) | ||||
self._atexit.append(obj.__exit__) | ||||
del self._pending | ||||
return values | ||||
def atexit(self, func, *args, **kwargs): | ||||
'''Add a function to call when this context manager exits. The | ||||
ordering of multiple atexit calls is unspecified, save that | ||||
they will happen before any __exit__ functions.''' | ||||
def wrapper(exc_type, exc_val, exc_tb): | ||||
func(*args, **kwargs) | ||||
self._atexit.append(wrapper) | ||||
return func | ||||
def __exit__(self, exc_type, exc_val, exc_tb): | ||||
'''Context managers are exited in the reverse order from which | ||||
they were created.''' | ||||
received = exc_type is not None | ||||
suppressed = False | ||||
pending = None | ||||
self._atexit.reverse() | ||||
for exitfunc in self._atexit: | ||||
try: | ||||
if exitfunc(exc_type, exc_val, exc_tb): | ||||
suppressed = True | ||||
exc_type = None | ||||
exc_val = None | ||||
exc_tb = None | ||||
Augie Fackler
|
r27755 | except BaseException: | ||
Bryan O'Sullivan
|
r27703 | pending = sys.exc_info() | ||
exc_type, exc_val, exc_tb = pending = sys.exc_info() | ||||
del self._atexit | ||||
if pending: | ||||
raise exc_val | ||||
return received and suppressed | ||||
Gregory Szorc
|
r30350 | # compression code | ||
Gregory Szorc
|
r30761 | SERVERROLE = 'server' | ||
CLIENTROLE = 'client' | ||||
compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport', | ||||
(u'name', u'serverpriority', | ||||
u'clientpriority')) | ||||
Gregory Szorc
|
r30350 | class compressormanager(object): | ||
"""Holds registrations of various compression engines. | ||||
This class essentially abstracts the differences between compression | ||||
engines to allow new compression formats to be added easily, possibly from | ||||
extensions. | ||||
Compressors are registered against the global instance by calling its | ||||
``register()`` method. | ||||
""" | ||||
def __init__(self): | ||||
self._engines = {} | ||||
# Bundle spec human name to engine name. | ||||
self._bundlenames = {} | ||||
# Internal bundle identifier to engine name. | ||||
self._bundletypes = {} | ||||
Gregory Szorc
|
r30798 | # Revlog header to engine name. | ||
self._revlogheaders = {} | ||||
Gregory Szorc
|
r30761 | # Wire proto identifier to engine name. | ||
self._wiretypes = {} | ||||
Gregory Szorc
|
r30350 | |||
def __getitem__(self, key): | ||||
return self._engines[key] | ||||
def __contains__(self, key): | ||||
return key in self._engines | ||||
def __iter__(self): | ||||
return iter(self._engines.keys()) | ||||
def register(self, engine): | ||||
"""Register a compression engine with the manager. | ||||
The argument must be a ``compressionengine`` instance. | ||||
""" | ||||
if not isinstance(engine, compressionengine): | ||||
raise ValueError(_('argument must be a compressionengine')) | ||||
name = engine.name() | ||||
if name in self._engines: | ||||
raise error.Abort(_('compression engine %s already registered') % | ||||
name) | ||||
bundleinfo = engine.bundletype() | ||||
if bundleinfo: | ||||
bundlename, bundletype = bundleinfo | ||||
if bundlename in self._bundlenames: | ||||
raise error.Abort(_('bundle name %s already registered') % | ||||
bundlename) | ||||
if bundletype in self._bundletypes: | ||||
raise error.Abort(_('bundle type %s already registered by %s') % | ||||
(bundletype, self._bundletypes[bundletype])) | ||||
# No external facing name declared. | ||||
if bundlename: | ||||
self._bundlenames[bundlename] = name | ||||
self._bundletypes[bundletype] = name | ||||
Gregory Szorc
|
r30761 | wiresupport = engine.wireprotosupport() | ||
if wiresupport: | ||||
wiretype = wiresupport.name | ||||
if wiretype in self._wiretypes: | ||||
raise error.Abort(_('wire protocol compression %s already ' | ||||
'registered by %s') % | ||||
(wiretype, self._wiretypes[wiretype])) | ||||
self._wiretypes[wiretype] = name | ||||
Gregory Szorc
|
r30798 | revlogheader = engine.revlogheader() | ||
if revlogheader and revlogheader in self._revlogheaders: | ||||
raise error.Abort(_('revlog header %s already registered by %s') % | ||||
(revlogheader, self._revlogheaders[revlogheader])) | ||||
if revlogheader: | ||||
self._revlogheaders[revlogheader] = name | ||||
Gregory Szorc
|
r30350 | self._engines[name] = engine | ||
@property | ||||
def supportedbundlenames(self): | ||||
return set(self._bundlenames.keys()) | ||||
@property | ||||
def supportedbundletypes(self): | ||||
return set(self._bundletypes.keys()) | ||||
def forbundlename(self, bundlename): | ||||
"""Obtain a compression engine registered to a bundle name. | ||||
Will raise KeyError if the bundle type isn't registered. | ||||
Gregory Szorc
|
r30438 | |||
Will abort if the engine is known but not available. | ||||
Gregory Szorc
|
r30350 | """ | ||
Gregory Szorc
|
r30438 | engine = self._engines[self._bundlenames[bundlename]] | ||
if not engine.available(): | ||||
raise error.Abort(_('compression engine %s could not be loaded') % | ||||
engine.name()) | ||||
return engine | ||||
Gregory Szorc
|
r30350 | |||
def forbundletype(self, bundletype): | ||||
"""Obtain a compression engine registered to a bundle type. | ||||
Will raise KeyError if the bundle type isn't registered. | ||||
Gregory Szorc
|
r30438 | |||
Will abort if the engine is known but not available. | ||||
Gregory Szorc
|
r30350 | """ | ||
Gregory Szorc
|
r30438 | engine = self._engines[self._bundletypes[bundletype]] | ||
if not engine.available(): | ||||
raise error.Abort(_('compression engine %s could not be loaded') % | ||||
engine.name()) | ||||
return engine | ||||
Gregory Szorc
|
r30350 | |||
Gregory Szorc
|
r30761 | def supportedwireengines(self, role, onlyavailable=True): | ||
"""Obtain compression engines that support the wire protocol. | ||||
Returns a list of engines in prioritized order, most desired first. | ||||
If ``onlyavailable`` is set, filter out engines that can't be | ||||
loaded. | ||||
""" | ||||
assert role in (SERVERROLE, CLIENTROLE) | ||||
attr = 'serverpriority' if role == SERVERROLE else 'clientpriority' | ||||
engines = [self._engines[e] for e in self._wiretypes.values()] | ||||
if onlyavailable: | ||||
engines = [e for e in engines if e.available()] | ||||
def getkey(e): | ||||
# Sort first by priority, highest first. In case of tie, sort | ||||
# alphabetically. This is arbitrary, but ensures output is | ||||
# stable. | ||||
w = e.wireprotosupport() | ||||
return -1 * getattr(w, attr), w.name | ||||
return list(sorted(engines, key=getkey)) | ||||
def forwiretype(self, wiretype): | ||||
engine = self._engines[self._wiretypes[wiretype]] | ||||
if not engine.available(): | ||||
raise error.Abort(_('compression engine %s could not be loaded') % | ||||
engine.name()) | ||||
return engine | ||||
Gregory Szorc
|
r30798 | def forrevlogheader(self, header): | ||
"""Obtain a compression engine registered to a revlog header. | ||||
Will raise KeyError if the revlog header value isn't registered. | ||||
""" | ||||
return self._engines[self._revlogheaders[header]] | ||||
Gregory Szorc
|
r30350 | compengines = compressormanager() | ||
class compressionengine(object): | ||||
"""Base class for compression engines. | ||||
Compression engines must implement the interface defined by this class. | ||||
""" | ||||
def name(self): | ||||
"""Returns the name of the compression engine. | ||||
This is the key the engine is registered under. | ||||
This method must be implemented. | ||||
""" | ||||
raise NotImplementedError() | ||||
Gregory Szorc
|
r30437 | def available(self): | ||
"""Whether the compression engine is available. | ||||
The intent of this method is to allow optional compression engines | ||||
that may not be available in all installations (such as engines relying | ||||
on C extensions that may not be present). | ||||
""" | ||||
return True | ||||
Gregory Szorc
|
r30350 | def bundletype(self): | ||
"""Describes bundle identifiers for this engine. | ||||
If this compression engine isn't supported for bundles, returns None. | ||||
If this engine can be used for bundles, returns a 2-tuple of strings of | ||||
the user-facing "bundle spec" compression name and an internal | ||||
identifier used to denote the compression format within bundles. To | ||||
exclude the name from external usage, set the first element to ``None``. | ||||
If bundle compression is supported, the class must also implement | ||||
Gregory Szorc
|
r30359 | ``compressstream`` and `decompressorreader``. | ||
Gregory Szorc
|
r31792 | |||
The docstring of this method is used in the help system to tell users | ||||
about this engine. | ||||
Gregory Szorc
|
r30350 | """ | ||
return None | ||||
Gregory Szorc
|
r30761 | def wireprotosupport(self): | ||
"""Declare support for this compression format on the wire protocol. | ||||
If this compression engine isn't supported for compressing wire | ||||
protocol payloads, returns None. | ||||
Otherwise, returns ``compenginewireprotosupport`` with the following | ||||
fields: | ||||
* String format identifier | ||||
* Integer priority for the server | ||||
* Integer priority for the client | ||||
The integer priorities are used to order the advertisement of format | ||||
support by server and client. The highest integer is advertised | ||||
first. Integers with non-positive values aren't advertised. | ||||
The priority values are somewhat arbitrary and only used for default | ||||
ordering. The relative order can be changed via config options. | ||||
If wire protocol compression is supported, the class must also implement | ||||
``compressstream`` and ``decompressorreader``. | ||||
""" | ||||
return None | ||||
Gregory Szorc
|
r30798 | def revlogheader(self): | ||
"""Header added to revlog chunks that identifies this engine. | ||||
If this engine can be used to compress revlogs, this method should | ||||
return the bytes used to identify chunks compressed with this engine. | ||||
Else, the method should return ``None`` to indicate it does not | ||||
participate in revlog compression. | ||||
""" | ||||
return None | ||||
Gregory Szorc
|
r30356 | def compressstream(self, it, opts=None): | ||
"""Compress an iterator of chunks. | ||||
The method receives an iterator (ideally a generator) of chunks of | ||||
bytes to be compressed. It returns an iterator (ideally a generator) | ||||
of bytes of chunks representing the compressed output. | ||||
Optionally accepts an argument defining how to perform compression. | ||||
Each engine treats this argument differently. | ||||
""" | ||||
raise NotImplementedError() | ||||
Gregory Szorc
|
r30350 | def decompressorreader(self, fh): | ||
"""Perform decompression on a file object. | ||||
Argument is an object with a ``read(size)`` method that returns | ||||
compressed data. Return value is an object with a ``read(size)`` that | ||||
returns uncompressed data. | ||||
""" | ||||
raise NotImplementedError() | ||||
Gregory Szorc
|
r30794 | def revlogcompressor(self, opts=None): | ||
"""Obtain an object that can be used to compress revlog entries. | ||||
The object has a ``compress(data)`` method that compresses binary | ||||
data. This method returns compressed binary data or ``None`` if | ||||
the data could not be compressed (too small, not compressible, etc). | ||||
The returned data should have a header uniquely identifying this | ||||
compression format so decompression can be routed to this engine. | ||||
Gregory Szorc
|
r30798 | This header should be identified by the ``revlogheader()`` return | ||
value. | ||||
The object has a ``decompress(data)`` method that decompresses | ||||
data. The method will only be called if ``data`` begins with | ||||
``revlogheader()``. The method should return the raw, uncompressed | ||||
data or raise a ``RevlogError``. | ||||
Gregory Szorc
|
r30794 | |||
The object is reusable but is not thread safe. | ||||
""" | ||||
raise NotImplementedError() | ||||
Gregory Szorc
|
r30350 | class _zlibengine(compressionengine): | ||
def name(self): | ||||
return 'zlib' | ||||
def bundletype(self): | ||||
Gregory Szorc
|
r31792 | """zlib compression using the DEFLATE algorithm. | ||
All Mercurial clients should support this format. The compression | ||||
algorithm strikes a reasonable balance between compression ratio | ||||
and size. | ||||
""" | ||||
Gregory Szorc
|
r30350 | return 'gzip', 'GZ' | ||
Gregory Szorc
|
r30761 | def wireprotosupport(self): | ||
return compewireprotosupport('zlib', 20, 20) | ||||
Gregory Szorc
|
r30798 | def revlogheader(self): | ||
return 'x' | ||||
Gregory Szorc
|
r30356 | def compressstream(self, it, opts=None): | ||
opts = opts or {} | ||||
z = zlib.compressobj(opts.get('level', -1)) | ||||
for chunk in it: | ||||
data = z.compress(chunk) | ||||
# Not all calls to compress emit data. It is cheaper to inspect | ||||
# here than to feed empty chunks through generator. | ||||
if data: | ||||
yield data | ||||
yield z.flush() | ||||
Gregory Szorc
|
r30350 | def decompressorreader(self, fh): | ||
def gen(): | ||||
d = zlib.decompressobj() | ||||
for chunk in filechunkiter(fh): | ||||
Gregory Szorc
|
r30536 | while chunk: | ||
# Limit output size to limit memory. | ||||
yield d.decompress(chunk, 2 ** 18) | ||||
chunk = d.unconsumed_tail | ||||
Gregory Szorc
|
r30350 | |||
return chunkbuffer(gen()) | ||||
Gregory Szorc
|
r30794 | class zlibrevlogcompressor(object): | ||
def compress(self, data): | ||||
insize = len(data) | ||||
# Caller handles empty input case. | ||||
assert insize > 0 | ||||
if insize < 44: | ||||
return None | ||||
elif insize <= 1000000: | ||||
compressed = zlib.compress(data) | ||||
if len(compressed) < insize: | ||||
return compressed | ||||
return None | ||||
# zlib makes an internal copy of the input buffer, doubling | ||||
# memory usage for large inputs. So do streaming compression | ||||
# on large inputs. | ||||
else: | ||||
z = zlib.compressobj() | ||||
parts = [] | ||||
pos = 0 | ||||
while pos < insize: | ||||
pos2 = pos + 2**20 | ||||
parts.append(z.compress(data[pos:pos2])) | ||||
pos = pos2 | ||||
parts.append(z.flush()) | ||||
if sum(map(len, parts)) < insize: | ||||
return ''.join(parts) | ||||
return None | ||||
Gregory Szorc
|
r30798 | def decompress(self, data): | ||
try: | ||||
return zlib.decompress(data) | ||||
except zlib.error as e: | ||||
raise error.RevlogError(_('revlog decompress error: %s') % | ||||
str(e)) | ||||
Gregory Szorc
|
r30794 | def revlogcompressor(self, opts=None): | ||
return self.zlibrevlogcompressor() | ||||
Gregory Szorc
|
r30350 | compengines.register(_zlibengine()) | ||
class _bz2engine(compressionengine): | ||||
def name(self): | ||||
return 'bz2' | ||||
def bundletype(self): | ||||
Gregory Szorc
|
r31792 | """An algorithm that produces smaller bundles than ``gzip``. | ||
All Mercurial clients should support this format. | ||||
This engine will likely produce smaller bundles than ``gzip`` but | ||||
will be significantly slower, both during compression and | ||||
decompression. | ||||
If available, the ``zstd`` engine can yield similar or better | ||||
compression at much higher speeds. | ||||
""" | ||||
Gregory Szorc
|
r30350 | return 'bzip2', 'BZ' | ||
Gregory Szorc
|
r30761 | # We declare a protocol name but don't advertise by default because | ||
# it is slow. | ||||
def wireprotosupport(self): | ||||
return compewireprotosupport('bzip2', 0, 0) | ||||
Gregory Szorc
|
r30356 | def compressstream(self, it, opts=None): | ||
opts = opts or {} | ||||
z = bz2.BZ2Compressor(opts.get('level', 9)) | ||||
for chunk in it: | ||||
data = z.compress(chunk) | ||||
if data: | ||||
yield data | ||||
yield z.flush() | ||||
Gregory Szorc
|
r30350 | def decompressorreader(self, fh): | ||
def gen(): | ||||
d = bz2.BZ2Decompressor() | ||||
for chunk in filechunkiter(fh): | ||||
yield d.decompress(chunk) | ||||
return chunkbuffer(gen()) | ||||
compengines.register(_bz2engine()) | ||||
class _truncatedbz2engine(compressionengine): | ||||
def name(self): | ||||
return 'bz2truncated' | ||||
def bundletype(self): | ||||
return None, '_truncatedBZ' | ||||
Gregory Szorc
|
r30359 | # We don't implement compressstream because it is hackily handled elsewhere. | ||
Gregory Szorc
|
r30350 | |||
def decompressorreader(self, fh): | ||||
def gen(): | ||||
# The input stream doesn't have the 'BZ' header. So add it back. | ||||
d = bz2.BZ2Decompressor() | ||||
d.decompress('BZ') | ||||
for chunk in filechunkiter(fh): | ||||
yield d.decompress(chunk) | ||||
return chunkbuffer(gen()) | ||||
compengines.register(_truncatedbz2engine()) | ||||
Gregory Szorc
|
r30265 | |||
Gregory Szorc
|
r30350 | class _noopengine(compressionengine): | ||
def name(self): | ||||
return 'none' | ||||
def bundletype(self): | ||||
Gregory Szorc
|
r31792 | """No compression is performed. | ||
Use this compression engine to explicitly disable compression. | ||||
""" | ||||
Gregory Szorc
|
r30350 | return 'none', 'UN' | ||
Gregory Szorc
|
r30761 | # Clients always support uncompressed payloads. Servers don't because | ||
# unless you are on a fast network, uncompressed payloads can easily | ||||
# saturate your network pipe. | ||||
def wireprotosupport(self): | ||||
return compewireprotosupport('none', 0, 10) | ||||
Gregory Szorc
|
r30798 | # We don't implement revlogheader because it is handled specially | ||
# in the revlog class. | ||||
Gregory Szorc
|
r30356 | def compressstream(self, it, opts=None): | ||
return it | ||||
Gregory Szorc
|
r30350 | def decompressorreader(self, fh): | ||
return fh | ||||
Gregory Szorc
|
r30794 | class nooprevlogcompressor(object): | ||
def compress(self, data): | ||||
return None | ||||
def revlogcompressor(self, opts=None): | ||||
return self.nooprevlogcompressor() | ||||
Gregory Szorc
|
r30350 | compengines.register(_noopengine()) | ||
Pierre-Yves David
|
r26266 | |||
Gregory Szorc
|
r30442 | class _zstdengine(compressionengine): | ||
def name(self): | ||||
return 'zstd' | ||||
@propertycache | ||||
def _module(self): | ||||
# Not all installs have the zstd module available. So defer importing | ||||
# until first access. | ||||
try: | ||||
from . import zstd | ||||
# Force delayed import. | ||||
zstd.__version__ | ||||
return zstd | ||||
except ImportError: | ||||
return None | ||||
def available(self): | ||||
return bool(self._module) | ||||
def bundletype(self): | ||||
Gregory Szorc
|
r31792 | """A modern compression algorithm that is fast and highly flexible. | ||
Only supported by Mercurial 4.1 and newer clients. | ||||
With the default settings, zstd compression is both faster and yields | ||||
better compression than ``gzip``. It also frequently yields better | ||||
compression than ``bzip2`` while operating at much higher speeds. | ||||
If this engine is available and backwards compatibility is not a | ||||
concern, it is likely the best available engine. | ||||
""" | ||||
Gregory Szorc
|
r30442 | return 'zstd', 'ZS' | ||
Gregory Szorc
|
r30761 | def wireprotosupport(self): | ||
return compewireprotosupport('zstd', 50, 50) | ||||
Gregory Szorc
|
r30798 | def revlogheader(self): | ||
return '\x28' | ||||
Gregory Szorc
|
r30442 | def compressstream(self, it, opts=None): | ||
opts = opts or {} | ||||
# zstd level 3 is almost always significantly faster than zlib | ||||
# while providing no worse compression. It strikes a good balance | ||||
# between speed and compression. | ||||
level = opts.get('level', 3) | ||||
zstd = self._module | ||||
z = zstd.ZstdCompressor(level=level).compressobj() | ||||
for chunk in it: | ||||
data = z.compress(chunk) | ||||
if data: | ||||
yield data | ||||
yield z.flush() | ||||
def decompressorreader(self, fh): | ||||
zstd = self._module | ||||
dctx = zstd.ZstdDecompressor() | ||||
return chunkbuffer(dctx.read_from(fh)) | ||||
Gregory Szorc
|
r30794 | class zstdrevlogcompressor(object): | ||
def __init__(self, zstd, level=3): | ||||
# Writing the content size adds a few bytes to the output. However, | ||||
# it allows decompression to be more optimal since we can | ||||
# pre-allocate a buffer to hold the result. | ||||
self._cctx = zstd.ZstdCompressor(level=level, | ||||
write_content_size=True) | ||||
Gregory Szorc
|
r30798 | self._dctx = zstd.ZstdDecompressor() | ||
Gregory Szorc
|
r30794 | self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE | ||
Gregory Szorc
|
r30798 | self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | ||
Gregory Szorc
|
r30794 | |||
def compress(self, data): | ||||
insize = len(data) | ||||
# Caller handles empty input case. | ||||
assert insize > 0 | ||||
if insize < 50: | ||||
return None | ||||
elif insize <= 1000000: | ||||
compressed = self._cctx.compress(data) | ||||
if len(compressed) < insize: | ||||
return compressed | ||||
return None | ||||
else: | ||||
z = self._cctx.compressobj() | ||||
chunks = [] | ||||
pos = 0 | ||||
while pos < insize: | ||||
pos2 = pos + self._compinsize | ||||
chunk = z.compress(data[pos:pos2]) | ||||
if chunk: | ||||
chunks.append(chunk) | ||||
pos = pos2 | ||||
chunks.append(z.flush()) | ||||
if sum(map(len, chunks)) < insize: | ||||
return ''.join(chunks) | ||||
return None | ||||
Gregory Szorc
|
r30798 | def decompress(self, data): | ||
insize = len(data) | ||||
try: | ||||
# This was measured to be faster than other streaming | ||||
# decompressors. | ||||
dobj = self._dctx.decompressobj() | ||||
chunks = [] | ||||
pos = 0 | ||||
while pos < insize: | ||||
pos2 = pos + self._decompinsize | ||||
chunk = dobj.decompress(data[pos:pos2]) | ||||
if chunk: | ||||
chunks.append(chunk) | ||||
pos = pos2 | ||||
# Frame should be exhausted, so no finish() API. | ||||
return ''.join(chunks) | ||||
except Exception as e: | ||||
raise error.RevlogError(_('revlog decompress error: %s') % | ||||
str(e)) | ||||
Gregory Szorc
|
r30794 | def revlogcompressor(self, opts=None): | ||
opts = opts or {} | ||||
return self.zstdrevlogcompressor(self._module, | ||||
level=opts.get('level', 3)) | ||||
Gregory Szorc
|
r30442 | compengines.register(_zstdengine()) | ||
Gregory Szorc
|
r31792 | def bundlecompressiontopics(): | ||
"""Obtains a list of available bundle compressions for use in help.""" | ||||
# help.makeitemsdocs() expects a dict of names to items with a .__doc__. | ||||
items = {} | ||||
# We need to format the docstring. So use a dummy object/type to hold it | ||||
# rather than mutating the original. | ||||
class docobject(object): | ||||
pass | ||||
for name in compengines: | ||||
engine = compengines[name] | ||||
if not engine.available(): | ||||
continue | ||||
bt = engine.bundletype() | ||||
if not bt or not bt[0]: | ||||
continue | ||||
Augie Fackler
|
r31811 | doc = pycompat.sysstr('``%s``\n %s') % ( | ||
bt[0], engine.bundletype.__doc__) | ||||
Gregory Szorc
|
r31792 | |||
value = docobject() | ||||
value.__doc__ = doc | ||||
items[bt[0]] = value | ||||
return items | ||||
Mads Kiilerich
|
r20244 | # convenient shortcut | ||
dst = debugstacktrace | ||||