##// END OF EJS Templates
rust-revlog: don't create an in-memory nodemap for filelogs from Python...
rust-revlog: don't create an in-memory nodemap for filelogs from Python Explanations inline. Benchmarks from this change affect positively the only repo that showed this being a problem: ``` ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip default: 62.848869 ~~~~~ before-this-patch: 58.113051 (-7.54%, -4.74) this-patch: 57.407533 (-8.66%, -5.44) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none default: 3.173532 ~~~~~ before-this-patch: 3.543591 (+11.66%, +0.37) this-patch: 3.297235 (+3.90%, +0.12) ```

File last commit:

r52755:607e94e0 default
r53071:bcd4962e default
Show More
charencode.py
87 lines | 2.4 KiB | text/x-python | PythonLexer
# charencode.py - miscellaneous character encoding
#
# Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import annotations
import array
from .. import pycompat
def isasciistr(s: bytes) -> bool:
try:
s.decode('ascii')
return True
except UnicodeDecodeError:
return False
def asciilower(s: bytes) -> bytes:
"""convert a string to lowercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found."""
s.decode('ascii')
return s.lower()
def asciiupper(s: bytes) -> bytes:
"""convert a string to uppercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found."""
s.decode('ascii')
return s.upper()
_jsonmap = []
_jsonmap.extend(b"\\u%04x" % x for x in range(32))
_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
_jsonmap.append(b'\\u007f')
_jsonmap[0x09] = b'\\t'
_jsonmap[0x0A] = b'\\n'
_jsonmap[0x22] = b'\\"'
_jsonmap[0x5C] = b'\\\\'
_jsonmap[0x08] = b'\\b'
_jsonmap[0x0C] = b'\\f'
_jsonmap[0x0D] = b'\\r'
_paranoidjsonmap = _jsonmap[:]
_paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>")
_paranoidjsonmap[0x3E] = b'\\u003e' # '>'
_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
def jsonescapeu8fast(u8chars: bytes, paranoid: bool) -> bytes:
"""Convert a UTF-8 byte string to JSON-escaped form (fast path)
Raises ValueError if non-ASCII characters have to be escaped.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
try:
return b''.join(jm[x] for x in bytearray(u8chars))
except IndexError:
raise ValueError
_utf8strict = r'surrogatepass'
def jsonescapeu8fallback(u8chars: bytes, paranoid: bool) -> bytes:
"""Convert a UTF-8 byte string to JSON-escaped form (slow path)
Escapes all non-ASCII characters no matter if paranoid is False.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
# non-BMP char is represented as UTF-16 surrogate pair
u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
u16codes = array.array('H', u16b)
u16codes.pop(0) # drop BOM
return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)