##// END OF EJS Templates
revlog: add glue to use a pure-Rust VFS...
revlog: add glue to use a pure-Rust VFS This will save us a lot of calling back into Python, which is always horribly expensive. We are now faster in all benchmarked cases except for `log --patch` specifically on mozilla-try. Fixing this will happen in a later patch. ``` ### data-env-vars.name = mercurial-devel-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.760765 ~~~~~ 5559d7e63ec3: 1.555513 (-11.66%, -0.21) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 62.848869 ~~~~~ 5559d7e63ec3: 58.113051 (-7.54%, -4.74) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 3.173532 ~~~~~ 5559d7e63ec3: 3.543591 (+11.66%, +0.37) ### data-env-vars.name = mozilla-try-2024-03-26-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 1.214698 ~~~~~ 5559d7e63ec3: 1.192478 (-1.83%, -0.02) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 56.205474 ~~~~~ 5559d7e63ec3: 51.520074 (-8.34%, -4.69) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 2.105419 ~~~~~ 5559d7e63ec3: 2.051849 (-2.54%, -0.05) ### data-env-vars.name = mozilla-unified-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.309960 ~~~~~ 5559d7e63ec3: 0.299035 (-3.52%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.cat # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.files = all-root # benchmark.variants.output = plain # benchmark.variants.rev = tip e679697a6ca4: 1.849832 ~~~~~ 5559d7e63ec3: 1.805076 (-2.42%, -0.04) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 10 # benchmark.variants.patch = yes # benchmark.variants.rev = none e679697a6ca4: 0.289521 ~~~~~ 5559d7e63ec3: 0.279889 (-3.33%, -0.01) ### data-env-vars.name = tryton-public-2024-03-22-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.patch = no # benchmark.variants.rev = none e679697a6ca4: 0.332270 ~~~~~ 5559d7e63ec3: 0.323324 (-2.69%, -0.01) ```

File last commit:

r52756:f4733654 default
r53069:72bc29f0 default
Show More
stringutil.py
1007 lines | 29.8 KiB | text/x-python | PythonLexer
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # stringutil.py - utility for generic string formatting, parsing, etc.
#
# Copyright 2005 K. Thananchayan <thananck@yahoo.com>
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Harbison
typing: add `from __future__ import annotations` to most files...
r52756 from __future__ import annotations
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Yuya Nishihara
wireproto: convert python literal to object without using unsafe eval()...
r37494 import ast
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 import codecs
import re as remod
import textwrap
Gregory Szorc
stringutil: teach pprint() to recognize generators...
r39332 import types
Matt Harbison
typing: induce pytype to use the standard `attr` instead of the vendored copy...
r52622 import typing
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 from typing import (
Optional,
overload,
)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 from ..i18n import _
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 from ..thirdparty import attr
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Matt Harbison
typing: induce pytype to use the standard `attr` instead of the vendored copy...
r52622 # Force pytype to use the non-vendored package
if typing.TYPE_CHECKING:
# noinspection PyPackageRequirements
import attr
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 from .. import (
encoding,
error,
pycompat,
)
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 # regex special chars pulled from https://bugs.python.org/issue29995
# which was part of Python 3.7.
Augie Fackler
stringutil: update list of re-special characters to include &~...
r38496 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
Boris Feld
match: provide and use a quick way to escape a single byte...
r40720 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 @overload
def reescape(pat: bytes) -> bytes:
...
@overload
def reescape(pat: str) -> str:
...
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 def reescape(pat):
"""Drop-in replacement for re.escape."""
# NOTE: it is intentional that this works on unicodes and not
# bytes, as it's only possible to do the escaping with
# unicode.translate, not bytes.translate. Sigh.
wantuni = True
if isinstance(pat, bytes):
wantuni = False
pat = pat.decode('latin1')
pat = pat.translate(_regexescapemap)
if wantuni:
return pat
return pat.encode('latin1')
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes:
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 """Pretty print an object."""
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 """Pretty print an object to a generator of atoms.
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 ``bprefix`` is a flag influencing whether bytestrings are preferred with
a ``b''`` prefix.
``indent`` controls whether collections and nested data structures
span multiple lines via the indentation amount in spaces. By default,
no newlines are emitted.
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312
``level`` specifies the initial indent level. Used if ``indent > 0``.
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 """
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389
Gregory Szorc
stringutil: support more types with pprint()...
r37637 if isinstance(o, bytes):
Augie Fackler
stringutil: make b prefixes on string output optional...
r37768 if bprefix:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"b'%s'" % escapestr(o)
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"'%s'" % escapestr(o)
Gregory Szorc
stringutil: support more types with pprint()...
r37637 elif isinstance(o, bytearray):
# codecs.escape_encode() can't handle bytearray, so escapestr fails
# without coercion.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"bytearray['%s']" % escapestr(bytes(o))
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 elif isinstance(o, list):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'[]'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, a in enumerate(o):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
a, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b']'
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 elif isinstance(o, dict):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'{}'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'{'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, (k, v) in enumerate(sorted(o.items())):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
k, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b': '
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
v, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'}'
Augie Fackler
stringutil: teach pprint about sets...
r39086 elif isinstance(o, set):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'set([])'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'set(['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, k in enumerate(sorted(o)):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
k, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'])'
Augie Fackler
stringutil: teach pprint about tuples...
r37951 elif isinstance(o, tuple):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'()'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'('
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, a in enumerate(o):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
a, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b')'
Gregory Szorc
stringutil: teach pprint() to recognize generators...
r39332 elif isinstance(o, types.GeneratorType):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 # Special case of empty generator.
try:
nextitem = next(o)
except StopIteration:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'gen[]'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'gen['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 last = False
while not last:
current = nextitem
try:
nextitem = next(o)
except StopIteration:
last = True
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
current, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if not last:
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b']'
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 else:
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389 yield pycompat.byterepr(o)
Gregory Szorc
stringutil: add function to pretty print an object...
r37316
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def prettyrepr(o) -> bytes:
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280 """Pretty print a representation of a possibly-nested object"""
lines = []
rs = pycompat.byterepr(o)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 p0 = p1 = 0
while p0 < len(rs):
# '... field=<type ... field=<type ...'
# ~~~~~~~~~~~~~~~~
# p0 p1 q0 q1
q0 = -1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 q1 = rs.find(b'<', p1 + 1)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 if q1 < 0:
q1 = len(rs)
Matt Harbison
pytype: stop excluding stringutil.py...
r49310 # pytype: disable=wrong-arg-count
# TODO: figure out why pytype doesn't recognize the optional start
# arg
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
Matt Harbison
pytype: stop excluding stringutil.py...
r49310 # pytype: enable=wrong-arg-count
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 # backtrack for ' field=<'
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 if q0 < 0:
q0 = q1
else:
q0 += 1 # skip ' '
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280 assert l >= 0
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 lines.append((l, rs[p0:q0].rstrip()))
p0, p1 = q0, q1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'\n'.join(b' ' * l + s for l, s in lines)
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def buildrepr(r) -> bytes:
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595 """Format an optional printable representation from unexpanded bits
======== =================================
type(r) example
======== =================================
tuple ('<not %r>', other)
bytes '<branch closed>'
callable lambda: '<branch %r>' % sorted(b)
object other
======== =================================
"""
if r is None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b''
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595 elif isinstance(r, tuple):
return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
elif isinstance(r, bytes):
return r
elif callable(r):
return r()
else:
Augie Fackler
stringutil: have buildrepr delegate to pprint for unknown types...
r39087 return pprint(r)
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def binary(s: bytes) -> bool:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """return true if a string is binary data"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return bool(s and b'\0' in s)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def _splitpattern(pattern: bytes):
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 if pattern.startswith(b're:'):
return b're', pattern[3:]
elif pattern.startswith(b'literal:'):
return b'literal', pattern[8:]
return b'literal', pattern
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def stringmatcher(pattern: bytes, casesensitive: bool = True):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """
accepts a string, possibly starting with 're:' or 'literal:' prefix.
returns the matcher name, pattern, and matcher function.
missing or unknown prefixes are treated as literal matches.
helper for tests:
>>> def test(pattern, *tests):
... kind, pattern, matcher = stringmatcher(pattern)
... return (kind, pattern, [bool(matcher(t)) for t in tests])
>>> def itest(pattern, *tests):
... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
... return (kind, pattern, [bool(matcher(t)) for t in tests])
exact matching (no prefix):
>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
('literal', 'abcdefg', [False, False, True])
regex matching ('re:' prefix)
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
('re', 'a.+b', [False, False, True])
force exact matches ('literal:' prefix)
>>> test(b'literal:re:foobar', b'foobar', b're:foobar')
('literal', 're:foobar', [False, True])
unknown prefixes are ignored and treated as literals
>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
('literal', 'foo:bar', [False, False, True])
case insensitive regex matches
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
('re', 'A.+b', [False, False, True])
case insensitive literal matches
>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
('literal', 'ABCDEFG', [False, False, True])
"""
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 kind, pattern = _splitpattern(pattern)
if kind == b're':
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 try:
flags = 0
if not casesensitive:
flags = remod.I
regex = remod.compile(pattern, flags)
except remod.error as e:
Yuya Nishihara
py3: fix stringmatcher() to byte-stringify exception message...
r46315 raise error.ParseError(
_(b'invalid regular expression: %s') % forcebytestr(e)
)
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 return kind, pattern, regex.search
elif kind == b'literal':
if casesensitive:
match = pattern.__eq__
else:
ipat = encoding.lower(pattern)
match = lambda s: ipat == encoding.lower(s)
return kind, pattern, match
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def substringregexp(pattern: bytes, flags: int = 0):
Yuya Nishihara
stringutil: add function to compile stringmatcher pattern into regexp...
r46316 """Build a regexp object from a string pattern possibly starting with
're:' or 'literal:' prefix.
helper for tests:
>>> def test(pattern, *tests):
... regexp = substringregexp(pattern)
... return [bool(regexp.search(t)) for t in tests]
>>> def itest(pattern, *tests):
... regexp = substringregexp(pattern, remod.I)
... return [bool(regexp.search(t)) for t in tests]
substring matching (no prefix):
>>> test(b'bcde', b'abc', b'def', b'abcdefg')
[False, False, True]
substring pattern should be escaped:
>>> substringregexp(b'.bc').pattern
'\\\\.bc'
>>> test(b'.bc', b'abc', b'def', b'abcdefg')
[False, False, False]
regex matching ('re:' prefix)
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
[False, False, True]
force substring matches ('literal:' prefix)
>>> test(b'literal:re:foobar', b'foobar', b're:foobar')
[False, True]
case insensitive literal matches
>>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
[False, False, True]
case insensitive regex matches
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
[False, False, True]
"""
kind, pattern = _splitpattern(pattern)
if kind == b're':
try:
return remod.compile(pattern, flags)
except remod.error as e:
raise error.ParseError(
_(b'invalid regular expression: %s') % forcebytestr(e)
)
elif kind == b'literal':
return remod.compile(remod.escape(pattern), flags)
raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def shortuser(user: bytes) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """Return a short representation of a user name or email address."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'@')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'<')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
Augie Fackler
formatting: blacken the codebase...
r43346 user = user[f + 1 :]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b' ')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'.')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
return user
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def emailuser(user: bytes) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """Return the user portion of an email address."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'@')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'<')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
Augie Fackler
formatting: blacken the codebase...
r43346 user = user[f + 1 :]
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 return user
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def email(author: bytes) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 '''get email of author.'''
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 r = author.find(b'>')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if r == -1:
r = None
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return author[author.find(b'<') + 1 : r]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def person(author: bytes) -> bytes:
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 """Returns the name before an email address,
interpreting it as per RFC 5322
>>> person(b'foo@bar')
'foo'
>>> person(b'Foo Bar <foo@bar>')
'Foo Bar'
>>> person(b'"Foo Bar" <foo@bar>')
'Foo Bar'
>>> person(b'"Foo \"buz\" Bar" <foo@bar>')
'Foo "buz" Bar'
>>> # The following are invalid, but do exist in real-life
...
>>> person(b'Foo "buz" Bar <foo@bar>')
'Foo "buz" Bar'
>>> person(b'"Foo Bar <foo@bar>')
'Foo Bar'
"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if b'@' not in author:
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 return author
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = author.find(b'<')
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 if f != -1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return author[:f].strip(b' "').replace(b'\\"', b'"')
f = author.find(b'@')
return author[:f].replace(b'.', b' ')
Connor Sheehan
stringutil: move person function from templatefilters...
r37173
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 @attr.s(hash=True)
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class mailmapping:
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Represents a username/email key or value in
a mailmap file"""
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 email = attr.ib()
name = attr.ib(default=None)
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 def _ismailmaplineinvalid(names, emails):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Returns True if the parsed names and emails
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 in a mailmap entry are invalid.
>>> # No names or emails fails
>>> names, emails = [], []
>>> _ismailmaplineinvalid(names, emails)
True
>>> # Only one email fails
>>> emails = [b'email@email.com']
>>> _ismailmaplineinvalid(names, emails)
True
>>> # One email and one name passes
>>> names = [b'Test Name']
>>> _ismailmaplineinvalid(names, emails)
False
>>> # No names but two emails passes
>>> names = []
>>> emails = [b'proper@email.com', b'commit@email.com']
>>> _ismailmaplineinvalid(names, emails)
False
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 return not emails or not names and len(emails) < 2
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 def parsemailmap(mailmapcontent):
"""Parses data in the .mailmap format
>>> mmdata = b"\\n".join([
... b'# Comment',
... b'Name <commit1@email.xx>',
... b'<name@email.xx> <commit2@email.xx>',
... b'Name <proper@email.xx> <commit3@email.xx>',
... b'Name <proper@email.xx> Commit <commit4@email.xx>',
... ])
>>> mm = parsemailmap(mmdata)
>>> for key in sorted(mm.keys()):
... print(key)
mailmapping(email='commit1@email.xx', name=None)
mailmapping(email='commit2@email.xx', name=None)
mailmapping(email='commit3@email.xx', name=None)
mailmapping(email='commit4@email.xx', name='Commit')
>>> for val in sorted(mm.values()):
... print(val)
mailmapping(email='commit1@email.xx', name='Name')
mailmapping(email='name@email.xx', name=None)
mailmapping(email='proper@email.xx', name='Name')
mailmapping(email='proper@email.xx', name='Name')
"""
mailmap = {}
if mailmapcontent is None:
return mailmap
for line in mailmapcontent.splitlines():
# Don't bother checking the line if it is a comment or
# is an improperly formed author field
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if line.lstrip().startswith(b'#'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 continue
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 # names, emails hold the parsed emails and names for each line
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # name_builder holds the words in a persons name
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 names, emails = [], []
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 namebuilder = []
for element in line.split():
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if element.startswith(b'#'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # If we reach a comment in the mailmap file, move on
break
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif element.startswith(b'<') and element.endswith(b'>'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # We have found an email.
# Parse it, and finalize any names from earlier
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 emails.append(element[1:-1]) # Slice off the "<>"
Connor Sheehan
templatefuncs: add mailmap template function...
r37227
if namebuilder:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 names.append(b' '.join(namebuilder))
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 namebuilder = []
# Break if we have found a second email, any other
# data does not fit the spec for .mailmap
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 if len(emails) > 1:
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 break
else:
# We have found another word in the committers name
namebuilder.append(element)
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 # Check to see if we have parsed the line into a valid form
# We require at least one email, and either at least one
# name or a second email
if _ismailmaplineinvalid(names, emails):
continue
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 mailmapkey = mailmapping(
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 email=emails[-1],
name=names[-1] if len(names) == 2 else None,
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 )
mailmap[mailmapkey] = mailmapping(
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 email=emails[0],
name=names[0] if names else None,
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 )
return mailmap
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def mapname(mailmap, author: bytes) -> bytes:
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 """Returns the author field according to the mailmap cache, or
the original author field.
>>> mmdata = b"\\n".join([
... b'# Comment',
... b'Name <commit1@email.xx>',
... b'<name@email.xx> <commit2@email.xx>',
... b'Name <proper@email.xx> <commit3@email.xx>',
... b'Name <proper@email.xx> Commit <commit4@email.xx>',
... ])
>>> m = parsemailmap(mmdata)
>>> mapname(m, b'Commit <commit1@email.xx>')
'Name <commit1@email.xx>'
>>> mapname(m, b'Name <commit2@email.xx>')
'Name <name@email.xx>'
>>> mapname(m, b'Commit <commit3@email.xx>')
'Name <proper@email.xx>'
>>> mapname(m, b'Commit <commit4@email.xx>')
'Name <proper@email.xx>'
>>> mapname(m, b'Unknown Name <unknown@email.com>')
'Unknown Name <unknown@email.com>'
"""
# If the author field coming in isn't in the correct format,
# or the mailmap is empty just return the original author field
if not isauthorwellformed(author) or not mailmap:
return author
Connor Sheehan
stringutil: edit comment to reflect actual data type name...
r37264 # Turn the user name into a mailmapping
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 commit = mailmapping(name=person(author), email=email(author))
try:
# Try and use both the commit email and name as the key
proper = mailmap[commit]
except KeyError:
# If the lookup fails, use just the email as the key instead
# We call this commit2 as not to erase original commit fields
commit2 = mailmapping(email=commit.email)
proper = mailmap.get(commit2, mailmapping(None, None))
# Return the author field with proper values filled in
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'%s <%s>' % (
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 proper.name if proper.name else commit.name,
proper.email if proper.email else commit.email,
)
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
cleanup: drop redundant character escapes outside of `[]`...
r44474 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def isauthorwellformed(author: bytes) -> bool:
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Return True if the author field is well formed
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172 (ie "Contributor Name <contrib@email.dom>")
>>> isauthorwellformed(b'Good Author <good@author.com>')
True
>>> isauthorwellformed(b'Author <good@author.com>')
True
>>> isauthorwellformed(b'Bad Author')
False
>>> isauthorwellformed(b'Bad Author <author@author.com')
False
>>> isauthorwellformed(b'Bad Author author@author.com')
False
>>> isauthorwellformed(b'<author@author.com>')
False
>>> isauthorwellformed(b'Bad Author <author>')
False
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172 return _correctauthorformat.match(author) is not None
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def firstline(text: bytes) -> bytes:
Martin von Zweigbergk
templates: extract function to `stringutil` for getting first line of text...
r49885 """Return the first line of the input"""
Martin von Zweigbergk
stringutil: try to avoid running `splitlines()` only to get first line...
r49894 # Try to avoid running splitlines() on the whole string
i = text.find(b'\n')
if i != -1:
text = text[:i]
Martin von Zweigbergk
templates: extract function to `stringutil` for getting first line of text...
r49885 try:
return text.splitlines()[0]
except IndexError:
return b''
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def ellipsis(text: bytes, maxlength: int = 400) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """Trim string to at most maxlength (default: 400) columns in display."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return encoding.trim(text, maxlength, ellipsis=b'...')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def escapestr(s: bytes) -> bytes:
# "bytes" is also a typing shortcut for bytes, bytearray, and memoryview
Joerg Sonnenberger
utils: accept bytearray arguments for escapestr
r52725 if isinstance(s, (memoryview, bytearray)):
Augie Fackler
stringutil: if we get a memoryview in escapestr, coerce it to bytes...
r39098 s = bytes(s)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # call underlying function of s.encode('string_escape') directly for
# Python 3 compatibility
Matt Harbison
typing: minor tweaks to allow updating to pytype 2022.11.18
r50543 # pytype: disable=bad-return-type
Matt Harbison
pytype: stop excluding stringutil.py...
r49310 return codecs.escape_encode(s)[0] # pytype: disable=module-attr
Matt Harbison
typing: minor tweaks to allow updating to pytype 2022.11.18
r50543 # pytype: enable=bad-return-type
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def unescapestr(s: bytes) -> bytes:
Matt Harbison
typing: minor tweaks to allow updating to pytype 2022.11.18
r50543 # pytype: disable=bad-return-type
Matt Harbison
pytype: stop excluding stringutil.py...
r49310 return codecs.escape_decode(s)[0] # pytype: disable=module-attr
Matt Harbison
typing: minor tweaks to allow updating to pytype 2022.11.18
r50543 # pytype: enable=bad-return-type
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def forcebytestr(obj):
"""Portably format an arbitrary object (e.g. exception) into a byte
string."""
try:
return pycompat.bytestr(obj)
except UnicodeEncodeError:
# non-ascii string, may be lossy
return pycompat.bytestr(encoding.strtolocal(str(obj)))
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def uirepr(s: bytes) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # Avoid double backslash in Windows path repr()
return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # delay import of textwrap
def _MBTextWrapper(**kwargs):
class tw(textwrap.TextWrapper):
"""
Extend TextWrapper for width-awareness.
Neither number of 'bytes' in any encoding nor 'characters' is
appropriate to calculate terminal columns for specified string.
Original TextWrapper implementation uses built-in 'len()' directly,
so overriding is needed to use width information of each characters.
In addition, characters classified into 'ambiguous' width are
treated as wide in East Asian area, but as narrow in other.
This requires use decision to determine width of such characters.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def _cutdown(self, ucstr, space_left):
l = 0
colwidth = encoding.ucolwidth
Manuel Jacob
py3: replace `pycompat.xrange` by `range`
r50179 for i in range(len(ucstr)):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 l += colwidth(ucstr[i])
if space_left < l:
return (ucstr[:i], ucstr[i:])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return ucstr, b''
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
# overriding of base class
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
space_left = max(width - cur_len, 1)
if self.break_long_words:
cut, res = self._cutdown(reversed_chunks[-1], space_left)
cur_line.append(cut)
reversed_chunks[-1] = res
elif not cur_line:
cur_line.append(reversed_chunks.pop())
# this overriding code is imported from TextWrapper of Python 2.6
# to calculate columns of string by 'encoding.ucolwidth()'
def _wrap_chunks(self, chunks):
colwidth = encoding.ucolwidth
lines = []
if self.width <= 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (i.e. no lines started yet).
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 del chunks[-1]
while chunks:
l = colwidth(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and colwidth(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
# If the last chunk on this line is all whitespace, drop it.
Augie Fackler
formatting: blacken the codebase...
r43346 if (
self.drop_whitespace
and cur_line
and cur_line[-1].strip() == r''
):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 del cur_line[-1]
# Convert current line back to a string and store it in list
# of all lines (return value).
if cur_line:
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 lines.append(indent + ''.join(cur_line))
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
return lines
global _MBTextWrapper
_MBTextWrapper = tw
return tw(**kwargs)
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def wrap(
line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b''
) -> bytes:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 maxindent = max(len(hangindent), len(initindent))
if width <= maxindent:
# adjust for weird terminal size
width = max(78, maxindent + 1)
Augie Fackler
formatting: blacken the codebase...
r43346 line = line.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
initindent = initindent.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
hangindent = hangindent.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
wrapper = _MBTextWrapper(
width=width, initial_indent=initindent, subsequent_indent=hangindent
)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
Augie Fackler
formatting: blacken the codebase...
r43346
_booleans = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'1': True,
b'yes': True,
b'true': True,
b'on': True,
b'always': True,
b'0': False,
b'no': False,
b'false': False,
b'off': False,
b'never': False,
Augie Fackler
formatting: blacken the codebase...
r43346 }
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def parsebool(s: bytes) -> Optional[bool]:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 """Parse s into a boolean.
If s is not a valid boolean, returns None.
"""
return _booleans.get(s.lower(), None)
Gregory Szorc
wireproto: syntax for encoding CBOR into frames...
r37306
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 # TODO: make arg mandatory (and fix code below?)
def parselist(value: Optional[bytes]):
parselist: move the function from config to stringutil...
r47960 """parse a configuration value as a list of comma/space separated strings
>>> parselist(b'this,is "a small" ,test')
['this', 'is', 'a small', 'test']
"""
def _parse_plain(parts, s, offset):
whitespace = False
while offset < len(s) and (
s[offset : offset + 1].isspace() or s[offset : offset + 1] == b','
):
whitespace = True
offset += 1
if offset >= len(s):
return None, parts, offset
if whitespace:
parts.append(b'')
if s[offset : offset + 1] == b'"' and not parts[-1]:
return _parse_quote, parts, offset + 1
elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\':
parts[-1] = parts[-1][:-1] + s[offset : offset + 1]
return _parse_plain, parts, offset + 1
parts[-1] += s[offset : offset + 1]
return _parse_plain, parts, offset + 1
def _parse_quote(parts, s, offset):
if offset < len(s) and s[offset : offset + 1] == b'"': # ""
parts.append(b'')
offset += 1
while offset < len(s) and (
s[offset : offset + 1].isspace()
or s[offset : offset + 1] == b','
):
offset += 1
return _parse_plain, parts, offset
while offset < len(s) and s[offset : offset + 1] != b'"':
if (
s[offset : offset + 1] == b'\\'
and offset + 1 < len(s)
and s[offset + 1 : offset + 2] == b'"'
):
offset += 1
parts[-1] += b'"'
else:
parts[-1] += s[offset : offset + 1]
offset += 1
if offset >= len(s):
real_parts = _configlist(parts[-1])
if not real_parts:
parts[-1] = b'"'
else:
real_parts[0] = b'"' + real_parts[0]
parts = parts[:-1]
parts.extend(real_parts)
return None, parts, offset
offset += 1
while offset < len(s) and s[offset : offset + 1] in [b' ', b',']:
offset += 1
if offset < len(s):
if offset + 1 == len(s) and s[offset : offset + 1] == b'"':
parts[-1] += b'"'
offset += 1
else:
parts.append(b'')
else:
return None, parts, offset
return _parse_plain, parts, offset
def _configlist(s):
s = s.rstrip(b' ,')
if not s:
return []
parser, parts, offset = _parse_plain, [b''], 0
while parser:
parser, parts, offset = parser(parts, s, offset)
return parts
if value is not None and isinstance(value, bytes):
result = _configlist(value.lstrip(b' ,\n'))
else:
result = value
return result or []
Matt Harbison
typing: add basic type hints to stringutil.py
r50470 def evalpythonliteral(s: bytes):
Yuya Nishihara
wireproto: convert python literal to object without using unsafe eval()...
r37494 """Evaluate a string containing a Python literal expression"""
# We could backport our tokenizer hack to rewrite '' to u'' if we want
Gregory Szorc
stringutil: remove Python 2 support code...
r49766 return ast.literal_eval(s.decode('latin1'))