stringutil.py
998 lines
| 29.5 KiB
| text/x-python
|
PythonLexer
Yuya Nishihara
|
r37101 | # stringutil.py - utility for generic string formatting, parsing, etc. | ||
# | ||||
# Copyright 2005 K. Thananchayan <thananck@yahoo.com> | ||||
Raphaël Gomès
|
r47575 | # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> | ||
Yuya Nishihara
|
r37101 | # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com> | ||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Yuya Nishihara
|
r37494 | import ast | ||
Yuya Nishihara
|
r37101 | import codecs | ||
import re as remod | ||||
import textwrap | ||||
Gregory Szorc
|
r39332 | import types | ||
Yuya Nishihara
|
r37101 | |||
Matt Harbison
|
r50470 | from typing import ( | ||
Optional, | ||||
overload, | ||||
) | ||||
Yuya Nishihara
|
r37101 | from ..i18n import _ | ||
Connor Sheehan
|
r37227 | from ..thirdparty import attr | ||
Yuya Nishihara
|
r37101 | |||
from .. import ( | ||||
encoding, | ||||
error, | ||||
pycompat, | ||||
) | ||||
Augie Fackler
|
r38493 | # regex special chars pulled from https://bugs.python.org/issue29995 | ||
# which was part of Python 3.7. | ||||
Augie Fackler
|
r38496 | _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f') | ||
Augie Fackler
|
r38493 | _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial} | ||
Boris Feld
|
r40720 | regexbytesescapemap = {i: (b'\\' + i) for i in _respecial} | ||
Augie Fackler
|
r38493 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | @overload | ||
def reescape(pat: bytes) -> bytes: | ||||
... | ||||
@overload | ||||
def reescape(pat: str) -> str: | ||||
... | ||||
Augie Fackler
|
r38493 | def reescape(pat): | ||
"""Drop-in replacement for re.escape.""" | ||||
# NOTE: it is intentional that this works on unicodes and not | ||||
# bytes, as it's only possible to do the escaping with | ||||
# unicode.translate, not bytes.translate. Sigh. | ||||
wantuni = True | ||||
if isinstance(pat, bytes): | ||||
wantuni = False | ||||
pat = pat.decode('latin1') | ||||
pat = pat.translate(_regexescapemap) | ||||
if wantuni: | ||||
return pat | ||||
return pat.encode('latin1') | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def pprint(o, bprefix: bool = False, indent: int = 0, level: int = 0) -> bytes: | ||
Gregory Szorc
|
r37316 | """Pretty print an object.""" | ||
Yuya Nishihara
|
r40312 | return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level)) | ||
Gregory Szorc
|
r39414 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def pprintgen(o, bprefix: bool = False, indent: int = 0, level: int = 0): | ||
Gregory Szorc
|
r39414 | """Pretty print an object to a generator of atoms. | ||
Gregory Szorc
|
r39389 | |||
Gregory Szorc
|
r39414 | ``bprefix`` is a flag influencing whether bytestrings are preferred with | ||
a ``b''`` prefix. | ||||
``indent`` controls whether collections and nested data structures | ||||
span multiple lines via the indentation amount in spaces. By default, | ||||
no newlines are emitted. | ||||
Yuya Nishihara
|
r40312 | |||
``level`` specifies the initial indent level. Used if ``indent > 0``. | ||||
Gregory Szorc
|
r39414 | """ | ||
Gregory Szorc
|
r39389 | |||
Gregory Szorc
|
r37637 | if isinstance(o, bytes): | ||
Augie Fackler
|
r37768 | if bprefix: | ||
Augie Fackler
|
r43347 | yield b"b'%s'" % escapestr(o) | ||
Gregory Szorc
|
r39389 | else: | ||
Augie Fackler
|
r43347 | yield b"'%s'" % escapestr(o) | ||
Gregory Szorc
|
r37637 | elif isinstance(o, bytearray): | ||
# codecs.escape_encode() can't handle bytearray, so escapestr fails | ||||
# without coercion. | ||||
Augie Fackler
|
r43347 | yield b"bytearray['%s']" % escapestr(bytes(o)) | ||
Gregory Szorc
|
r37316 | elif isinstance(o, list): | ||
Gregory Szorc
|
r39390 | if not o: | ||
Augie Fackler
|
r43347 | yield b'[]' | ||
Gregory Szorc
|
r39390 | return | ||
Augie Fackler
|
r43347 | yield b'[' | ||
Gregory Szorc
|
r39390 | |||
Gregory Szorc
|
r39414 | if indent: | ||
Yuya Nishihara
|
r40312 | level += 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | |||
Gregory Szorc
|
r39390 | for i, a in enumerate(o): | ||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
a, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
if i + 1 < len(o): | ||||
Gregory Szorc
|
r39414 | if indent: | ||
Augie Fackler
|
r43347 | yield b',\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | else: | ||
Augie Fackler
|
r43347 | yield b', ' | ||
Gregory Szorc
|
r39414 | |||
if indent: | ||||
Yuya Nishihara
|
r40312 | level -= 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43347 | yield b']' | ||
Gregory Szorc
|
r37316 | elif isinstance(o, dict): | ||
Gregory Szorc
|
r39390 | if not o: | ||
Augie Fackler
|
r43347 | yield b'{}' | ||
Gregory Szorc
|
r39390 | return | ||
Augie Fackler
|
r43347 | yield b'{' | ||
Gregory Szorc
|
r39390 | |||
Gregory Szorc
|
r39414 | if indent: | ||
Yuya Nishihara
|
r40312 | level += 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | |||
Gregory Szorc
|
r39390 | for i, (k, v) in enumerate(sorted(o.items())): | ||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
k, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
Augie Fackler
|
r43347 | yield b': ' | ||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
v, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
if i + 1 < len(o): | ||||
Gregory Szorc
|
r39414 | if indent: | ||
Augie Fackler
|
r43347 | yield b',\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | else: | ||
Augie Fackler
|
r43347 | yield b', ' | ||
Gregory Szorc
|
r39414 | |||
if indent: | ||||
Yuya Nishihara
|
r40312 | level -= 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43347 | yield b'}' | ||
Augie Fackler
|
r39086 | elif isinstance(o, set): | ||
Gregory Szorc
|
r39390 | if not o: | ||
Augie Fackler
|
r43347 | yield b'set([])' | ||
Gregory Szorc
|
r39390 | return | ||
Augie Fackler
|
r43347 | yield b'set([' | ||
Gregory Szorc
|
r39390 | |||
Gregory Szorc
|
r39414 | if indent: | ||
Yuya Nishihara
|
r40312 | level += 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | |||
Gregory Szorc
|
r39390 | for i, k in enumerate(sorted(o)): | ||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
k, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
if i + 1 < len(o): | ||||
Gregory Szorc
|
r39414 | if indent: | ||
Augie Fackler
|
r43347 | yield b',\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | else: | ||
Augie Fackler
|
r43347 | yield b', ' | ||
Gregory Szorc
|
r39414 | |||
if indent: | ||||
Yuya Nishihara
|
r40312 | level -= 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43347 | yield b'])' | ||
Augie Fackler
|
r37951 | elif isinstance(o, tuple): | ||
Gregory Szorc
|
r39390 | if not o: | ||
Augie Fackler
|
r43347 | yield b'()' | ||
Gregory Szorc
|
r39390 | return | ||
Augie Fackler
|
r43347 | yield b'(' | ||
Gregory Szorc
|
r39390 | |||
Gregory Szorc
|
r39414 | if indent: | ||
Yuya Nishihara
|
r40312 | level += 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | |||
Gregory Szorc
|
r39390 | for i, a in enumerate(o): | ||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
a, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
if i + 1 < len(o): | ||||
Gregory Szorc
|
r39414 | if indent: | ||
Augie Fackler
|
r43347 | yield b',\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | else: | ||
Augie Fackler
|
r43347 | yield b', ' | ||
Gregory Szorc
|
r39414 | |||
if indent: | ||||
Yuya Nishihara
|
r40312 | level -= 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43347 | yield b')' | ||
Gregory Szorc
|
r39332 | elif isinstance(o, types.GeneratorType): | ||
Gregory Szorc
|
r39390 | # Special case of empty generator. | ||
try: | ||||
nextitem = next(o) | ||||
except StopIteration: | ||||
Augie Fackler
|
r43347 | yield b'gen[]' | ||
Gregory Szorc
|
r39390 | return | ||
Augie Fackler
|
r43347 | yield b'gen[' | ||
Gregory Szorc
|
r39390 | |||
Gregory Szorc
|
r39414 | if indent: | ||
Yuya Nishihara
|
r40312 | level += 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | |||
Gregory Szorc
|
r39390 | last = False | ||
while not last: | ||||
current = nextitem | ||||
try: | ||||
nextitem = next(o) | ||||
except StopIteration: | ||||
last = True | ||||
Augie Fackler
|
r43346 | for chunk in pprintgen( | ||
current, bprefix=bprefix, indent=indent, level=level | ||||
): | ||||
Gregory Szorc
|
r39390 | yield chunk | ||
if not last: | ||||
Gregory Szorc
|
r39414 | if indent: | ||
Augie Fackler
|
r43347 | yield b',\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39414 | else: | ||
Augie Fackler
|
r43347 | yield b', ' | ||
Gregory Szorc
|
r39414 | |||
if indent: | ||||
Yuya Nishihara
|
r40312 | level -= 1 | ||
Augie Fackler
|
r43347 | yield b'\n' | ||
yield b' ' * (level * indent) | ||||
Gregory Szorc
|
r39390 | |||
Augie Fackler
|
r43347 | yield b']' | ||
Gregory Szorc
|
r37316 | else: | ||
Gregory Szorc
|
r39389 | yield pycompat.byterepr(o) | ||
Gregory Szorc
|
r37316 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def prettyrepr(o) -> bytes: | ||
Yuya Nishihara
|
r38280 | """Pretty print a representation of a possibly-nested object""" | ||
lines = [] | ||||
rs = pycompat.byterepr(o) | ||||
Yuya Nishihara
|
r38283 | p0 = p1 = 0 | ||
while p0 < len(rs): | ||||
# '... field=<type ... field=<type ...' | ||||
# ~~~~~~~~~~~~~~~~ | ||||
# p0 p1 q0 q1 | ||||
q0 = -1 | ||||
Augie Fackler
|
r43347 | q1 = rs.find(b'<', p1 + 1) | ||
Yuya Nishihara
|
r38283 | if q1 < 0: | ||
q1 = len(rs) | ||||
Matt Harbison
|
r49310 | # pytype: disable=wrong-arg-count | ||
# TODO: figure out why pytype doesn't recognize the optional start | ||||
# arg | ||||
Augie Fackler
|
r43347 | elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1): | ||
Matt Harbison
|
r49310 | # pytype: enable=wrong-arg-count | ||
Yuya Nishihara
|
r38283 | # backtrack for ' field=<' | ||
Augie Fackler
|
r43347 | q0 = rs.rfind(b' ', p1 + 1, q1 - 1) | ||
Yuya Nishihara
|
r38283 | if q0 < 0: | ||
q0 = q1 | ||||
else: | ||||
q0 += 1 # skip ' ' | ||||
Augie Fackler
|
r43347 | l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0) | ||
Yuya Nishihara
|
r38280 | assert l >= 0 | ||
Yuya Nishihara
|
r38283 | lines.append((l, rs[p0:q0].rstrip())) | ||
p0, p1 = q0, q1 | ||||
Augie Fackler
|
r43347 | return b'\n'.join(b' ' * l + s for l, s in lines) | ||
Yuya Nishihara
|
r38280 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def buildrepr(r) -> bytes: | ||
Yuya Nishihara
|
r38595 | """Format an optional printable representation from unexpanded bits | ||
======== ================================= | ||||
type(r) example | ||||
======== ================================= | ||||
tuple ('<not %r>', other) | ||||
bytes '<branch closed>' | ||||
callable lambda: '<branch %r>' % sorted(b) | ||||
object other | ||||
======== ================================= | ||||
""" | ||||
if r is None: | ||||
Augie Fackler
|
r43347 | return b'' | ||
Yuya Nishihara
|
r38595 | elif isinstance(r, tuple): | ||
return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:]) | ||||
elif isinstance(r, bytes): | ||||
return r | ||||
elif callable(r): | ||||
return r() | ||||
else: | ||||
Augie Fackler
|
r39087 | return pprint(r) | ||
Yuya Nishihara
|
r38595 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def binary(s: bytes) -> bool: | ||
Yuya Nishihara
|
r37101 | """return true if a string is binary data""" | ||
Augie Fackler
|
r43347 | return bool(s and b'\0' in s) | ||
Yuya Nishihara
|
r37101 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def _splitpattern(pattern: bytes): | ||
Yuya Nishihara
|
r46314 | if pattern.startswith(b're:'): | ||
return b're', pattern[3:] | ||||
elif pattern.startswith(b'literal:'): | ||||
return b'literal', pattern[8:] | ||||
return b'literal', pattern | ||||
Matt Harbison
|
r50470 | def stringmatcher(pattern: bytes, casesensitive: bool = True): | ||
Yuya Nishihara
|
r37101 | """ | ||
accepts a string, possibly starting with 're:' or 'literal:' prefix. | ||||
returns the matcher name, pattern, and matcher function. | ||||
missing or unknown prefixes are treated as literal matches. | ||||
helper for tests: | ||||
>>> def test(pattern, *tests): | ||||
... kind, pattern, matcher = stringmatcher(pattern) | ||||
... return (kind, pattern, [bool(matcher(t)) for t in tests]) | ||||
>>> def itest(pattern, *tests): | ||||
... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False) | ||||
... return (kind, pattern, [bool(matcher(t)) for t in tests]) | ||||
exact matching (no prefix): | ||||
>>> test(b'abcdefg', b'abc', b'def', b'abcdefg') | ||||
('literal', 'abcdefg', [False, False, True]) | ||||
regex matching ('re:' prefix) | ||||
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') | ||||
('re', 'a.+b', [False, False, True]) | ||||
force exact matches ('literal:' prefix) | ||||
>>> test(b'literal:re:foobar', b'foobar', b're:foobar') | ||||
('literal', 're:foobar', [False, True]) | ||||
unknown prefixes are ignored and treated as literals | ||||
>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar') | ||||
('literal', 'foo:bar', [False, False, True]) | ||||
case insensitive regex matches | ||||
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') | ||||
('re', 'A.+b', [False, False, True]) | ||||
case insensitive literal matches | ||||
>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg') | ||||
('literal', 'ABCDEFG', [False, False, True]) | ||||
""" | ||||
Yuya Nishihara
|
r46314 | kind, pattern = _splitpattern(pattern) | ||
if kind == b're': | ||||
Yuya Nishihara
|
r37101 | try: | ||
flags = 0 | ||||
if not casesensitive: | ||||
flags = remod.I | ||||
regex = remod.compile(pattern, flags) | ||||
except remod.error as e: | ||||
Yuya Nishihara
|
r46315 | raise error.ParseError( | ||
_(b'invalid regular expression: %s') % forcebytestr(e) | ||||
) | ||||
Yuya Nishihara
|
r46314 | return kind, pattern, regex.search | ||
elif kind == b'literal': | ||||
if casesensitive: | ||||
match = pattern.__eq__ | ||||
else: | ||||
ipat = encoding.lower(pattern) | ||||
match = lambda s: ipat == encoding.lower(s) | ||||
return kind, pattern, match | ||||
Yuya Nishihara
|
r37101 | |||
Yuya Nishihara
|
r46314 | raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) | ||
Yuya Nishihara
|
r37101 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def substringregexp(pattern: bytes, flags: int = 0): | ||
Yuya Nishihara
|
r46316 | """Build a regexp object from a string pattern possibly starting with | ||
're:' or 'literal:' prefix. | ||||
helper for tests: | ||||
>>> def test(pattern, *tests): | ||||
... regexp = substringregexp(pattern) | ||||
... return [bool(regexp.search(t)) for t in tests] | ||||
>>> def itest(pattern, *tests): | ||||
... regexp = substringregexp(pattern, remod.I) | ||||
... return [bool(regexp.search(t)) for t in tests] | ||||
substring matching (no prefix): | ||||
>>> test(b'bcde', b'abc', b'def', b'abcdefg') | ||||
[False, False, True] | ||||
substring pattern should be escaped: | ||||
>>> substringregexp(b'.bc').pattern | ||||
'\\\\.bc' | ||||
>>> test(b'.bc', b'abc', b'def', b'abcdefg') | ||||
[False, False, False] | ||||
regex matching ('re:' prefix) | ||||
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar') | ||||
[False, False, True] | ||||
force substring matches ('literal:' prefix) | ||||
>>> test(b'literal:re:foobar', b'foobar', b're:foobar') | ||||
[False, True] | ||||
case insensitive literal matches | ||||
>>> itest(b'BCDE', b'abc', b'def', b'abcdefg') | ||||
[False, False, True] | ||||
case insensitive regex matches | ||||
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar') | ||||
[False, False, True] | ||||
""" | ||||
kind, pattern = _splitpattern(pattern) | ||||
if kind == b're': | ||||
try: | ||||
return remod.compile(pattern, flags) | ||||
except remod.error as e: | ||||
raise error.ParseError( | ||||
_(b'invalid regular expression: %s') % forcebytestr(e) | ||||
) | ||||
elif kind == b'literal': | ||||
return remod.compile(remod.escape(pattern), flags) | ||||
raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind) | ||||
Matt Harbison
|
r50470 | def shortuser(user: bytes) -> bytes: | ||
Yuya Nishihara
|
r37101 | """Return a short representation of a user name or email address.""" | ||
Augie Fackler
|
r43347 | f = user.find(b'@') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
user = user[:f] | ||||
Augie Fackler
|
r43347 | f = user.find(b'<') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
Augie Fackler
|
r43346 | user = user[f + 1 :] | ||
Augie Fackler
|
r43347 | f = user.find(b' ') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
user = user[:f] | ||||
Augie Fackler
|
r43347 | f = user.find(b'.') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
user = user[:f] | ||||
return user | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def emailuser(user: bytes) -> bytes: | ||
Yuya Nishihara
|
r37101 | """Return the user portion of an email address.""" | ||
Augie Fackler
|
r43347 | f = user.find(b'@') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
user = user[:f] | ||||
Augie Fackler
|
r43347 | f = user.find(b'<') | ||
Yuya Nishihara
|
r37101 | if f >= 0: | ||
Augie Fackler
|
r43346 | user = user[f + 1 :] | ||
Yuya Nishihara
|
r37101 | return user | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def email(author: bytes) -> bytes: | ||
Yuya Nishihara
|
r37101 | '''get email of author.''' | ||
Augie Fackler
|
r43347 | r = author.find(b'>') | ||
Yuya Nishihara
|
r37101 | if r == -1: | ||
r = None | ||||
Augie Fackler
|
r43347 | return author[author.find(b'<') + 1 : r] | ||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r37101 | |||
Matt Harbison
|
r50470 | def person(author: bytes) -> bytes: | ||
Connor Sheehan
|
r37173 | """Returns the name before an email address, | ||
interpreting it as per RFC 5322 | ||||
>>> person(b'foo@bar') | ||||
'foo' | ||||
>>> person(b'Foo Bar <foo@bar>') | ||||
'Foo Bar' | ||||
>>> person(b'"Foo Bar" <foo@bar>') | ||||
'Foo Bar' | ||||
>>> person(b'"Foo \"buz\" Bar" <foo@bar>') | ||||
'Foo "buz" Bar' | ||||
>>> # The following are invalid, but do exist in real-life | ||||
... | ||||
>>> person(b'Foo "buz" Bar <foo@bar>') | ||||
'Foo "buz" Bar' | ||||
>>> person(b'"Foo Bar <foo@bar>') | ||||
'Foo Bar' | ||||
""" | ||||
Augie Fackler
|
r43347 | if b'@' not in author: | ||
Connor Sheehan
|
r37173 | return author | ||
Augie Fackler
|
r43347 | f = author.find(b'<') | ||
Connor Sheehan
|
r37173 | if f != -1: | ||
Augie Fackler
|
r43347 | return author[:f].strip(b' "').replace(b'\\"', b'"') | ||
f = author.find(b'@') | ||||
return author[:f].replace(b'.', b' ') | ||||
Connor Sheehan
|
r37173 | |||
Augie Fackler
|
r43346 | |||
Connor Sheehan
|
r37227 | @attr.s(hash=True) | ||
Gregory Szorc
|
r49801 | class mailmapping: | ||
Augie Fackler
|
r46554 | """Represents a username/email key or value in | ||
a mailmap file""" | ||||
Augie Fackler
|
r43346 | |||
Connor Sheehan
|
r37227 | email = attr.ib() | ||
name = attr.ib(default=None) | ||||
Augie Fackler
|
r43346 | |||
Connor Sheehan
|
r37263 | def _ismailmaplineinvalid(names, emails): | ||
Augie Fackler
|
r46554 | """Returns True if the parsed names and emails | ||
Connor Sheehan
|
r37263 | in a mailmap entry are invalid. | ||
>>> # No names or emails fails | ||||
>>> names, emails = [], [] | ||||
>>> _ismailmaplineinvalid(names, emails) | ||||
True | ||||
>>> # Only one email fails | ||||
>>> emails = [b'email@email.com'] | ||||
>>> _ismailmaplineinvalid(names, emails) | ||||
True | ||||
>>> # One email and one name passes | ||||
>>> names = [b'Test Name'] | ||||
>>> _ismailmaplineinvalid(names, emails) | ||||
False | ||||
>>> # No names but two emails passes | ||||
>>> names = [] | ||||
>>> emails = [b'proper@email.com', b'commit@email.com'] | ||||
>>> _ismailmaplineinvalid(names, emails) | ||||
False | ||||
Augie Fackler
|
r46554 | """ | ||
Connor Sheehan
|
r37263 | return not emails or not names and len(emails) < 2 | ||
Augie Fackler
|
r43346 | |||
Connor Sheehan
|
r37227 | def parsemailmap(mailmapcontent): | ||
"""Parses data in the .mailmap format | ||||
>>> mmdata = b"\\n".join([ | ||||
... b'# Comment', | ||||
... b'Name <commit1@email.xx>', | ||||
... b'<name@email.xx> <commit2@email.xx>', | ||||
... b'Name <proper@email.xx> <commit3@email.xx>', | ||||
... b'Name <proper@email.xx> Commit <commit4@email.xx>', | ||||
... ]) | ||||
>>> mm = parsemailmap(mmdata) | ||||
>>> for key in sorted(mm.keys()): | ||||
... print(key) | ||||
mailmapping(email='commit1@email.xx', name=None) | ||||
mailmapping(email='commit2@email.xx', name=None) | ||||
mailmapping(email='commit3@email.xx', name=None) | ||||
mailmapping(email='commit4@email.xx', name='Commit') | ||||
>>> for val in sorted(mm.values()): | ||||
... print(val) | ||||
mailmapping(email='commit1@email.xx', name='Name') | ||||
mailmapping(email='name@email.xx', name=None) | ||||
mailmapping(email='proper@email.xx', name='Name') | ||||
mailmapping(email='proper@email.xx', name='Name') | ||||
""" | ||||
mailmap = {} | ||||
if mailmapcontent is None: | ||||
return mailmap | ||||
for line in mailmapcontent.splitlines(): | ||||
# Don't bother checking the line if it is a comment or | ||||
# is an improperly formed author field | ||||
Augie Fackler
|
r43347 | if line.lstrip().startswith(b'#'): | ||
Connor Sheehan
|
r37227 | continue | ||
Connor Sheehan
|
r37262 | # names, emails hold the parsed emails and names for each line | ||
Connor Sheehan
|
r37227 | # name_builder holds the words in a persons name | ||
Connor Sheehan
|
r37262 | names, emails = [], [] | ||
Connor Sheehan
|
r37227 | namebuilder = [] | ||
for element in line.split(): | ||||
Augie Fackler
|
r43347 | if element.startswith(b'#'): | ||
Connor Sheehan
|
r37227 | # If we reach a comment in the mailmap file, move on | ||
break | ||||
Augie Fackler
|
r43347 | elif element.startswith(b'<') and element.endswith(b'>'): | ||
Connor Sheehan
|
r37227 | # We have found an email. | ||
# Parse it, and finalize any names from earlier | ||||
Connor Sheehan
|
r37262 | emails.append(element[1:-1]) # Slice off the "<>" | ||
Connor Sheehan
|
r37227 | |||
if namebuilder: | ||||
Augie Fackler
|
r43347 | names.append(b' '.join(namebuilder)) | ||
Connor Sheehan
|
r37227 | namebuilder = [] | ||
# Break if we have found a second email, any other | ||||
# data does not fit the spec for .mailmap | ||||
Connor Sheehan
|
r37262 | if len(emails) > 1: | ||
Connor Sheehan
|
r37227 | break | ||
else: | ||||
# We have found another word in the committers name | ||||
namebuilder.append(element) | ||||
Connor Sheehan
|
r37263 | # Check to see if we have parsed the line into a valid form | ||
# We require at least one email, and either at least one | ||||
# name or a second email | ||||
if _ismailmaplineinvalid(names, emails): | ||||
continue | ||||
Connor Sheehan
|
r37227 | mailmapkey = mailmapping( | ||
Augie Fackler
|
r46554 | email=emails[-1], | ||
name=names[-1] if len(names) == 2 else None, | ||||
Connor Sheehan
|
r37227 | ) | ||
mailmap[mailmapkey] = mailmapping( | ||||
Augie Fackler
|
r46554 | email=emails[0], | ||
name=names[0] if names else None, | ||||
Connor Sheehan
|
r37227 | ) | ||
return mailmap | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def mapname(mailmap, author: bytes) -> bytes: | ||
Connor Sheehan
|
r37227 | """Returns the author field according to the mailmap cache, or | ||
the original author field. | ||||
>>> mmdata = b"\\n".join([ | ||||
... b'# Comment', | ||||
... b'Name <commit1@email.xx>', | ||||
... b'<name@email.xx> <commit2@email.xx>', | ||||
... b'Name <proper@email.xx> <commit3@email.xx>', | ||||
... b'Name <proper@email.xx> Commit <commit4@email.xx>', | ||||
... ]) | ||||
>>> m = parsemailmap(mmdata) | ||||
>>> mapname(m, b'Commit <commit1@email.xx>') | ||||
'Name <commit1@email.xx>' | ||||
>>> mapname(m, b'Name <commit2@email.xx>') | ||||
'Name <name@email.xx>' | ||||
>>> mapname(m, b'Commit <commit3@email.xx>') | ||||
'Name <proper@email.xx>' | ||||
>>> mapname(m, b'Commit <commit4@email.xx>') | ||||
'Name <proper@email.xx>' | ||||
>>> mapname(m, b'Unknown Name <unknown@email.com>') | ||||
'Unknown Name <unknown@email.com>' | ||||
""" | ||||
# If the author field coming in isn't in the correct format, | ||||
# or the mailmap is empty just return the original author field | ||||
if not isauthorwellformed(author) or not mailmap: | ||||
return author | ||||
Connor Sheehan
|
r37264 | # Turn the user name into a mailmapping | ||
Connor Sheehan
|
r37227 | commit = mailmapping(name=person(author), email=email(author)) | ||
try: | ||||
# Try and use both the commit email and name as the key | ||||
proper = mailmap[commit] | ||||
except KeyError: | ||||
# If the lookup fails, use just the email as the key instead | ||||
# We call this commit2 as not to erase original commit fields | ||||
commit2 = mailmapping(email=commit.email) | ||||
proper = mailmap.get(commit2, mailmapping(None, None)) | ||||
# Return the author field with proper values filled in | ||||
Augie Fackler
|
r43347 | return b'%s <%s>' % ( | ||
Connor Sheehan
|
r37227 | proper.name if proper.name else commit.name, | ||
proper.email if proper.email else commit.email, | ||||
) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r44474 | _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$') | ||
Connor Sheehan
|
r37172 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def isauthorwellformed(author: bytes) -> bool: | ||
Augie Fackler
|
r46554 | """Return True if the author field is well formed | ||
Connor Sheehan
|
r37172 | (ie "Contributor Name <contrib@email.dom>") | ||
>>> isauthorwellformed(b'Good Author <good@author.com>') | ||||
True | ||||
>>> isauthorwellformed(b'Author <good@author.com>') | ||||
True | ||||
>>> isauthorwellformed(b'Bad Author') | ||||
False | ||||
>>> isauthorwellformed(b'Bad Author <author@author.com') | ||||
False | ||||
>>> isauthorwellformed(b'Bad Author author@author.com') | ||||
False | ||||
>>> isauthorwellformed(b'<author@author.com>') | ||||
False | ||||
>>> isauthorwellformed(b'Bad Author <author>') | ||||
False | ||||
Augie Fackler
|
r46554 | """ | ||
Connor Sheehan
|
r37172 | return _correctauthorformat.match(author) is not None | ||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def firstline(text: bytes) -> bytes: | ||
Martin von Zweigbergk
|
r49885 | """Return the first line of the input""" | ||
Martin von Zweigbergk
|
r49894 | # Try to avoid running splitlines() on the whole string | ||
i = text.find(b'\n') | ||||
if i != -1: | ||||
text = text[:i] | ||||
Martin von Zweigbergk
|
r49885 | try: | ||
return text.splitlines()[0] | ||||
except IndexError: | ||||
return b'' | ||||
Matt Harbison
|
r50470 | def ellipsis(text: bytes, maxlength: int = 400) -> bytes: | ||
Yuya Nishihara
|
r37101 | """Trim string to at most maxlength (default: 400) columns in display.""" | ||
Augie Fackler
|
r43347 | return encoding.trim(text, maxlength, ellipsis=b'...') | ||
Yuya Nishihara
|
r37101 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def escapestr(s: bytes) -> bytes: | ||
# "bytes" is also a typing shortcut for bytes, bytearray, and memoryview | ||||
Augie Fackler
|
r39098 | if isinstance(s, memoryview): | ||
s = bytes(s) | ||||
Yuya Nishihara
|
r37101 | # call underlying function of s.encode('string_escape') directly for | ||
# Python 3 compatibility | ||||
Matt Harbison
|
r49310 | return codecs.escape_encode(s)[0] # pytype: disable=module-attr | ||
Yuya Nishihara
|
r37101 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def unescapestr(s: bytes) -> bytes: | ||
Matt Harbison
|
r49310 | return codecs.escape_decode(s)[0] # pytype: disable=module-attr | ||
Yuya Nishihara
|
r37101 | |||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r37101 | def forcebytestr(obj): | ||
"""Portably format an arbitrary object (e.g. exception) into a byte | ||||
string.""" | ||||
try: | ||||
return pycompat.bytestr(obj) | ||||
except UnicodeEncodeError: | ||||
# non-ascii string, may be lossy | ||||
return pycompat.bytestr(encoding.strtolocal(str(obj))) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def uirepr(s: bytes) -> bytes: | ||
Yuya Nishihara
|
r37101 | # Avoid double backslash in Windows path repr() | ||
return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\') | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r37101 | # delay import of textwrap | ||
def _MBTextWrapper(**kwargs): | ||||
class tw(textwrap.TextWrapper): | ||||
""" | ||||
Extend TextWrapper for width-awareness. | ||||
Neither number of 'bytes' in any encoding nor 'characters' is | ||||
appropriate to calculate terminal columns for specified string. | ||||
Original TextWrapper implementation uses built-in 'len()' directly, | ||||
so overriding is needed to use width information of each characters. | ||||
In addition, characters classified into 'ambiguous' width are | ||||
treated as wide in East Asian area, but as narrow in other. | ||||
This requires use decision to determine width of such characters. | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Yuya Nishihara
|
r37101 | def _cutdown(self, ucstr, space_left): | ||
l = 0 | ||||
colwidth = encoding.ucolwidth | ||||
Manuel Jacob
|
r50179 | for i in range(len(ucstr)): | ||
Yuya Nishihara
|
r37101 | l += colwidth(ucstr[i]) | ||
if space_left < l: | ||||
return (ucstr[:i], ucstr[i:]) | ||||
Augie Fackler
|
r43347 | return ucstr, b'' | ||
Yuya Nishihara
|
r37101 | |||
# overriding of base class | ||||
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): | ||||
space_left = max(width - cur_len, 1) | ||||
if self.break_long_words: | ||||
cut, res = self._cutdown(reversed_chunks[-1], space_left) | ||||
cur_line.append(cut) | ||||
reversed_chunks[-1] = res | ||||
elif not cur_line: | ||||
cur_line.append(reversed_chunks.pop()) | ||||
# this overriding code is imported from TextWrapper of Python 2.6 | ||||
# to calculate columns of string by 'encoding.ucolwidth()' | ||||
def _wrap_chunks(self, chunks): | ||||
colwidth = encoding.ucolwidth | ||||
lines = [] | ||||
if self.width <= 0: | ||||
Augie Fackler
|
r43347 | raise ValueError(b"invalid width %r (must be > 0)" % self.width) | ||
Yuya Nishihara
|
r37101 | |||
# Arrange in reverse order so items can be efficiently popped | ||||
# from a stack of chucks. | ||||
chunks.reverse() | ||||
while chunks: | ||||
# Start the list of chunks that will make up the current line. | ||||
# cur_len is just the length of all the chunks in cur_line. | ||||
cur_line = [] | ||||
cur_len = 0 | ||||
# Figure out which static string will prefix this line. | ||||
if lines: | ||||
indent = self.subsequent_indent | ||||
else: | ||||
indent = self.initial_indent | ||||
# Maximum width for this line. | ||||
width = self.width - len(indent) | ||||
# First chunk on line is whitespace -- drop it, unless this | ||||
# is the very beginning of the text (i.e. no lines started yet). | ||||
Augie Fackler
|
r43906 | if self.drop_whitespace and chunks[-1].strip() == '' and lines: | ||
Yuya Nishihara
|
r37101 | del chunks[-1] | ||
while chunks: | ||||
l = colwidth(chunks[-1]) | ||||
# Can at least squeeze this chunk onto the current line. | ||||
if cur_len + l <= width: | ||||
cur_line.append(chunks.pop()) | ||||
cur_len += l | ||||
# Nope, this line is full. | ||||
else: | ||||
break | ||||
# The current line is full, and the next chunk is too big to | ||||
# fit on *any* line (not just this one). | ||||
if chunks and colwidth(chunks[-1]) > width: | ||||
self._handle_long_word(chunks, cur_line, cur_len, width) | ||||
# If the last chunk on this line is all whitespace, drop it. | ||||
Augie Fackler
|
r43346 | if ( | ||
self.drop_whitespace | ||||
and cur_line | ||||
and cur_line[-1].strip() == r'' | ||||
): | ||||
Yuya Nishihara
|
r37101 | del cur_line[-1] | ||
# Convert current line back to a string and store it in list | ||||
# of all lines (return value). | ||||
if cur_line: | ||||
Augie Fackler
|
r43906 | lines.append(indent + ''.join(cur_line)) | ||
Yuya Nishihara
|
r37101 | |||
return lines | ||||
global _MBTextWrapper | ||||
_MBTextWrapper = tw | ||||
return tw(**kwargs) | ||||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | def wrap( | ||
line: bytes, width: int, initindent: bytes = b'', hangindent: bytes = b'' | ||||
) -> bytes: | ||||
Yuya Nishihara
|
r37101 | maxindent = max(len(hangindent), len(initindent)) | ||
if width <= maxindent: | ||||
# adjust for weird terminal size | ||||
width = max(78, maxindent + 1) | ||||
Augie Fackler
|
r43346 | line = line.decode( | ||
pycompat.sysstr(encoding.encoding), | ||||
pycompat.sysstr(encoding.encodingmode), | ||||
) | ||||
initindent = initindent.decode( | ||||
pycompat.sysstr(encoding.encoding), | ||||
pycompat.sysstr(encoding.encodingmode), | ||||
) | ||||
hangindent = hangindent.decode( | ||||
pycompat.sysstr(encoding.encoding), | ||||
pycompat.sysstr(encoding.encodingmode), | ||||
) | ||||
wrapper = _MBTextWrapper( | ||||
width=width, initial_indent=initindent, subsequent_indent=hangindent | ||||
) | ||||
Yuya Nishihara
|
r37101 | return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding)) | ||
Augie Fackler
|
r43346 | |||
_booleans = { | ||||
Augie Fackler
|
r43347 | b'1': True, | ||
b'yes': True, | ||||
b'true': True, | ||||
b'on': True, | ||||
b'always': True, | ||||
b'0': False, | ||||
b'no': False, | ||||
b'false': False, | ||||
b'off': False, | ||||
b'never': False, | ||||
Augie Fackler
|
r43346 | } | ||
Yuya Nishihara
|
r37101 | |||
Matt Harbison
|
r50470 | def parsebool(s: bytes) -> Optional[bool]: | ||
Yuya Nishihara
|
r37101 | """Parse s into a boolean. | ||
If s is not a valid boolean, returns None. | ||||
""" | ||||
return _booleans.get(s.lower(), None) | ||||
Gregory Szorc
|
r37306 | |||
Augie Fackler
|
r43346 | |||
Matt Harbison
|
r50470 | # TODO: make arg mandatory (and fix code below?) | ||
def parselist(value: Optional[bytes]): | ||||
r47960 | """parse a configuration value as a list of comma/space separated strings | |||
>>> parselist(b'this,is "a small" ,test') | ||||
['this', 'is', 'a small', 'test'] | ||||
""" | ||||
def _parse_plain(parts, s, offset): | ||||
whitespace = False | ||||
while offset < len(s) and ( | ||||
s[offset : offset + 1].isspace() or s[offset : offset + 1] == b',' | ||||
): | ||||
whitespace = True | ||||
offset += 1 | ||||
if offset >= len(s): | ||||
return None, parts, offset | ||||
if whitespace: | ||||
parts.append(b'') | ||||
if s[offset : offset + 1] == b'"' and not parts[-1]: | ||||
return _parse_quote, parts, offset + 1 | ||||
elif s[offset : offset + 1] == b'"' and parts[-1][-1:] == b'\\': | ||||
parts[-1] = parts[-1][:-1] + s[offset : offset + 1] | ||||
return _parse_plain, parts, offset + 1 | ||||
parts[-1] += s[offset : offset + 1] | ||||
return _parse_plain, parts, offset + 1 | ||||
def _parse_quote(parts, s, offset): | ||||
if offset < len(s) and s[offset : offset + 1] == b'"': # "" | ||||
parts.append(b'') | ||||
offset += 1 | ||||
while offset < len(s) and ( | ||||
s[offset : offset + 1].isspace() | ||||
or s[offset : offset + 1] == b',' | ||||
): | ||||
offset += 1 | ||||
return _parse_plain, parts, offset | ||||
while offset < len(s) and s[offset : offset + 1] != b'"': | ||||
if ( | ||||
s[offset : offset + 1] == b'\\' | ||||
and offset + 1 < len(s) | ||||
and s[offset + 1 : offset + 2] == b'"' | ||||
): | ||||
offset += 1 | ||||
parts[-1] += b'"' | ||||
else: | ||||
parts[-1] += s[offset : offset + 1] | ||||
offset += 1 | ||||
if offset >= len(s): | ||||
real_parts = _configlist(parts[-1]) | ||||
if not real_parts: | ||||
parts[-1] = b'"' | ||||
else: | ||||
real_parts[0] = b'"' + real_parts[0] | ||||
parts = parts[:-1] | ||||
parts.extend(real_parts) | ||||
return None, parts, offset | ||||
offset += 1 | ||||
while offset < len(s) and s[offset : offset + 1] in [b' ', b',']: | ||||
offset += 1 | ||||
if offset < len(s): | ||||
if offset + 1 == len(s) and s[offset : offset + 1] == b'"': | ||||
parts[-1] += b'"' | ||||
offset += 1 | ||||
else: | ||||
parts.append(b'') | ||||
else: | ||||
return None, parts, offset | ||||
return _parse_plain, parts, offset | ||||
def _configlist(s): | ||||
s = s.rstrip(b' ,') | ||||
if not s: | ||||
return [] | ||||
parser, parts, offset = _parse_plain, [b''], 0 | ||||
while parser: | ||||
parser, parts, offset = parser(parts, s, offset) | ||||
return parts | ||||
if value is not None and isinstance(value, bytes): | ||||
result = _configlist(value.lstrip(b' ,\n')) | ||||
else: | ||||
result = value | ||||
return result or [] | ||||
Matt Harbison
|
r50470 | def evalpythonliteral(s: bytes): | ||
Yuya Nishihara
|
r37494 | """Evaluate a string containing a Python literal expression""" | ||
# We could backport our tokenizer hack to rewrite '' to u'' if we want | ||||
Gregory Szorc
|
r49766 | return ast.literal_eval(s.decode('latin1')) | ||