##// END OF EJS Templates
tests: skip test-git-interop.t on Windows...
tests: skip test-git-interop.t on Windows Casefolding isn't handled in dirstate yet, triggering a bunch of assertions. But while this is more correctly `no-icasefs`, it's more likely to get attention if someone sees it. I'd just rather not have it adding to the noise on Windows for now. Differential Revision: https://phab.mercurial-scm.org/D10312

File last commit:

r47575:d4ba4d51 default
r47656:fe34c75f default
Show More
stringutil.py
876 lines | 25.6 KiB | text/x-python | PythonLexer
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # stringutil.py - utility for generic string formatting, parsing, etc.
#
# Copyright 2005 K. Thananchayan <thananck@yahoo.com>
Raphaël Gomès
contributor: change mentions of mpm to olivia...
r47575 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
Yuya Nishihara
wireproto: convert python literal to object without using unsafe eval()...
r37494 import ast
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 import codecs
import re as remod
import textwrap
Gregory Szorc
stringutil: teach pprint() to recognize generators...
r39332 import types
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
from ..i18n import _
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 from ..thirdparty import attr
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
from .. import (
encoding,
error,
pycompat,
)
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 # regex special chars pulled from https://bugs.python.org/issue29995
# which was part of Python 3.7.
Augie Fackler
stringutil: update list of re-special characters to include &~...
r38496 _respecial = pycompat.bytestr(b'()[]{}?*+-|^$\\.&~# \t\n\r\v\f')
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 _regexescapemap = {ord(i): (b'\\' + i).decode('latin1') for i in _respecial}
Boris Feld
match: provide and use a quick way to escape a single byte...
r40720 regexbytesescapemap = {i: (b'\\' + i) for i in _respecial}
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
stringutil: add a new function to do minimal regex escaping...
r38493 def reescape(pat):
"""Drop-in replacement for re.escape."""
# NOTE: it is intentional that this works on unicodes and not
# bytes, as it's only possible to do the escaping with
# unicode.translate, not bytes.translate. Sigh.
wantuni = True
if isinstance(pat, bytes):
wantuni = False
pat = pat.decode('latin1')
pat = pat.translate(_regexescapemap)
if wantuni:
return pat
return pat.encode('latin1')
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 def pprint(o, bprefix=False, indent=0, level=0):
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 """Pretty print an object."""
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 return b''.join(pprintgen(o, bprefix=bprefix, indent=indent, level=level))
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 def pprintgen(o, bprefix=False, indent=0, level=0):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 """Pretty print an object to a generator of atoms.
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 ``bprefix`` is a flag influencing whether bytestrings are preferred with
a ``b''`` prefix.
``indent`` controls whether collections and nested data structures
span multiple lines via the indentation amount in spaces. By default,
no newlines are emitted.
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312
``level`` specifies the initial indent level. Used if ``indent > 0``.
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 """
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389
Gregory Szorc
stringutil: support more types with pprint()...
r37637 if isinstance(o, bytes):
Augie Fackler
stringutil: make b prefixes on string output optional...
r37768 if bprefix:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"b'%s'" % escapestr(o)
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"'%s'" % escapestr(o)
Gregory Szorc
stringutil: support more types with pprint()...
r37637 elif isinstance(o, bytearray):
# codecs.escape_encode() can't handle bytearray, so escapestr fails
# without coercion.
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b"bytearray['%s']" % escapestr(bytes(o))
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 elif isinstance(o, list):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'[]'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, a in enumerate(o):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
a, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b']'
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 elif isinstance(o, dict):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'{}'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'{'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, (k, v) in enumerate(sorted(o.items())):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
k, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b': '
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
v, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'}'
Augie Fackler
stringutil: teach pprint about sets...
r39086 elif isinstance(o, set):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'set([])'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'set(['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, k in enumerate(sorted(o)):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
k, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'])'
Augie Fackler
stringutil: teach pprint about tuples...
r37951 elif isinstance(o, tuple):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 if not o:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'()'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'('
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 for i, a in enumerate(o):
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
a, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if i + 1 < len(o):
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b')'
Gregory Szorc
stringutil: teach pprint() to recognize generators...
r39332 elif isinstance(o, types.GeneratorType):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 # Special case of empty generator.
try:
nextitem = next(o)
except StopIteration:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'gen[]'
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 return
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'gen['
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level += 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 last = False
while not last:
current = nextitem
try:
nextitem = next(o)
except StopIteration:
last = True
Augie Fackler
formatting: blacken the codebase...
r43346 for chunk in pprintgen(
current, bprefix=bprefix, indent=indent, level=level
):
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390 yield chunk
if not last:
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 if indent:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b',\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: teach pprint() to indent...
r39414 else:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b', '
Gregory Szorc
stringutil: teach pprint() to indent...
r39414
if indent:
Yuya Nishihara
stringutil: allow to specify initial indent level of pprint()...
r40312 level -= 1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b'\n'
yield b' ' * (level * indent)
Gregory Szorc
stringutil: emit multiple chunks when pretty printing...
r39390
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 yield b']'
Gregory Szorc
stringutil: add function to pretty print an object...
r37316 else:
Gregory Szorc
stringutil: refactor core of pprint so it emits chunks...
r39389 yield pycompat.byterepr(o)
Gregory Szorc
stringutil: add function to pretty print an object...
r37316
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280 def prettyrepr(o):
"""Pretty print a representation of a possibly-nested object"""
lines = []
rs = pycompat.byterepr(o)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 p0 = p1 = 0
while p0 < len(rs):
# '... field=<type ... field=<type ...'
# ~~~~~~~~~~~~~~~~
# p0 p1 q0 q1
q0 = -1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 q1 = rs.find(b'<', p1 + 1)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 if q1 < 0:
q1 = len(rs)
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif q1 > p1 + 1 and rs.startswith(b'=', q1 - 1):
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 # backtrack for ' field=<'
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 q0 = rs.rfind(b' ', p1 + 1, q1 - 1)
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 if q0 < 0:
q0 = q1
else:
q0 += 1 # skip ' '
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 l = rs.count(b'<', 0, p0) - rs.count(b'>', 0, p0)
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280 assert l >= 0
Yuya Nishihara
stringutil: fix prettyrepr() to not orphan foo=<...> line
r38283 lines.append((l, rs[p0:q0].rstrip()))
p0, p1 = q0, q1
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'\n'.join(b' ' * l + s for l, s in lines)
Yuya Nishihara
stringutil: promote smartset.prettyformat() to utility function...
r38280
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595 def buildrepr(r):
"""Format an optional printable representation from unexpanded bits
======== =================================
type(r) example
======== =================================
tuple ('<not %r>', other)
bytes '<branch closed>'
callable lambda: '<branch %r>' % sorted(b)
object other
======== =================================
"""
if r is None:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b''
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595 elif isinstance(r, tuple):
return r[0] % pycompat.rapply(pycompat.maybebytestr, r[1:])
elif isinstance(r, bytes):
return r
elif callable(r):
return r()
else:
Augie Fackler
stringutil: have buildrepr delegate to pprint for unknown types...
r39087 return pprint(r)
Yuya Nishihara
stringutil: move _formatsetrepr() from smartset...
r38595
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def binary(s):
"""return true if a string is binary data"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return bool(s and b'\0' in s)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 def _splitpattern(pattern):
if pattern.startswith(b're:'):
return b're', pattern[3:]
elif pattern.startswith(b'literal:'):
return b'literal', pattern[8:]
return b'literal', pattern
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def stringmatcher(pattern, casesensitive=True):
"""
accepts a string, possibly starting with 're:' or 'literal:' prefix.
returns the matcher name, pattern, and matcher function.
missing or unknown prefixes are treated as literal matches.
helper for tests:
>>> def test(pattern, *tests):
... kind, pattern, matcher = stringmatcher(pattern)
... return (kind, pattern, [bool(matcher(t)) for t in tests])
>>> def itest(pattern, *tests):
... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
... return (kind, pattern, [bool(matcher(t)) for t in tests])
exact matching (no prefix):
>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
('literal', 'abcdefg', [False, False, True])
regex matching ('re:' prefix)
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
('re', 'a.+b', [False, False, True])
force exact matches ('literal:' prefix)
>>> test(b'literal:re:foobar', b'foobar', b're:foobar')
('literal', 're:foobar', [False, True])
unknown prefixes are ignored and treated as literals
>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
('literal', 'foo:bar', [False, False, True])
case insensitive regex matches
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
('re', 'A.+b', [False, False, True])
case insensitive literal matches
>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
('literal', 'ABCDEFG', [False, False, True])
"""
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 kind, pattern = _splitpattern(pattern)
if kind == b're':
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 try:
flags = 0
if not casesensitive:
flags = remod.I
regex = remod.compile(pattern, flags)
except remod.error as e:
Yuya Nishihara
py3: fix stringmatcher() to byte-stringify exception message...
r46315 raise error.ParseError(
_(b'invalid regular expression: %s') % forcebytestr(e)
)
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 return kind, pattern, regex.search
elif kind == b'literal':
if casesensitive:
match = pattern.__eq__
else:
ipat = encoding.lower(pattern)
match = lambda s: ipat == encoding.lower(s)
return kind, pattern, match
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Yuya Nishihara
stringutil: extract helper function that splits stringmatcher() pattern
r46314 raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: add function to compile stringmatcher pattern into regexp...
r46316 def substringregexp(pattern, flags=0):
"""Build a regexp object from a string pattern possibly starting with
're:' or 'literal:' prefix.
helper for tests:
>>> def test(pattern, *tests):
... regexp = substringregexp(pattern)
... return [bool(regexp.search(t)) for t in tests]
>>> def itest(pattern, *tests):
... regexp = substringregexp(pattern, remod.I)
... return [bool(regexp.search(t)) for t in tests]
substring matching (no prefix):
>>> test(b'bcde', b'abc', b'def', b'abcdefg')
[False, False, True]
substring pattern should be escaped:
>>> substringregexp(b'.bc').pattern
'\\\\.bc'
>>> test(b'.bc', b'abc', b'def', b'abcdefg')
[False, False, False]
regex matching ('re:' prefix)
>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
[False, False, True]
force substring matches ('literal:' prefix)
>>> test(b'literal:re:foobar', b'foobar', b're:foobar')
[False, True]
case insensitive literal matches
>>> itest(b'BCDE', b'abc', b'def', b'abcdefg')
[False, False, True]
case insensitive regex matches
>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
[False, False, True]
"""
kind, pattern = _splitpattern(pattern)
if kind == b're':
try:
return remod.compile(pattern, flags)
except remod.error as e:
raise error.ParseError(
_(b'invalid regular expression: %s') % forcebytestr(e)
)
elif kind == b'literal':
return remod.compile(remod.escape(pattern), flags)
raise error.ProgrammingError(b'unhandled pattern kind: %s' % kind)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def shortuser(user):
"""Return a short representation of a user name or email address."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'@')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'<')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
Augie Fackler
formatting: blacken the codebase...
r43346 user = user[f + 1 :]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b' ')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'.')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
return user
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def emailuser(user):
"""Return the user portion of an email address."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'@')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
user = user[:f]
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = user.find(b'<')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if f >= 0:
Augie Fackler
formatting: blacken the codebase...
r43346 user = user[f + 1 :]
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 return user
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def email(author):
'''get email of author.'''
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 r = author.find(b'>')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 if r == -1:
r = None
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return author[author.find(b'<') + 1 : r]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 def person(author):
"""Returns the name before an email address,
interpreting it as per RFC 5322
>>> person(b'foo@bar')
'foo'
>>> person(b'Foo Bar <foo@bar>')
'Foo Bar'
>>> person(b'"Foo Bar" <foo@bar>')
'Foo Bar'
>>> person(b'"Foo \"buz\" Bar" <foo@bar>')
'Foo "buz" Bar'
>>> # The following are invalid, but do exist in real-life
...
>>> person(b'Foo "buz" Bar <foo@bar>')
'Foo "buz" Bar'
>>> person(b'"Foo Bar <foo@bar>')
'Foo Bar'
"""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if b'@' not in author:
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 return author
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 f = author.find(b'<')
Connor Sheehan
stringutil: move person function from templatefilters...
r37173 if f != -1:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return author[:f].strip(b' "').replace(b'\\"', b'"')
f = author.find(b'@')
return author[:f].replace(b'.', b' ')
Connor Sheehan
stringutil: move person function from templatefilters...
r37173
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 @attr.s(hash=True)
class mailmapping(object):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Represents a username/email key or value in
a mailmap file"""
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 email = attr.ib()
name = attr.ib(default=None)
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 def _ismailmaplineinvalid(names, emails):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Returns True if the parsed names and emails
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 in a mailmap entry are invalid.
>>> # No names or emails fails
>>> names, emails = [], []
>>> _ismailmaplineinvalid(names, emails)
True
>>> # Only one email fails
>>> emails = [b'email@email.com']
>>> _ismailmaplineinvalid(names, emails)
True
>>> # One email and one name passes
>>> names = [b'Test Name']
>>> _ismailmaplineinvalid(names, emails)
False
>>> # No names but two emails passes
>>> names = []
>>> emails = [b'proper@email.com', b'commit@email.com']
>>> _ismailmaplineinvalid(names, emails)
False
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 return not emails or not names and len(emails) < 2
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 def parsemailmap(mailmapcontent):
"""Parses data in the .mailmap format
>>> mmdata = b"\\n".join([
... b'# Comment',
... b'Name <commit1@email.xx>',
... b'<name@email.xx> <commit2@email.xx>',
... b'Name <proper@email.xx> <commit3@email.xx>',
... b'Name <proper@email.xx> Commit <commit4@email.xx>',
... ])
>>> mm = parsemailmap(mmdata)
>>> for key in sorted(mm.keys()):
... print(key)
mailmapping(email='commit1@email.xx', name=None)
mailmapping(email='commit2@email.xx', name=None)
mailmapping(email='commit3@email.xx', name=None)
mailmapping(email='commit4@email.xx', name='Commit')
>>> for val in sorted(mm.values()):
... print(val)
mailmapping(email='commit1@email.xx', name='Name')
mailmapping(email='name@email.xx', name=None)
mailmapping(email='proper@email.xx', name='Name')
mailmapping(email='proper@email.xx', name='Name')
"""
mailmap = {}
if mailmapcontent is None:
return mailmap
for line in mailmapcontent.splitlines():
# Don't bother checking the line if it is a comment or
# is an improperly formed author field
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if line.lstrip().startswith(b'#'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 continue
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 # names, emails hold the parsed emails and names for each line
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # name_builder holds the words in a persons name
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 names, emails = [], []
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 namebuilder = []
for element in line.split():
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 if element.startswith(b'#'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # If we reach a comment in the mailmap file, move on
break
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 elif element.startswith(b'<') and element.endswith(b'>'):
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 # We have found an email.
# Parse it, and finalize any names from earlier
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 emails.append(element[1:-1]) # Slice off the "<>"
Connor Sheehan
templatefuncs: add mailmap template function...
r37227
if namebuilder:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 names.append(b' '.join(namebuilder))
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 namebuilder = []
# Break if we have found a second email, any other
# data does not fit the spec for .mailmap
Connor Sheehan
stringutil: rename local email/names variables to their plural forms...
r37262 if len(emails) > 1:
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 break
else:
# We have found another word in the committers name
namebuilder.append(element)
Connor Sheehan
stringutil: improve check for failed mailmap line parsing...
r37263 # Check to see if we have parsed the line into a valid form
# We require at least one email, and either at least one
# name or a second email
if _ismailmaplineinvalid(names, emails):
continue
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 mailmapkey = mailmapping(
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 email=emails[-1],
name=names[-1] if len(names) == 2 else None,
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 )
mailmap[mailmapkey] = mailmapping(
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 email=emails[0],
name=names[0] if names else None,
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 )
return mailmap
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 def mapname(mailmap, author):
"""Returns the author field according to the mailmap cache, or
the original author field.
>>> mmdata = b"\\n".join([
... b'# Comment',
... b'Name <commit1@email.xx>',
... b'<name@email.xx> <commit2@email.xx>',
... b'Name <proper@email.xx> <commit3@email.xx>',
... b'Name <proper@email.xx> Commit <commit4@email.xx>',
... ])
>>> m = parsemailmap(mmdata)
>>> mapname(m, b'Commit <commit1@email.xx>')
'Name <commit1@email.xx>'
>>> mapname(m, b'Name <commit2@email.xx>')
'Name <name@email.xx>'
>>> mapname(m, b'Commit <commit3@email.xx>')
'Name <proper@email.xx>'
>>> mapname(m, b'Commit <commit4@email.xx>')
'Name <proper@email.xx>'
>>> mapname(m, b'Unknown Name <unknown@email.com>')
'Unknown Name <unknown@email.com>'
"""
# If the author field coming in isn't in the correct format,
# or the mailmap is empty just return the original author field
if not isauthorwellformed(author) or not mailmap:
return author
Connor Sheehan
stringutil: edit comment to reflect actual data type name...
r37264 # Turn the user name into a mailmapping
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 commit = mailmapping(name=person(author), email=email(author))
try:
# Try and use both the commit email and name as the key
proper = mailmap[commit]
except KeyError:
# If the lookup fails, use just the email as the key instead
# We call this commit2 as not to erase original commit fields
commit2 = mailmapping(email=commit.email)
proper = mailmap.get(commit2, mailmapping(None, None))
# Return the author field with proper values filled in
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return b'%s <%s>' % (
Connor Sheehan
templatefuncs: add mailmap template function...
r37227 proper.name if proper.name else commit.name,
proper.email if proper.email else commit.email,
)
Augie Fackler
formatting: blacken the codebase...
r43346
Matt Harbison
cleanup: drop redundant character escapes outside of `[]`...
r44474 _correctauthorformat = remod.compile(br'^[^<]+\s<[^<>]+@[^<>]+>$')
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172
Augie Fackler
formatting: blacken the codebase...
r43346
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172 def isauthorwellformed(author):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Return True if the author field is well formed
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172 (ie "Contributor Name <contrib@email.dom>")
>>> isauthorwellformed(b'Good Author <good@author.com>')
True
>>> isauthorwellformed(b'Author <good@author.com>')
True
>>> isauthorwellformed(b'Bad Author')
False
>>> isauthorwellformed(b'Bad Author <author@author.com')
False
>>> isauthorwellformed(b'Bad Author author@author.com')
False
>>> isauthorwellformed(b'<author@author.com>')
False
>>> isauthorwellformed(b'Bad Author <author>')
False
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """
Connor Sheehan
stringutil: add isauthorwellformed function...
r37172 return _correctauthorformat.match(author) is not None
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def ellipsis(text, maxlength=400):
"""Trim string to at most maxlength (default: 400) columns in display."""
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return encoding.trim(text, maxlength, ellipsis=b'...')
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def escapestr(s):
Augie Fackler
stringutil: if we get a memoryview in escapestr, coerce it to bytes...
r39098 if isinstance(s, memoryview):
s = bytes(s)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # call underlying function of s.encode('string_escape') directly for
# Python 3 compatibility
return codecs.escape_encode(s)[0]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def unescapestr(s):
return codecs.escape_decode(s)[0]
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def forcebytestr(obj):
"""Portably format an arbitrary object (e.g. exception) into a byte
string."""
try:
return pycompat.bytestr(obj)
except UnicodeEncodeError:
# non-ascii string, may be lossy
return pycompat.bytestr(encoding.strtolocal(str(obj)))
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def uirepr(s):
# Avoid double backslash in Windows path repr()
return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 # delay import of textwrap
def _MBTextWrapper(**kwargs):
class tw(textwrap.TextWrapper):
"""
Extend TextWrapper for width-awareness.
Neither number of 'bytes' in any encoding nor 'characters' is
appropriate to calculate terminal columns for specified string.
Original TextWrapper implementation uses built-in 'len()' directly,
so overriding is needed to use width information of each characters.
In addition, characters classified into 'ambiguous' width are
treated as wide in East Asian area, but as narrow in other.
This requires use decision to determine width of such characters.
"""
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 def _cutdown(self, ucstr, space_left):
l = 0
colwidth = encoding.ucolwidth
Gregory Szorc
global: use pycompat.xrange()...
r38806 for i in pycompat.xrange(len(ucstr)):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 l += colwidth(ucstr[i])
if space_left < l:
return (ucstr[:i], ucstr[i:])
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 return ucstr, b''
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
# overriding of base class
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
space_left = max(width - cur_len, 1)
if self.break_long_words:
cut, res = self._cutdown(reversed_chunks[-1], space_left)
cur_line.append(cut)
reversed_chunks[-1] = res
elif not cur_line:
cur_line.append(reversed_chunks.pop())
# this overriding code is imported from TextWrapper of Python 2.6
# to calculate columns of string by 'encoding.ucolwidth()'
def _wrap_chunks(self, chunks):
colwidth = encoding.ucolwidth
lines = []
if self.width <= 0:
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 raise ValueError(b"invalid width %r (must be > 0)" % self.width)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
# Arrange in reverse order so items can be efficiently popped
# from a stack of chucks.
chunks.reverse()
while chunks:
# Start the list of chunks that will make up the current line.
# cur_len is just the length of all the chunks in cur_line.
cur_line = []
cur_len = 0
# Figure out which static string will prefix this line.
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
# Maximum width for this line.
width = self.width - len(indent)
# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (i.e. no lines started yet).
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 del chunks[-1]
while chunks:
l = colwidth(chunks[-1])
# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
cur_line.append(chunks.pop())
cur_len += l
# Nope, this line is full.
else:
break
# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and colwidth(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
# If the last chunk on this line is all whitespace, drop it.
Augie Fackler
formatting: blacken the codebase...
r43346 if (
self.drop_whitespace
and cur_line
and cur_line[-1].strip() == r''
):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 del cur_line[-1]
# Convert current line back to a string and store it in list
# of all lines (return value).
if cur_line:
Augie Fackler
cleanup: remove pointless r-prefixes on single-quoted strings...
r43906 lines.append(indent + ''.join(cur_line))
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
return lines
global _MBTextWrapper
_MBTextWrapper = tw
return tw(**kwargs)
Augie Fackler
formatting: blacken the codebase...
r43346
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 def wrap(line, width, initindent=b'', hangindent=b''):
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 maxindent = max(len(hangindent), len(initindent))
if width <= maxindent:
# adjust for weird terminal size
width = max(78, maxindent + 1)
Augie Fackler
formatting: blacken the codebase...
r43346 line = line.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
initindent = initindent.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
hangindent = hangindent.decode(
pycompat.sysstr(encoding.encoding),
pycompat.sysstr(encoding.encodingmode),
)
wrapper = _MBTextWrapper(
width=width, initial_indent=initindent, subsequent_indent=hangindent
)
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
Augie Fackler
formatting: blacken the codebase...
r43346
_booleans = {
Augie Fackler
formatting: byteify all mercurial/ and hgext/ string literals...
r43347 b'1': True,
b'yes': True,
b'true': True,
b'on': True,
b'always': True,
b'0': False,
b'no': False,
b'false': False,
b'off': False,
b'never': False,
Augie Fackler
formatting: blacken the codebase...
r43346 }
Yuya Nishihara
stringutil: move generic string helpers to new module...
r37101
def parsebool(s):
"""Parse s into a boolean.
If s is not a valid boolean, returns None.
"""
return _booleans.get(s.lower(), None)
Gregory Szorc
wireproto: syntax for encoding CBOR into frames...
r37306
Augie Fackler
formatting: blacken the codebase...
r43346
Yuya Nishihara
wireproto: convert python literal to object without using unsafe eval()...
r37494 def evalpythonliteral(s):
"""Evaluate a string containing a Python literal expression"""
# We could backport our tokenizer hack to rewrite '' to u'' if we want
Augie Fackler
stringutil: ast.literal_eval needs a unicode on py3...
r37699 if pycompat.ispy3:
return ast.literal_eval(s.decode('latin1'))
Yuya Nishihara
wireproto: convert python literal to object without using unsafe eval()...
r37494 return ast.literal_eval(s)