upstream/mercurial-mirror Files · mercurial/utils/stringutil.py

remove: use progress helper...

remove: use progress helper Differential Revision: https://phab.mercurial-scm.org/D3767

Yuya Nishihara - - Load All Authors

File last commit:

r38283:fbb2edde default


                r38366:89db59e5

default

Download file

             stringutil.py
        
                    541 lines
            
             | 18.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / mercurial / utils / stringutil.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Yuya Nishihara
    
stringutil: move generic string helpers to new module...

              r37101
            
      # stringutil.py - utility for generic string formatting, parsing, etc.

      #

      #  Copyright 2005 K. Thananchayan <thananck@yahoo.com>

      #  Copyright 2005-2007 Matt Mackall <mpm@selenic.com>

      #  Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import

        Yuya Nishihara
    
wireproto: convert python literal to object without using unsafe eval()...

              r37494
            
      import ast

        Yuya Nishihara
    
stringutil: move generic string helpers to new module...

              r37101
            
      import codecs

      import re as remod

      import textwrap

      from ..i18n import _

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
      from ..thirdparty import attr

        Yuya Nishihara
    
stringutil: move generic string helpers to new module...

              r37101
            
      from .. import (

          encoding,

          error,

          pycompat,

      )

        Yuya Nishihara
    
stringutil: flip the default of pprint() to bprefix=False...

              r37961
            
      def pprint(o, bprefix=False):

        Gregory Szorc
    
stringutil: add function to pretty print an object...

              r37316
            
          """Pretty print an object."""

        Gregory Szorc
    
stringutil: support more types with pprint()...

              r37637
            
          if isinstance(o, bytes):

        Augie Fackler
    
stringutil: make b prefixes on string output optional...

              r37768
            
              if bprefix:

                  return "b'%s'" % escapestr(o)

              return "'%s'" % escapestr(o)

        Gregory Szorc
    
stringutil: support more types with pprint()...

              r37637
            
          elif isinstance(o, bytearray):

              # codecs.escape_encode() can't handle bytearray, so escapestr fails

              # without coercion.

              return "bytearray['%s']" % escapestr(bytes(o))

        Gregory Szorc
    
stringutil: add function to pretty print an object...

              r37316
            
          elif isinstance(o, list):

        Augie Fackler
    
stringutil: make b prefixes on string output optional...

              r37768
            
              return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))

        Gregory Szorc
    
stringutil: add function to pretty print an object...

              r37316
            
          elif isinstance(o, dict):

              return '{%s}' % (b', '.join(

        Augie Fackler
    
stringutil: make b prefixes on string output optional...

              r37768
            
                  '%s: %s' % (pprint(k, bprefix=bprefix),

                              pprint(v, bprefix=bprefix))

                  for k, v in sorted(o.items())))

        Augie Fackler
    
stringutil: teach pprint about tuples...

              r37951
            
          elif isinstance(o, tuple):

              return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))

        Gregory Szorc
    
stringutil: add function to pretty print an object...

              r37316
            
          else:

        Yuya Nishihara
    
stringutil: make pprint() forward uninteresting object to b'%r'...

              r37960
            
              return pycompat.byterepr(o)

        Gregory Szorc
    
stringutil: add function to pretty print an object...

              r37316
            
        Yuya Nishihara
    
stringutil: promote smartset.prettyformat() to utility function...

              r38280
            
      def prettyrepr(o):

          """Pretty print a representation of a possibly-nested object"""

          lines = []

          rs = pycompat.byterepr(o)

        Yuya Nishihara
    
stringutil: fix prettyrepr() to not orphan foo=<...> line

              r38283
            
          p0 = p1 = 0

          while p0 < len(rs):

              # '... field=<type ... field=<type ...'

              #      ~~~~~~~~~~~~~~~~

              #      p0    p1        q0    q1

              q0 = -1

              q1 = rs.find('<', p1 + 1)

              if q1 < 0:

                  q1 = len(rs)

              elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):

                  # backtrack for ' field=<'

                  q0 = rs.rfind(' ', p1 + 1, q1 - 1)

              if q0 < 0:

                  q0 = q1

              else:

                  q0 += 1  # skip ' '

              l = rs.count('<', 0, p0) - rs.count('>', 0, p0)

        Yuya Nishihara
    
stringutil: promote smartset.prettyformat() to utility function...

              r38280
            
              assert l >= 0

        Yuya Nishihara
    
stringutil: fix prettyrepr() to not orphan foo=<...> line

              r38283
            
              lines.append((l, rs[p0:q0].rstrip()))

              p0, p1 = q0, q1

        Yuya Nishihara
    
stringutil: promote smartset.prettyformat() to utility function...

              r38280
            
          return '\n'.join('  ' * l + s for l, s in lines)

        Yuya Nishihara
    
stringutil: move generic string helpers to new module...

              r37101
            
      def binary(s):

          """return true if a string is binary data"""

          return bool(s and '\0' in s)

      def stringmatcher(pattern, casesensitive=True):

          """

          accepts a string, possibly starting with 're:' or 'literal:' prefix.

          returns the matcher name, pattern, and matcher function.

          missing or unknown prefixes are treated as literal matches.

          helper for tests:

          >>> def test(pattern, *tests):

          ...     kind, pattern, matcher = stringmatcher(pattern)

          ...     return (kind, pattern, [bool(matcher(t)) for t in tests])

          >>> def itest(pattern, *tests):

          ...     kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)

          ...     return (kind, pattern, [bool(matcher(t)) for t in tests])

          exact matching (no prefix):

          >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')

          ('literal', 'abcdefg', [False, False, True])

          regex matching ('re:' prefix)

          >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')

          ('re', 'a.+b', [False, False, True])

          force exact matches ('literal:' prefix)

          >>> test(b'literal:re:foobar', b'foobar', b're:foobar')

          ('literal', 're:foobar', [False, True])

          unknown prefixes are ignored and treated as literals

          >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')

          ('literal', 'foo:bar', [False, False, True])

          case insensitive regex matches

          >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')

          ('re', 'A.+b', [False, False, True])

          case insensitive literal matches

          >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')

          ('literal', 'ABCDEFG', [False, False, True])

          """

          if pattern.startswith('re:'):

              pattern = pattern[3:]

              try:

                  flags = 0

                  if not casesensitive:

                      flags = remod.I

                  regex = remod.compile(pattern, flags)

              except remod.error as e:

                  raise error.ParseError(_('invalid regular expression: %s')

                                         % e)

              return 're', pattern, regex.search

          elif pattern.startswith('literal:'):

              pattern = pattern[8:]

          match = pattern.__eq__

          if not casesensitive:

              ipat = encoding.lower(pattern)

              match = lambda s: ipat == encoding.lower(s)

          return 'literal', pattern, match

      def shortuser(user):

          """Return a short representation of a user name or email address."""

          f = user.find('@')

          if f >= 0:

              user = user[:f]

          f = user.find('<')

          if f >= 0:

              user = user[f + 1:]

          f = user.find(' ')

          if f >= 0:

              user = user[:f]

          f = user.find('.')

          if f >= 0:

              user = user[:f]

          return user

      def emailuser(user):

          """Return the user portion of an email address."""

          f = user.find('@')

          if f >= 0:

              user = user[:f]

          f = user.find('<')

          if f >= 0:

              user = user[f + 1:]

          return user

      def email(author):

          '''get email of author.'''

          r = author.find('>')

          if r == -1:

              r = None

          return author[author.find('<') + 1:r]

        Connor Sheehan
    
stringutil: move person function from templatefilters...

              r37173
            
      def person(author):

          """Returns the name before an email address,

          interpreting it as per RFC 5322

          >>> person(b'foo@bar')

          'foo'

          >>> person(b'Foo Bar <foo@bar>')

          'Foo Bar'

          >>> person(b'"Foo Bar" <foo@bar>')

          'Foo Bar'

          >>> person(b'"Foo \"buz\" Bar" <foo@bar>')

          'Foo "buz" Bar'

          >>> # The following are invalid, but do exist in real-life

          ...

          >>> person(b'Foo "buz" Bar <foo@bar>')

          'Foo "buz" Bar'

          >>> person(b'"Foo Bar <foo@bar>')

          'Foo Bar'

          """

          if '@' not in author:

              return author

          f = author.find('<')

          if f != -1:

              return author[:f].strip(' "').replace('\\"', '"')

          f = author.find('@')

          return author[:f].replace('.', ' ')

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
      @attr.s(hash=True)

      class mailmapping(object):

          '''Represents a username/email key or value in

          a mailmap file'''

          email = attr.ib()

          name = attr.ib(default=None)

        Connor Sheehan
    
stringutil: improve check for failed mailmap line parsing...

              r37263
            
      def _ismailmaplineinvalid(names, emails):

          '''Returns True if the parsed names and emails

          in a mailmap entry are invalid.

          >>> # No names or emails fails

          >>> names, emails = [], []

          >>> _ismailmaplineinvalid(names, emails)

          True

          >>> # Only one email fails

          >>> emails = [b'email@email.com']

          >>> _ismailmaplineinvalid(names, emails)

          True

          >>> # One email and one name passes

          >>> names = [b'Test Name']

          >>> _ismailmaplineinvalid(names, emails)

          False

          >>> # No names but two emails passes

          >>> names = []

          >>> emails = [b'proper@email.com', b'commit@email.com']

          >>> _ismailmaplineinvalid(names, emails)

          False

          '''

          return not emails or not names and len(emails) < 2

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
      def parsemailmap(mailmapcontent):

          """Parses data in the .mailmap format

          >>> mmdata = b"\\n".join([

          ... b'# Comment',

          ... b'Name <commit1@email.xx>',

          ... b'<name@email.xx> <commit2@email.xx>',

          ... b'Name <proper@email.xx> <commit3@email.xx>',

          ... b'Name <proper@email.xx> Commit <commit4@email.xx>',

          ... ])

          >>> mm = parsemailmap(mmdata)

          >>> for key in sorted(mm.keys()):

          ...     print(key)

          mailmapping(email='commit1@email.xx', name=None)

          mailmapping(email='commit2@email.xx', name=None)

          mailmapping(email='commit3@email.xx', name=None)

          mailmapping(email='commit4@email.xx', name='Commit')

          >>> for val in sorted(mm.values()):

          ...     print(val)

          mailmapping(email='commit1@email.xx', name='Name')

          mailmapping(email='name@email.xx', name=None)

          mailmapping(email='proper@email.xx', name='Name')

          mailmapping(email='proper@email.xx', name='Name')

          """

          mailmap = {}

          if mailmapcontent is None:

              return mailmap

          for line in mailmapcontent.splitlines():

              # Don't bother checking the line if it is a comment or

              # is an improperly formed author field

        Connor Sheehan
    
stringutil: improve check for failed mailmap line parsing...

              r37263
            
              if line.lstrip().startswith('#'):

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
                  continue

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
              # names, emails hold the parsed emails and names for each line

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
              # name_builder holds the words in a persons name

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
              names, emails = [], []

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
              namebuilder = []

              for element in line.split():

                  if element.startswith('#'):

                      # If we reach a comment in the mailmap file, move on

                      break

                  elif element.startswith('<') and element.endswith('>'):

                      # We have found an email.

                      # Parse it, and finalize any names from earlier

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
                      emails.append(element[1:-1])  # Slice off the "<>"

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
                      if namebuilder:

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
                          names.append(' '.join(namebuilder))

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
                          namebuilder = []

                      # Break if we have found a second email, any other

                      # data does not fit the spec for .mailmap

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
                      if len(emails) > 1:

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
                          break

                  else:

                      # We have found another word in the committers name

                      namebuilder.append(element)

        Connor Sheehan
    
stringutil: improve check for failed mailmap line parsing...

              r37263
            
              # Check to see if we have parsed the line into a valid form

              # We require at least one email, and either at least one

              # name or a second email

              if _ismailmaplineinvalid(names, emails):

                  continue

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
              mailmapkey = mailmapping(

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
                  email=emails[-1],

                  name=names[-1] if len(names) == 2 else None,

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
              )

              mailmap[mailmapkey] = mailmapping(

        Connor Sheehan
    
stringutil: rename local email/names variables to their plural forms...

              r37262
            
                  email=emails[0],

                  name=names[0] if names else None,

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
              )

          return mailmap

      def mapname(mailmap, author):

          """Returns the author field according to the mailmap cache, or

          the original author field.

          >>> mmdata = b"\\n".join([

          ...     b'# Comment',

          ...     b'Name <commit1@email.xx>',

          ...     b'<name@email.xx> <commit2@email.xx>',

          ...     b'Name <proper@email.xx> <commit3@email.xx>',

          ...     b'Name <proper@email.xx> Commit <commit4@email.xx>',

          ... ])

          >>> m = parsemailmap(mmdata)

          >>> mapname(m, b'Commit <commit1@email.xx>')

          'Name <commit1@email.xx>'

          >>> mapname(m, b'Name <commit2@email.xx>')

          'Name <name@email.xx>'

          >>> mapname(m, b'Commit <commit3@email.xx>')

          'Name <proper@email.xx>'

          >>> mapname(m, b'Commit <commit4@email.xx>')

          'Name <proper@email.xx>'

          >>> mapname(m, b'Unknown Name <unknown@email.com>')

          'Unknown Name <unknown@email.com>'

          """

          # If the author field coming in isn't in the correct format,

          # or the mailmap is empty just return the original author field

          if not isauthorwellformed(author) or not mailmap:

              return author

        Connor Sheehan
    
stringutil: edit comment to reflect actual data type name...

              r37264
            
          # Turn the user name into a mailmapping

        Connor Sheehan
    
templatefuncs: add mailmap template function...

              r37227
            
          commit = mailmapping(name=person(author), email=email(author))

          try:

              # Try and use both the commit email and name as the key

              proper = mailmap[commit]

          except KeyError:

              # If the lookup fails, use just the email as the key instead

              # We call this commit2 as not to erase original commit fields

              commit2 = mailmapping(email=commit.email)

              proper = mailmap.get(commit2, mailmapping(None, None))

          # Return the author field with proper values filled in

          return '%s <%s>' % (

              proper.name if proper.name else commit.name,

              proper.email if proper.email else commit.email,

          )

        Connor Sheehan
    
stringutil: add isauthorwellformed function...

              r37172
            
      _correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')

      def isauthorwellformed(author):

          '''Return True if the author field is well formed

          (ie "Contributor Name <contrib@email.dom>")

          >>> isauthorwellformed(b'Good Author <good@author.com>')

          True

          >>> isauthorwellformed(b'Author <good@author.com>')

          True

          >>> isauthorwellformed(b'Bad Author')

          False

          >>> isauthorwellformed(b'Bad Author <author@author.com')

          False

          >>> isauthorwellformed(b'Bad Author author@author.com')

          False

          >>> isauthorwellformed(b'<author@author.com>')

          False

          >>> isauthorwellformed(b'Bad Author <author>')

          False

          '''

          return _correctauthorformat.match(author) is not None

        Yuya Nishihara
    
stringutil: move generic string helpers to new module...

              r37101
            
      def ellipsis(text, maxlength=400):

          """Trim string to at most maxlength (default: 400) columns in display."""

          return encoding.trim(text, maxlength, ellipsis='...')

      def escapestr(s):

          # call underlying function of s.encode('string_escape') directly for

          # Python 3 compatibility

          return codecs.escape_encode(s)[0]

      def unescapestr(s):

          return codecs.escape_decode(s)[0]

      def forcebytestr(obj):

          """Portably format an arbitrary object (e.g. exception) into a byte

          string."""

          try:

              return pycompat.bytestr(obj)

          except UnicodeEncodeError:

              # non-ascii string, may be lossy

              return pycompat.bytestr(encoding.strtolocal(str(obj)))

      def uirepr(s):

          # Avoid double backslash in Windows path repr()

          return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

      # delay import of textwrap

      def _MBTextWrapper(**kwargs):

          class tw(textwrap.TextWrapper):

              """

              Extend TextWrapper for width-awareness.

              Neither number of 'bytes' in any encoding nor 'characters' is

              appropriate to calculate terminal columns for specified string.

              Original TextWrapper implementation uses built-in 'len()' directly,

              so overriding is needed to use width information of each characters.

              In addition, characters classified into 'ambiguous' width are

              treated as wide in East Asian area, but as narrow in other.

              This requires use decision to determine width of such characters.

              """

              def _cutdown(self, ucstr, space_left):

                  l = 0

                  colwidth = encoding.ucolwidth

                  for i in xrange(len(ucstr)):

                      l += colwidth(ucstr[i])

                      if space_left < l:

                          return (ucstr[:i], ucstr[i:])

                  return ucstr, ''

              # overriding of base class

              def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):

                  space_left = max(width - cur_len, 1)

                  if self.break_long_words:

                      cut, res = self._cutdown(reversed_chunks[-1], space_left)

                      cur_line.append(cut)

                      reversed_chunks[-1] = res

                  elif not cur_line:

                      cur_line.append(reversed_chunks.pop())

              # this overriding code is imported from TextWrapper of Python 2.6

              # to calculate columns of string by 'encoding.ucolwidth()'

              def _wrap_chunks(self, chunks):

                  colwidth = encoding.ucolwidth

                  lines = []

                  if self.width <= 0:

                      raise ValueError("invalid width %r (must be > 0)" % self.width)

                  # Arrange in reverse order so items can be efficiently popped

                  # from a stack of chucks.

                  chunks.reverse()

                  while chunks:

                      # Start the list of chunks that will make up the current line.

                      # cur_len is just the length of all the chunks in cur_line.

                      cur_line = []

                      cur_len = 0

                      # Figure out which static string will prefix this line.

                      if lines:

                          indent = self.subsequent_indent

                      else:

                          indent = self.initial_indent

                      # Maximum width for this line.

                      width = self.width - len(indent)

                      # First chunk on line is whitespace -- drop it, unless this

                      # is the very beginning of the text (i.e. no lines started yet).

                      if self.drop_whitespace and chunks[-1].strip() == r'' and lines:

                          del chunks[-1]

                      while chunks:

                          l = colwidth(chunks[-1])

                          # Can at least squeeze this chunk onto the current line.

                          if cur_len + l <= width:

                              cur_line.append(chunks.pop())

                              cur_len += l

                          # Nope, this line is full.

                          else:

                              break

                      # The current line is full, and the next chunk is too big to

                      # fit on *any* line (not just this one).

                      if chunks and colwidth(chunks[-1]) > width:

                          self._handle_long_word(chunks, cur_line, cur_len, width)

                      # If the last chunk on this line is all whitespace, drop it.

                      if (self.drop_whitespace and

                          cur_line and cur_line[-1].strip() == r''):

                          del cur_line[-1]

                      # Convert current line back to a string and store it in list

                      # of all lines (return value).

                      if cur_line:

                          lines.append(indent + r''.join(cur_line))

                  return lines

          global _MBTextWrapper

          _MBTextWrapper = tw

          return tw(**kwargs)

      def wrap(line, width, initindent='', hangindent=''):

          maxindent = max(len(hangindent), len(initindent))

          if width <= maxindent:

              # adjust for weird terminal size

              width = max(78, maxindent + 1)

          line = line.decode(pycompat.sysstr(encoding.encoding),

                             pycompat.sysstr(encoding.encodingmode))

          initindent = initindent.decode(pycompat.sysstr(encoding.encoding),

                                         pycompat.sysstr(encoding.encodingmode))

          hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),

                                         pycompat.sysstr(encoding.encodingmode))

          wrapper = _MBTextWrapper(width=width,

                                   initial_indent=initindent,

                                   subsequent_indent=hangindent)

          return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

      _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,

                   '0': False, 'no': False, 'false': False, 'off': False,

                   'never': False}

      def parsebool(s):

          """Parse s into a boolean.

          If s is not a valid boolean, returns None.

          """

          return _booleans.get(s.lower(), None)

        Gregory Szorc
    
wireproto: syntax for encoding CBOR into frames...

              r37306
            
        Yuya Nishihara
    
wireproto: convert python literal to object without using unsafe eval()...

              r37494
            
      def evalpythonliteral(s):

          """Evaluate a string containing a Python literal expression"""

          # We could backport our tokenizer hack to rewrite '' to u'' if we want

        Augie Fackler
    
stringutil: ast.literal_eval needs a unicode on py3...

              r37699
            
          if pycompat.ispy3:

              return ast.literal_eval(s.decode('latin1'))

        Yuya Nishihara
    
wireproto: convert python literal to object without using unsafe eval()...

              r37494
            
          return ast.literal_eval(s)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Yuya Nishihara stringutil: move generic string helpers to new module...	r37101	# stringutil.py - utility for generic string formatting, parsing, etc.
		#
		# Copyright 2005 K. Thananchayan <thananck@yahoo.com>
		# Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
		# Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

		from __future__ import absolute_import

Yuya Nishihara wireproto: convert python literal to object without using unsafe eval()...	r37494	import ast
Yuya Nishihara stringutil: move generic string helpers to new module...	r37101	import codecs
		import re as remod
		import textwrap

		from ..i18n import _
Connor Sheehan templatefuncs: add mailmap template function...	r37227	from ..thirdparty import attr
Yuya Nishihara stringutil: move generic string helpers to new module...	r37101
		from .. import (
		encoding,
		error,
		pycompat,
		)

Yuya Nishihara stringutil: flip the default of pprint() to bprefix=False...	r37961	def pprint(o, bprefix=False):
Gregory Szorc stringutil: add function to pretty print an object...	r37316	"""Pretty print an object."""
Gregory Szorc stringutil: support more types with pprint()...	r37637	if isinstance(o, bytes):
Augie Fackler stringutil: make b prefixes on string output optional...	r37768	if bprefix:
		return "b'%s'" % escapestr(o)
		return "'%s'" % escapestr(o)
Gregory Szorc stringutil: support more types with pprint()...	r37637	elif isinstance(o, bytearray):
		# codecs.escape_encode() can't handle bytearray, so escapestr fails
		# without coercion.
		return "bytearray['%s']" % escapestr(bytes(o))
Gregory Szorc stringutil: add function to pretty print an object...	r37316	elif isinstance(o, list):
Augie Fackler stringutil: make b prefixes on string output optional...	r37768	return '[%s]' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
Gregory Szorc stringutil: add function to pretty print an object...	r37316	elif isinstance(o, dict):
		return '{%s}' % (b', '.join(
Augie Fackler stringutil: make b prefixes on string output optional...	r37768	'%s: %s' % (pprint(k, bprefix=bprefix),
		pprint(v, bprefix=bprefix))
		for k, v in sorted(o.items())))
Augie Fackler stringutil: teach pprint about tuples...	r37951	elif isinstance(o, tuple):
		return '(%s)' % (b', '.join(pprint(a, bprefix=bprefix) for a in o))
Gregory Szorc stringutil: add function to pretty print an object...	r37316	else:
Yuya Nishihara stringutil: make pprint() forward uninteresting object to b'%r'...	r37960	return pycompat.byterepr(o)
Gregory Szorc stringutil: add function to pretty print an object...	r37316
Yuya Nishihara stringutil: promote smartset.prettyformat() to utility function...	r38280	def prettyrepr(o):
		"""Pretty print a representation of a possibly-nested object"""
		lines = []
		rs = pycompat.byterepr(o)
Yuya Nishihara stringutil: fix prettyrepr() to not orphan foo=<...> line	r38283	p0 = p1 = 0
		while p0 < len(rs):
		# '... field=<type ... field=<type ...'
		# ~~~~~~~~~~~~~~~~
		# p0 p1 q0 q1
		q0 = -1
		q1 = rs.find('<', p1 + 1)
		if q1 < 0:
		q1 = len(rs)
		elif q1 > p1 + 1 and rs.startswith('=', q1 - 1):
		# backtrack for ' field=<'
		q0 = rs.rfind(' ', p1 + 1, q1 - 1)
		if q0 < 0:
		q0 = q1
		else:
		q0 += 1 # skip ' '
		l = rs.count('<', 0, p0) - rs.count('>', 0, p0)
Yuya Nishihara stringutil: promote smartset.prettyformat() to utility function...	r38280	assert l >= 0
Yuya Nishihara stringutil: fix prettyrepr() to not orphan foo=<...> line	r38283	lines.append((l, rs[p0:q0].rstrip()))
		p0, p1 = q0, q1
Yuya Nishihara stringutil: promote smartset.prettyformat() to utility function...	r38280	return '\n'.join(' ' * l + s for l, s in lines)

Yuya Nishihara stringutil: move generic string helpers to new module...	r37101	def binary(s):
		"""return true if a string is binary data"""
		return bool(s and '\0' in s)

		def stringmatcher(pattern, casesensitive=True):
		"""
		accepts a string, possibly starting with 're:' or 'literal:' prefix.
		returns the matcher name, pattern, and matcher function.
		missing or unknown prefixes are treated as literal matches.

		helper for tests:
		>>> def test(pattern, *tests):
		... kind, pattern, matcher = stringmatcher(pattern)
		... return (kind, pattern, [bool(matcher(t)) for t in tests])
		>>> def itest(pattern, *tests):
		... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
		... return (kind, pattern, [bool(matcher(t)) for t in tests])

		exact matching (no prefix):
		>>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
		('literal', 'abcdefg', [False, False, True])

		regex matching ('re:' prefix)
		>>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
		('re', 'a.+b', [False, False, True])

		force exact matches ('literal:' prefix)
		>>> test(b'literal:re:foobar', b'foobar', b're:foobar')
		('literal', 're:foobar', [False, True])

		unknown prefixes are ignored and treated as literals
		>>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
		('literal', 'foo:bar', [False, False, True])

		case insensitive regex matches
		>>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
		('re', 'A.+b', [False, False, True])

		case insensitive literal matches
		>>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
		('literal', 'ABCDEFG', [False, False, True])
		"""
		if pattern.startswith('re:'):
		pattern = pattern[3:]
		try:
		flags = 0
		if not casesensitive:
		flags = remod.I
		regex = remod.compile(pattern, flags)
		except remod.error as e:
		raise error.ParseError(_('invalid regular expression: %s')
		% e)
		return 're', pattern, regex.search
		elif pattern.startswith('literal:'):
		pattern = pattern[8:]

		match = pattern.__eq__

		if not casesensitive:
		ipat = encoding.lower(pattern)
		match = lambda s: ipat == encoding.lower(s)
		return 'literal', pattern, match

		def shortuser(user):
		"""Return a short representation of a user name or email address."""
		f = user.find('@')
		if f >= 0:
		user = user[:f]
		f = user.find('<')
		if f >= 0:
		user = user[f + 1:]
		f = user.find(' ')
		if f >= 0:
		user = user[:f]
		f = user.find('.')
		if f >= 0:
		user = user[:f]
		return user

		def emailuser(user):
		"""Return the user portion of an email address."""
		f = user.find('@')
		if f >= 0:
		user = user[:f]
		f = user.find('<')
		if f >= 0:
		user = user[f + 1:]
		return user

		def email(author):
		'''get email of author.'''
		r = author.find('>')
		if r == -1:
		r = None
		return author[author.find('<') + 1:r]

Connor Sheehan stringutil: move person function from templatefilters...	r37173	def person(author):
		"""Returns the name before an email address,
		interpreting it as per RFC 5322

		>>> person(b'foo@bar')
		'foo'
		>>> person(b'Foo Bar <foo@bar>')
		'Foo Bar'
		>>> person(b'"Foo Bar" <foo@bar>')
		'Foo Bar'
		>>> person(b'"Foo \"buz\" Bar" <foo@bar>')
		'Foo "buz" Bar'
		>>> # The following are invalid, but do exist in real-life
		...
		>>> person(b'Foo "buz" Bar <foo@bar>')
		'Foo "buz" Bar'
		>>> person(b'"Foo Bar <foo@bar>')
		'Foo Bar'
		"""
		if '@' not in author:
		return author
		f = author.find('<')
		if f != -1:
		return author[:f].strip(' "').replace('\\"', '"')
		f = author.find('@')
		return author[:f].replace('.', ' ')

Connor Sheehan templatefuncs: add mailmap template function...	r37227	@attr.s(hash=True)
		class mailmapping(object):
		'''Represents a username/email key or value in
		a mailmap file'''
		email = attr.ib()
		name = attr.ib(default=None)

Connor Sheehan stringutil: improve check for failed mailmap line parsing...	r37263	def _ismailmaplineinvalid(names, emails):
		'''Returns True if the parsed names and emails
		in a mailmap entry are invalid.

		>>> # No names or emails fails
		>>> names, emails = [], []
		>>> _ismailmaplineinvalid(names, emails)
		True
		>>> # Only one email fails
		>>> emails = [b'email@email.com']
		>>> _ismailmaplineinvalid(names, emails)
		True
		>>> # One email and one name passes
		>>> names = [b'Test Name']
		>>> _ismailmaplineinvalid(names, emails)
		False
		>>> # No names but two emails passes
		>>> names = []
		>>> emails = [b'proper@email.com', b'commit@email.com']
		>>> _ismailmaplineinvalid(names, emails)
		False
		'''
		return not emails or not names and len(emails) < 2

Connor Sheehan templatefuncs: add mailmap template function...	r37227	def parsemailmap(mailmapcontent):
		"""Parses data in the .mailmap format

		>>> mmdata = b"\\n".join([
		... b'# Comment',
		... b'Name <commit1@email.xx>',
		... b'<name@email.xx> <commit2@email.xx>',
		... b'Name <proper@email.xx> <commit3@email.xx>',
		... b'Name <proper@email.xx> Commit <commit4@email.xx>',
		... ])
		>>> mm = parsemailmap(mmdata)
		>>> for key in sorted(mm.keys()):
		... print(key)
		mailmapping(email='commit1@email.xx', name=None)
		mailmapping(email='commit2@email.xx', name=None)
		mailmapping(email='commit3@email.xx', name=None)
		mailmapping(email='commit4@email.xx', name='Commit')
		>>> for val in sorted(mm.values()):
		... print(val)
		mailmapping(email='commit1@email.xx', name='Name')
		mailmapping(email='name@email.xx', name=None)
		mailmapping(email='proper@email.xx', name='Name')
		mailmapping(email='proper@email.xx', name='Name')
		"""
		mailmap = {}

		if mailmapcontent is None:
		return mailmap

		for line in mailmapcontent.splitlines():

		# Don't bother checking the line if it is a comment or
		# is an improperly formed author field
Connor Sheehan stringutil: improve check for failed mailmap line parsing...	r37263	if line.lstrip().startswith('#'):
Connor Sheehan templatefuncs: add mailmap template function...	r37227	continue

Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	# names, emails hold the parsed emails and names for each line
Connor Sheehan templatefuncs: add mailmap template function...	r37227	# name_builder holds the words in a persons name
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	names, emails = [], []
Connor Sheehan templatefuncs: add mailmap template function...	r37227	namebuilder = []

		for element in line.split():
		if element.startswith('#'):
		# If we reach a comment in the mailmap file, move on
		break

		elif element.startswith('<') and element.endswith('>'):
		# We have found an email.
		# Parse it, and finalize any names from earlier
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	emails.append(element[1:-1]) # Slice off the "<>"
Connor Sheehan templatefuncs: add mailmap template function...	r37227
		if namebuilder:
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	names.append(' '.join(namebuilder))
Connor Sheehan templatefuncs: add mailmap template function...	r37227	namebuilder = []

		# Break if we have found a second email, any other
		# data does not fit the spec for .mailmap
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	if len(emails) > 1:
Connor Sheehan templatefuncs: add mailmap template function...	r37227	break

		else:
		# We have found another word in the committers name
		namebuilder.append(element)

Connor Sheehan stringutil: improve check for failed mailmap line parsing...	r37263	# Check to see if we have parsed the line into a valid form
		# We require at least one email, and either at least one
		# name or a second email
		if _ismailmaplineinvalid(names, emails):
		continue

Connor Sheehan templatefuncs: add mailmap template function...	r37227	mailmapkey = mailmapping(
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	email=emails[-1],
		name=names[-1] if len(names) == 2 else None,
Connor Sheehan templatefuncs: add mailmap template function...	r37227	)

		mailmap[mailmapkey] = mailmapping(
Connor Sheehan stringutil: rename local email/names variables to their plural forms...	r37262	email=emails[0],
		name=names[0] if names else None,
Connor Sheehan templatefuncs: add mailmap template function...	r37227	)

		return mailmap

		def mapname(mailmap, author):
		"""Returns the author field according to the mailmap cache, or
		the original author field.

		>>> mmdata = b"\\n".join([
		... b'# Comment',
		... b'Name <commit1@email.xx>',
		... b'<name@email.xx> <commit2@email.xx>',
		... b'Name <proper@email.xx> <commit3@email.xx>',
		... b'Name <proper@email.xx> Commit <commit4@email.xx>',
		... ])
		>>> m = parsemailmap(mmdata)
		>>> mapname(m, b'Commit <commit1@email.xx>')
		'Name <commit1@email.xx>'
		>>> mapname(m, b'Name <commit2@email.xx>')
		'Name <name@email.xx>'
		>>> mapname(m, b'Commit <commit3@email.xx>')
		'Name <proper@email.xx>'
		>>> mapname(m, b'Commit <commit4@email.xx>')
		'Name <proper@email.xx>'
		>>> mapname(m, b'Unknown Name <unknown@email.com>')
		'Unknown Name <unknown@email.com>'
		"""
		# If the author field coming in isn't in the correct format,
		# or the mailmap is empty just return the original author field
		if not isauthorwellformed(author) or not mailmap:
		return author

Connor Sheehan stringutil: edit comment to reflect actual data type name...	r37264	# Turn the user name into a mailmapping
Connor Sheehan templatefuncs: add mailmap template function...	r37227	commit = mailmapping(name=person(author), email=email(author))

		try:
		# Try and use both the commit email and name as the key
		proper = mailmap[commit]

		except KeyError:
		# If the lookup fails, use just the email as the key instead
		# We call this commit2 as not to erase original commit fields
		commit2 = mailmapping(email=commit.email)
		proper = mailmap.get(commit2, mailmapping(None, None))

		# Return the author field with proper values filled in
		return '%s <%s>' % (
		proper.name if proper.name else commit.name,
		proper.email if proper.email else commit.email,
		)

Connor Sheehan stringutil: add isauthorwellformed function...	r37172	_correctauthorformat = remod.compile(br'^[^<]+\s\<[^<>]+@[^<>]+\>$')

		def isauthorwellformed(author):
		'''Return True if the author field is well formed
		(ie "Contributor Name <contrib@email.dom>")

		>>> isauthorwellformed(b'Good Author <good@author.com>')
		True
		>>> isauthorwellformed(b'Author <good@author.com>')
		True
		>>> isauthorwellformed(b'Bad Author')
		False
		>>> isauthorwellformed(b'Bad Author <author@author.com')
		False
		>>> isauthorwellformed(b'Bad Author author@author.com')
		False
		>>> isauthorwellformed(b'<author@author.com>')
		False
		>>> isauthorwellformed(b'Bad Author <author>')
		False
		'''
		return _correctauthorformat.match(author) is not None

Yuya Nishihara stringutil: move generic string helpers to new module...	r37101	def ellipsis(text, maxlength=400):
		"""Trim string to at most maxlength (default: 400) columns in display."""
		return encoding.trim(text, maxlength, ellipsis='...')

		def escapestr(s):
		# call underlying function of s.encode('string_escape') directly for
		# Python 3 compatibility
		return codecs.escape_encode(s)[0]

		def unescapestr(s):
		return codecs.escape_decode(s)[0]

		def forcebytestr(obj):
		"""Portably format an arbitrary object (e.g. exception) into a byte
		string."""
		try:
		return pycompat.bytestr(obj)
		except UnicodeEncodeError:
		# non-ascii string, may be lossy
		return pycompat.bytestr(encoding.strtolocal(str(obj)))

		def uirepr(s):
		# Avoid double backslash in Windows path repr()
		return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')

		# delay import of textwrap
		def _MBTextWrapper(**kwargs):
		class tw(textwrap.TextWrapper):
		"""
		Extend TextWrapper for width-awareness.

		Neither number of 'bytes' in any encoding nor 'characters' is
		appropriate to calculate terminal columns for specified string.

		Original TextWrapper implementation uses built-in 'len()' directly,
		so overriding is needed to use width information of each characters.

		In addition, characters classified into 'ambiguous' width are
		treated as wide in East Asian area, but as narrow in other.

		This requires use decision to determine width of such characters.
		"""
		def _cutdown(self, ucstr, space_left):
		l = 0
		colwidth = encoding.ucolwidth
		for i in xrange(len(ucstr)):
		l += colwidth(ucstr[i])
		if space_left < l:
		return (ucstr[:i], ucstr[i:])
		return ucstr, ''

		# overriding of base class
		def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
		space_left = max(width - cur_len, 1)

		if self.break_long_words:
		cut, res = self._cutdown(reversed_chunks[-1], space_left)
		cur_line.append(cut)
		reversed_chunks[-1] = res
		elif not cur_line:
		cur_line.append(reversed_chunks.pop())

		# this overriding code is imported from TextWrapper of Python 2.6
		# to calculate columns of string by 'encoding.ucolwidth()'
		def _wrap_chunks(self, chunks):
		colwidth = encoding.ucolwidth

		lines = []
		if self.width <= 0:
		raise ValueError("invalid width %r (must be > 0)" % self.width)

		# Arrange in reverse order so items can be efficiently popped
		# from a stack of chucks.
		chunks.reverse()

		while chunks:

		# Start the list of chunks that will make up the current line.
		# cur_len is just the length of all the chunks in cur_line.
		cur_line = []
		cur_len = 0

		# Figure out which static string will prefix this line.
		if lines:
		indent = self.subsequent_indent
		else:
		indent = self.initial_indent

		# Maximum width for this line.
		width = self.width - len(indent)

		# First chunk on line is whitespace -- drop it, unless this
		# is the very beginning of the text (i.e. no lines started yet).
		if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
		del chunks[-1]

		while chunks:
		l = colwidth(chunks[-1])

		# Can at least squeeze this chunk onto the current line.
		if cur_len + l <= width:
		cur_line.append(chunks.pop())
		cur_len += l

		# Nope, this line is full.
		else:
		break

		# The current line is full, and the next chunk is too big to
		# fit on any line (not just this one).
		if chunks and colwidth(chunks[-1]) > width:
		self._handle_long_word(chunks, cur_line, cur_len, width)

		# If the last chunk on this line is all whitespace, drop it.
		if (self.drop_whitespace and
		cur_line and cur_line[-1].strip() == r''):
		del cur_line[-1]

		# Convert current line back to a string and store it in list
		# of all lines (return value).
		if cur_line:
		lines.append(indent + r''.join(cur_line))

		return lines

		global _MBTextWrapper
		_MBTextWrapper = tw
		return tw(**kwargs)

		def wrap(line, width, initindent='', hangindent=''):
		maxindent = max(len(hangindent), len(initindent))
		if width <= maxindent:
		# adjust for weird terminal size
		width = max(78, maxindent + 1)
		line = line.decode(pycompat.sysstr(encoding.encoding),
		pycompat.sysstr(encoding.encodingmode))
		initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
		pycompat.sysstr(encoding.encodingmode))
		hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
		pycompat.sysstr(encoding.encodingmode))
		wrapper = _MBTextWrapper(width=width,
		initial_indent=initindent,
		subsequent_indent=hangindent)
		return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))

		_booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
		'0': False, 'no': False, 'false': False, 'off': False,
		'never': False}

		def parsebool(s):
		"""Parse s into a boolean.

		If s is not a valid boolean, returns None.
		"""
		return _booleans.get(s.lower(), None)
Gregory Szorc wireproto: syntax for encoding CBOR into frames...	r37306
Yuya Nishihara wireproto: convert python literal to object without using unsafe eval()...	r37494	def evalpythonliteral(s):
		"""Evaluate a string containing a Python literal expression"""
		# We could backport our tokenizer hack to rewrite '' to u'' if we want
Augie Fackler stringutil: ast.literal_eval needs a unicode on py3...	r37699	if pycompat.ispy3:
		return ast.literal_eval(s.decode('latin1'))
Yuya Nishihara wireproto: convert python literal to object without using unsafe eval()...	r37494	return ast.literal_eval(s)