upstream/mercurial-mirror Files · contrib/testparseutil.py

tests: use items() in test-remotefilelog-datapack.py...

tests: use items() in test-remotefilelog-datapack.py Performance doesn't matter in tests. iteritems() doesn't exist in Python 3. Differential Revision: https://phab.mercurial-scm.org/D5757

FUJIWARA Katsunori - - Load All Authors

File last commit:

r40129:726cfc47 default


                r41613:6309ce86

default

Download file

             testparseutil.py
        
                    630 lines
            
             | 19.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / contrib / testparseutil.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        FUJIWARA Katsunori
    
contrib: add an utility module to parse test scripts...

              r40129
            
      # testparseutil.py - utilities to parse test script for check tools

      #

      #  Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others

      #

      # This software may be used and distributed according to the terms of the

      # GNU General Public License version 2 or any later version.

      from __future__ import absolute_import, print_function

      import abc

      import re

      import sys

      ####################

      # for Python3 compatibility (almost comes from mercurial/pycompat.py)

      ispy3 = (sys.version_info[0] >= 3)

      def identity(a):

          return a

      def _rapply(f, xs):

          if xs is None:

              # assume None means non-value of optional data

              return xs

          if isinstance(xs, (list, set, tuple)):

              return type(xs)(_rapply(f, x) for x in xs)

          if isinstance(xs, dict):

              return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())

          return f(xs)

      def rapply(f, xs):

          if f is identity:

              # fast path mainly for py2

              return xs

          return _rapply(f, xs)

      if ispy3:

          import builtins

          # TODO: .buffer might not exist if std streams were replaced; we'll need

          # a silly wrapper to make a bytes stream backed by a unicode one.

          stdin = sys.stdin.buffer

          stdout = sys.stdout.buffer

          stderr = sys.stderr.buffer

          def bytestr(s):

              # tiny version of pycompat.bytestr

              return s.encode('latin1')

          def sysstr(s):

              if isinstance(s, builtins.str):

                  return s

              return s.decode(u'latin-1')

          def opentext(f):

              return open(f, 'rb')

      else:

          stdin = sys.stdin

          stdout = sys.stdout

          stderr = sys.stderr

          bytestr = str

          sysstr = identity

          opentext = open

      def b2s(x):

          # convert BYTES elements in "x" to SYSSTR recursively

          return rapply(sysstr, x)

      def writeout(data):

          # write "data" in BYTES into stdout

          stdout.write(data)

      def writeerr(data):

          # write "data" in BYTES into stderr

          stderr.write(data)

      ####################

      class embeddedmatcher(object):

          """Base class to detect embedded code fragments in *.t test script

          """

          __metaclass__ = abc.ABCMeta

          def __init__(self, desc):

              self.desc = desc

          @abc.abstractmethod

          def startsat(self, line):

              """Examine whether embedded code starts at line

              This can return arbitrary object, and it is used as 'ctx' for

              subsequent method invocations.

              """

          @abc.abstractmethod

          def endsat(self, ctx, line):

              """Examine whether embedded code ends at line"""

          @abc.abstractmethod

          def isinside(self, ctx, line):

              """Examine whether line is inside embedded code, if not yet endsat

              """

          @abc.abstractmethod

          def ignores(self, ctx):

              """Examine whether detected embedded code should be ignored"""

          @abc.abstractmethod

          def filename(self, ctx):

              """Return filename of embedded code

              If filename isn't specified for embedded code explicitly, this

              returns None.

              """

          @abc.abstractmethod

          def codeatstart(self, ctx, line):

              """Return actual code at the start line of embedded code

              This might return None, if the start line doesn't contain

              actual code.

              """

          @abc.abstractmethod

          def codeatend(self, ctx, line):

              """Return actual code at the end line of embedded code

              This might return None, if the end line doesn't contain actual

              code.

              """

          @abc.abstractmethod

          def codeinside(self, ctx, line):

              """Return actual code at line inside embedded code"""

      def embedded(basefile, lines, errors, matchers):

          """pick embedded code fragments up from given lines

          This is common parsing logic, which examines specified matchers on

          given lines.

          :basefile: a name of a file, from which lines to be parsed come.

          :lines: to be parsed (might be a value returned by "open(basefile)")

          :errors: an array, into which messages for detected error are stored

          :matchers: an array of embeddedmatcher objects

          This function yields '(filename, starts, ends, code)' tuple.

          :filename: a name of embedded code, if it is explicitly specified

                     (e.g.  "foobar" of "cat >> foobar <<EOF").

                     Otherwise, this is None

          :starts: line number (1-origin), at which embedded code starts (inclusive)

          :ends: line number (1-origin), at which embedded code ends (exclusive)

          :code: extracted embedded code, which is single-stringified

          >>> class ambigmatcher(object):

          ...     # mock matcher class to examine implementation of

          ...     # "ambiguous matching" corner case

          ...     def __init__(self, desc, matchfunc):

          ...         self.desc = desc

          ...         self.matchfunc = matchfunc

          ...     def startsat(self, line):

          ...         return self.matchfunc(line)

          >>> ambig1 = ambigmatcher(b'ambiguous #1',

          ...                       lambda l: l.startswith(b'  $ cat '))

          >>> ambig2 = ambigmatcher(b'ambiguous #2',

          ...                       lambda l: l.endswith(b'<< EOF\\n'))

          >>> lines = [b'  $ cat > foo.py << EOF\\n']

          >>> errors = []

          >>> matchers = [ambig1, ambig2]

          >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))

          []

          >>> b2s(errors)

          ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']

          """

          matcher = None

          ctx = filename = code = startline = None # for pyflakes

          for lineno, line in enumerate(lines, 1):

              if not line.endswith(b'\n'):

                  line += b'\n' # to normalize EOF line

              if matcher: # now, inside embedded code

                  if matcher.endsat(ctx, line):

                      codeatend = matcher.codeatend(ctx, line)

                      if codeatend is not None:

                          code.append(codeatend)

                      if not matcher.ignores(ctx):

                          yield (filename, startline, lineno, b''.join(code))

                      matcher = None

                      # DO NOT "continue", because line might start next fragment

                  elif not matcher.isinside(ctx, line):

                      # this is an error of basefile

                      # (if matchers are implemented correctly)

                      errors.append(b'%s:%d: unexpected line for "%s"'

                                    % (basefile, lineno, matcher.desc))

                      # stop extracting embedded code by current 'matcher',

                      # because appearance of unexpected line might mean

                      # that expected end-of-embedded-code line might never

                      # appear

                      matcher = None

                      # DO NOT "continue", because line might start next fragment

                  else:

                      code.append(matcher.codeinside(ctx, line))

                      continue

              # examine whether current line starts embedded code or not

              assert not matcher

              matched = []

              for m in matchers:

                  ctx = m.startsat(line)

                  if ctx:

                      matched.append((m, ctx))

              if matched:

                  if len(matched) > 1:

                      # this is an error of matchers, maybe

                      errors.append(b'%s:%d: ambiguous line for %s' %

                                    (basefile, lineno,

                                     b', '.join([b'"%s"' % m.desc

                                                 for m, c in matched])))

                      # omit extracting embedded code, because choosing

                      # arbitrary matcher from matched ones might fail to

                      # detect the end of embedded code as expected.

                      continue

                  matcher, ctx = matched[0]

                  filename = matcher.filename(ctx)

                  code = []

                  codeatstart = matcher.codeatstart(ctx, line)

                  if codeatstart is not None:

                      code.append(codeatstart)

                      startline = lineno

                  else:

                      startline = lineno + 1

          if matcher:

              # examine whether EOF ends embedded code, because embedded

              # code isn't yet ended explicitly

              if matcher.endsat(ctx, b'\n'):

                  codeatend = matcher.codeatend(ctx, b'\n')

                  if codeatend is not None:

                      code.append(codeatend)

                  if not matcher.ignores(ctx):

                      yield (filename, startline, lineno + 1, b''.join(code))

              else:

                  # this is an error of basefile

                  # (if matchers are implemented correctly)

                  errors.append(b'%s:%d: unexpected end of file for "%s"'

                                % (basefile, lineno, matcher.desc))

      # heredoc limit mark to ignore embedded code at check-code.py or so

      heredocignorelimit = b'NO_CHECK_EOF'

      # the pattern to match against cases below, and to return a limit mark

      # string as 'lname' group

      #

      # - << LIMITMARK

      # - << "LIMITMARK"

      # - << 'LIMITMARK'

      heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'

      class fileheredocmatcher(embeddedmatcher):

          """Detect "cat > FILE << LIMIT" style embedded code

          >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py')

          >>> b2s(matcher.startsat(b'  $ cat > file.py << EOF\\n'))

          ('file.py', '  > EOF\\n')

          >>> b2s(matcher.startsat(b'  $ cat   >>file.py   <<EOF\\n'))

          ('file.py', '  > EOF\\n')

          >>> b2s(matcher.startsat(b'  $ cat>  \\x27any file.py\\x27<<  "EOF"\\n'))

          ('any file.py', '  > EOF\\n')

          >>> b2s(matcher.startsat(b"  $ cat > file.py << 'ANYLIMIT'\\n"))

          ('file.py', '  > ANYLIMIT\\n')

          >>> b2s(matcher.startsat(b'  $ cat<<ANYLIMIT>"file.py"\\n'))

          ('file.py', '  > ANYLIMIT\\n')

          >>> start = b'  $ cat > file.py << EOF\\n'

          >>> ctx = matcher.startsat(start)

          >>> matcher.codeatstart(ctx, start)

          >>> b2s(matcher.filename(ctx))

          'file.py'

          >>> matcher.ignores(ctx)

          False

          >>> inside = b'  > foo = 1\\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          'foo = 1\\n'

          >>> end = b'  > EOF\\n'

          >>> matcher.endsat(ctx, end)

          True

          >>> matcher.codeatend(ctx, end)

          >>> matcher.endsat(ctx, b'  > EOFEOF\\n')

          False

          >>> ctx = matcher.startsat(b'  $ cat > file.py << NO_CHECK_EOF\\n')

          >>> matcher.ignores(ctx)

          True

          """

          _prefix = b'  > '

          def __init__(self, desc, namepat):

              super(fileheredocmatcher, self).__init__(desc)

              # build the pattern to match against cases below (and ">>"

              # variants), and to return a target filename string as 'name'

              # group

              #

              # - > NAMEPAT

              # - > "NAMEPAT"

              # - > 'NAMEPAT'

              namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'

                         % namepat)

              self._fileres = [

                  # "cat > NAME << LIMIT" case

                  re.compile(br'  \$ \s*cat' + namepat + heredoclimitpat),

                  # "cat << LIMIT > NAME" case

                  re.compile(br'  \$ \s*cat' + heredoclimitpat + namepat),

              ]

          def startsat(self, line):

              # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple

              for filere in self._fileres:

                  matched = filere.match(line)

                  if matched:

                      return (matched.group('name'),

                              b'  > %s\n' % matched.group('limit'))

          def endsat(self, ctx, line):

              return ctx[1] == line

          def isinside(self, ctx, line):

              return line.startswith(self._prefix)

          def ignores(self, ctx):

              return b'  > %s\n' % heredocignorelimit == ctx[1]

          def filename(self, ctx):

              return ctx[0]

          def codeatstart(self, ctx, line):

              return None # no embedded code at start line

          def codeatend(self, ctx, line):

              return None # no embedded code at end line

          def codeinside(self, ctx, line):

              return line[len(self._prefix):] # strip prefix

      ####

      # for embedded python script

      class pydoctestmatcher(embeddedmatcher):

          """Detect ">>> code" style embedded python code

          >>> matcher = pydoctestmatcher()

          >>> startline = b'  >>> foo = 1\\n'

          >>> matcher.startsat(startline)

          True

          >>> matcher.startsat(b'  ... foo = 1\\n')

          False

          >>> ctx = matcher.startsat(startline)

          >>> matcher.filename(ctx)

          >>> matcher.ignores(ctx)

          False

          >>> b2s(matcher.codeatstart(ctx, startline))

          'foo = 1\\n'

          >>> inside = b'  >>> foo = 1\\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          'foo = 1\\n'

          >>> inside = b'  ... foo = 1\\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          'foo = 1\\n'

          >>> inside = b'  expected output\\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          '\\n'

          >>> inside = b'  \\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          '\\n'

          >>> end = b'  $ foo bar\\n'

          >>> matcher.endsat(ctx, end)

          True

          >>> matcher.codeatend(ctx, end)

          >>> end = b'\\n'

          >>> matcher.endsat(ctx, end)

          True

          >>> matcher.codeatend(ctx, end)

          """

          _prefix = b'  >>> '

          _prefixre = re.compile(br'  (>>>|\.\.\.) ')

          # If a line matches against not _prefixre but _outputre, that line

          # is "an expected output line" (= not a part of code fragment).

          #

          # Strictly speaking, a line matching against "(#if|#else|#endif)"

          # is also treated similarly in "inline python code" semantics by

          # run-tests.py. But "directive line inside inline python code"

          # should be rejected by Mercurial reviewers. Therefore, this

          # regexp does not matche against such directive lines.

          _outputre = re.compile(br'  $|  [^$]')

          def __init__(self):

              super(pydoctestmatcher, self).__init__(b"doctest style python code")

          def startsat(self, line):

              # ctx is "True"

              return line.startswith(self._prefix)

          def endsat(self, ctx, line):

              return not (self._prefixre.match(line) or self._outputre.match(line))

          def isinside(self, ctx, line):

              return True # always true, if not yet ended

          def ignores(self, ctx):

              return False # should be checked always

          def filename(self, ctx):

              return None # no filename

          def codeatstart(self, ctx, line):

              return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '

          def codeatend(self, ctx, line):

              return None # no embedded code at end line

          def codeinside(self, ctx, line):

              if self._prefixre.match(line):

                  return line[len(self._prefix):] # strip prefix '  >>> '/'  ... '

              return b'\n' # an expected output line is treated as an empty line

      class pyheredocmatcher(embeddedmatcher):

          """Detect "python << LIMIT" style embedded python code

          >>> matcher = pyheredocmatcher()

          >>> b2s(matcher.startsat(b'  $ python << EOF\\n'))

          '  > EOF\\n'

          >>> b2s(matcher.startsat(b'  $ $PYTHON   <<EOF\\n'))

          '  > EOF\\n'

          >>> b2s(matcher.startsat(b'  $ "$PYTHON"<<  "EOF"\\n'))

          '  > EOF\\n'

          >>> b2s(matcher.startsat(b"  $ $PYTHON << 'ANYLIMIT'\\n"))

          '  > ANYLIMIT\\n'

          >>> matcher.startsat(b'  $ "$PYTHON" < EOF\\n')

          >>> start = b'  $ python << EOF\\n'

          >>> ctx = matcher.startsat(start)

          >>> matcher.codeatstart(ctx, start)

          >>> matcher.filename(ctx)

          >>> matcher.ignores(ctx)

          False

          >>> inside = b'  > foo = 1\\n'

          >>> matcher.endsat(ctx, inside)

          False

          >>> matcher.isinside(ctx, inside)

          True

          >>> b2s(matcher.codeinside(ctx, inside))

          'foo = 1\\n'

          >>> end = b'  > EOF\\n'

          >>> matcher.endsat(ctx, end)

          True

          >>> matcher.codeatend(ctx, end)

          >>> matcher.endsat(ctx, b'  > EOFEOF\\n')

          False

          >>> ctx = matcher.startsat(b'  $ python << NO_CHECK_EOF\\n')

          >>> matcher.ignores(ctx)

          True

          """

          _prefix = b'  > '

          _startre = re.compile(br'  \$ (\$PYTHON|"\$PYTHON"|python).*' +

                                heredoclimitpat)

          def __init__(self):

              super(pyheredocmatcher, self).__init__(b"heredoc python invocation")

          def startsat(self, line):

              # ctx is END-LINE-OF-EMBEDDED-CODE

              matched = self._startre.match(line)

              if matched:

                  return b'  > %s\n' % matched.group('limit')

          def endsat(self, ctx, line):

              return ctx == line

          def isinside(self, ctx, line):

              return line.startswith(self._prefix)

          def ignores(self, ctx):

              return b'  > %s\n' % heredocignorelimit == ctx

          def filename(self, ctx):

              return None # no filename

          def codeatstart(self, ctx, line):

              return None # no embedded code at start line

          def codeatend(self, ctx, line):

              return None # no embedded code at end line

          def codeinside(self, ctx, line):

              return line[len(self._prefix):] # strip prefix

      _pymatchers = [

          pydoctestmatcher(),

          pyheredocmatcher(),

          # use '[^<]+' instead of '\S+', in order to match against

          # paths including whitespaces

          fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),

      ]

      def pyembedded(basefile, lines, errors):

          return embedded(basefile, lines, errors, _pymatchers)

      ####

      # for embedded shell script

      _shmatchers = [

          # use '[^<]+' instead of '\S+', in order to match against

          # paths including whitespaces

          fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),

      ]

      def shembedded(basefile, lines, errors):

          return embedded(basefile, lines, errors, _shmatchers)

      ####

      # for embedded hgrc configuration

      _hgrcmatchers = [

          # use '[^<]+' instead of '\S+', in order to match against

          # paths including whitespaces

          fileheredocmatcher(b'heredoc hgrc file',

                             br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'),

      ]

      def hgrcembedded(basefile, lines, errors):

          return embedded(basefile, lines, errors, _hgrcmatchers)

      ####

      if __name__ == "__main__":

          import optparse

          import sys

          def showembedded(basefile, lines, embeddedfunc, opts):

              errors = []

              for name, starts, ends, code in embeddedfunc(basefile, lines, errors):

                  if not name:

                      name = b'<anonymous>'

                  writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))

                  if opts.verbose and code:

                      writeout(b"  |%s\n" %

                               b"\n  |".join(l for l in code.splitlines()))

                  writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))

              for e in errors:

                  writeerr(b"%s\n" % e)

              return len(errors)

          def applyembedded(args, embeddedfunc, opts):

              ret = 0

              if args:

                  for f in args:

                      with opentext(f) as fp:

                          if showembedded(bytestr(f), fp, embeddedfunc, opts):

                              ret = 1

              else:

                  lines = [l for l in stdin.readlines()]

                  if showembedded(b'<stdin>', lines, embeddedfunc, opts):

                      ret = 1

              return ret

          commands = {}

          def command(name, desc):

              def wrap(func):

                  commands[name] = (desc, func)

              return wrap

          @command("pyembedded", "detect embedded python script")

          def pyembeddedcmd(args, opts):

              return applyembedded(args, pyembedded, opts)

          @command("shembedded", "detect embedded shell script")

          def shembeddedcmd(args, opts):

              return applyembedded(args, shembedded, opts)

          @command("hgrcembedded", "detect embedded hgrc configuration")

          def hgrcembeddedcmd(args, opts):

              return applyembedded(args, hgrcembedded, opts)

          availablecommands = "\n".join(["  - %s: %s" % (key, value[0])

                                         for key, value in commands.items()])

          parser = optparse.OptionParser("""%prog COMMAND [file ...]

      Pick up embedded code fragments from given file(s) or stdin, and list

      up start/end lines of them in standard compiler format

      ("FILENAME:LINENO:").

      Available commands are:

      """ + availablecommands + """

      """)

          parser.add_option("-v", "--verbose",

                            help="enable additional output (e.g. actual code)",

                            action="store_true")

          (opts, args) = parser.parse_args()

          if not args or args[0] not in commands:

              parser.print_help()

              sys.exit(255)

          sys.exit(commands[args[0]][1](args[1:], opts))

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

FUJIWARA Katsunori contrib: add an utility module to parse test scripts...	r40129	# testparseutil.py - utilities to parse test script for check tools
		#
		# Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
		#
		# This software may be used and distributed according to the terms of the
		# GNU General Public License version 2 or any later version.

		from __future__ import absolute_import, print_function

		import abc
		import re
		import sys

		####################
		# for Python3 compatibility (almost comes from mercurial/pycompat.py)

		ispy3 = (sys.version_info[0] >= 3)

		def identity(a):
		return a

		def _rapply(f, xs):
		if xs is None:
		# assume None means non-value of optional data
		return xs
		if isinstance(xs, (list, set, tuple)):
		return type(xs)(_rapply(f, x) for x in xs)
		if isinstance(xs, dict):
		return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
		return f(xs)

		def rapply(f, xs):
		if f is identity:
		# fast path mainly for py2
		return xs
		return _rapply(f, xs)

		if ispy3:
		import builtins

		# TODO: .buffer might not exist if std streams were replaced; we'll need
		# a silly wrapper to make a bytes stream backed by a unicode one.
		stdin = sys.stdin.buffer
		stdout = sys.stdout.buffer
		stderr = sys.stderr.buffer

		def bytestr(s):
		# tiny version of pycompat.bytestr
		return s.encode('latin1')

		def sysstr(s):
		if isinstance(s, builtins.str):
		return s
		return s.decode(u'latin-1')

		def opentext(f):
		return open(f, 'rb')
		else:
		stdin = sys.stdin
		stdout = sys.stdout
		stderr = sys.stderr

		bytestr = str
		sysstr = identity

		opentext = open

		def b2s(x):
		# convert BYTES elements in "x" to SYSSTR recursively
		return rapply(sysstr, x)

		def writeout(data):
		# write "data" in BYTES into stdout
		stdout.write(data)

		def writeerr(data):
		# write "data" in BYTES into stderr
		stderr.write(data)

		####################

		class embeddedmatcher(object):
		"""Base class to detect embedded code fragments in *.t test script
		"""
		__metaclass__ = abc.ABCMeta

		def __init__(self, desc):
		self.desc = desc

		@abc.abstractmethod
		def startsat(self, line):
		"""Examine whether embedded code starts at line

		This can return arbitrary object, and it is used as 'ctx' for
		subsequent method invocations.
		"""

		@abc.abstractmethod
		def endsat(self, ctx, line):
		"""Examine whether embedded code ends at line"""

		@abc.abstractmethod
		def isinside(self, ctx, line):
		"""Examine whether line is inside embedded code, if not yet endsat
		"""

		@abc.abstractmethod
		def ignores(self, ctx):
		"""Examine whether detected embedded code should be ignored"""

		@abc.abstractmethod
		def filename(self, ctx):
		"""Return filename of embedded code

		If filename isn't specified for embedded code explicitly, this
		returns None.
		"""

		@abc.abstractmethod
		def codeatstart(self, ctx, line):
		"""Return actual code at the start line of embedded code

		This might return None, if the start line doesn't contain
		actual code.
		"""

		@abc.abstractmethod
		def codeatend(self, ctx, line):
		"""Return actual code at the end line of embedded code

		This might return None, if the end line doesn't contain actual
		code.
		"""

		@abc.abstractmethod
		def codeinside(self, ctx, line):
		"""Return actual code at line inside embedded code"""

		def embedded(basefile, lines, errors, matchers):
		"""pick embedded code fragments up from given lines

		This is common parsing logic, which examines specified matchers on
		given lines.

		:basefile: a name of a file, from which lines to be parsed come.
		:lines: to be parsed (might be a value returned by "open(basefile)")
		:errors: an array, into which messages for detected error are stored
		:matchers: an array of embeddedmatcher objects

		This function yields '(filename, starts, ends, code)' tuple.

		:filename: a name of embedded code, if it is explicitly specified
		(e.g. "foobar" of "cat >> foobar <<EOF").
		Otherwise, this is None
		:starts: line number (1-origin), at which embedded code starts (inclusive)
		:ends: line number (1-origin), at which embedded code ends (exclusive)
		:code: extracted embedded code, which is single-stringified

		>>> class ambigmatcher(object):
		... # mock matcher class to examine implementation of
		... # "ambiguous matching" corner case
		... def __init__(self, desc, matchfunc):
		... self.desc = desc
		... self.matchfunc = matchfunc
		... def startsat(self, line):
		... return self.matchfunc(line)
		>>> ambig1 = ambigmatcher(b'ambiguous #1',
		... lambda l: l.startswith(b' $ cat '))
		>>> ambig2 = ambigmatcher(b'ambiguous #2',
		... lambda l: l.endswith(b'<< EOF\\n'))
		>>> lines = [b' $ cat > foo.py << EOF\\n']
		>>> errors = []
		>>> matchers = [ambig1, ambig2]
		>>> list(t for t in embedded(b'<dummy>', lines, errors, matchers))
		[]
		>>> b2s(errors)
		['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']

		"""
		matcher = None
		ctx = filename = code = startline = None # for pyflakes

		for lineno, line in enumerate(lines, 1):
		if not line.endswith(b'\n'):
		line += b'\n' # to normalize EOF line
		if matcher: # now, inside embedded code
		if matcher.endsat(ctx, line):
		codeatend = matcher.codeatend(ctx, line)
		if codeatend is not None:
		code.append(codeatend)
		if not matcher.ignores(ctx):
		yield (filename, startline, lineno, b''.join(code))
		matcher = None
		# DO NOT "continue", because line might start next fragment
		elif not matcher.isinside(ctx, line):
		# this is an error of basefile
		# (if matchers are implemented correctly)
		errors.append(b'%s:%d: unexpected line for "%s"'
		% (basefile, lineno, matcher.desc))
		# stop extracting embedded code by current 'matcher',
		# because appearance of unexpected line might mean
		# that expected end-of-embedded-code line might never
		# appear
		matcher = None
		# DO NOT "continue", because line might start next fragment
		else:
		code.append(matcher.codeinside(ctx, line))
		continue

		# examine whether current line starts embedded code or not
		assert not matcher

		matched = []
		for m in matchers:
		ctx = m.startsat(line)
		if ctx:
		matched.append((m, ctx))
		if matched:
		if len(matched) > 1:
		# this is an error of matchers, maybe
		errors.append(b'%s:%d: ambiguous line for %s' %
		(basefile, lineno,
		b', '.join([b'"%s"' % m.desc
		for m, c in matched])))
		# omit extracting embedded code, because choosing
		# arbitrary matcher from matched ones might fail to
		# detect the end of embedded code as expected.
		continue
		matcher, ctx = matched[0]
		filename = matcher.filename(ctx)
		code = []
		codeatstart = matcher.codeatstart(ctx, line)
		if codeatstart is not None:
		code.append(codeatstart)
		startline = lineno
		else:
		startline = lineno + 1

		if matcher:
		# examine whether EOF ends embedded code, because embedded
		# code isn't yet ended explicitly
		if matcher.endsat(ctx, b'\n'):
		codeatend = matcher.codeatend(ctx, b'\n')
		if codeatend is not None:
		code.append(codeatend)
		if not matcher.ignores(ctx):
		yield (filename, startline, lineno + 1, b''.join(code))
		else:
		# this is an error of basefile
		# (if matchers are implemented correctly)
		errors.append(b'%s:%d: unexpected end of file for "%s"'
		% (basefile, lineno, matcher.desc))

		# heredoc limit mark to ignore embedded code at check-code.py or so
		heredocignorelimit = b'NO_CHECK_EOF'

		# the pattern to match against cases below, and to return a limit mark
		# string as 'lname' group
		#
		# - << LIMITMARK
		# - << "LIMITMARK"
		# - << 'LIMITMARK'
		heredoclimitpat = br'\s<<\s(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'

		class fileheredocmatcher(embeddedmatcher):
		"""Detect "cat > FILE << LIMIT" style embedded code

		>>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py')
		>>> b2s(matcher.startsat(b' $ cat > file.py << EOF\\n'))
		('file.py', ' > EOF\\n')
		>>> b2s(matcher.startsat(b' $ cat >>file.py <<EOF\\n'))
		('file.py', ' > EOF\\n')
		>>> b2s(matcher.startsat(b' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
		('any file.py', ' > EOF\\n')
		>>> b2s(matcher.startsat(b" $ cat > file.py << 'ANYLIMIT'\\n"))
		('file.py', ' > ANYLIMIT\\n')
		>>> b2s(matcher.startsat(b' $ cat<<ANYLIMIT>"file.py"\\n'))
		('file.py', ' > ANYLIMIT\\n')
		>>> start = b' $ cat > file.py << EOF\\n'
		>>> ctx = matcher.startsat(start)
		>>> matcher.codeatstart(ctx, start)
		>>> b2s(matcher.filename(ctx))
		'file.py'
		>>> matcher.ignores(ctx)
		False
		>>> inside = b' > foo = 1\\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'foo = 1\\n'
		>>> end = b' > EOF\\n'
		>>> matcher.endsat(ctx, end)
		True
		>>> matcher.codeatend(ctx, end)
		>>> matcher.endsat(ctx, b' > EOFEOF\\n')
		False
		>>> ctx = matcher.startsat(b' $ cat > file.py << NO_CHECK_EOF\\n')
		>>> matcher.ignores(ctx)
		True
		"""
		_prefix = b' > '

		def __init__(self, desc, namepat):
		super(fileheredocmatcher, self).__init__(desc)

		# build the pattern to match against cases below (and ">>"
		# variants), and to return a target filename string as 'name'
		# group
		#
		# - > NAMEPAT
		# - > "NAMEPAT"
		# - > 'NAMEPAT'
		namepat = (br'\s>>?\s(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)'
		% namepat)
		self._fileres = [
		# "cat > NAME << LIMIT" case
		re.compile(br' \$ \s*cat' + namepat + heredoclimitpat),
		# "cat << LIMIT > NAME" case
		re.compile(br' \$ \s*cat' + heredoclimitpat + namepat),
		]

		def startsat(self, line):
		# ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
		for filere in self._fileres:
		matched = filere.match(line)
		if matched:
		return (matched.group('name'),
		b' > %s\n' % matched.group('limit'))

		def endsat(self, ctx, line):
		return ctx[1] == line

		def isinside(self, ctx, line):
		return line.startswith(self._prefix)

		def ignores(self, ctx):
		return b' > %s\n' % heredocignorelimit == ctx[1]

		def filename(self, ctx):
		return ctx[0]

		def codeatstart(self, ctx, line):
		return None # no embedded code at start line

		def codeatend(self, ctx, line):
		return None # no embedded code at end line

		def codeinside(self, ctx, line):
		return line[len(self._prefix):] # strip prefix

		####
		# for embedded python script

		class pydoctestmatcher(embeddedmatcher):
		"""Detect ">>> code" style embedded python code

		>>> matcher = pydoctestmatcher()
		>>> startline = b' >>> foo = 1\\n'
		>>> matcher.startsat(startline)
		True
		>>> matcher.startsat(b' ... foo = 1\\n')
		False
		>>> ctx = matcher.startsat(startline)
		>>> matcher.filename(ctx)
		>>> matcher.ignores(ctx)
		False
		>>> b2s(matcher.codeatstart(ctx, startline))
		'foo = 1\\n'
		>>> inside = b' >>> foo = 1\\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'foo = 1\\n'
		>>> inside = b' ... foo = 1\\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'foo = 1\\n'
		>>> inside = b' expected output\\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'\\n'
		>>> inside = b' \\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'\\n'
		>>> end = b' $ foo bar\\n'
		>>> matcher.endsat(ctx, end)
		True
		>>> matcher.codeatend(ctx, end)
		>>> end = b'\\n'
		>>> matcher.endsat(ctx, end)
		True
		>>> matcher.codeatend(ctx, end)
		"""
		_prefix = b' >>> '
		_prefixre = re.compile(br' (>>>\|\.\.\.) ')

		# If a line matches against not _prefixre but _outputre, that line
		# is "an expected output line" (= not a part of code fragment).
		#
		# Strictly speaking, a line matching against "(#if\|#else\|#endif)"
		# is also treated similarly in "inline python code" semantics by
		# run-tests.py. But "directive line inside inline python code"
		# should be rejected by Mercurial reviewers. Therefore, this
		# regexp does not matche against such directive lines.
		_outputre = re.compile(br' $\| [^$]')

		def __init__(self):
		super(pydoctestmatcher, self).__init__(b"doctest style python code")

		def startsat(self, line):
		# ctx is "True"
		return line.startswith(self._prefix)

		def endsat(self, ctx, line):
		return not (self._prefixre.match(line) or self._outputre.match(line))

		def isinside(self, ctx, line):
		return True # always true, if not yet ended

		def ignores(self, ctx):
		return False # should be checked always

		def filename(self, ctx):
		return None # no filename

		def codeatstart(self, ctx, line):
		return line[len(self._prefix):] # strip prefix ' >>> '/' ... '

		def codeatend(self, ctx, line):
		return None # no embedded code at end line

		def codeinside(self, ctx, line):
		if self._prefixre.match(line):
		return line[len(self._prefix):] # strip prefix ' >>> '/' ... '
		return b'\n' # an expected output line is treated as an empty line

		class pyheredocmatcher(embeddedmatcher):
		"""Detect "python << LIMIT" style embedded python code

		>>> matcher = pyheredocmatcher()
		>>> b2s(matcher.startsat(b' $ python << EOF\\n'))
		' > EOF\\n'
		>>> b2s(matcher.startsat(b' $ $PYTHON <<EOF\\n'))
		' > EOF\\n'
		>>> b2s(matcher.startsat(b' $ "$PYTHON"<< "EOF"\\n'))
		' > EOF\\n'
		>>> b2s(matcher.startsat(b" $ $PYTHON << 'ANYLIMIT'\\n"))
		' > ANYLIMIT\\n'
		>>> matcher.startsat(b' $ "$PYTHON" < EOF\\n')
		>>> start = b' $ python << EOF\\n'
		>>> ctx = matcher.startsat(start)
		>>> matcher.codeatstart(ctx, start)
		>>> matcher.filename(ctx)
		>>> matcher.ignores(ctx)
		False
		>>> inside = b' > foo = 1\\n'
		>>> matcher.endsat(ctx, inside)
		False
		>>> matcher.isinside(ctx, inside)
		True
		>>> b2s(matcher.codeinside(ctx, inside))
		'foo = 1\\n'
		>>> end = b' > EOF\\n'
		>>> matcher.endsat(ctx, end)
		True
		>>> matcher.codeatend(ctx, end)
		>>> matcher.endsat(ctx, b' > EOFEOF\\n')
		False
		>>> ctx = matcher.startsat(b' $ python << NO_CHECK_EOF\\n')
		>>> matcher.ignores(ctx)
		True
		"""
		_prefix = b' > '

		_startre = re.compile(br' \$ (\$PYTHON\|"\$PYTHON"\|python).*' +
		heredoclimitpat)

		def __init__(self):
		super(pyheredocmatcher, self).__init__(b"heredoc python invocation")

		def startsat(self, line):
		# ctx is END-LINE-OF-EMBEDDED-CODE
		matched = self._startre.match(line)
		if matched:
		return b' > %s\n' % matched.group('limit')

		def endsat(self, ctx, line):
		return ctx == line

		def isinside(self, ctx, line):
		return line.startswith(self._prefix)

		def ignores(self, ctx):
		return b' > %s\n' % heredocignorelimit == ctx

		def filename(self, ctx):
		return None # no filename

		def codeatstart(self, ctx, line):
		return None # no embedded code at start line

		def codeatend(self, ctx, line):
		return None # no embedded code at end line

		def codeinside(self, ctx, line):
		return line[len(self._prefix):] # strip prefix

		_pymatchers = [
		pydoctestmatcher(),
		pyheredocmatcher(),
		# use '[^<]+' instead of '\S+', in order to match against
		# paths including whitespaces
		fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'),
		]

		def pyembedded(basefile, lines, errors):
		return embedded(basefile, lines, errors, _pymatchers)

		####
		# for embedded shell script

		_shmatchers = [
		# use '[^<]+' instead of '\S+', in order to match against
		# paths including whitespaces
		fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'),
		]

		def shembedded(basefile, lines, errors):
		return embedded(basefile, lines, errors, _shmatchers)

		####
		# for embedded hgrc configuration

		_hgrcmatchers = [
		# use '[^<]+' instead of '\S+', in order to match against
		# paths including whitespaces
		fileheredocmatcher(b'heredoc hgrc file',
		br'(([^/<]+/)+hgrc\|\$HGRCPATH\|\${HGRCPATH})'),
		]

		def hgrcembedded(basefile, lines, errors):
		return embedded(basefile, lines, errors, _hgrcmatchers)

		####

		if __name__ == "__main__":
		import optparse
		import sys

		def showembedded(basefile, lines, embeddedfunc, opts):
		errors = []
		for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
		if not name:
		name = b'<anonymous>'
		writeout(b"%s:%d: %s starts\n" % (basefile, starts, name))
		if opts.verbose and code:
		writeout(b" \|%s\n" %
		b"\n \|".join(l for l in code.splitlines()))
		writeout(b"%s:%d: %s ends\n" % (basefile, ends, name))
		for e in errors:
		writeerr(b"%s\n" % e)
		return len(errors)

		def applyembedded(args, embeddedfunc, opts):
		ret = 0
		if args:
		for f in args:
		with opentext(f) as fp:
		if showembedded(bytestr(f), fp, embeddedfunc, opts):
		ret = 1
		else:
		lines = [l for l in stdin.readlines()]
		if showembedded(b'<stdin>', lines, embeddedfunc, opts):
		ret = 1
		return ret

		commands = {}
		def command(name, desc):
		def wrap(func):
		commands[name] = (desc, func)
		return wrap

		@command("pyembedded", "detect embedded python script")
		def pyembeddedcmd(args, opts):
		return applyembedded(args, pyembedded, opts)

		@command("shembedded", "detect embedded shell script")
		def shembeddedcmd(args, opts):
		return applyembedded(args, shembedded, opts)

		@command("hgrcembedded", "detect embedded hgrc configuration")
		def hgrcembeddedcmd(args, opts):
		return applyembedded(args, hgrcembedded, opts)

		availablecommands = "\n".join([" - %s: %s" % (key, value[0])
		for key, value in commands.items()])

		parser = optparse.OptionParser("""%prog COMMAND [file ...]

		Pick up embedded code fragments from given file(s) or stdin, and list
		up start/end lines of them in standard compiler format
		("FILENAME:LINENO:").

		Available commands are:
		""" + availablecommands + """
		""")
		parser.add_option("-v", "--verbose",
		help="enable additional output (e.g. actual code)",
		action="store_true")
		(opts, args) = parser.parse_args()

		if not args or args[0] not in commands:
		parser.print_help()
		sys.exit(255)

		sys.exit(commands[args[0]][1](args[1:], opts))