testparseutil.py
670 lines
| 19.7 KiB
| text/x-python
|
PythonLexer
/ contrib / testparseutil.py
FUJIWARA Katsunori
|
r40129 | # testparseutil.py - utilities to parse test script for check tools | ||
# | ||||
# Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
from __future__ import absolute_import, print_function | ||||
import abc | ||||
import re | ||||
import sys | ||||
#################### | ||||
# for Python3 compatibility (almost comes from mercurial/pycompat.py) | ||||
Augie Fackler
|
r43346 | ispy3 = sys.version_info[0] >= 3 | ||
FUJIWARA Katsunori
|
r40129 | |||
def identity(a): | ||||
return a | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def _rapply(f, xs): | ||
if xs is None: | ||||
# assume None means non-value of optional data | ||||
return xs | ||||
if isinstance(xs, (list, set, tuple)): | ||||
return type(xs)(_rapply(f, x) for x in xs) | ||||
if isinstance(xs, dict): | ||||
return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items()) | ||||
return f(xs) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def rapply(f, xs): | ||
if f is identity: | ||||
# fast path mainly for py2 | ||||
return xs | ||||
return _rapply(f, xs) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | if ispy3: | ||
import builtins | ||||
def bytestr(s): | ||||
# tiny version of pycompat.bytestr | ||||
return s.encode('latin1') | ||||
def sysstr(s): | ||||
if isinstance(s, builtins.str): | ||||
return s | ||||
Gregory Szorc
|
r43361 | return s.decode('latin-1') | ||
FUJIWARA Katsunori
|
r40129 | |||
def opentext(f): | ||||
Pulkit Goyal
|
r42517 | return open(f, 'r') | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | else: | ||
bytestr = str | ||||
sysstr = identity | ||||
opentext = open | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def b2s(x): | ||
# convert BYTES elements in "x" to SYSSTR recursively | ||||
return rapply(sysstr, x) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def writeout(data): | ||
# write "data" in BYTES into stdout | ||||
Augie Fackler
|
r42582 | sys.stdout.write(data) | ||
FUJIWARA Katsunori
|
r40129 | |||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def writeerr(data): | ||
# write "data" in BYTES into stderr | ||||
Augie Fackler
|
r42582 | sys.stderr.write(data) | ||
FUJIWARA Katsunori
|
r40129 | |||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | #################### | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | class embeddedmatcher(object): | ||
"""Base class to detect embedded code fragments in *.t test script | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | __metaclass__ = abc.ABCMeta | ||
def __init__(self, desc): | ||||
self.desc = desc | ||||
@abc.abstractmethod | ||||
def startsat(self, line): | ||||
"""Examine whether embedded code starts at line | ||||
This can return arbitrary object, and it is used as 'ctx' for | ||||
subsequent method invocations. | ||||
""" | ||||
@abc.abstractmethod | ||||
def endsat(self, ctx, line): | ||||
"""Examine whether embedded code ends at line""" | ||||
@abc.abstractmethod | ||||
def isinside(self, ctx, line): | ||||
"""Examine whether line is inside embedded code, if not yet endsat | ||||
""" | ||||
@abc.abstractmethod | ||||
def ignores(self, ctx): | ||||
"""Examine whether detected embedded code should be ignored""" | ||||
@abc.abstractmethod | ||||
def filename(self, ctx): | ||||
"""Return filename of embedded code | ||||
If filename isn't specified for embedded code explicitly, this | ||||
returns None. | ||||
""" | ||||
@abc.abstractmethod | ||||
def codeatstart(self, ctx, line): | ||||
"""Return actual code at the start line of embedded code | ||||
This might return None, if the start line doesn't contain | ||||
actual code. | ||||
""" | ||||
@abc.abstractmethod | ||||
def codeatend(self, ctx, line): | ||||
"""Return actual code at the end line of embedded code | ||||
This might return None, if the end line doesn't contain actual | ||||
code. | ||||
""" | ||||
@abc.abstractmethod | ||||
def codeinside(self, ctx, line): | ||||
"""Return actual code at line inside embedded code""" | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def embedded(basefile, lines, errors, matchers): | ||
"""pick embedded code fragments up from given lines | ||||
This is common parsing logic, which examines specified matchers on | ||||
given lines. | ||||
:basefile: a name of a file, from which lines to be parsed come. | ||||
:lines: to be parsed (might be a value returned by "open(basefile)") | ||||
:errors: an array, into which messages for detected error are stored | ||||
:matchers: an array of embeddedmatcher objects | ||||
This function yields '(filename, starts, ends, code)' tuple. | ||||
:filename: a name of embedded code, if it is explicitly specified | ||||
(e.g. "foobar" of "cat >> foobar <<EOF"). | ||||
Otherwise, this is None | ||||
:starts: line number (1-origin), at which embedded code starts (inclusive) | ||||
:ends: line number (1-origin), at which embedded code ends (exclusive) | ||||
:code: extracted embedded code, which is single-stringified | ||||
>>> class ambigmatcher(object): | ||||
... # mock matcher class to examine implementation of | ||||
... # "ambiguous matching" corner case | ||||
... def __init__(self, desc, matchfunc): | ||||
... self.desc = desc | ||||
... self.matchfunc = matchfunc | ||||
... def startsat(self, line): | ||||
... return self.matchfunc(line) | ||||
Pulkit Goyal
|
r42517 | >>> ambig1 = ambigmatcher('ambiguous #1', | ||
... lambda l: l.startswith(' $ cat ')) | ||||
>>> ambig2 = ambigmatcher('ambiguous #2', | ||||
... lambda l: l.endswith('<< EOF\\n')) | ||||
>>> lines = [' $ cat > foo.py << EOF\\n'] | ||||
FUJIWARA Katsunori
|
r40129 | >>> errors = [] | ||
>>> matchers = [ambig1, ambig2] | ||||
Pulkit Goyal
|
r42517 | >>> list(t for t in embedded('<dummy>', lines, errors, matchers)) | ||
FUJIWARA Katsunori
|
r40129 | [] | ||
>>> b2s(errors) | ||||
['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"'] | ||||
""" | ||||
matcher = None | ||||
Augie Fackler
|
r43346 | ctx = filename = code = startline = None # for pyflakes | ||
FUJIWARA Katsunori
|
r40129 | |||
for lineno, line in enumerate(lines, 1): | ||||
Pulkit Goyal
|
r42517 | if not line.endswith('\n'): | ||
Augie Fackler
|
r43346 | line += '\n' # to normalize EOF line | ||
if matcher: # now, inside embedded code | ||||
FUJIWARA Katsunori
|
r40129 | if matcher.endsat(ctx, line): | ||
codeatend = matcher.codeatend(ctx, line) | ||||
if codeatend is not None: | ||||
code.append(codeatend) | ||||
if not matcher.ignores(ctx): | ||||
Pulkit Goyal
|
r42517 | yield (filename, startline, lineno, ''.join(code)) | ||
FUJIWARA Katsunori
|
r40129 | matcher = None | ||
# DO NOT "continue", because line might start next fragment | ||||
elif not matcher.isinside(ctx, line): | ||||
# this is an error of basefile | ||||
# (if matchers are implemented correctly) | ||||
Augie Fackler
|
r43346 | errors.append( | ||
'%s:%d: unexpected line for "%s"' | ||||
% (basefile, lineno, matcher.desc) | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | # stop extracting embedded code by current 'matcher', | ||
# because appearance of unexpected line might mean | ||||
# that expected end-of-embedded-code line might never | ||||
# appear | ||||
matcher = None | ||||
# DO NOT "continue", because line might start next fragment | ||||
else: | ||||
code.append(matcher.codeinside(ctx, line)) | ||||
continue | ||||
# examine whether current line starts embedded code or not | ||||
assert not matcher | ||||
matched = [] | ||||
for m in matchers: | ||||
ctx = m.startsat(line) | ||||
if ctx: | ||||
matched.append((m, ctx)) | ||||
if matched: | ||||
if len(matched) > 1: | ||||
# this is an error of matchers, maybe | ||||
Augie Fackler
|
r43346 | errors.append( | ||
'%s:%d: ambiguous line for %s' | ||||
% ( | ||||
basefile, | ||||
lineno, | ||||
', '.join(['"%s"' % m.desc for m, c in matched]), | ||||
) | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | # omit extracting embedded code, because choosing | ||
# arbitrary matcher from matched ones might fail to | ||||
# detect the end of embedded code as expected. | ||||
continue | ||||
matcher, ctx = matched[0] | ||||
filename = matcher.filename(ctx) | ||||
code = [] | ||||
codeatstart = matcher.codeatstart(ctx, line) | ||||
if codeatstart is not None: | ||||
code.append(codeatstart) | ||||
startline = lineno | ||||
else: | ||||
startline = lineno + 1 | ||||
if matcher: | ||||
# examine whether EOF ends embedded code, because embedded | ||||
# code isn't yet ended explicitly | ||||
Pulkit Goyal
|
r42517 | if matcher.endsat(ctx, '\n'): | ||
codeatend = matcher.codeatend(ctx, '\n') | ||||
FUJIWARA Katsunori
|
r40129 | if codeatend is not None: | ||
code.append(codeatend) | ||||
if not matcher.ignores(ctx): | ||||
Pulkit Goyal
|
r42517 | yield (filename, startline, lineno + 1, ''.join(code)) | ||
FUJIWARA Katsunori
|
r40129 | else: | ||
# this is an error of basefile | ||||
# (if matchers are implemented correctly) | ||||
Augie Fackler
|
r43346 | errors.append( | ||
'%s:%d: unexpected end of file for "%s"' | ||||
% (basefile, lineno, matcher.desc) | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | |||
# heredoc limit mark to ignore embedded code at check-code.py or so | ||||
Pulkit Goyal
|
r42517 | heredocignorelimit = 'NO_CHECK_EOF' | ||
FUJIWARA Katsunori
|
r40129 | |||
# the pattern to match against cases below, and to return a limit mark | ||||
# string as 'lname' group | ||||
# | ||||
# - << LIMITMARK | ||||
# - << "LIMITMARK" | ||||
# - << 'LIMITMARK' | ||||
Pulkit Goyal
|
r42517 | heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)' | ||
FUJIWARA Katsunori
|
r40129 | |||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | class fileheredocmatcher(embeddedmatcher): | ||
"""Detect "cat > FILE << LIMIT" style embedded code | ||||
Augie Fackler
|
r42583 | >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py') | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ('file.py', ' > EOF\\n') | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ('file.py', ' > EOF\\n') | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ('any file.py', ' > EOF\\n') | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n")) | ||
FUJIWARA Katsunori
|
r40129 | ('file.py', ' > ANYLIMIT\\n') | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ('file.py', ' > ANYLIMIT\\n') | ||
Pulkit Goyal
|
r42517 | >>> start = ' $ cat > file.py << EOF\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> ctx = matcher.startsat(start) | ||
>>> matcher.codeatstart(ctx, start) | ||||
>>> b2s(matcher.filename(ctx)) | ||||
'file.py' | ||||
>>> matcher.ignores(ctx) | ||||
False | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' > foo = 1\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'foo = 1\\n' | ||||
Pulkit Goyal
|
r42517 | >>> end = ' > EOF\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, end) | ||
True | ||||
>>> matcher.codeatend(ctx, end) | ||||
Pulkit Goyal
|
r42517 | >>> matcher.endsat(ctx, ' > EOFEOF\\n') | ||
FUJIWARA Katsunori
|
r40129 | False | ||
Pulkit Goyal
|
r42517 | >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n') | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.ignores(ctx) | ||
True | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r42517 | _prefix = ' > ' | ||
FUJIWARA Katsunori
|
r40129 | |||
def __init__(self, desc, namepat): | ||||
super(fileheredocmatcher, self).__init__(desc) | ||||
# build the pattern to match against cases below (and ">>" | ||||
# variants), and to return a target filename string as 'name' | ||||
# group | ||||
# | ||||
# - > NAMEPAT | ||||
# - > "NAMEPAT" | ||||
# - > 'NAMEPAT' | ||||
Augie Fackler
|
r43346 | namepat = ( | ||
r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | self._fileres = [ | ||
# "cat > NAME << LIMIT" case | ||||
Pulkit Goyal
|
r42517 | re.compile(r' \$ \s*cat' + namepat + heredoclimitpat), | ||
FUJIWARA Katsunori
|
r40129 | # "cat << LIMIT > NAME" case | ||
Pulkit Goyal
|
r42517 | re.compile(r' \$ \s*cat' + heredoclimitpat + namepat), | ||
FUJIWARA Katsunori
|
r40129 | ] | ||
def startsat(self, line): | ||||
# ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple | ||||
for filere in self._fileres: | ||||
matched = filere.match(line) | ||||
if matched: | ||||
Augie Fackler
|
r43346 | return ( | ||
matched.group('name'), | ||||
' > %s\n' % matched.group('limit'), | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | |||
def endsat(self, ctx, line): | ||||
return ctx[1] == line | ||||
def isinside(self, ctx, line): | ||||
return line.startswith(self._prefix) | ||||
def ignores(self, ctx): | ||||
Pulkit Goyal
|
r42517 | return ' > %s\n' % heredocignorelimit == ctx[1] | ||
FUJIWARA Katsunori
|
r40129 | |||
def filename(self, ctx): | ||||
return ctx[0] | ||||
def codeatstart(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return None # no embedded code at start line | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeatend(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return None # no embedded code at end line | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeinside(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return line[len(self._prefix) :] # strip prefix | ||
FUJIWARA Katsunori
|
r40129 | |||
#### | ||||
# for embedded python script | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | class pydoctestmatcher(embeddedmatcher): | ||
"""Detect ">>> code" style embedded python code | ||||
>>> matcher = pydoctestmatcher() | ||||
Pulkit Goyal
|
r42517 | >>> startline = ' >>> foo = 1\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.startsat(startline) | ||
True | ||||
Pulkit Goyal
|
r42517 | >>> matcher.startsat(' ... foo = 1\\n') | ||
FUJIWARA Katsunori
|
r40129 | False | ||
>>> ctx = matcher.startsat(startline) | ||||
>>> matcher.filename(ctx) | ||||
>>> matcher.ignores(ctx) | ||||
False | ||||
>>> b2s(matcher.codeatstart(ctx, startline)) | ||||
'foo = 1\\n' | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' >>> foo = 1\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'foo = 1\\n' | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' ... foo = 1\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'foo = 1\\n' | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' expected output\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'\\n' | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' \\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'\\n' | ||||
Pulkit Goyal
|
r42517 | >>> end = ' $ foo bar\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, end) | ||
True | ||||
>>> matcher.codeatend(ctx, end) | ||||
Pulkit Goyal
|
r42517 | >>> end = '\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, end) | ||
True | ||||
>>> matcher.codeatend(ctx, end) | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r42517 | _prefix = ' >>> ' | ||
_prefixre = re.compile(r' (>>>|\.\.\.) ') | ||||
FUJIWARA Katsunori
|
r40129 | |||
# If a line matches against not _prefixre but _outputre, that line | ||||
# is "an expected output line" (= not a part of code fragment). | ||||
# | ||||
# Strictly speaking, a line matching against "(#if|#else|#endif)" | ||||
# is also treated similarly in "inline python code" semantics by | ||||
# run-tests.py. But "directive line inside inline python code" | ||||
# should be rejected by Mercurial reviewers. Therefore, this | ||||
# regexp does not matche against such directive lines. | ||||
Pulkit Goyal
|
r42517 | _outputre = re.compile(r' $| [^$]') | ||
FUJIWARA Katsunori
|
r40129 | |||
def __init__(self): | ||||
Pulkit Goyal
|
r42517 | super(pydoctestmatcher, self).__init__("doctest style python code") | ||
FUJIWARA Katsunori
|
r40129 | |||
def startsat(self, line): | ||||
# ctx is "True" | ||||
return line.startswith(self._prefix) | ||||
def endsat(self, ctx, line): | ||||
return not (self._prefixre.match(line) or self._outputre.match(line)) | ||||
def isinside(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return True # always true, if not yet ended | ||
FUJIWARA Katsunori
|
r40129 | |||
def ignores(self, ctx): | ||||
Augie Fackler
|
r43346 | return False # should be checked always | ||
FUJIWARA Katsunori
|
r40129 | |||
def filename(self, ctx): | ||||
Augie Fackler
|
r43346 | return None # no filename | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeatstart(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return line[len(self._prefix) :] # strip prefix ' >>> '/' ... ' | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeatend(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return None # no embedded code at end line | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeinside(self, ctx, line): | ||||
if self._prefixre.match(line): | ||||
Augie Fackler
|
r43346 | return line[len(self._prefix) :] # strip prefix ' >>> '/' ... ' | ||
return '\n' # an expected output line is treated as an empty line | ||||
FUJIWARA Katsunori
|
r40129 | |||
class pyheredocmatcher(embeddedmatcher): | ||||
"""Detect "python << LIMIT" style embedded python code | ||||
>>> matcher = pyheredocmatcher() | ||||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ python << EOF\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ' > EOF\\n' | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ' > EOF\\n' | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n')) | ||
FUJIWARA Katsunori
|
r40129 | ' > EOF\\n' | ||
Pulkit Goyal
|
r42517 | >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n")) | ||
FUJIWARA Katsunori
|
r40129 | ' > ANYLIMIT\\n' | ||
Pulkit Goyal
|
r42517 | >>> matcher.startsat(' $ "$PYTHON" < EOF\\n') | ||
>>> start = ' $ python << EOF\\n' | ||||
FUJIWARA Katsunori
|
r40129 | >>> ctx = matcher.startsat(start) | ||
>>> matcher.codeatstart(ctx, start) | ||||
>>> matcher.filename(ctx) | ||||
>>> matcher.ignores(ctx) | ||||
False | ||||
Pulkit Goyal
|
r42517 | >>> inside = ' > foo = 1\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, inside) | ||
False | ||||
>>> matcher.isinside(ctx, inside) | ||||
True | ||||
>>> b2s(matcher.codeinside(ctx, inside)) | ||||
'foo = 1\\n' | ||||
Pulkit Goyal
|
r42517 | >>> end = ' > EOF\\n' | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.endsat(ctx, end) | ||
True | ||||
>>> matcher.codeatend(ctx, end) | ||||
Pulkit Goyal
|
r42517 | >>> matcher.endsat(ctx, ' > EOFEOF\\n') | ||
FUJIWARA Katsunori
|
r40129 | False | ||
Pulkit Goyal
|
r42517 | >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n') | ||
FUJIWARA Katsunori
|
r40129 | >>> matcher.ignores(ctx) | ||
True | ||||
""" | ||||
Augie Fackler
|
r43346 | |||
Pulkit Goyal
|
r42517 | _prefix = ' > ' | ||
FUJIWARA Katsunori
|
r40129 | |||
Augie Fackler
|
r43346 | _startre = re.compile( | ||
r' \$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | |||
def __init__(self): | ||||
Pulkit Goyal
|
r42517 | super(pyheredocmatcher, self).__init__("heredoc python invocation") | ||
FUJIWARA Katsunori
|
r40129 | |||
def startsat(self, line): | ||||
# ctx is END-LINE-OF-EMBEDDED-CODE | ||||
matched = self._startre.match(line) | ||||
if matched: | ||||
Pulkit Goyal
|
r42517 | return ' > %s\n' % matched.group('limit') | ||
FUJIWARA Katsunori
|
r40129 | |||
def endsat(self, ctx, line): | ||||
return ctx == line | ||||
def isinside(self, ctx, line): | ||||
return line.startswith(self._prefix) | ||||
def ignores(self, ctx): | ||||
Pulkit Goyal
|
r42517 | return ' > %s\n' % heredocignorelimit == ctx | ||
FUJIWARA Katsunori
|
r40129 | |||
def filename(self, ctx): | ||||
Augie Fackler
|
r43346 | return None # no filename | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeatstart(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return None # no embedded code at start line | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeatend(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return None # no embedded code at end line | ||
FUJIWARA Katsunori
|
r40129 | |||
def codeinside(self, ctx, line): | ||||
Augie Fackler
|
r43346 | return line[len(self._prefix) :] # strip prefix | ||
FUJIWARA Katsunori
|
r40129 | |||
_pymatchers = [ | ||||
pydoctestmatcher(), | ||||
pyheredocmatcher(), | ||||
# use '[^<]+' instead of '\S+', in order to match against | ||||
# paths including whitespaces | ||||
Pulkit Goyal
|
r42517 | fileheredocmatcher('heredoc .py file', r'[^<]+\.py'), | ||
FUJIWARA Katsunori
|
r40129 | ] | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def pyembedded(basefile, lines, errors): | ||
return embedded(basefile, lines, errors, _pymatchers) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | #### | ||
# for embedded shell script | ||||
_shmatchers = [ | ||||
# use '[^<]+' instead of '\S+', in order to match against | ||||
# paths including whitespaces | ||||
Pulkit Goyal
|
r42517 | fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'), | ||
FUJIWARA Katsunori
|
r40129 | ] | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def shembedded(basefile, lines, errors): | ||
return embedded(basefile, lines, errors, _shmatchers) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | #### | ||
# for embedded hgrc configuration | ||||
_hgrcmatchers = [ | ||||
# use '[^<]+' instead of '\S+', in order to match against | ||||
# paths including whitespaces | ||||
Augie Fackler
|
r43346 | fileheredocmatcher( | ||
'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})' | ||||
), | ||||
FUJIWARA Katsunori
|
r40129 | ] | ||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def hgrcembedded(basefile, lines, errors): | ||
return embedded(basefile, lines, errors, _hgrcmatchers) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | #### | ||
if __name__ == "__main__": | ||||
import optparse | ||||
import sys | ||||
def showembedded(basefile, lines, embeddedfunc, opts): | ||||
errors = [] | ||||
for name, starts, ends, code in embeddedfunc(basefile, lines, errors): | ||||
if not name: | ||||
Pulkit Goyal
|
r42517 | name = '<anonymous>' | ||
writeout("%s:%d: %s starts\n" % (basefile, starts, name)) | ||||
FUJIWARA Katsunori
|
r40129 | if opts.verbose and code: | ||
Augie Fackler
|
r43346 | writeout(" |%s\n" % "\n |".join(l for l in code.splitlines())) | ||
Pulkit Goyal
|
r42517 | writeout("%s:%d: %s ends\n" % (basefile, ends, name)) | ||
FUJIWARA Katsunori
|
r40129 | for e in errors: | ||
Pulkit Goyal
|
r42517 | writeerr("%s\n" % e) | ||
FUJIWARA Katsunori
|
r40129 | return len(errors) | ||
def applyembedded(args, embeddedfunc, opts): | ||||
ret = 0 | ||||
if args: | ||||
for f in args: | ||||
with opentext(f) as fp: | ||||
Pulkit Goyal
|
r42517 | if showembedded(f, fp, embeddedfunc, opts): | ||
FUJIWARA Katsunori
|
r40129 | ret = 1 | ||
else: | ||||
Augie Fackler
|
r42582 | lines = [l for l in sys.stdin.readlines()] | ||
Pulkit Goyal
|
r42517 | if showembedded('<stdin>', lines, embeddedfunc, opts): | ||
FUJIWARA Katsunori
|
r40129 | ret = 1 | ||
return ret | ||||
commands = {} | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | def command(name, desc): | ||
def wrap(func): | ||||
commands[name] = (desc, func) | ||||
Augie Fackler
|
r43346 | |||
FUJIWARA Katsunori
|
r40129 | return wrap | ||
@command("pyembedded", "detect embedded python script") | ||||
def pyembeddedcmd(args, opts): | ||||
return applyembedded(args, pyembedded, opts) | ||||
@command("shembedded", "detect embedded shell script") | ||||
def shembeddedcmd(args, opts): | ||||
return applyembedded(args, shembedded, opts) | ||||
@command("hgrcembedded", "detect embedded hgrc configuration") | ||||
def hgrcembeddedcmd(args, opts): | ||||
return applyembedded(args, hgrcembedded, opts) | ||||
Augie Fackler
|
r43346 | availablecommands = "\n".join( | ||
[" - %s: %s" % (key, value[0]) for key, value in commands.items()] | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | |||
Augie Fackler
|
r43346 | parser = optparse.OptionParser( | ||
"""%prog COMMAND [file ...] | ||||
FUJIWARA Katsunori
|
r40129 | |||
Pick up embedded code fragments from given file(s) or stdin, and list | ||||
up start/end lines of them in standard compiler format | ||||
("FILENAME:LINENO:"). | ||||
Available commands are: | ||||
Augie Fackler
|
r43346 | """ | ||
+ availablecommands | ||||
+ """ | ||||
""" | ||||
) | ||||
parser.add_option( | ||||
"-v", | ||||
"--verbose", | ||||
help="enable additional output (e.g. actual code)", | ||||
action="store_true", | ||||
) | ||||
FUJIWARA Katsunori
|
r40129 | (opts, args) = parser.parse_args() | ||
if not args or args[0] not in commands: | ||||
parser.print_help() | ||||
sys.exit(255) | ||||
sys.exit(commands[args[0]][1](args[1:], opts)) | ||||