# HG changeset patch # User FUJIWARA Katsunori # Date 2018-08-23 03:25:54 # Node ID 7288838bec1f7e8829f7f8a544d6c784cfc81eaf # Parent ff47ba7a29030401e1fe7ccb734f2c9693ad4df7 import-checker: use testparseutil.embedded() to centralize detection logic This patch fixes issues of embedded() in import-checker.py below, too. - overlook (or mis-detect) the end of inline script in doctest style - overlook inline script in doctest style at the end of file (and ignore invalid un-closed heredoc at the end of file, too) - overlook code fragment in styles below - "python < file < foobar.py << ANYLIMIT" (limit mark should be "EOF") - "cat << EOF > foobar.py" (filename should be placed before limit mark) - "cat >> foobar.py << EOF" (appending is ignored) diff --git a/contrib/import-checker.py b/contrib/import-checker.py --- a/contrib/import-checker.py +++ b/contrib/import-checker.py @@ -5,7 +5,6 @@ from __future__ import absolute_import, import ast import collections import os -import re import sys # Import a minimal set of stdlib modules needed for list_stdlib_modules() @@ -18,6 +17,8 @@ if True: # disable lexical sorting check basehttpserver = None import zlib +import testparseutil + # Whitelist of modules that symbols can be directly imported from. allowsymbolimports = ( '__future__', @@ -659,61 +660,21 @@ def embedded(f, modname, src): ... b' > EOF', ... ] >>> test(b"example.t", lines) - example[2] doctest.py 2 - "from __future__ import print_function\\n' multiline\\nstring'\\n" - example[7] foo.py 7 + example[2] doctest.py 1 + "from __future__ import print_function\\n' multiline\\nstring'\\n\\n" + example[8] foo.py 7 'from __future__ import print_function\\n' """ - inlinepython = 0 - shpython = 0 - script = [] - prefix = 6 - t = '' - n = 0 - for l in src: - n += 1 - if not l.endswith(b'\n'): - l += b'\n' - if l.startswith(b' >>> '): # python inlines - if shpython: - print("%s:%d: Parse Error" % (f, n)) - if not inlinepython: - # We've just entered a Python block. - inlinepython = n - t = b'doctest.py' - script.append(l[prefix:]) - continue - if l.startswith(b' ... '): # python inlines - script.append(l[prefix:]) - continue - cat = re.search(br"\$ \s*cat\s*>\s*(\S+\.py)\s*<<\s*EOF", l) - if cat: - if inlinepython: - yield b''.join(script), (b"%s[%d]" % - (modname, inlinepython)), t, inlinepython - script = [] - inlinepython = 0 - shpython = n - t = cat.group(1) - continue - if shpython and l.startswith(b' > '): # sh continuation - if l == b' > EOF\n': - yield b''.join(script), (b"%s[%d]" % - (modname, shpython)), t, shpython - script = [] - shpython = 0 - else: - script.append(l[4:]) - continue - # If we have an empty line or a command for sh, we end the - # inline script. - if inlinepython and (l == b' \n' - or l.startswith(b' $ ')): - yield b''.join(script), (b"%s[%d]" % - (modname, inlinepython)), t, inlinepython - script = [] - inlinepython = 0 - continue + errors = [] + for name, starts, ends, code in testparseutil.pyembedded(f, src, errors): + if not name: + # use 'doctest.py', in order to make already existing + # doctest above pass instantly + name = 'doctest.py' + # "starts" is "line number" (1-origin), but embedded() is + # expected to return "line offset" (0-origin). Therefore, this + # yields "starts - 1". + yield code, "%s[%d]" % (modname, starts), name, starts - 1 def sources(f, modname): """Yields possibly multiple sources from a filepath