##// END OF EJS Templates
commitctx: document _filecommit input and output...
commitctx: document _filecommit input and output This is the beginning of a larger refactoring/cleanup of the commitctx code to clarify and augment the logic gathering metadata useful for copy tracing. The current code is a tad too long and entangled to make such update easy. We start with easy and small cleanup. Differential Revision: https://phab.mercurial-scm.org/D8699

File last commit:

r44897:4cabeea6 default
r45610:74c59008 default
Show More
import-checker.py
821 lines | 28.4 KiB | text/x-python | PythonLexer
Yuya Nishihara
import-checker: make it executable for convenience
r26954 #!/usr/bin/env python
timeless
py3: use print_function in import-checker
r28703 from __future__ import absolute_import, print_function
timeless
py3: use absolute_import in import-checker
r28702
Augie Fackler
contrib: add an import checker...
r20036 import ast
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 import collections
Gregory Szorc
import-checker: open all source files as utf-8...
r43733 import io
Augie Fackler
contrib: add an import checker...
r20036 import os
import sys
Chris Jerdonek
import-checker: make test-module-imports.t work using virtualenv (issue4129)...
r20198 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
# to work when run from a virtualenv. The modules were chosen empirically
# so that the return value matches the return value without virtualenv.
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 if True: # disable lexical sorting checks
Augie Fackler
contrib: work around some modules not existing on Py3 in import checker
r33878 try:
import BaseHTTPServer as basehttpserver
except ImportError:
basehttpserver = None
Yuya Nishihara
py3: make contrib/import-checker.py get along with itself...
r29211 import zlib
Chris Jerdonek
import-checker: make test-module-imports.t work using virtualenv (issue4129)...
r20198
FUJIWARA Katsunori
import-checker: use testparseutil.embedded() to centralize detection logic...
r40131 import testparseutil
Gregory Szorc
import-checker: establish modern import convention...
r25703 # Whitelist of modules that symbols can be directly imported from.
allowsymbolimports = (
'__future__',
Augie Fackler
contrib: add bzrlib to list of packages from which we import symbols...
r33920 'bzrlib',
Augie Fackler
contrib: allow symbol imports from hgclient for tests
r33916 'hgclient',
Augie Fackler
contrib: allow importing "symbols" from mercurial
r33894 'mercurial',
Yuya Nishihara
import-checker: allow symbol imports from hgweb.common and .request...
r27018 'mercurial.hgweb.common',
'mercurial.hgweb.request',
Gregory Szorc
import-checker: establish modern import convention...
r25703 'mercurial.i18n',
Pulkit Goyal
interfaces: create a new folder for interfaces and move repository.py in it...
r43078 'mercurial.interfaces',
Gregory Szorc
import-checker: establish modern import convention...
r25703 'mercurial.node',
Gregory Szorc
import-checker: allow symbol imports from mercurial.pycompat...
r43354 'mercurial.pycompat',
Boris Feld
revlog: split constants into a new `revlogutils.constants` module...
r39365 # for revlog to re-export constant to extensions
'mercurial.revlogutils.constants',
flagutil: create a `mercurial.revlogutils.flagutil` module...
r42954 'mercurial.revlogutils.flagutil',
Yuya Nishihara
import-checker: allow importing symbols from pure modules...
r32507 # for cffi modules to re-export pure functions
'mercurial.pure.base85',
'mercurial.pure.bdiff',
'mercurial.pure.mpatch',
'mercurial.pure.osutil',
'mercurial.pure.parsers',
Siddharth Agarwal
tests: disable lints on mercurial/thirdparty...
r34396 # third-party imports should be directly imported
'mercurial.thirdparty',
Martijn Pieters
util: create a context manager to handle timing...
r38833 'mercurial.thirdparty.attr',
Gregory Szorc
setup: register zope.interface packages and compile C extension...
r37197 'mercurial.thirdparty.zope',
'mercurial.thirdparty.zope.interface',
Gregory Szorc
import-checker: establish modern import convention...
r25703 )
Siddharth Agarwal
import-checker: add a way to directly import certain symbols...
r32419 # Whitelist of symbols that can be directly imported.
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 directsymbols = ('demandimport',)
Siddharth Agarwal
import-checker: add a way to directly import certain symbols...
r32419
Gregory Szorc
import-checker: establish modern import convention...
r25703 # Modules that must be aliased because they are commonly confused with
# common variables and can create aliasing and readability issues.
requirealias = {
'ui': 'uimod',
}
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Gregory Szorc
import-checker: establish modern import convention...
r25703 def usingabsolute(root):
"""Whether absolute imports are being used."""
if sys.version_info[0] >= 3:
return True
for node in ast.walk(root):
if isinstance(node, ast.ImportFrom):
if node.module == '__future__':
for n in node.names:
if n.name == 'absolute_import':
return True
return False
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 def walklocal(root):
"""Recursively yield all descendant nodes but not in a different scope"""
todo = collections.deque(ast.iter_child_nodes(root))
yield root, False
while todo:
node = todo.popleft()
newscope = isinstance(node, ast.FunctionDef)
if not newscope:
todo.extend(ast.iter_child_nodes(node))
yield node, newscope
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: drop workaround for pure modules
r32374 def dotted_name_of_path(path):
Augie Fackler
contrib: add an import checker...
r20036 """Given a relative path to a source file, return its dotted module name.
>>> dotted_name_of_path('mercurial/error.py')
'mercurial.error'
Mads Kiilerich
import-checker: fix names of dynamically loaded modules...
r20383 >>> dotted_name_of_path('zlibmodule.so')
'zlib'
Augie Fackler
contrib: add an import checker...
r20036 """
Yuya Nishihara
import-checker: normalize directory separator to get module name on Windows...
r27620 parts = path.replace(os.sep, '/').split('/')
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 parts[-1] = parts[-1].split('.', 1)[0] # remove .py and .so and .ARCH.so
Mads Kiilerich
import-checker: fix names of dynamically loaded modules...
r20383 if parts[-1].endswith('module'):
parts[-1] = parts[-1][:-6]
Augie Fackler
contrib: add an import checker...
r20036 return '.'.join(parts)
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 def fromlocalfunc(modulename, localmods):
"""Get a function to examine which locally defined module the
target source imports via a specified name.
`modulename` is an `dotted_name_of_path()`-ed source file path,
which may have `.__init__` at the end of it, of the target source.
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 `localmods` is a set of absolute `dotted_name_of_path()`-ed source file
paths of locally defined (= Mercurial specific) modules.
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173
This function assumes that module names not existing in
Mads Kiilerich
spelling: trivial spell checking
r26781 `localmods` are from the Python standard library.
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173
This function returns the function, which takes `name` argument,
and returns `(absname, dottedpath, hassubmod)` tuple if `name`
matches against locally defined module. Otherwise, it returns
False.
It is assumed that `name` doesn't have `.__init__`.
`absname` is an absolute module name of specified `name`
(e.g. "hgext.convert"). This can be used to compose prefix for sub
modules or so.
`dottedpath` is a `dotted_name_of_path()`-ed source file path
(e.g. "hgext.convert.__init__") of `name`. This is used to look
module up in `localmods` again.
`hassubmod` is whether it may have sub modules under it (for
convenient, even though this is also equivalent to "absname !=
dottednpath")
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 >>> localmods = {'foo.__init__', 'foo.foo1',
... 'foo.bar.__init__', 'foo.bar.bar1',
... 'baz.__init__', 'baz.baz1'}
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 >>> fromlocal = fromlocalfunc('foo.xxx', localmods)
>>> # relative
>>> fromlocal('foo1')
('foo.foo1', 'foo.foo1', False)
>>> fromlocal('bar')
('foo.bar', 'foo.bar.__init__', True)
>>> fromlocal('bar.bar1')
('foo.bar.bar1', 'foo.bar.bar1', False)
>>> # absolute
>>> fromlocal('baz')
('baz', 'baz.__init__', True)
>>> fromlocal('baz.baz1')
('baz.baz1', 'baz.baz1', False)
>>> # unknown = maybe standard library
>>> fromlocal('os')
False
Gregory Szorc
import-checker: resolve relative imports...
r25701 >>> fromlocal(None, 1)
('foo', 'foo.__init__', True)
liscju
import-checker: recognize relative imports from parents of current package...
r29122 >>> fromlocal('foo1', 1)
('foo.foo1', 'foo.foo1', False)
Gregory Szorc
import-checker: resolve relative imports...
r25701 >>> fromlocal2 = fromlocalfunc('foo.xxx.yyy', localmods)
>>> fromlocal2(None, 2)
('foo', 'foo.__init__', True)
liscju
import-checker: recognize relative imports from parents of current package...
r29122 >>> fromlocal2('bar2', 1)
False
>>> fromlocal2('bar', 2)
('foo.bar', 'foo.bar.__init__', True)
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 """
Augie Fackler
contrib: have import-checker work mostly with native strings for mod names...
r33891 if not isinstance(modulename, str):
modulename = modulename.decode('ascii')
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 prefix = '.'.join(modulename.split('.')[:-1])
if prefix:
prefix += '.'
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Gregory Szorc
import-checker: resolve relative imports...
r25701 def fromlocal(name, level=0):
FUJIWARA Katsunori
import-checker: increase portability for python 2.6.x...
r29374 # name is false value when relative imports are used.
if not name:
Gregory Szorc
import-checker: resolve relative imports...
r25701 # If relative imports are used, level must not be absolute.
assert level > 0
candidates = ['.'.join(modulename.split('.')[:-level])]
else:
liscju
import-checker: recognize relative imports from parents of current package...
r29122 if not level:
# Check relative name first.
candidates = [prefix + name, name]
else:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 candidates = [
'.'.join(modulename.split('.')[:-level]) + '.' + name
]
Gregory Szorc
import-checker: resolve relative imports...
r25701
for n in candidates:
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 if n in localmods:
return (n, n, False)
dottedpath = n + '.__init__'
if dottedpath in localmods:
return (n, dottedpath, True)
return False
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
FUJIWARA Katsunori
import-checker: add utility to examine what module is imported easily...
r25173 return fromlocal
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: guess names of C extension modules...
r32509 def populateextmods(localmods):
"""Populate C extension modules based on pure modules"""
newlocalmods = set(localmods)
for n in localmods:
if n.startswith('mercurial.pure.'):
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 m = n[len('mercurial.pure.') :]
Yuya Nishihara
import-checker: guess names of C extension modules...
r32509 newlocalmods.add('mercurial.cext.' + m)
newlocalmods.add('mercurial.cffi._' + m)
return newlocalmods
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 def list_stdlib_modules():
"""List the modules present in the stdlib.
Augie Fackler
contrib: work around some modules not existing on Py3 in import checker
r33878 >>> py3 = sys.version_info[0] >= 3
Augie Fackler
contrib: add an import checker...
r20036 >>> mods = set(list_stdlib_modules())
Augie Fackler
contrib: work around some modules not existing on Py3 in import checker
r33878 >>> 'BaseHTTPServer' in mods or py3
Augie Fackler
contrib: add an import checker...
r20036 True
os.path isn't really a module, so it's missing:
>>> 'os.path' in mods
False
sys requires special treatment, because it's baked into the
interpreter, but it should still appear:
>>> 'sys' in mods
True
>>> 'collections' in mods
True
Augie Fackler
contrib: work around some modules not existing on Py3 in import checker
r33878 >>> 'cStringIO' in mods or py3
Augie Fackler
contrib: add an import checker...
r20036 True
Augie Fackler
import-checker: ensure cffi is always a system module...
r29395
>>> 'cffi' in mods
True
Augie Fackler
contrib: add an import checker...
r20036 """
for m in sys.builtin_module_names:
yield m
# These modules only exist on windows, but we should always
# consider them stdlib.
for m in ['msvcrt', '_winreg']:
yield m
Augie Fackler
contrib: inform import checker that __builtin__ is a thing
r33895 yield '__builtin__'
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield 'builtins' # python3 only
yield 'importlib.abc' # python3 only
yield 'importlib.machinery' # python3 only
yield 'importlib.util' # python3 only
Matt Harbison
import-checker: force 'fcntl', 'grp', 'pwd', and 'termios' to stdlib modules...
r24669 for m in 'fcntl', 'grp', 'pwd', 'termios': # Unix only
yield m
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 for m in 'cPickle', 'datetime': # in Python (not C) on PyPy
Maciej Fijalkowski
tests: fix builtin module test on pypy...
r28713 yield m
Augie Fackler
import-checker: ensure cffi is always a system module...
r29395 for m in ['cffi']:
yield m
Martin von Zweigbergk
cleanup: use set literals...
r32291 stdlib_prefixes = {sys.prefix, sys.exec_prefix}
Chris Jerdonek
import-checker: make test-module-imports.t work using virtualenv (issue4129)...
r20198 # We need to supplement the list of prefixes for the search to work
# when run from within a virtualenv.
Augie Fackler
contrib: work around some modules not existing on Py3 in import checker
r33878 for mod in (basehttpserver, zlib):
if mod is None:
continue
Chris Jerdonek
import-checker: make test-module-imports.t work using virtualenv (issue4129)...
r20198 try:
# Not all module objects have a __file__ attribute.
filename = mod.__file__
except AttributeError:
continue
dirname = os.path.dirname(filename)
for prefix in stdlib_prefixes:
if dirname.startswith(prefix):
# Then this directory is redundant.
break
else:
stdlib_prefixes.add(dirname)
Valentin Gatien-Baron
tests: make test-check-module-imports more robust...
r40723 sourceroot = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
Augie Fackler
contrib: add an import checker...
r20036 for libpath in sys.path:
Valentin Gatien-Baron
tests: make test-check-module-imports more robust...
r40723 # We want to walk everything in sys.path that starts with something in
# stdlib_prefixes, but not directories from the hg sources.
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 if os.path.abspath(libpath).startswith(sourceroot) or not any(
libpath.startswith(p) for p in stdlib_prefixes
):
Augie Fackler
contrib: add an import checker...
r20036 continue
for top, dirs, files in os.walk(libpath):
Yuya Nishihara
import-checker: recurse into subtree of sys.path only if __init__.py exists...
r25733 for i, d in reversed(list(enumerate(dirs))):
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 if (
not os.path.exists(os.path.join(top, d, '__init__.py'))
or top == libpath
and d in ('hgdemandimport', 'hgext', 'mercurial')
):
Yuya Nishihara
import-checker: recurse into subtree of sys.path only if __init__.py exists...
r25733 del dirs[i]
Augie Fackler
contrib: add an import checker...
r20036 for name in files:
Augie Fackler
import-checker: use modern .endswith for multiple suffixes...
r26221 if not name.endswith(('.py', '.so', '.pyc', '.pyo', '.pyd')):
Augie Fackler
contrib: add an import checker...
r20036 continue
Yuya Nishihara
import-checker: list package directory as stdlib module...
r27621 if name.startswith('__init__.py'):
full_path = top
else:
full_path = os.path.join(top, name)
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 rel_path = full_path[len(libpath) + 1 :]
Augie Fackler
contrib: add an import checker...
r20036 mod = dotted_name_of_path(rel_path)
yield mod
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 stdlib_modules = set(list_stdlib_modules())
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
timeless
import-checker: track filenames for SyntaxErrors
r28921 def imported_modules(source, modulename, f, localmods, ignore_nested=False):
Augie Fackler
contrib: add an import checker...
r20036 """Given the source of a file as a string, yield the names
imported by that file.
Augie Fackler
import-checker: ignore nested imports
r20037 Args:
source: The python source to examine as a string.
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 modulename: of specified python source (may have `__init__`)
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 localmods: set of locally defined module names (may have `__init__`)
Augie Fackler
import-checker: ignore nested imports
r20037 ignore_nested: If true, import statements that do not start in
column zero will be ignored.
Returns:
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 A list of absolute module names imported by the given source.
Augie Fackler
import-checker: ignore nested imports
r20037
timeless
import-checker: track filenames for SyntaxErrors
r28921 >>> f = 'foo/xxx.py'
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 >>> modulename = 'foo.xxx'
>>> localmods = {'foo.__init__': True,
... 'foo.foo1': True, 'foo.foo2': True,
... 'foo.bar.__init__': True, 'foo.bar.bar1': True,
... 'baz.__init__': True, 'baz.baz1': True }
>>> # standard library (= not locally defined ones)
>>> sorted(imported_modules(
... 'from stdlib1 import foo, bar; import stdlib2',
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... modulename, f, localmods))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 []
>>> # relative importing
Augie Fackler
import-checker: ignore nested imports
r20037 >>> sorted(imported_modules(
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ... 'import foo1; from bar import bar1',
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... modulename, f, localmods))
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 ['foo.bar.bar1', 'foo.foo1']
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 >>> sorted(imported_modules(
... 'from bar.bar1 import name1, name2, name3',
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... modulename, f, localmods))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ['foo.bar.bar1']
>>> # absolute importing
>>> sorted(imported_modules(
... 'from baz import baz1, name1',
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... modulename, f, localmods))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ['baz.__init__', 'baz.baz1']
>>> # mixed importing, even though it shouldn't be recommended
>>> sorted(imported_modules(
... 'import stdlib, foo1, baz',
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... modulename, f, localmods))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ['baz.__init__', 'foo.foo1']
>>> # ignore_nested
Augie Fackler
import-checker: ignore nested imports
r20037 >>> sorted(imported_modules(
... '''import foo
... def wat():
... import bar
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... ''', modulename, f, localmods))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ['foo.__init__', 'foo.bar.__init__']
>>> sorted(imported_modules(
... '''import foo
... def wat():
... import bar
timeless
import-checker: track filenames for SyntaxErrors
r28921 ... ''', modulename, f, localmods, ignore_nested=True))
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 ['foo.__init__']
Augie Fackler
contrib: add an import checker...
r20036 """
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 fromlocal = fromlocalfunc(modulename, localmods)
timeless
import-checker: track filenames for SyntaxErrors
r28921 for node in ast.walk(ast.parse(source, f)):
Augie Fackler
import-checker: ignore nested imports
r20037 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
continue
Augie Fackler
contrib: add an import checker...
r20036 if isinstance(node, ast.Import):
for n in node.names:
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 found = fromlocal(n.name)
if not found:
# this should import standard library
continue
yield found[1]
Augie Fackler
contrib: add an import checker...
r20036 elif isinstance(node, ast.ImportFrom):
Gregory Szorc
import-checker: resolve relative imports...
r25701 found = fromlocal(node.module, node.level)
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 if not found:
# this should import standard library
continue
absname, dottedpath, hassubmod = found
if not hassubmod:
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 # "dottedpath" is not a package; must be imported
yield dottedpath
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 # examination of "node.names" should be redundant
# e.g.: from mercurial.node import nullid, nullrev
continue
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 modnotfound = False
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 prefix = absname + '.'
Augie Fackler
contrib: add an import checker...
r20036 for n in node.names:
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 found = fromlocal(prefix + n.name)
if not found:
# this should be a function or a property of "node.module"
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 modnotfound = True
FUJIWARA Katsunori
import-checker: make imported_modules yield absolute dotted_name_of_path...
r25174 continue
yield found[1]
Joerg Sonnenberger
hgext: start building a library for simple hooks...
r44897 if modnotfound and dottedpath != modulename:
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 # "dottedpath" is a package, but imported because of non-module
# lookup
Joerg Sonnenberger
hgext: start building a library for simple hooks...
r44897 # specifically allow "from . import foo" from __init__.py
Yuya Nishihara
import-checker: allow import of child modules from package root...
r26964 yield dottedpath
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: allow absolute imports of sub modules from local packages...
r27272 def verify_import_convention(module, source, localmods):
Gregory Szorc
import-checker: establish modern import convention...
r25703 """Verify imports match our established coding convention.
We have 2 conventions: legacy and modern. The modern convention is in
effect when using absolute imports.
The legacy convention only looks for mixed imports. The modern convention
is much more thorough.
"""
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702 root = ast.parse(source)
Gregory Szorc
import-checker: establish modern import convention...
r25703 absolute = usingabsolute(root)
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702
Gregory Szorc
import-checker: establish modern import convention...
r25703 if absolute:
Yuya Nishihara
import-checker: allow absolute imports of sub modules from local packages...
r27272 return verify_modern_convention(module, root, localmods)
Gregory Szorc
import-checker: establish modern import convention...
r25703 else:
return verify_stdlib_on_own_line(root)
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: allow absolute imports of sub modules from local packages...
r27272 def verify_modern_convention(module, root, localmods, root_col_offset=0):
Gregory Szorc
import-checker: establish modern import convention...
r25703 """Verify a file conforms to the modern import convention rules.
The rules of the modern convention are:
* Ordering is stdlib followed by local imports. Each group is lexically
sorted.
* Importing multiple modules via "import X, Y" is not allowed: use
separate import statements.
* Importing multiple modules via "from X import ..." is allowed if using
parenthesis and one entry per line.
* Only 1 relative import statement per import level ("from .", "from ..")
is allowed.
* Relative imports from higher levels must occur before lower levels. e.g.
"from .." must be before "from .".
* Imports from peer packages should use relative import (e.g. do not
"import mercurial.foo" from a "mercurial.*" module).
* Symbols can only be imported from specific modules (see
`allowsymbolimports`). For other modules, first import the module then
assign the symbol to a module-level variable. In addition, these imports
Yuya Nishihara
import-checker: extend check of symbol-import order to all local modules...
r29208 must be performed before other local imports. This rule only
Gregory Szorc
import-checker: establish modern import convention...
r25703 applies to import statements outside of any blocks.
Jun Wu
import-checker: allow relative import a module being checked...
r34040 * Relative imports from the standard library are not allowed, unless that
library is also a local module.
Gregory Szorc
import-checker: establish modern import convention...
r25703 * Certain modules must be aliased to alternate names to avoid aliasing
and readability problems. See `requirealias`.
"""
Augie Fackler
contrib: have import-checker work mostly with native strings for mod names...
r33891 if not isinstance(module, str):
module = module.decode('ascii')
Gregory Szorc
import-checker: establish modern import convention...
r25703 topmodule = module.split('.')[0]
Yuya Nishihara
import-checker: allow absolute imports of sub modules from local packages...
r27272 fromlocal = fromlocalfunc(module, localmods)
Gregory Szorc
import-checker: establish modern import convention...
r25703
# Whether a local/non-stdlib import has been performed.
timeless
import-checker: report local with stdlib late warning...
r28330 seenlocal = None
Yuya Nishihara
import-checker: extend check of symbol-import order to all local modules...
r29208 # Whether a local/non-stdlib, non-symbol import has been seen.
seennonsymbollocal = False
Gregory Szorc
import-checker: establish modern import convention...
r25703 # The last name to be imported (for sorting).
lastname = None
Pierre-Yves David
import-checker: do not enforce lexical sort accross stdlib/local boundary...
r30590 laststdlib = None
Gregory Szorc
import-checker: establish modern import convention...
r25703 # Relative import levels encountered so far.
seenlevels = set()
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 for node, newscope in walklocal(root):
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: extract function to generate a formatted warning...
r26955 def msg(fmt, *args):
Yuya Nishihara
import-checker: include lineno in warning message...
r26956 return (fmt % args, node.lineno)
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 if newscope:
# Check for local imports in function
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 for r in verify_modern_convention(
module, node, localmods, node.col_offset + 4
):
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 yield r
elif isinstance(node, ast.Import):
Gregory Szorc
import-checker: establish modern import convention...
r25703 # Disallow "import foo, bar" and require separate imports
# for each module.
if len(node.names) > 1:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'multiple imported names: %s',
', '.join(n.name for n in node.names),
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
name = node.names[0].name
asname = node.names[0].asname
Pierre-Yves David
import-checker: do not enforce lexical sort accross stdlib/local boundary...
r30590 stdlib = name in stdlib_modules
Gregory Szorc
import-checker: establish modern import convention...
r25703 # Ignore sorting rules on imports inside blocks.
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 if node.col_offset == root_col_offset:
Pierre-Yves David
import-checker: do not enforce lexical sort accross stdlib/local boundary...
r30590 if lastname and name < lastname and laststdlib == stdlib:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'imports not lexically sorted: %s < %s', name, lastname
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
Pierre-Yves David
import-checker: do not enforce lexical sort accross stdlib/local boundary...
r30590 lastname = name
laststdlib = stdlib
Gregory Szorc
import-checker: establish modern import convention...
r25703
# stdlib imports should be before local imports.
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 if stdlib and seenlocal and node.col_offset == root_col_offset:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'stdlib import "%s" follows local import: %s',
name,
seenlocal,
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
if not stdlib:
timeless
import-checker: report local with stdlib late warning...
r28330 seenlocal = name
Gregory Szorc
import-checker: establish modern import convention...
r25703
# Import of sibling modules should use relative imports.
topname = name.split('.')[0]
if topname == topmodule:
Yuya Nishihara
import-checker: extract function to generate a formatted warning...
r26955 yield msg('import should be relative: %s', name)
Gregory Szorc
import-checker: establish modern import convention...
r25703
if name in requirealias and asname != requirealias[name]:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'%s module must be "as" aliased to %s',
name,
requirealias[name],
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
elif isinstance(node, ast.ImportFrom):
# Resolve the full imported module name.
if node.level > 0:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 fullname = '.'.join(module.split('.')[: -node.level])
Gregory Szorc
import-checker: establish modern import convention...
r25703 if node.module:
fullname += '.%s' % node.module
else:
assert node.module
fullname = node.module
topname = fullname.split('.')[0]
if topname == topmodule:
Yuya Nishihara
import-checker: extract function to generate a formatted warning...
r26955 yield msg('import should be relative: %s', fullname)
Gregory Szorc
import-checker: establish modern import convention...
r25703
# __future__ is special since it needs to come first and use
# symbol import.
if fullname != '__future__':
Jun Wu
import-checker: allow relative import a module being checked...
r34040 if not fullname or (
fullname in stdlib_modules
Yuya Nishihara
import-checker: allow 'from typing import ...'...
r43995 # allow standard 'from typing import ...' style
Martin von Zweigbergk
import-checker: allow all absolute imports of stdlib modules...
r44406 and fullname.startswith('.')
Jun Wu
import-checker: allow relative import a module being checked...
r34040 and fullname not in localmods
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 and fullname + '.__init__' not in localmods
):
Yuya Nishihara
import-checker: extract function to generate a formatted warning...
r26955 yield msg('relative import of stdlib module')
Gregory Szorc
import-checker: establish modern import convention...
r25703 else:
timeless
import-checker: report local with stdlib late warning...
r28330 seenlocal = fullname
Gregory Szorc
import-checker: establish modern import convention...
r25703
# Direct symbol import is only allowed from certain modules and
# must occur before non-symbol imports.
Yuya Nishihara
import-checker: always build a list of imported symbols...
r29207 found = fromlocal(node.module, node.level)
if found and found[2]: # node.module is a package
prefix = found[0] + '.'
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 symbols = (
n.name for n in node.names if not fromlocal(prefix + n.name)
)
Yuya Nishihara
import-checker: always build a list of imported symbols...
r29207 else:
Siddharth Agarwal
import-checker: add a way to directly import certain symbols...
r32419 symbols = (n.name for n in node.names)
symbols = [sym for sym in symbols if sym not in directsymbols]
Yuya Nishihara
import-checker: reset context to verify convention in function scope...
r26965 if node.module and node.col_offset == root_col_offset:
Yuya Nishihara
import-checker: allow absolute imports of sub modules from local packages...
r27272 if symbols and fullname not in allowsymbolimports:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'direct symbol import %s from %s',
', '.join(symbols),
fullname,
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
Yuya Nishihara
import-checker: extend check of symbol-import order to all local modules...
r29208 if symbols and seennonsymbollocal:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'symbol import follows non-symbol import: %s', fullname
)
Yuya Nishihara
import-checker: extend check of symbol-import order to all local modules...
r29208 if not symbols and fullname not in stdlib_modules:
seennonsymbollocal = True
Gregory Szorc
import-checker: establish modern import convention...
r25703
if not node.module:
assert node.level
# Only allow 1 group per level.
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 if (
node.level in seenlevels
and node.col_offset == root_col_offset
):
yield msg(
'multiple "from %s import" statements', '.' * node.level
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
# Higher-level groups come before lower-level groups.
if any(node.level > l for l in seenlevels):
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'higher-level import should come first: %s', fullname
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
seenlevels.add(node.level)
# Entries in "from .X import ( ... )" lists must be lexically
# sorted.
lastentryname = None
for n in node.names:
if lastentryname and n.name < lastentryname:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'imports from %s not lexically sorted: %s < %s',
fullname,
n.name,
lastentryname,
)
Gregory Szorc
import-checker: establish modern import convention...
r25703
lastentryname = n.name
if n.name in requirealias and n.asname != requirealias[n.name]:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield msg(
'%s from %s must be "as" aliased to %s',
n.name,
fullname,
requirealias[n.name],
)
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702
def verify_stdlib_on_own_line(root):
Augie Fackler
contrib: add an import checker...
r20036 """Given some python source, verify that stdlib imports are done
in separate statements from relative local module imports.
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, foo')))
Yuya Nishihara
import-checker: include lineno in warning message...
r26956 [('mixed imports\\n stdlib: sys\\n relative: foo', 1)]
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702 >>> list(verify_stdlib_on_own_line(ast.parse('import sys, os')))
Augie Fackler
contrib: add an import checker...
r20036 []
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702 >>> list(verify_stdlib_on_own_line(ast.parse('import foo, bar')))
Augie Fackler
contrib: add an import checker...
r20036 []
"""
Gregory Szorc
import-checker: establish new function for verifying import conventions...
r25702 for node in ast.walk(root):
Augie Fackler
contrib: add an import checker...
r20036 if isinstance(node, ast.Import):
Mads Kiilerich
import-checker: show stdlib and relative imports separately...
r20386 from_stdlib = {False: [], True: []}
Augie Fackler
contrib: add an import checker...
r20036 for n in node.names:
Mads Kiilerich
import-checker: show stdlib and relative imports separately...
r20386 from_stdlib[n.name in stdlib_modules].append(n.name)
if from_stdlib[True] and from_stdlib[False]:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 yield (
'mixed imports\n stdlib: %s\n relative: %s'
% (
', '.join(sorted(from_stdlib[True])),
', '.join(sorted(from_stdlib[False])),
),
node.lineno,
)
Augie Fackler
contrib: add an import checker...
r20036
class CircularImport(Exception):
pass
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Matt Mackall
import-checker: make search algorithm non-recursive breadth-first...
r24490 def checkmod(mod, imports):
shortest = {}
visit = [[mod]]
while visit:
path = visit.pop(0)
for i in sorted(imports.get(path[-1], [])):
if len(path) < shortest.get(i, 1000):
shortest[i] = len(path)
if i in path:
if i == path[0]:
raise CircularImport(path)
continue
visit.append(path + [i])
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
import-checker: try a little harder to show fewer cycles...
r20038 def rotatecycle(cycle):
"""arrange a cycle so that the lexicographically first module listed first
Matt Mackall
import-checker: drop duplicate element from cycle...
r24488 >>> rotatecycle(['foo', 'bar'])
Augie Fackler
import-checker: try a little harder to show fewer cycles...
r20038 ['bar', 'foo', 'bar']
"""
lowest = min(cycle)
idx = cycle.index(lowest)
Matt Mackall
import-checker: drop duplicate element from cycle...
r24488 return cycle[idx:] + cycle[:idx] + [lowest]
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 def find_cycles(imports):
"""Find cycles in an already-loaded import graph.
FUJIWARA Katsunori
import-checker: don't treat modules as relative one if not found...
r25175 All module names recorded in `imports` should be absolute one.
timeless
py3: use print_function in import-checker
r28703 >>> from __future__ import print_function
FUJIWARA Katsunori
import-checker: don't treat modules as relative one if not found...
r25175 >>> imports = {'top.foo': ['top.bar', 'os.path', 'top.qux'],
... 'top.bar': ['top.baz', 'sys'],
... 'top.baz': ['top.foo'],
... 'top.qux': ['top.foo']}
timeless
py3: use print_function in import-checker
r28703 >>> print('\\n'.join(sorted(find_cycles(imports))))
Matt Mackall
import-checker: fix rotatecycle...
r24487 top.bar -> top.baz -> top.foo -> top.bar
top.foo -> top.qux -> top.foo
Augie Fackler
contrib: add an import checker...
r20036 """
Matt Mackall
import-checker: rotatecycle is actually the canonical cycle key...
r24491 cycles = set()
timeless
py3: handle iter/iterkeys+iteritems python3 divergence in import-checker
r28704 for mod in sorted(imports.keys()):
Augie Fackler
contrib: add an import checker...
r20036 try:
Matt Mackall
import-checker: make search algorithm non-recursive breadth-first...
r24490 checkmod(mod, imports)
Gregory Szorc
global: mass rewrite to use modern exception syntax...
r25660 except CircularImport as e:
Augie Fackler
contrib: add an import checker...
r20036 cycle = e.args[0]
Matt Mackall
import-checker: rotatecycle is actually the canonical cycle key...
r24491 cycles.add(" -> ".join(rotatecycle(cycle)))
return cycles
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 def _cycle_sortkey(c):
return len(c), c
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
timeless
import-checker: parse python code from .t files
r28922 def embedded(f, modname, src):
"""Extract embedded python code
Augie Fackler
contrib: make import checker always think in terms of bytes...
r33879 >>> def _forcestr(thing):
... if not isinstance(thing, str):
... return thing.decode('ascii')
... return thing
timeless
import-checker: parse python code from .t files
r28922 >>> def test(fn, lines):
Augie Fackler
contrib: make import checker always think in terms of bytes...
r33879 ... for s, m, f, l in embedded(fn, b"example", lines):
... print("%s %s %d" % (_forcestr(m), _forcestr(f), l))
... print(repr(_forcestr(s)))
timeless
import-checker: parse python code from .t files
r28922 >>> lines = [
Augie Fackler
contrib: fix import-checker to operate on str instead of bytes...
r42580 ... 'comment',
... ' >>> from __future__ import print_function',
... " >>> ' multiline",
... " ... string'",
... ' ',
... 'comment',
... ' $ cat > foo.py <<EOF',
... ' > from __future__ import print_function',
... ' > EOF',
timeless
import-checker: parse python code from .t files
r28922 ... ]
Augie Fackler
contrib: make import checker always think in terms of bytes...
r33879 >>> test(b"example.t", lines)
FUJIWARA Katsunori
import-checker: use testparseutil.embedded() to centralize detection logic...
r40131 example[2] doctest.py 1
"from __future__ import print_function\\n' multiline\\nstring'\\n\\n"
example[8] foo.py 7
timeless
import-checker: parse python code from .t files
r28922 'from __future__ import print_function\\n'
"""
FUJIWARA Katsunori
import-checker: use testparseutil.embedded() to centralize detection logic...
r40131 errors = []
for name, starts, ends, code in testparseutil.pyembedded(f, src, errors):
if not name:
# use 'doctest.py', in order to make already existing
# doctest above pass instantly
name = 'doctest.py'
# "starts" is "line number" (1-origin), but embedded() is
# expected to return "line offset" (0-origin). Therefore, this
# yields "starts - 1".
Augie Fackler
contrib: fix import-checker to not b'' module names on Python 3...
r40514 if not isinstance(modname, str):
modname = modname.decode('utf8')
FUJIWARA Katsunori
import-checker: use testparseutil.embedded() to centralize detection logic...
r40131 yield code, "%s[%d]" % (modname, starts), name, starts - 1
timeless
import-checker: parse python code from .t files
r28922
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
timeless
import-checker: refactor source reading...
r28919 def sources(f, modname):
timeless
import-checker: parse python code from .t files
r28922 """Yields possibly multiple sources from a filepath
input: filepath, modulename
yields: script(string), modulename, filepath, linenumber
For embedded scripts, the modulename and filepath will be different
from the function arguments. linenumber is an offset relative to
the input file.
"""
py = False
Yuya Nishihara
tests: enable import checker for all python files (including no .py files)...
r29234 if not f.endswith('.t'):
Augie Fackler
contrib: make import checker always think in terms of bytes...
r33879 with open(f, 'rb') as src:
timeless
import-checker: parse python code from .t files
r28922 yield src.read(), modname, f, 0
py = True
if py or f.endswith('.t'):
Gregory Szorc
import-checker: open all source files as utf-8...
r43733 # Strictly speaking we should sniff for the magic header that denotes
# Python source file encoding. But in reality we don't use anything
# other than ASCII (mainly) and UTF-8 (in a few exceptions), so
# simplicity is fine.
with io.open(f, 'r', encoding='utf-8') as src:
timeless
import-checker: parse python code from .t files
r28922 for script, modname, t, line in embedded(f, modname, src):
Augie Fackler
contrib: tweak import-checker to always use bytes for module names...
r40515 yield script, modname.encode('utf8'), t, line
timeless
import-checker: refactor source reading...
r28919
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 def main(argv):
FUJIWARA Katsunori
import-checker: add xargs like mode...
r25063 if len(argv) < 2 or (argv[1] == '-' and len(argv) > 2):
timeless
py3: use print_function in import-checker
r28703 print('Usage: %s {-|file [file] [file] ...}')
Augie Fackler
contrib: add an import checker...
r20036 return 1
FUJIWARA Katsunori
import-checker: add xargs like mode...
r25063 if argv[1] == '-':
argv = argv[:1]
argv.extend(l.rstrip() for l in sys.stdin.readlines())
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 localmodpaths = {}
Augie Fackler
contrib: add an import checker...
r20036 used_imports = {}
any_errors = False
for source_path in argv[1:]:
Yuya Nishihara
import-checker: drop workaround for pure modules
r32374 modname = dotted_name_of_path(source_path)
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 localmodpaths[modname] = source_path
Yuya Nishihara
import-checker: guess names of C extension modules...
r32509 localmods = populateextmods(localmodpaths)
Yuya Nishihara
import-checker: convert localmods to a set of module names...
r32508 for localmodname, source_path in sorted(localmodpaths.items()):
Augie Fackler
contrib: have import-checker work mostly with native strings for mod names...
r33891 if not isinstance(localmodname, bytes):
# This is only safe because all hg's files are ascii
localmodname = localmodname.encode('ascii')
timeless
import-checker: parse python code from .t files
r28922 for src, modname, name, line in sources(source_path, localmodname):
timeless
import-checker: track SyntaxErrors...
r28920 try:
used_imports[modname] = sorted(
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 imported_modules(
src, modname, name, localmods, ignore_nested=True
)
)
for error, lineno in verify_import_convention(
modname, src, localmods
):
timeless
import-checker: track SyntaxErrors...
r28920 any_errors = True
timeless
import-checker: parse python code from .t files
r28922 print('%s:%d: %s' % (source_path, lineno + line, error))
timeless
import-checker: track SyntaxErrors...
r28920 except SyntaxError as e:
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345 print(
'%s:%d: SyntaxError: %s' % (source_path, e.lineno + line, e)
)
Augie Fackler
contrib: add an import checker...
r20036 cycles = find_cycles(used_imports)
if cycles:
firstmods = set()
for c in sorted(cycles, key=_cycle_sortkey):
first = c.split()[0]
# As a rough cut, ignore any cycle that starts with the
# same module as some other cycle. Otherwise we see lots
# of cycles that are effectively duplicates.
if first in firstmods:
continue
timeless
py3: use print_function in import-checker
r28703 print('Import cycle:', c)
Augie Fackler
contrib: add an import checker...
r20036 firstmods.add(first)
any_errors = True
FUJIWARA Katsunori
import-checker.py: exit with code 0 if no error is detected...
r25731 return any_errors != 0
Augie Fackler
contrib: add an import checker...
r20036
Augie Fackler
style: run a patched black on a subset of mercurial...
r43345
Augie Fackler
contrib: add an import checker...
r20036 if __name__ == '__main__':
sys.exit(int(main(sys.argv)))