##// END OF EJS Templates
check-code: do not skip entire file, skip only one match instead...
Simon Heimberg -
r20238:81e90579 default
parent child Browse files
Show More
@@ -1,545 +1,545 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 #
2 #
3 # check-code - a style and portability checker for Mercurial
3 # check-code - a style and portability checker for Mercurial
4 #
4 #
5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import re, glob, os, sys
10 import re, glob, os, sys
11 import keyword
11 import keyword
12 import optparse
12 import optparse
13 try:
13 try:
14 import re2
14 import re2
15 except ImportError:
15 except ImportError:
16 re2 = None
16 re2 = None
17
17
18 def compilere(pat, multiline=False):
18 def compilere(pat, multiline=False):
19 if multiline:
19 if multiline:
20 pat = '(?m)' + pat
20 pat = '(?m)' + pat
21 if re2:
21 if re2:
22 try:
22 try:
23 return re2.compile(pat)
23 return re2.compile(pat)
24 except re2.error:
24 except re2.error:
25 pass
25 pass
26 return re.compile(pat)
26 return re.compile(pat)
27
27
28 def repquote(m):
28 def repquote(m):
29 fromc = '.:'
29 fromc = '.:'
30 tochr = 'pq'
30 tochr = 'pq'
31 def encodechr(i):
31 def encodechr(i):
32 if i > 255:
32 if i > 255:
33 return 'u'
33 return 'u'
34 c = chr(i)
34 c = chr(i)
35 if c in ' \n':
35 if c in ' \n':
36 return c
36 return c
37 if c.isalpha():
37 if c.isalpha():
38 return 'x'
38 return 'x'
39 if c.isdigit():
39 if c.isdigit():
40 return 'n'
40 return 'n'
41 try:
41 try:
42 return tochr[fromc.find(c)]
42 return tochr[fromc.find(c)]
43 except (ValueError, IndexError):
43 except (ValueError, IndexError):
44 return 'o'
44 return 'o'
45 t = m.group('text')
45 t = m.group('text')
46 tt = ''.join(encodechr(i) for i in xrange(256))
46 tt = ''.join(encodechr(i) for i in xrange(256))
47 t = t.translate(tt)
47 t = t.translate(tt)
48 return m.group('quote') + t + m.group('quote')
48 return m.group('quote') + t + m.group('quote')
49
49
50 def reppython(m):
50 def reppython(m):
51 comment = m.group('comment')
51 comment = m.group('comment')
52 if comment:
52 if comment:
53 l = len(comment.rstrip())
53 l = len(comment.rstrip())
54 return "#" * l + comment[l:]
54 return "#" * l + comment[l:]
55 return repquote(m)
55 return repquote(m)
56
56
57 def repcomment(m):
57 def repcomment(m):
58 return m.group(1) + "#" * len(m.group(2))
58 return m.group(1) + "#" * len(m.group(2))
59
59
60 def repccomment(m):
60 def repccomment(m):
61 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
61 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
62 return m.group(1) + t + "*/"
62 return m.group(1) + t + "*/"
63
63
64 def repcallspaces(m):
64 def repcallspaces(m):
65 t = re.sub(r"\n\s+", "\n", m.group(2))
65 t = re.sub(r"\n\s+", "\n", m.group(2))
66 return m.group(1) + t
66 return m.group(1) + t
67
67
68 def repinclude(m):
68 def repinclude(m):
69 return m.group(1) + "<foo>"
69 return m.group(1) + "<foo>"
70
70
71 def rephere(m):
71 def rephere(m):
72 t = re.sub(r"\S", "x", m.group(2))
72 t = re.sub(r"\S", "x", m.group(2))
73 return m.group(1) + t
73 return m.group(1) + t
74
74
75
75
76 testpats = [
76 testpats = [
77 [
77 [
78 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
78 (r'pushd|popd', "don't use 'pushd' or 'popd', use 'cd'"),
79 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
79 (r'\W\$?\(\([^\)\n]*\)\)', "don't use (()) or $(()), use 'expr'"),
80 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
80 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
81 (r'(?<!hg )grep.*-a', "don't use 'grep -a', use in-line python"),
81 (r'(?<!hg )grep.*-a', "don't use 'grep -a', use in-line python"),
82 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
82 (r'sed.*-i', "don't use 'sed -i', use a temporary file"),
83 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
83 (r'\becho\b.*\\n', "don't use 'echo \\n', use printf"),
84 (r'echo -n', "don't use 'echo -n', use printf"),
84 (r'echo -n', "don't use 'echo -n', use printf"),
85 (r'(^| )wc[^|]*$\n(?!.*\(re\))', "filter wc output"),
85 (r'(^| )wc[^|]*$\n(?!.*\(re\))', "filter wc output"),
86 (r'head -c', "don't use 'head -c', use 'dd'"),
86 (r'head -c', "don't use 'head -c', use 'dd'"),
87 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
87 (r'tail -n', "don't use the '-n' option to tail, just use '-<num>'"),
88 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
88 (r'sha1sum', "don't use sha1sum, use $TESTDIR/md5sum.py"),
89 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
89 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
90 (r'printf.*[^\\]\\([1-9]|0\d)', "don't use 'printf \NNN', use Python"),
90 (r'printf.*[^\\]\\([1-9]|0\d)', "don't use 'printf \NNN', use Python"),
91 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
91 (r'printf.*[^\\]\\x', "don't use printf \\x, use Python"),
92 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
92 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
93 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
93 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
94 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
94 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
95 "use egrep for extended grep syntax"),
95 "use egrep for extended grep syntax"),
96 (r'/bin/', "don't use explicit paths for tools"),
96 (r'/bin/', "don't use explicit paths for tools"),
97 (r'[^\n]\Z', "no trailing newline"),
97 (r'[^\n]\Z', "no trailing newline"),
98 (r'export.*=', "don't export and assign at once"),
98 (r'export.*=', "don't export and assign at once"),
99 (r'^source\b', "don't use 'source', use '.'"),
99 (r'^source\b', "don't use 'source', use '.'"),
100 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
100 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
101 (r'ls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
101 (r'ls +[^|\n-]+ +-', "options to 'ls' must come before filenames"),
102 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
102 (r'[^>\n]>\s*\$HGRCPATH', "don't overwrite $HGRCPATH, append to it"),
103 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
103 (r'^stop\(\)', "don't use 'stop' as a shell function name"),
104 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
104 (r'(\[|\btest\b).*-e ', "don't use 'test -e', use 'test -f'"),
105 (r'^alias\b.*=', "don't use alias, use a function"),
105 (r'^alias\b.*=', "don't use alias, use a function"),
106 (r'if\s*!', "don't use '!' to negate exit status"),
106 (r'if\s*!', "don't use '!' to negate exit status"),
107 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
107 (r'/dev/u?random', "don't use entropy, use /dev/zero"),
108 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
108 (r'do\s*true;\s*done', "don't use true as loop body, use sleep 0"),
109 (r'^( *)\t', "don't use tabs to indent"),
109 (r'^( *)\t', "don't use tabs to indent"),
110 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
110 (r'sed (-e )?\'(\d+|/[^/]*/)i(?!\\\n)',
111 "put a backslash-escaped newline after sed 'i' command"),
111 "put a backslash-escaped newline after sed 'i' command"),
112 ],
112 ],
113 # warnings
113 # warnings
114 [
114 [
115 (r'^function', "don't use 'function', use old style"),
115 (r'^function', "don't use 'function', use old style"),
116 (r'^diff.*-\w*N', "don't use 'diff -N'"),
116 (r'^diff.*-\w*N', "don't use 'diff -N'"),
117 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
117 (r'\$PWD|\${PWD}', "don't use $PWD, use `pwd`"),
118 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
118 (r'^([^"\'\n]|("[^"\n]*")|(\'[^\'\n]*\'))*\^', "^ must be quoted"),
119 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
119 (r'kill (`|\$\()', "don't use kill, use killdaemons.py")
120 ]
120 ]
121 ]
121 ]
122
122
123 testfilters = [
123 testfilters = [
124 (r"( *)(#([^\n]*\S)?)", repcomment),
124 (r"( *)(#([^\n]*\S)?)", repcomment),
125 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
125 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
126 ]
126 ]
127
127
128 winglobmsg = "use (glob) to match Windows paths too"
128 winglobmsg = "use (glob) to match Windows paths too"
129 uprefix = r"^ \$ "
129 uprefix = r"^ \$ "
130 utestpats = [
130 utestpats = [
131 [
131 [
132 (r'^(\S.*|| [$>] .*)[ \t]\n', "trailing whitespace on non-output"),
132 (r'^(\S.*|| [$>] .*)[ \t]\n', "trailing whitespace on non-output"),
133 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
133 (uprefix + r'.*\|\s*sed[^|>\n]*\n',
134 "use regex test output patterns instead of sed"),
134 "use regex test output patterns instead of sed"),
135 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
135 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
136 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
136 (uprefix + r'.*(?<!\[)\$\?', "explicit exit code checks unnecessary"),
137 (uprefix + r'.*\|\| echo.*(fail|error)',
137 (uprefix + r'.*\|\| echo.*(fail|error)',
138 "explicit exit code checks unnecessary"),
138 "explicit exit code checks unnecessary"),
139 (uprefix + r'set -e', "don't use set -e"),
139 (uprefix + r'set -e', "don't use set -e"),
140 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
140 (uprefix + r'(\s|fi\b|done\b)', "use > for continued lines"),
141 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
141 (r'^ saved backup bundle to \$TESTTMP.*\.hg$', winglobmsg),
142 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
142 (r'^ changeset .* references (corrupted|missing) \$TESTTMP/.*[^)]$',
143 winglobmsg),
143 winglobmsg),
144 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg,
144 (r'^ pulling from \$TESTTMP/.*[^)]$', winglobmsg,
145 '\$TESTTMP/unix-repo$'), # in test-issue1802.t which skipped on windows
145 '\$TESTTMP/unix-repo$'), # in test-issue1802.t which skipped on windows
146 (r'^ reverting .*/.*[^)]$', winglobmsg),
146 (r'^ reverting .*/.*[^)]$', winglobmsg),
147 (r'^ cloning subrepo \S+/.*[^)]$', winglobmsg),
147 (r'^ cloning subrepo \S+/.*[^)]$', winglobmsg),
148 (r'^ pushing to \$TESTTMP/.*[^)]$', winglobmsg),
148 (r'^ pushing to \$TESTTMP/.*[^)]$', winglobmsg),
149 (r'^ pushing subrepo \S+/\S+ to.*[^)]$', winglobmsg),
149 (r'^ pushing subrepo \S+/\S+ to.*[^)]$', winglobmsg),
150 (r'^ moving \S+/.*[^)]$', winglobmsg),
150 (r'^ moving \S+/.*[^)]$', winglobmsg),
151 (r'^ no changes made to subrepo since.*/.*[^)]$', winglobmsg),
151 (r'^ no changes made to subrepo since.*/.*[^)]$', winglobmsg),
152 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
152 (r'^ .*: largefile \S+ not available from file:.*/.*[^)]$', winglobmsg),
153 ],
153 ],
154 # warnings
154 # warnings
155 [
155 [
156 (r'^ [^*?/\n]* \(glob\)$',
156 (r'^ [^*?/\n]* \(glob\)$',
157 "glob match with no glob character (?*/)"),
157 "glob match with no glob character (?*/)"),
158 ]
158 ]
159 ]
159 ]
160
160
161 for i in [0, 1]:
161 for i in [0, 1]:
162 for p, m in testpats[i]:
162 for p, m in testpats[i]:
163 if p.startswith(r'^'):
163 if p.startswith(r'^'):
164 p = r"^ [$>] (%s)" % p[1:]
164 p = r"^ [$>] (%s)" % p[1:]
165 else:
165 else:
166 p = r"^ [$>] .*(%s)" % p
166 p = r"^ [$>] .*(%s)" % p
167 utestpats[i].append((p, m))
167 utestpats[i].append((p, m))
168
168
169 utestfilters = [
169 utestfilters = [
170 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
170 (r"<<(\S+)((.|\n)*?\n > \1)", rephere),
171 (r"( *)(#([^\n]*\S)?)", repcomment),
171 (r"( *)(#([^\n]*\S)?)", repcomment),
172 ]
172 ]
173
173
174 pypats = [
174 pypats = [
175 [
175 [
176 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
176 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
177 "tuple parameter unpacking not available in Python 3+"),
177 "tuple parameter unpacking not available in Python 3+"),
178 (r'lambda\s*\(.*,.*\)',
178 (r'lambda\s*\(.*,.*\)',
179 "tuple parameter unpacking not available in Python 3+"),
179 "tuple parameter unpacking not available in Python 3+"),
180 (r'import (.+,[^.]+\.[^.]+|[^.]+\.[^.]+,)',
180 (r'import (.+,[^.]+\.[^.]+|[^.]+\.[^.]+,)',
181 '2to3 can\'t always rewrite "import qux, foo.bar", '
181 '2to3 can\'t always rewrite "import qux, foo.bar", '
182 'use "import foo.bar" on its own line instead.'),
182 'use "import foo.bar" on its own line instead.'),
183 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
183 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
184 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
184 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
185 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
185 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
186 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
186 (r'\s<>\s', '<> operator is not available in Python 3+, use !='),
187 (r'^\s*\t', "don't use tabs"),
187 (r'^\s*\t', "don't use tabs"),
188 (r'\S;\s*\n', "semicolon"),
188 (r'\S;\s*\n', "semicolon"),
189 (r'[^_]_\("[^"]+"\s*%', "don't use % inside _()"),
189 (r'[^_]_\("[^"]+"\s*%', "don't use % inside _()"),
190 (r"[^_]_\('[^']+'\s*%", "don't use % inside _()"),
190 (r"[^_]_\('[^']+'\s*%", "don't use % inside _()"),
191 (r'(\w|\)),\w', "missing whitespace after ,"),
191 (r'(\w|\)),\w', "missing whitespace after ,"),
192 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
192 (r'(\w|\))[+/*\-<>]\w', "missing whitespace in expression"),
193 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
193 (r'^\s+(\w|\.)+=\w[^,()\n]*$', "missing whitespace in assignment"),
194 (r'(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
194 (r'(\s+)try:\n((?:\n|\1\s.*\n)+?)\1except.*?:\n'
195 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Python 2.4'),
195 r'((?:\n|\1\s.*\n)+?)\1finally:', 'no try/except/finally in Python 2.4'),
196 (r'(?<!def)(\s+|^|\()next\(.+\)',
196 (r'(?<!def)(\s+|^|\()next\(.+\)',
197 'no next(foo) in Python 2.4 and 2.5, use foo.next() instead'),
197 'no next(foo) in Python 2.4 and 2.5, use foo.next() instead'),
198 (r'(\s+)try:\n((?:\n|\1\s.*\n)*?)\1\s*yield\b.*?'
198 (r'(\s+)try:\n((?:\n|\1\s.*\n)*?)\1\s*yield\b.*?'
199 r'((?:\n|\1\s.*\n)+?)\1finally:',
199 r'((?:\n|\1\s.*\n)+?)\1finally:',
200 'no yield inside try/finally in Python 2.4'),
200 'no yield inside try/finally in Python 2.4'),
201 (r'.{81}', "line too long"),
201 (r'.{81}', "line too long"),
202 (r' x+[xo][\'"]\n\s+[\'"]x', 'string join across lines with no space'),
202 (r' x+[xo][\'"]\n\s+[\'"]x', 'string join across lines with no space'),
203 (r'[^\n]\Z', "no trailing newline"),
203 (r'[^\n]\Z', "no trailing newline"),
204 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
204 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
205 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
205 # (r'^\s+[^_ \n][^_. \n]+_[^_\n]+\s*=',
206 # "don't use underbars in identifiers"),
206 # "don't use underbars in identifiers"),
207 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
207 (r'^\s+(self\.)?[A-za-z][a-z0-9]+[A-Z]\w* = ',
208 "don't use camelcase in identifiers"),
208 "don't use camelcase in identifiers"),
209 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
209 (r'^\s*(if|while|def|class|except|try)\s[^[\n]*:\s*[^\\n]#\s]+',
210 "linebreak after :"),
210 "linebreak after :"),
211 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
211 (r'class\s[^( \n]+:', "old-style class, use class foo(object)"),
212 (r'class\s[^( \n]+\(\):',
212 (r'class\s[^( \n]+\(\):',
213 "class foo() not available in Python 2.4, use class foo(object)"),
213 "class foo() not available in Python 2.4, use class foo(object)"),
214 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
214 (r'\b(%s)\(' % '|'.join(keyword.kwlist),
215 "Python keyword is not a function"),
215 "Python keyword is not a function"),
216 (r',]', "unneeded trailing ',' in list"),
216 (r',]', "unneeded trailing ',' in list"),
217 # (r'class\s[A-Z][^\(]*\((?!Exception)',
217 # (r'class\s[A-Z][^\(]*\((?!Exception)',
218 # "don't capitalize non-exception classes"),
218 # "don't capitalize non-exception classes"),
219 # (r'in range\(', "use xrange"),
219 # (r'in range\(', "use xrange"),
220 # (r'^\s*print\s+', "avoid using print in core and extensions"),
220 # (r'^\s*print\s+', "avoid using print in core and extensions"),
221 (r'[\x80-\xff]', "non-ASCII character literal"),
221 (r'[\x80-\xff]', "non-ASCII character literal"),
222 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
222 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
223 (r'^\s*with\s+', "with not available in Python 2.4"),
223 (r'^\s*with\s+', "with not available in Python 2.4"),
224 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
224 (r'\.isdisjoint\(', "set.isdisjoint not available in Python 2.4"),
225 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
225 (r'^\s*except.* as .*:', "except as not available in Python 2.4"),
226 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
226 (r'^\s*os\.path\.relpath', "relpath not available in Python 2.4"),
227 (r'(?<!def)\s+(any|all|format)\(',
227 (r'(?<!def)\s+(any|all|format)\(',
228 "any/all/format not available in Python 2.4"),
228 "any/all/format not available in Python 2.4", 'no-py24'),
229 (r'(?<!def)\s+(callable)\(',
229 (r'(?<!def)\s+(callable)\(',
230 "callable not available in Python 3, use getattr(f, '__call__', None)"),
230 "callable not available in Python 3, use getattr(f, '__call__', None)"),
231 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
231 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
232 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
232 (r'^\s*(%s)\s\s' % '|'.join(keyword.kwlist),
233 "gratuitous whitespace after Python keyword"),
233 "gratuitous whitespace after Python keyword"),
234 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
234 (r'([\(\[][ \t]\S)|(\S[ \t][\)\]])', "gratuitous whitespace in () or []"),
235 # (r'\s\s=', "gratuitous whitespace before ="),
235 # (r'\s\s=', "gratuitous whitespace before ="),
236 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
236 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
237 "missing whitespace around operator"),
237 "missing whitespace around operator"),
238 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
238 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\s',
239 "missing whitespace around operator"),
239 "missing whitespace around operator"),
240 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
240 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=|%=)\S',
241 "missing whitespace around operator"),
241 "missing whitespace around operator"),
242 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
242 (r'[^^+=*/!<>&| %-](\s=|=\s)[^= ]',
243 "wrong whitespace around ="),
243 "wrong whitespace around ="),
244 (r'\([^()]*( =[^=]|[^<>!=]= )',
244 (r'\([^()]*( =[^=]|[^<>!=]= )',
245 "no whitespace around = for named parameters"),
245 "no whitespace around = for named parameters"),
246 (r'raise Exception', "don't raise generic exceptions"),
246 (r'raise Exception', "don't raise generic exceptions"),
247 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
247 (r'raise [^,(]+, (\([^\)]+\)|[^,\(\)]+)$',
248 "don't use old-style two-argument raise, use Exception(message)"),
248 "don't use old-style two-argument raise, use Exception(message)"),
249 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
249 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
250 (r' [=!]=\s+(True|False|None)',
250 (r' [=!]=\s+(True|False|None)',
251 "comparison with singleton, use 'is' or 'is not' instead"),
251 "comparison with singleton, use 'is' or 'is not' instead"),
252 (r'^\s*(while|if) [01]:',
252 (r'^\s*(while|if) [01]:',
253 "use True/False for constant Boolean expression"),
253 "use True/False for constant Boolean expression"),
254 (r'(?:(?<!def)\s+|\()hasattr',
254 (r'(?:(?<!def)\s+|\()hasattr',
255 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
255 'hasattr(foo, bar) is broken, use util.safehasattr(foo, bar) instead'),
256 (r'opener\([^)]*\).read\(',
256 (r'opener\([^)]*\).read\(',
257 "use opener.read() instead"),
257 "use opener.read() instead"),
258 (r'BaseException', 'not in Python 2.4, use Exception'),
258 (r'BaseException', 'not in Python 2.4, use Exception'),
259 (r'os\.path\.relpath', 'os.path.relpath is not in Python 2.5'),
259 (r'os\.path\.relpath', 'os.path.relpath is not in Python 2.5'),
260 (r'opener\([^)]*\).write\(',
260 (r'opener\([^)]*\).write\(',
261 "use opener.write() instead"),
261 "use opener.write() instead"),
262 (r'[\s\(](open|file)\([^)]*\)\.read\(',
262 (r'[\s\(](open|file)\([^)]*\)\.read\(',
263 "use util.readfile() instead"),
263 "use util.readfile() instead"),
264 (r'[\s\(](open|file)\([^)]*\)\.write\(',
264 (r'[\s\(](open|file)\([^)]*\)\.write\(',
265 "use util.writefile() instead"),
265 "use util.writefile() instead"),
266 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
266 (r'^[\s\(]*(open(er)?|file)\([^)]*\)',
267 "always assign an opened file to a variable, and close it afterwards"),
267 "always assign an opened file to a variable, and close it afterwards"),
268 (r'[\s\(](open|file)\([^)]*\)\.',
268 (r'[\s\(](open|file)\([^)]*\)\.',
269 "always assign an opened file to a variable, and close it afterwards"),
269 "always assign an opened file to a variable, and close it afterwards"),
270 (r'(?i)descendent', "the proper spelling is descendAnt"),
270 (r'(?i)descendent', "the proper spelling is descendAnt"),
271 (r'\.debug\(\_', "don't mark debug messages for translation"),
271 (r'\.debug\(\_', "don't mark debug messages for translation"),
272 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
272 (r'\.strip\(\)\.split\(\)', "no need to strip before splitting"),
273 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
273 (r'^\s*except\s*:', "naked except clause", r'#.*re-raises'),
274 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
274 (r':\n( )*( ){1,3}[^ ]', "must indent 4 spaces"),
275 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
275 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
276 "missing _() in ui message (use () to hide false-positives)"),
276 "missing _() in ui message (use () to hide false-positives)"),
277 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
277 (r'release\(.*wlock, .*lock\)', "wrong lock release order"),
278 ],
278 ],
279 # warnings
279 # warnings
280 [
280 [
281 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
281 (r'(^| )pp +xxxxqq[ \n][^\n]', "add two newlines after '.. note::'"),
282 ]
282 ]
283 ]
283 ]
284
284
285 pyfilters = [
285 pyfilters = [
286 (r"""(?msx)(?P<comment>\#.*?$)|
286 (r"""(?msx)(?P<comment>\#.*?$)|
287 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
287 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
288 (?P<text>(([^\\]|\\.)*?))
288 (?P<text>(([^\\]|\\.)*?))
289 (?P=quote))""", reppython),
289 (?P=quote))""", reppython),
290 ]
290 ]
291
291
292 txtfilters = []
292 txtfilters = []
293
293
294 txtpats = [
294 txtpats = [
295 [
295 [
296 ('\s$', 'trailing whitespace'),
296 ('\s$', 'trailing whitespace'),
297 ],
297 ],
298 []
298 []
299 ]
299 ]
300
300
301 cpats = [
301 cpats = [
302 [
302 [
303 (r'//', "don't use //-style comments"),
303 (r'//', "don't use //-style comments"),
304 (r'^ ', "don't use spaces to indent"),
304 (r'^ ', "don't use spaces to indent"),
305 (r'\S\t', "don't use tabs except for indent"),
305 (r'\S\t', "don't use tabs except for indent"),
306 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
306 (r'(\S[ \t]+|^[ \t]+)\n', "trailing whitespace"),
307 (r'.{81}', "line too long"),
307 (r'.{81}', "line too long"),
308 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
308 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
309 (r'return\(', "return is not a function"),
309 (r'return\(', "return is not a function"),
310 (r' ;', "no space before ;"),
310 (r' ;', "no space before ;"),
311 (r'[)][{]', "space between ) and {"),
311 (r'[)][{]', "space between ) and {"),
312 (r'\w+\* \w+', "use int *foo, not int* foo"),
312 (r'\w+\* \w+', "use int *foo, not int* foo"),
313 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
313 (r'\W\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
314 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
314 (r'\w+ (\+\+|--)', "use foo++, not foo ++"),
315 (r'\w,\w', "missing whitespace after ,"),
315 (r'\w,\w', "missing whitespace after ,"),
316 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
316 (r'^[^#]\w[+/*]\w', "missing whitespace in expression"),
317 (r'^#\s+\w', "use #foo, not # foo"),
317 (r'^#\s+\w', "use #foo, not # foo"),
318 (r'[^\n]\Z', "no trailing newline"),
318 (r'[^\n]\Z', "no trailing newline"),
319 (r'^\s*#import\b', "use only #include in standard C code"),
319 (r'^\s*#import\b', "use only #include in standard C code"),
320 ],
320 ],
321 # warnings
321 # warnings
322 []
322 []
323 ]
323 ]
324
324
325 cfilters = [
325 cfilters = [
326 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
326 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
327 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
327 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
328 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
328 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
329 (r'(\()([^)]+\))', repcallspaces),
329 (r'(\()([^)]+\))', repcallspaces),
330 ]
330 ]
331
331
332 inutilpats = [
332 inutilpats = [
333 [
333 [
334 (r'\bui\.', "don't use ui in util"),
334 (r'\bui\.', "don't use ui in util"),
335 ],
335 ],
336 # warnings
336 # warnings
337 []
337 []
338 ]
338 ]
339
339
340 inrevlogpats = [
340 inrevlogpats = [
341 [
341 [
342 (r'\brepo\.', "don't use repo in revlog"),
342 (r'\brepo\.', "don't use repo in revlog"),
343 ],
343 ],
344 # warnings
344 # warnings
345 []
345 []
346 ]
346 ]
347
347
348 checks = [
348 checks = [
349 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
349 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
350 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
350 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
351 ('c', r'.*\.[ch]$', cfilters, cpats),
351 ('c', r'.*\.[ch]$', cfilters, cpats),
352 ('unified test', r'.*\.t$', utestfilters, utestpats),
352 ('unified test', r'.*\.t$', utestfilters, utestpats),
353 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
353 ('layering violation repo in revlog', r'mercurial/revlog\.py', pyfilters,
354 inrevlogpats),
354 inrevlogpats),
355 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
355 ('layering violation ui in util', r'mercurial/util\.py', pyfilters,
356 inutilpats),
356 inutilpats),
357 ('txt', r'.*\.txt$', txtfilters, txtpats),
357 ('txt', r'.*\.txt$', txtfilters, txtpats),
358 ]
358 ]
359
359
360 def _preparepats():
360 def _preparepats():
361 for c in checks:
361 for c in checks:
362 failandwarn = c[-1]
362 failandwarn = c[-1]
363 for pats in failandwarn:
363 for pats in failandwarn:
364 for i, pseq in enumerate(pats):
364 for i, pseq in enumerate(pats):
365 # fix-up regexes for multi-line searches
365 # fix-up regexes for multi-line searches
366 p = pseq[0]
366 p = pseq[0]
367 # \s doesn't match \n
367 # \s doesn't match \n
368 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
368 p = re.sub(r'(?<!\\)\\s', r'[ \\t]', p)
369 # [^...] doesn't match newline
369 # [^...] doesn't match newline
370 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
370 p = re.sub(r'(?<!\\)\[\^', r'[^\\n', p)
371
371
372 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
372 pats[i] = (re.compile(p, re.MULTILINE),) + pseq[1:]
373 filters = c[2]
373 filters = c[2]
374 for i, flt in enumerate(filters):
374 for i, flt in enumerate(filters):
375 filters[i] = re.compile(flt[0]), flt[1]
375 filters[i] = re.compile(flt[0]), flt[1]
376 _preparepats()
376 _preparepats()
377
377
378 class norepeatlogger(object):
378 class norepeatlogger(object):
379 def __init__(self):
379 def __init__(self):
380 self._lastseen = None
380 self._lastseen = None
381
381
382 def log(self, fname, lineno, line, msg, blame):
382 def log(self, fname, lineno, line, msg, blame):
383 """print error related a to given line of a given file.
383 """print error related a to given line of a given file.
384
384
385 The faulty line will also be printed but only once in the case
385 The faulty line will also be printed but only once in the case
386 of multiple errors.
386 of multiple errors.
387
387
388 :fname: filename
388 :fname: filename
389 :lineno: line number
389 :lineno: line number
390 :line: actual content of the line
390 :line: actual content of the line
391 :msg: error message
391 :msg: error message
392 """
392 """
393 msgid = fname, lineno, line
393 msgid = fname, lineno, line
394 if msgid != self._lastseen:
394 if msgid != self._lastseen:
395 if blame:
395 if blame:
396 print "%s:%d (%s):" % (fname, lineno, blame)
396 print "%s:%d (%s):" % (fname, lineno, blame)
397 else:
397 else:
398 print "%s:%d:" % (fname, lineno)
398 print "%s:%d:" % (fname, lineno)
399 print " > %s" % line
399 print " > %s" % line
400 self._lastseen = msgid
400 self._lastseen = msgid
401 print " " + msg
401 print " " + msg
402
402
403 _defaultlogger = norepeatlogger()
403 _defaultlogger = norepeatlogger()
404
404
405 def getblame(f):
405 def getblame(f):
406 lines = []
406 lines = []
407 for l in os.popen('hg annotate -un %s' % f):
407 for l in os.popen('hg annotate -un %s' % f):
408 start, line = l.split(':', 1)
408 start, line = l.split(':', 1)
409 user, rev = start.split()
409 user, rev = start.split()
410 lines.append((line[1:-1], user, rev))
410 lines.append((line[1:-1], user, rev))
411 return lines
411 return lines
412
412
413 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
413 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
414 blame=False, debug=False, lineno=True):
414 blame=False, debug=False, lineno=True):
415 """checks style and portability of a given file
415 """checks style and portability of a given file
416
416
417 :f: filepath
417 :f: filepath
418 :logfunc: function used to report error
418 :logfunc: function used to report error
419 logfunc(filename, linenumber, linecontent, errormessage)
419 logfunc(filename, linenumber, linecontent, errormessage)
420 :maxerr: number of error to display before aborting.
420 :maxerr: number of error to display before aborting.
421 Set to false (default) to report all errors
421 Set to false (default) to report all errors
422
422
423 return True if no error is found, False otherwise.
423 return True if no error is found, False otherwise.
424 """
424 """
425 blamecache = None
425 blamecache = None
426 result = True
426 result = True
427 for name, match, filters, pats in checks:
427 for name, match, filters, pats in checks:
428 if debug:
428 if debug:
429 print name, f
429 print name, f
430 fc = 0
430 fc = 0
431 if not re.match(match, f):
431 if not re.match(match, f):
432 if debug:
432 if debug:
433 print "Skipping %s for %s it doesn't match %s" % (
433 print "Skipping %s for %s it doesn't match %s" % (
434 name, match, f)
434 name, match, f)
435 continue
435 continue
436 try:
436 try:
437 fp = open(f)
437 fp = open(f)
438 except IOError, e:
438 except IOError, e:
439 print "Skipping %s, %s" % (f, str(e).split(':', 1)[0])
439 print "Skipping %s, %s" % (f, str(e).split(':', 1)[0])
440 continue
440 continue
441 pre = post = fp.read()
441 pre = post = fp.read()
442 fp.close()
442 fp.close()
443 if "no-" "check-code" in pre:
443 if "no-" "check-code" in pre:
444 if debug:
444 if debug:
445 print "Skipping %s for %s it has no-" "check-code" % (
445 print "Skipping %s for %s it has no-" "check-code" % (
446 name, f)
446 name, f)
447 break
447 break
448 for p, r in filters:
448 for p, r in filters:
449 post = re.sub(p, r, post)
449 post = re.sub(p, r, post)
450 nerrs = len(pats[0]) # nerr elements are errors
450 nerrs = len(pats[0]) # nerr elements are errors
451 if warnings:
451 if warnings:
452 pats = pats[0] + pats[1]
452 pats = pats[0] + pats[1]
453 else:
453 else:
454 pats = pats[0]
454 pats = pats[0]
455 # print post # uncomment to show filtered version
455 # print post # uncomment to show filtered version
456
456
457 if debug:
457 if debug:
458 print "Checking %s for %s" % (name, f)
458 print "Checking %s for %s" % (name, f)
459
459
460 prelines = None
460 prelines = None
461 errors = []
461 errors = []
462 for i, pat in enumerate(pats):
462 for i, pat in enumerate(pats):
463 if len(pat) == 3:
463 if len(pat) == 3:
464 p, msg, ignore = pat
464 p, msg, ignore = pat
465 else:
465 else:
466 p, msg = pat
466 p, msg = pat
467 ignore = None
467 ignore = None
468 if i >= nerrs:
468 if i >= nerrs:
469 msg = "warning: " + msg
469 msg = "warning: " + msg
470
470
471 pos = 0
471 pos = 0
472 n = 0
472 n = 0
473 for m in p.finditer(post):
473 for m in p.finditer(post):
474 if prelines is None:
474 if prelines is None:
475 prelines = pre.splitlines()
475 prelines = pre.splitlines()
476 postlines = post.splitlines(True)
476 postlines = post.splitlines(True)
477
477
478 start = m.start()
478 start = m.start()
479 while n < len(postlines):
479 while n < len(postlines):
480 step = len(postlines[n])
480 step = len(postlines[n])
481 if pos + step > start:
481 if pos + step > start:
482 break
482 break
483 pos += step
483 pos += step
484 n += 1
484 n += 1
485 l = prelines[n]
485 l = prelines[n]
486
486
487 if "check-code" "-ignore" in l:
487 if "check-code" "-ignore" in l:
488 if debug:
488 if debug:
489 print "Skipping %s for %s:%s (check-code" "-ignore)" % (
489 print "Skipping %s for %s:%s (check-code" "-ignore)" % (
490 name, f, n)
490 name, f, n)
491 continue
491 continue
492 elif ignore and re.search(ignore, l, re.MULTILINE):
492 elif ignore and re.search(ignore, l, re.MULTILINE):
493 continue
493 continue
494 bd = ""
494 bd = ""
495 if blame:
495 if blame:
496 bd = 'working directory'
496 bd = 'working directory'
497 if not blamecache:
497 if not blamecache:
498 blamecache = getblame(f)
498 blamecache = getblame(f)
499 if n < len(blamecache):
499 if n < len(blamecache):
500 bl, bu, br = blamecache[n]
500 bl, bu, br = blamecache[n]
501 if bl == l:
501 if bl == l:
502 bd = '%s@%s' % (bu, br)
502 bd = '%s@%s' % (bu, br)
503
503
504 errors.append((f, lineno and n + 1, l, msg, bd))
504 errors.append((f, lineno and n + 1, l, msg, bd))
505 result = False
505 result = False
506
506
507 errors.sort()
507 errors.sort()
508 for e in errors:
508 for e in errors:
509 logfunc(*e)
509 logfunc(*e)
510 fc += 1
510 fc += 1
511 if maxerr and fc >= maxerr:
511 if maxerr and fc >= maxerr:
512 print " (too many errors, giving up)"
512 print " (too many errors, giving up)"
513 break
513 break
514
514
515 return result
515 return result
516
516
517 if __name__ == "__main__":
517 if __name__ == "__main__":
518 parser = optparse.OptionParser("%prog [options] [files]")
518 parser = optparse.OptionParser("%prog [options] [files]")
519 parser.add_option("-w", "--warnings", action="store_true",
519 parser.add_option("-w", "--warnings", action="store_true",
520 help="include warning-level checks")
520 help="include warning-level checks")
521 parser.add_option("-p", "--per-file", type="int",
521 parser.add_option("-p", "--per-file", type="int",
522 help="max warnings per file")
522 help="max warnings per file")
523 parser.add_option("-b", "--blame", action="store_true",
523 parser.add_option("-b", "--blame", action="store_true",
524 help="use annotate to generate blame info")
524 help="use annotate to generate blame info")
525 parser.add_option("", "--debug", action="store_true",
525 parser.add_option("", "--debug", action="store_true",
526 help="show debug information")
526 help="show debug information")
527 parser.add_option("", "--nolineno", action="store_false",
527 parser.add_option("", "--nolineno", action="store_false",
528 dest='lineno', help="don't show line numbers")
528 dest='lineno', help="don't show line numbers")
529
529
530 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
530 parser.set_defaults(per_file=15, warnings=False, blame=False, debug=False,
531 lineno=True)
531 lineno=True)
532 (options, args) = parser.parse_args()
532 (options, args) = parser.parse_args()
533
533
534 if len(args) == 0:
534 if len(args) == 0:
535 check = glob.glob("*")
535 check = glob.glob("*")
536 else:
536 else:
537 check = args
537 check = args
538
538
539 ret = 0
539 ret = 0
540 for f in check:
540 for f in check:
541 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
541 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
542 blame=options.blame, debug=options.debug,
542 blame=options.blame, debug=options.debug,
543 lineno=options.lineno):
543 lineno=options.lineno):
544 ret = 1
544 ret = 1
545 sys.exit(ret)
545 sys.exit(ret)
@@ -1,244 +1,244 b''
1 import ast
1 import ast
2 import os
2 import os
3 import sys
3 import sys
4
4
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
5 # Import a minimal set of stdlib modules needed for list_stdlib_modules()
6 # to work when run from a virtualenv. The modules were chosen empirically
6 # to work when run from a virtualenv. The modules were chosen empirically
7 # so that the return value matches the return value without virtualenv.
7 # so that the return value matches the return value without virtualenv.
8 import BaseHTTPServer
8 import BaseHTTPServer
9 import zlib
9 import zlib
10
10
11 def dotted_name_of_path(path):
11 def dotted_name_of_path(path):
12 """Given a relative path to a source file, return its dotted module name.
12 """Given a relative path to a source file, return its dotted module name.
13
13
14
14
15 >>> dotted_name_of_path('mercurial/error.py')
15 >>> dotted_name_of_path('mercurial/error.py')
16 'mercurial.error'
16 'mercurial.error'
17 """
17 """
18 parts = path.split('/')
18 parts = path.split('/')
19 parts[-1] = parts[-1][:-3] # remove .py
19 parts[-1] = parts[-1][:-3] # remove .py
20 return '.'.join(parts)
20 return '.'.join(parts)
21
21
22
22
23 def list_stdlib_modules():
23 def list_stdlib_modules():
24 """List the modules present in the stdlib.
24 """List the modules present in the stdlib.
25
25
26 >>> mods = set(list_stdlib_modules())
26 >>> mods = set(list_stdlib_modules())
27 >>> 'BaseHTTPServer' in mods
27 >>> 'BaseHTTPServer' in mods
28 True
28 True
29
29
30 os.path isn't really a module, so it's missing:
30 os.path isn't really a module, so it's missing:
31
31
32 >>> 'os.path' in mods
32 >>> 'os.path' in mods
33 False
33 False
34
34
35 sys requires special treatment, because it's baked into the
35 sys requires special treatment, because it's baked into the
36 interpreter, but it should still appear:
36 interpreter, but it should still appear:
37
37
38 >>> 'sys' in mods
38 >>> 'sys' in mods
39 True
39 True
40
40
41 >>> 'collections' in mods
41 >>> 'collections' in mods
42 True
42 True
43
43
44 >>> 'cStringIO' in mods
44 >>> 'cStringIO' in mods
45 True
45 True
46 """
46 """
47 for m in sys.builtin_module_names:
47 for m in sys.builtin_module_names:
48 yield m
48 yield m
49 # These modules only exist on windows, but we should always
49 # These modules only exist on windows, but we should always
50 # consider them stdlib.
50 # consider them stdlib.
51 for m in ['msvcrt', '_winreg']:
51 for m in ['msvcrt', '_winreg']:
52 yield m
52 yield m
53 # These get missed too
53 # These get missed too
54 for m in 'ctypes', 'email':
54 for m in 'ctypes', 'email':
55 yield m
55 yield m
56 yield 'builtins' # python3 only
56 yield 'builtins' # python3 only
57 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
57 stdlib_prefixes = set([sys.prefix, sys.exec_prefix])
58 # We need to supplement the list of prefixes for the search to work
58 # We need to supplement the list of prefixes for the search to work
59 # when run from within a virtualenv.
59 # when run from within a virtualenv.
60 for mod in (BaseHTTPServer, zlib):
60 for mod in (BaseHTTPServer, zlib):
61 try:
61 try:
62 # Not all module objects have a __file__ attribute.
62 # Not all module objects have a __file__ attribute.
63 filename = mod.__file__
63 filename = mod.__file__
64 except AttributeError:
64 except AttributeError:
65 continue
65 continue
66 dirname = os.path.dirname(filename)
66 dirname = os.path.dirname(filename)
67 for prefix in stdlib_prefixes:
67 for prefix in stdlib_prefixes:
68 if dirname.startswith(prefix):
68 if dirname.startswith(prefix):
69 # Then this directory is redundant.
69 # Then this directory is redundant.
70 break
70 break
71 else:
71 else:
72 stdlib_prefixes.add(dirname)
72 stdlib_prefixes.add(dirname)
73 for libpath in sys.path:
73 for libpath in sys.path:
74 # We want to walk everything in sys.path that starts with
74 # We want to walk everything in sys.path that starts with
75 # something in stdlib_prefixes. check-code suppressed because
75 # something in stdlib_prefixes. check-code suppressed because
76 # the ast module used by this script implies the availability
76 # the ast module used by this script implies the availability
77 # of any().
77 # of any().
78 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-check-code
78 if not any(libpath.startswith(p) for p in stdlib_prefixes): # no-py24
79 continue
79 continue
80 if 'site-packages' in libpath:
80 if 'site-packages' in libpath:
81 continue
81 continue
82 for top, dirs, files in os.walk(libpath):
82 for top, dirs, files in os.walk(libpath):
83 for name in files:
83 for name in files:
84 if name == '__init__.py':
84 if name == '__init__.py':
85 continue
85 continue
86 if not (name.endswith('.py') or name.endswith('.so')):
86 if not (name.endswith('.py') or name.endswith('.so')):
87 continue
87 continue
88 full_path = os.path.join(top, name)
88 full_path = os.path.join(top, name)
89 if 'site-packages' in full_path:
89 if 'site-packages' in full_path:
90 continue
90 continue
91 rel_path = full_path[len(libpath) + 1:]
91 rel_path = full_path[len(libpath) + 1:]
92 mod = dotted_name_of_path(rel_path)
92 mod = dotted_name_of_path(rel_path)
93 yield mod
93 yield mod
94
94
95 stdlib_modules = set(list_stdlib_modules())
95 stdlib_modules = set(list_stdlib_modules())
96
96
97 def imported_modules(source, ignore_nested=False):
97 def imported_modules(source, ignore_nested=False):
98 """Given the source of a file as a string, yield the names
98 """Given the source of a file as a string, yield the names
99 imported by that file.
99 imported by that file.
100
100
101 Args:
101 Args:
102 source: The python source to examine as a string.
102 source: The python source to examine as a string.
103 ignore_nested: If true, import statements that do not start in
103 ignore_nested: If true, import statements that do not start in
104 column zero will be ignored.
104 column zero will be ignored.
105
105
106 Returns:
106 Returns:
107 A list of module names imported by the given source.
107 A list of module names imported by the given source.
108
108
109 >>> sorted(imported_modules(
109 >>> sorted(imported_modules(
110 ... 'import foo ; from baz import bar; import foo.qux'))
110 ... 'import foo ; from baz import bar; import foo.qux'))
111 ['baz.bar', 'foo', 'foo.qux']
111 ['baz.bar', 'foo', 'foo.qux']
112 >>> sorted(imported_modules(
112 >>> sorted(imported_modules(
113 ... '''import foo
113 ... '''import foo
114 ... def wat():
114 ... def wat():
115 ... import bar
115 ... import bar
116 ... ''', ignore_nested=True))
116 ... ''', ignore_nested=True))
117 ['foo']
117 ['foo']
118 """
118 """
119 for node in ast.walk(ast.parse(source)):
119 for node in ast.walk(ast.parse(source)):
120 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
120 if ignore_nested and getattr(node, 'col_offset', 0) > 0:
121 continue
121 continue
122 if isinstance(node, ast.Import):
122 if isinstance(node, ast.Import):
123 for n in node.names:
123 for n in node.names:
124 yield n.name
124 yield n.name
125 elif isinstance(node, ast.ImportFrom):
125 elif isinstance(node, ast.ImportFrom):
126 prefix = node.module + '.'
126 prefix = node.module + '.'
127 for n in node.names:
127 for n in node.names:
128 yield prefix + n.name
128 yield prefix + n.name
129
129
130 def verify_stdlib_on_own_line(source):
130 def verify_stdlib_on_own_line(source):
131 """Given some python source, verify that stdlib imports are done
131 """Given some python source, verify that stdlib imports are done
132 in separate statements from relative local module imports.
132 in separate statements from relative local module imports.
133
133
134 Observing this limitation is important as it works around an
134 Observing this limitation is important as it works around an
135 annoying lib2to3 bug in relative import rewrites:
135 annoying lib2to3 bug in relative import rewrites:
136 http://bugs.python.org/issue19510.
136 http://bugs.python.org/issue19510.
137
137
138 >>> list(verify_stdlib_on_own_line('import sys, foo'))
138 >>> list(verify_stdlib_on_own_line('import sys, foo'))
139 ['mixed stdlib and relative imports:\\n foo, sys']
139 ['mixed stdlib and relative imports:\\n foo, sys']
140 >>> list(verify_stdlib_on_own_line('import sys, os'))
140 >>> list(verify_stdlib_on_own_line('import sys, os'))
141 []
141 []
142 >>> list(verify_stdlib_on_own_line('import foo, bar'))
142 >>> list(verify_stdlib_on_own_line('import foo, bar'))
143 []
143 []
144 """
144 """
145 for node in ast.walk(ast.parse(source)):
145 for node in ast.walk(ast.parse(source)):
146 if isinstance(node, ast.Import):
146 if isinstance(node, ast.Import):
147 from_stdlib = {}
147 from_stdlib = {}
148 for n in node.names:
148 for n in node.names:
149 from_stdlib[n.name] = n.name in stdlib_modules
149 from_stdlib[n.name] = n.name in stdlib_modules
150 num_std = len([x for x in from_stdlib.values() if x])
150 num_std = len([x for x in from_stdlib.values() if x])
151 if num_std not in (len(from_stdlib.values()), 0):
151 if num_std not in (len(from_stdlib.values()), 0):
152 yield ('mixed stdlib and relative imports:\n %s' %
152 yield ('mixed stdlib and relative imports:\n %s' %
153 ', '.join(sorted(from_stdlib.iterkeys())))
153 ', '.join(sorted(from_stdlib.iterkeys())))
154
154
155 class CircularImport(Exception):
155 class CircularImport(Exception):
156 pass
156 pass
157
157
158
158
159 def cyclekey(names):
159 def cyclekey(names):
160 return tuple(sorted(set(names)))
160 return tuple(sorted(set(names)))
161
161
162 def check_one_mod(mod, imports, path=None, ignore=None):
162 def check_one_mod(mod, imports, path=None, ignore=None):
163 if path is None:
163 if path is None:
164 path = []
164 path = []
165 if ignore is None:
165 if ignore is None:
166 ignore = []
166 ignore = []
167 path = path + [mod]
167 path = path + [mod]
168 for i in sorted(imports.get(mod, [])):
168 for i in sorted(imports.get(mod, [])):
169 if i not in stdlib_modules:
169 if i not in stdlib_modules:
170 i = mod.rsplit('.', 1)[0] + '.' + i
170 i = mod.rsplit('.', 1)[0] + '.' + i
171 if i in path:
171 if i in path:
172 firstspot = path.index(i)
172 firstspot = path.index(i)
173 cycle = path[firstspot:] + [i]
173 cycle = path[firstspot:] + [i]
174 if cyclekey(cycle) not in ignore:
174 if cyclekey(cycle) not in ignore:
175 raise CircularImport(cycle)
175 raise CircularImport(cycle)
176 continue
176 continue
177 check_one_mod(i, imports, path=path, ignore=ignore)
177 check_one_mod(i, imports, path=path, ignore=ignore)
178
178
179 def rotatecycle(cycle):
179 def rotatecycle(cycle):
180 """arrange a cycle so that the lexicographically first module listed first
180 """arrange a cycle so that the lexicographically first module listed first
181
181
182 >>> rotatecycle(['foo', 'bar', 'foo'])
182 >>> rotatecycle(['foo', 'bar', 'foo'])
183 ['bar', 'foo', 'bar']
183 ['bar', 'foo', 'bar']
184 """
184 """
185 lowest = min(cycle)
185 lowest = min(cycle)
186 idx = cycle.index(lowest)
186 idx = cycle.index(lowest)
187 return cycle[idx:] + cycle[1:idx] + [lowest]
187 return cycle[idx:] + cycle[1:idx] + [lowest]
188
188
189 def find_cycles(imports):
189 def find_cycles(imports):
190 """Find cycles in an already-loaded import graph.
190 """Find cycles in an already-loaded import graph.
191
191
192 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
192 >>> imports = {'top.foo': ['bar', 'os.path', 'qux'],
193 ... 'top.bar': ['baz', 'sys'],
193 ... 'top.bar': ['baz', 'sys'],
194 ... 'top.baz': ['foo'],
194 ... 'top.baz': ['foo'],
195 ... 'top.qux': ['foo']}
195 ... 'top.qux': ['foo']}
196 >>> print '\\n'.join(sorted(find_cycles(imports)))
196 >>> print '\\n'.join(sorted(find_cycles(imports)))
197 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
197 top.bar -> top.baz -> top.foo -> top.bar -> top.bar
198 top.foo -> top.qux -> top.foo -> top.foo
198 top.foo -> top.qux -> top.foo -> top.foo
199 """
199 """
200 cycles = {}
200 cycles = {}
201 for mod in sorted(imports.iterkeys()):
201 for mod in sorted(imports.iterkeys()):
202 try:
202 try:
203 check_one_mod(mod, imports, ignore=cycles)
203 check_one_mod(mod, imports, ignore=cycles)
204 except CircularImport, e:
204 except CircularImport, e:
205 cycle = e.args[0]
205 cycle = e.args[0]
206 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
206 cycles[cyclekey(cycle)] = ' -> '.join(rotatecycle(cycle))
207 return cycles.values()
207 return cycles.values()
208
208
209 def _cycle_sortkey(c):
209 def _cycle_sortkey(c):
210 return len(c), c
210 return len(c), c
211
211
212 def main(argv):
212 def main(argv):
213 if len(argv) < 2:
213 if len(argv) < 2:
214 print 'Usage: %s file [file] [file] ...'
214 print 'Usage: %s file [file] [file] ...'
215 return 1
215 return 1
216 used_imports = {}
216 used_imports = {}
217 any_errors = False
217 any_errors = False
218 for source_path in argv[1:]:
218 for source_path in argv[1:]:
219 f = open(source_path)
219 f = open(source_path)
220 modname = dotted_name_of_path(source_path)
220 modname = dotted_name_of_path(source_path)
221 src = f.read()
221 src = f.read()
222 used_imports[modname] = sorted(
222 used_imports[modname] = sorted(
223 imported_modules(src, ignore_nested=True))
223 imported_modules(src, ignore_nested=True))
224 for error in verify_stdlib_on_own_line(src):
224 for error in verify_stdlib_on_own_line(src):
225 any_errors = True
225 any_errors = True
226 print source_path, error
226 print source_path, error
227 f.close()
227 f.close()
228 cycles = find_cycles(used_imports)
228 cycles = find_cycles(used_imports)
229 if cycles:
229 if cycles:
230 firstmods = set()
230 firstmods = set()
231 for c in sorted(cycles, key=_cycle_sortkey):
231 for c in sorted(cycles, key=_cycle_sortkey):
232 first = c.split()[0]
232 first = c.split()[0]
233 # As a rough cut, ignore any cycle that starts with the
233 # As a rough cut, ignore any cycle that starts with the
234 # same module as some other cycle. Otherwise we see lots
234 # same module as some other cycle. Otherwise we see lots
235 # of cycles that are effectively duplicates.
235 # of cycles that are effectively duplicates.
236 if first in firstmods:
236 if first in firstmods:
237 continue
237 continue
238 print 'Import cycle:', c
238 print 'Import cycle:', c
239 firstmods.add(first)
239 firstmods.add(first)
240 any_errors = True
240 any_errors = True
241 return not any_errors
241 return not any_errors
242
242
243 if __name__ == '__main__':
243 if __name__ == '__main__':
244 sys.exit(int(main(sys.argv)))
244 sys.exit(int(main(sys.argv)))
General Comments 0
You need to be logged in to leave comments. Login now