##// END OF EJS Templates
check-code: catch Python 'is' comparing number or string literals...
Adrian Buehlmann -
r13026:53391819 default
parent child Browse files
Show More
@@ -1,300 +1,301 b''
1 1 #!/usr/bin/env python
2 2 #
3 3 # check-code - a style and portability checker for Mercurial
4 4 #
5 5 # Copyright 2010 Matt Mackall <mpm@selenic.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import re, glob, os, sys
11 11 import optparse
12 12
13 13 def repquote(m):
14 14 t = re.sub(r"\w", "x", m.group('text'))
15 15 t = re.sub(r"[^\sx]", "o", t)
16 16 return m.group('quote') + t + m.group('quote')
17 17
18 18 def reppython(m):
19 19 comment = m.group('comment')
20 20 if comment:
21 21 return "#" * len(comment)
22 22 return repquote(m)
23 23
24 24 def repcomment(m):
25 25 return m.group(1) + "#" * len(m.group(2))
26 26
27 27 def repccomment(m):
28 28 t = re.sub(r"((?<=\n) )|\S", "x", m.group(2))
29 29 return m.group(1) + t + "*/"
30 30
31 31 def repcallspaces(m):
32 32 t = re.sub(r"\n\s+", "\n", m.group(2))
33 33 return m.group(1) + t
34 34
35 35 def repinclude(m):
36 36 return m.group(1) + "<foo>"
37 37
38 38 def rephere(m):
39 39 t = re.sub(r"\S", "x", m.group(2))
40 40 return m.group(1) + t
41 41
42 42
43 43 testpats = [
44 44 (r'(pushd|popd)', "don't use 'pushd' or 'popd', use 'cd'"),
45 45 (r'\W\$?\(\([^\)]*\)\)', "don't use (()) or $(()), use 'expr'"),
46 46 (r'^function', "don't use 'function', use old style"),
47 47 (r'grep.*-q', "don't use 'grep -q', redirect to /dev/null"),
48 48 (r'echo.*\\n', "don't use 'echo \\n', use printf"),
49 49 (r'echo -n', "don't use 'echo -n', use printf"),
50 50 (r'^diff.*-\w*N', "don't use 'diff -N'"),
51 51 (r'(^| )wc[^|]*$', "filter wc output"),
52 52 (r'head -c', "don't use 'head -c', use 'dd'"),
53 53 (r'ls.*-\w*R', "don't use 'ls -R', use 'find'"),
54 54 (r'printf.*\\\d\d\d', "don't use 'printf \NNN', use Python"),
55 55 (r'printf.*\\x', "don't use printf \\x, use Python"),
56 56 (r'\$\(.*\)', "don't use $(expr), use `expr`"),
57 57 (r'rm -rf \*', "don't use naked rm -rf, target a directory"),
58 58 (r'(^|\|\s*)grep (-\w\s+)*[^|]*[(|]\w',
59 59 "use egrep for extended grep syntax"),
60 60 (r'/bin/', "don't use explicit paths for tools"),
61 61 (r'\$PWD', "don't use $PWD, use `pwd`"),
62 62 (r'[^\n]\Z', "no trailing newline"),
63 63 (r'export.*=', "don't export and assign at once"),
64 64 ('^([^"\']|("[^"]*")|(\'[^\']*\'))*\\^', "^ must be quoted"),
65 65 (r'^source\b', "don't use 'source', use '.'"),
66 66 (r'touch -d', "don't use 'touch -d', use 'touch -t' instead"),
67 67 (r'ls\s+[^-]+\s+-', "options to 'ls' must come before filenames"),
68 68 ]
69 69
70 70 testfilters = [
71 71 (r"( *)(#([^\n]*\S)?)", repcomment),
72 72 (r"<<(\S+)((.|\n)*?\n\1)", rephere),
73 73 ]
74 74
75 75 uprefix = r"^ \$ "
76 76 uprefixc = r"^ > "
77 77 utestpats = [
78 78 (r'^(\S| $ ).*(\S\s+|^\s+)\n', "trailing whitespace on non-output"),
79 79 (uprefix + r'.*\|\s*sed', "use regex test output patterns instead of sed"),
80 80 (uprefix + r'(true|exit 0)', "explicit zero exit unnecessary"),
81 81 (uprefix + r'.*\$\?', "explicit exit code checks unnecessary"),
82 82 (uprefix + r'.*\|\| echo.*(fail|error)',
83 83 "explicit exit code checks unnecessary"),
84 84 (uprefix + r'set -e', "don't use set -e"),
85 85 (uprefixc + r'( *)\t', "don't use tabs to indent"),
86 86 ]
87 87
88 88 for p, m in testpats:
89 89 if p.startswith('^'):
90 90 p = uprefix + p[1:]
91 91 else:
92 92 p = uprefix + p
93 93 utestpats.append((p, m))
94 94
95 95 utestfilters = [
96 96 (r"( *)(#([^\n]*\S)?)", repcomment),
97 97 ]
98 98
99 99 pypats = [
100 100 (r'^\s*def\s*\w+\s*\(.*,\s*\(',
101 101 "tuple parameter unpacking not available in Python 3+"),
102 102 (r'lambda\s*\(.*,.*\)',
103 103 "tuple parameter unpacking not available in Python 3+"),
104 104 (r'(?<!def)\s+(cmp)\(', "cmp is not available in Python 3+"),
105 105 (r'\breduce\s*\(.*', "reduce is not available in Python 3+"),
106 106 (r'\.has_key\b', "dict.has_key is not available in Python 3+"),
107 107 (r'^\s*\t', "don't use tabs"),
108 108 (r'\S;\s*\n', "semicolon"),
109 109 (r'\w,\w', "missing whitespace after ,"),
110 110 (r'\w[+/*\-<>]\w', "missing whitespace in expression"),
111 111 (r'^\s+\w+=\w+[^,)]$', "missing whitespace in assignment"),
112 112 (r'.{85}', "line too long"),
113 113 (r'.{81}', "warning: line over 80 characters"),
114 114 (r'[^\n]\Z', "no trailing newline"),
115 115 (r'(\S\s+|^\s+)\n', "trailing whitespace"),
116 116 # (r'^\s+[^_ ][^_. ]+_[^_]+\s*=', "don't use underbars in identifiers"),
117 117 # (r'\w*[a-z][A-Z]\w*\s*=', "don't use camelcase in identifiers"),
118 118 (r'^\s*(if|while|def|class|except|try)\s[^[]*:\s*[^\]#\s]+',
119 119 "linebreak after :"),
120 120 (r'class\s[^(]:', "old-style class, use class foo(object)"),
121 121 (r'^\s+del\(', "del isn't a function"),
122 122 (r'\band\(', "and isn't a function"),
123 123 (r'\bor\(', "or isn't a function"),
124 124 (r'\bnot\(', "not isn't a function"),
125 125 (r'^\s+except\(', "except isn't a function"),
126 126 (r',]', "unneeded trailing ',' in list"),
127 127 # (r'class\s[A-Z][^\(]*\((?!Exception)',
128 128 # "don't capitalize non-exception classes"),
129 129 # (r'in range\(', "use xrange"),
130 130 # (r'^\s*print\s+', "avoid using print in core and extensions"),
131 131 (r'[\x80-\xff]', "non-ASCII character literal"),
132 132 (r'("\')\.format\(', "str.format() not available in Python 2.4"),
133 133 (r'^\s*with\s+', "with not available in Python 2.4"),
134 134 (r'(?<!def)\s+(any|all|format)\(',
135 135 "any/all/format not available in Python 2.4"),
136 136 (r'(?<!def)\s+(callable)\(',
137 137 "callable not available in Python 3, use hasattr(f, '__call__')"),
138 138 (r'if\s.*\selse', "if ... else form not available in Python 2.4"),
139 139 (r'([\(\[]\s\S)|(\S\s[\)\]])', "gratuitous whitespace in () or []"),
140 140 # (r'\s\s=', "gratuitous whitespace before ="),
141 141 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\S',
142 142 "missing whitespace around operator"),
143 143 (r'[^>< ](\+=|-=|!=|<>|<=|>=|<<=|>>=)\s',
144 144 "missing whitespace around operator"),
145 145 (r'\s(\+=|-=|!=|<>|<=|>=|<<=|>>=)\S',
146 146 "missing whitespace around operator"),
147 147 (r'[^+=*!<>&| -](\s=|=\s)[^= ]',
148 148 "wrong whitespace around ="),
149 149 (r'raise Exception', "don't raise generic exceptions"),
150 150 (r'ui\.(status|progress|write|note|warn)\([\'\"]x',
151 151 "warning: unwrapped ui message"),
152 (r' is\s+(not\s+)?["\'0-9-]', "object comparison with literal"),
152 153 ]
153 154
154 155 pyfilters = [
155 156 (r"""(?msx)(?P<comment>\#.*?$)|
156 157 ((?P<quote>('''|\"\"\"|(?<!')'(?!')|(?<!")"(?!")))
157 158 (?P<text>(([^\\]|\\.)*?))
158 159 (?P=quote))""", reppython),
159 160 ]
160 161
161 162 cpats = [
162 163 (r'//', "don't use //-style comments"),
163 164 (r'^ ', "don't use spaces to indent"),
164 165 (r'\S\t', "don't use tabs except for indent"),
165 166 (r'(\S\s+|^\s+)\n', "trailing whitespace"),
166 167 (r'.{85}', "line too long"),
167 168 (r'(while|if|do|for)\(', "use space after while/if/do/for"),
168 169 (r'return\(', "return is not a function"),
169 170 (r' ;', "no space before ;"),
170 171 (r'\w+\* \w+', "use int *foo, not int* foo"),
171 172 (r'\([^\)]+\) \w+', "use (int)foo, not (int) foo"),
172 173 (r'\S+ (\+\+|--)', "use foo++, not foo ++"),
173 174 (r'\w,\w', "missing whitespace after ,"),
174 175 (r'\w[+/*]\w', "missing whitespace in expression"),
175 176 (r'^#\s+\w', "use #foo, not # foo"),
176 177 (r'[^\n]\Z', "no trailing newline"),
177 178 ]
178 179
179 180 cfilters = [
180 181 (r'(/\*)(((\*(?!/))|[^*])*)\*/', repccomment),
181 182 (r'''(?P<quote>(?<!")")(?P<text>([^"]|\\")+)"(?!")''', repquote),
182 183 (r'''(#\s*include\s+<)([^>]+)>''', repinclude),
183 184 (r'(\()([^)]+\))', repcallspaces),
184 185 ]
185 186
186 187 checks = [
187 188 ('python', r'.*\.(py|cgi)$', pyfilters, pypats),
188 189 ('test script', r'(.*/)?test-[^.~]*$', testfilters, testpats),
189 190 ('c', r'.*\.c$', cfilters, cpats),
190 191 ('unified test', r'.*\.t$', utestfilters, utestpats),
191 192 ]
192 193
193 194 class norepeatlogger(object):
194 195 def __init__(self):
195 196 self._lastseen = None
196 197
197 198 def log(self, fname, lineno, line, msg, blame):
198 199 """print error related a to given line of a given file.
199 200
200 201 The faulty line will also be printed but only once in the case
201 202 of multiple errors.
202 203
203 204 :fname: filename
204 205 :lineno: line number
205 206 :line: actual content of the line
206 207 :msg: error message
207 208 """
208 209 msgid = fname, lineno, line
209 210 if msgid != self._lastseen:
210 211 if blame:
211 212 print "%s:%d (%s):" % (fname, lineno, blame)
212 213 else:
213 214 print "%s:%d:" % (fname, lineno)
214 215 print " > %s" % line
215 216 self._lastseen = msgid
216 217 print " " + msg
217 218
218 219 _defaultlogger = norepeatlogger()
219 220
220 221 def getblame(f):
221 222 lines = []
222 223 for l in os.popen('hg annotate -un %s' % f):
223 224 start, line = l.split(':', 1)
224 225 user, rev = start.split()
225 226 lines.append((line[1:-1], user, rev))
226 227 return lines
227 228
228 229 def checkfile(f, logfunc=_defaultlogger.log, maxerr=None, warnings=False,
229 230 blame=False):
230 231 """checks style and portability of a given file
231 232
232 233 :f: filepath
233 234 :logfunc: function used to report error
234 235 logfunc(filename, linenumber, linecontent, errormessage)
235 236 :maxerr: number of error to display before arborting.
236 237 Set to None (default) to report all errors
237 238
238 239 return True if no error is found, False otherwise.
239 240 """
240 241 blamecache = None
241 242 result = True
242 243 for name, match, filters, pats in checks:
243 244 fc = 0
244 245 if not re.match(match, f):
245 246 continue
246 247 pre = post = open(f).read()
247 248 if "no-" + "check-code" in pre:
248 249 break
249 250 for p, r in filters:
250 251 post = re.sub(p, r, post)
251 252 # print post # uncomment to show filtered version
252 253 z = enumerate(zip(pre.splitlines(), post.splitlines(True)))
253 254 for n, l in z:
254 255 if "check-code" + "-ignore" in l[0]:
255 256 continue
256 257 for p, msg in pats:
257 258 if not warnings and msg.startswith("warning"):
258 259 continue
259 260 if re.search(p, l[1]):
260 261 bd = ""
261 262 if blame:
262 263 bd = 'working directory'
263 264 if not blamecache:
264 265 blamecache = getblame(f)
265 266 if n < len(blamecache):
266 267 bl, bu, br = blamecache[n]
267 268 if bl == l[0]:
268 269 bd = '%s@%s' % (bu, br)
269 270 logfunc(f, n + 1, l[0], msg, bd)
270 271 fc += 1
271 272 result = False
272 273 if maxerr is not None and fc >= maxerr:
273 274 print " (too many errors, giving up)"
274 275 break
275 276 break
276 277 return result
277 278
278 279 if __name__ == "__main__":
279 280 parser = optparse.OptionParser("%prog [options] [files]")
280 281 parser.add_option("-w", "--warnings", action="store_true",
281 282 help="include warning-level checks")
282 283 parser.add_option("-p", "--per-file", type="int",
283 284 help="max warnings per file")
284 285 parser.add_option("-b", "--blame", action="store_true",
285 286 help="use annotate to generate blame info")
286 287
287 288 parser.set_defaults(per_file=15, warnings=False, blame=False)
288 289 (options, args) = parser.parse_args()
289 290
290 291 if len(args) == 0:
291 292 check = glob.glob("*")
292 293 else:
293 294 check = args
294 295
295 296 for f in check:
296 297 ret = 0
297 298 if not checkfile(f, maxerr=options.per_file, warnings=options.warnings,
298 299 blame=options.blame):
299 300 ret = 1
300 301 sys.exit(ret)
@@ -1,54 +1,95 b''
1 1 $ cat > correct.py <<EOF
2 2 > def toto(arg1, arg2):
3 3 > del arg2
4 4 > return (5 + 6, 9)
5 5 > EOF
6 6 $ cat > wrong.py <<EOF
7 7 > def toto( arg1, arg2):
8 8 > del(arg2)
9 9 > return ( 5+6, 9)
10 10 > EOF
11 11 $ cat > quote.py <<EOF
12 12 > # let's use quote in comments
13 13 > (''' ( 4x5 )
14 14 > but """\\''' and finally''',
15 15 > """let's fool checkpatch""", '1+2',
16 16 > '"""', 42+1, """and
17 17 > ( 4-1 ) """, "( 1+1 )\" and ")
18 18 > a, '\\\\\\\\', "\\\\\\" x-2", "c-1"
19 19 > EOF
20 20 $ cat > non-py24.py <<EOF
21 21 > # Using builtins that does not exist in Python 2.4
22 22 > if any():
23 23 > x = all()
24 24 > y = format(x)
25 25 >
26 26 > # Do not complain about our own definition
27 27 > def any(x):
28 28 > pass
29 29 > EOF
30 30 $ check_code="$TESTDIR"/../contrib/check-code.py
31 31 $ "$check_code" ./wrong.py ./correct.py ./quote.py ./non-py24.py
32 32 ./wrong.py:1:
33 33 > def toto( arg1, arg2):
34 34 gratuitous whitespace in () or []
35 35 ./wrong.py:2:
36 36 > del(arg2)
37 37 del isn't a function
38 38 ./wrong.py:3:
39 39 > return ( 5+6, 9)
40 40 missing whitespace in expression
41 41 gratuitous whitespace in () or []
42 42 ./quote.py:5:
43 43 > '"""', 42+1, """and
44 44 missing whitespace in expression
45 45 ./non-py24.py:2:
46 46 > if any():
47 47 any/all/format not available in Python 2.4
48 48 ./non-py24.py:3:
49 49 > x = all()
50 50 any/all/format not available in Python 2.4
51 51 ./non-py24.py:4:
52 52 > y = format(x)
53 53 any/all/format not available in Python 2.4
54 54 [1]
55
56 $ cat > is-op.py <<EOF
57 > # is-operator comparing number or string literal
58 > x = None
59 > y = x is 'foo'
60 > y = x is "foo"
61 > y = x is 5346
62 > y = x is -6
63 > y = x is not 'foo'
64 > y = x is not "foo"
65 > y = x is not 5346
66 > y = x is not -6
67 > EOF
68
69 $ "$check_code" ./is-op.py
70 ./is-op.py:3:
71 > y = x is 'foo'
72 object comparison with literal
73 ./is-op.py:4:
74 > y = x is "foo"
75 object comparison with literal
76 ./is-op.py:5:
77 > y = x is 5346
78 object comparison with literal
79 ./is-op.py:6:
80 > y = x is -6
81 object comparison with literal
82 ./is-op.py:7:
83 > y = x is not 'foo'
84 object comparison with literal
85 ./is-op.py:8:
86 > y = x is not "foo"
87 object comparison with literal
88 ./is-op.py:9:
89 > y = x is not 5346
90 object comparison with literal
91 ./is-op.py:10:
92 > y = x is not -6
93 object comparison with literal
94 [1]
95
General Comments 0
You need to be logged in to leave comments. Login now