##// END OF EJS Templates
contrib: drop py2 support from testparseutil.py
Matt Harbison -
r50770:cd3b8fd1 default
parent child Browse files
Show More
@@ -1,667 +1,657 b''
1 # testparseutil.py - utilities to parse test script for check tools
1 # testparseutil.py - utilities to parse test script for check tools
2 #
2 #
3 # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
3 # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import abc
9 import abc
10 import builtins
10 import re
11 import re
11 import sys
12
12
13 ####################
13 ####################
14 # for Python3 compatibility (almost comes from mercurial/pycompat.py)
14 # for Python3 compatibility (almost comes from mercurial/pycompat.py)
15
15
16 ispy3 = sys.version_info[0] >= 3
17
18
16
19 def identity(a):
17 def identity(a):
20 return a
18 return a
21
19
22
20
23 def _rapply(f, xs):
21 def _rapply(f, xs):
24 if xs is None:
22 if xs is None:
25 # assume None means non-value of optional data
23 # assume None means non-value of optional data
26 return xs
24 return xs
27 if isinstance(xs, (list, set, tuple)):
25 if isinstance(xs, (list, set, tuple)):
28 return type(xs)(_rapply(f, x) for x in xs)
26 return type(xs)(_rapply(f, x) for x in xs)
29 if isinstance(xs, dict):
27 if isinstance(xs, dict):
30 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
28 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
31 return f(xs)
29 return f(xs)
32
30
33
31
34 def rapply(f, xs):
32 def rapply(f, xs):
35 if f is identity:
33 if f is identity:
36 # fast path mainly for py2
34 # fast path mainly for py2
37 return xs
35 return xs
38 return _rapply(f, xs)
36 return _rapply(f, xs)
39
37
40
38
41 if ispy3:
39 def bytestr(s):
42 import builtins
40 # tiny version of pycompat.bytestr
43
41 return s.encode('latin1')
44 def bytestr(s):
45 # tiny version of pycompat.bytestr
46 return s.encode('latin1')
47
48 def sysstr(s):
49 if isinstance(s, builtins.str):
50 return s
51 return s.decode('latin-1')
52
53 def opentext(f):
54 return open(f, 'r')
55
42
56
43
57 else:
44 def sysstr(s):
58 bytestr = str
45 if isinstance(s, builtins.str):
59 sysstr = identity
46 return s
47 return s.decode('latin-1')
60
48
61 opentext = open
49
50 def opentext(f):
51 return open(f, 'r')
62
52
63
53
64 def b2s(x):
54 def b2s(x):
65 # convert BYTES elements in "x" to SYSSTR recursively
55 # convert BYTES elements in "x" to SYSSTR recursively
66 return rapply(sysstr, x)
56 return rapply(sysstr, x)
67
57
68
58
69 def writeout(data):
59 def writeout(data):
70 # write "data" in BYTES into stdout
60 # write "data" in BYTES into stdout
71 sys.stdout.write(data)
61 sys.stdout.write(data)
72
62
73
63
74 def writeerr(data):
64 def writeerr(data):
75 # write "data" in BYTES into stderr
65 # write "data" in BYTES into stderr
76 sys.stderr.write(data)
66 sys.stderr.write(data)
77
67
78
68
79 ####################
69 ####################
80
70
81
71
82 class embeddedmatcher: # pytype: disable=ignored-metaclass
72 class embeddedmatcher: # pytype: disable=ignored-metaclass
83 """Base class to detect embedded code fragments in *.t test script"""
73 """Base class to detect embedded code fragments in *.t test script"""
84
74
85 __metaclass__ = abc.ABCMeta
75 __metaclass__ = abc.ABCMeta
86
76
87 def __init__(self, desc):
77 def __init__(self, desc):
88 self.desc = desc
78 self.desc = desc
89
79
90 @abc.abstractmethod
80 @abc.abstractmethod
91 def startsat(self, line):
81 def startsat(self, line):
92 """Examine whether embedded code starts at line
82 """Examine whether embedded code starts at line
93
83
94 This can return arbitrary object, and it is used as 'ctx' for
84 This can return arbitrary object, and it is used as 'ctx' for
95 subsequent method invocations.
85 subsequent method invocations.
96 """
86 """
97
87
98 @abc.abstractmethod
88 @abc.abstractmethod
99 def endsat(self, ctx, line):
89 def endsat(self, ctx, line):
100 """Examine whether embedded code ends at line"""
90 """Examine whether embedded code ends at line"""
101
91
102 @abc.abstractmethod
92 @abc.abstractmethod
103 def isinside(self, ctx, line):
93 def isinside(self, ctx, line):
104 """Examine whether line is inside embedded code, if not yet endsat"""
94 """Examine whether line is inside embedded code, if not yet endsat"""
105
95
106 @abc.abstractmethod
96 @abc.abstractmethod
107 def ignores(self, ctx):
97 def ignores(self, ctx):
108 """Examine whether detected embedded code should be ignored"""
98 """Examine whether detected embedded code should be ignored"""
109
99
110 @abc.abstractmethod
100 @abc.abstractmethod
111 def filename(self, ctx):
101 def filename(self, ctx):
112 """Return filename of embedded code
102 """Return filename of embedded code
113
103
114 If filename isn't specified for embedded code explicitly, this
104 If filename isn't specified for embedded code explicitly, this
115 returns None.
105 returns None.
116 """
106 """
117
107
118 @abc.abstractmethod
108 @abc.abstractmethod
119 def codeatstart(self, ctx, line):
109 def codeatstart(self, ctx, line):
120 """Return actual code at the start line of embedded code
110 """Return actual code at the start line of embedded code
121
111
122 This might return None, if the start line doesn't contain
112 This might return None, if the start line doesn't contain
123 actual code.
113 actual code.
124 """
114 """
125
115
126 @abc.abstractmethod
116 @abc.abstractmethod
127 def codeatend(self, ctx, line):
117 def codeatend(self, ctx, line):
128 """Return actual code at the end line of embedded code
118 """Return actual code at the end line of embedded code
129
119
130 This might return None, if the end line doesn't contain actual
120 This might return None, if the end line doesn't contain actual
131 code.
121 code.
132 """
122 """
133
123
134 @abc.abstractmethod
124 @abc.abstractmethod
135 def codeinside(self, ctx, line):
125 def codeinside(self, ctx, line):
136 """Return actual code at line inside embedded code"""
126 """Return actual code at line inside embedded code"""
137
127
138
128
139 def embedded(basefile, lines, errors, matchers):
129 def embedded(basefile, lines, errors, matchers):
140 """pick embedded code fragments up from given lines
130 """pick embedded code fragments up from given lines
141
131
142 This is common parsing logic, which examines specified matchers on
132 This is common parsing logic, which examines specified matchers on
143 given lines.
133 given lines.
144
134
145 :basefile: a name of a file, from which lines to be parsed come.
135 :basefile: a name of a file, from which lines to be parsed come.
146 :lines: to be parsed (might be a value returned by "open(basefile)")
136 :lines: to be parsed (might be a value returned by "open(basefile)")
147 :errors: an array, into which messages for detected error are stored
137 :errors: an array, into which messages for detected error are stored
148 :matchers: an array of embeddedmatcher objects
138 :matchers: an array of embeddedmatcher objects
149
139
150 This function yields '(filename, starts, ends, code)' tuple.
140 This function yields '(filename, starts, ends, code)' tuple.
151
141
152 :filename: a name of embedded code, if it is explicitly specified
142 :filename: a name of embedded code, if it is explicitly specified
153 (e.g. "foobar" of "cat >> foobar <<EOF").
143 (e.g. "foobar" of "cat >> foobar <<EOF").
154 Otherwise, this is None
144 Otherwise, this is None
155 :starts: line number (1-origin), at which embedded code starts (inclusive)
145 :starts: line number (1-origin), at which embedded code starts (inclusive)
156 :ends: line number (1-origin), at which embedded code ends (exclusive)
146 :ends: line number (1-origin), at which embedded code ends (exclusive)
157 :code: extracted embedded code, which is single-stringified
147 :code: extracted embedded code, which is single-stringified
158
148
159 >>> class ambigmatcher:
149 >>> class ambigmatcher:
160 ... # mock matcher class to examine implementation of
150 ... # mock matcher class to examine implementation of
161 ... # "ambiguous matching" corner case
151 ... # "ambiguous matching" corner case
162 ... def __init__(self, desc, matchfunc):
152 ... def __init__(self, desc, matchfunc):
163 ... self.desc = desc
153 ... self.desc = desc
164 ... self.matchfunc = matchfunc
154 ... self.matchfunc = matchfunc
165 ... def startsat(self, line):
155 ... def startsat(self, line):
166 ... return self.matchfunc(line)
156 ... return self.matchfunc(line)
167 >>> ambig1 = ambigmatcher('ambiguous #1',
157 >>> ambig1 = ambigmatcher('ambiguous #1',
168 ... lambda l: l.startswith(' $ cat '))
158 ... lambda l: l.startswith(' $ cat '))
169 >>> ambig2 = ambigmatcher('ambiguous #2',
159 >>> ambig2 = ambigmatcher('ambiguous #2',
170 ... lambda l: l.endswith('<< EOF\\n'))
160 ... lambda l: l.endswith('<< EOF\\n'))
171 >>> lines = [' $ cat > foo.py << EOF\\n']
161 >>> lines = [' $ cat > foo.py << EOF\\n']
172 >>> errors = []
162 >>> errors = []
173 >>> matchers = [ambig1, ambig2]
163 >>> matchers = [ambig1, ambig2]
174 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
164 >>> list(t for t in embedded('<dummy>', lines, errors, matchers))
175 []
165 []
176 >>> b2s(errors)
166 >>> b2s(errors)
177 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
167 ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"']
178
168
179 """
169 """
180 matcher = None
170 matcher = None
181 ctx = filename = code = startline = None # for pyflakes
171 ctx = filename = code = startline = None # for pyflakes
182
172
183 for lineno, line in enumerate(lines, 1):
173 for lineno, line in enumerate(lines, 1):
184 if not line.endswith('\n'):
174 if not line.endswith('\n'):
185 line += '\n' # to normalize EOF line
175 line += '\n' # to normalize EOF line
186 if matcher: # now, inside embedded code
176 if matcher: # now, inside embedded code
187 if matcher.endsat(ctx, line):
177 if matcher.endsat(ctx, line):
188 codeatend = matcher.codeatend(ctx, line)
178 codeatend = matcher.codeatend(ctx, line)
189 if codeatend is not None:
179 if codeatend is not None:
190 code.append(codeatend)
180 code.append(codeatend)
191 if not matcher.ignores(ctx):
181 if not matcher.ignores(ctx):
192 yield (filename, startline, lineno, ''.join(code))
182 yield (filename, startline, lineno, ''.join(code))
193 matcher = None
183 matcher = None
194 # DO NOT "continue", because line might start next fragment
184 # DO NOT "continue", because line might start next fragment
195 elif not matcher.isinside(ctx, line):
185 elif not matcher.isinside(ctx, line):
196 # this is an error of basefile
186 # this is an error of basefile
197 # (if matchers are implemented correctly)
187 # (if matchers are implemented correctly)
198 errors.append(
188 errors.append(
199 '%s:%d: unexpected line for "%s"'
189 '%s:%d: unexpected line for "%s"'
200 % (basefile, lineno, matcher.desc)
190 % (basefile, lineno, matcher.desc)
201 )
191 )
202 # stop extracting embedded code by current 'matcher',
192 # stop extracting embedded code by current 'matcher',
203 # because appearance of unexpected line might mean
193 # because appearance of unexpected line might mean
204 # that expected end-of-embedded-code line might never
194 # that expected end-of-embedded-code line might never
205 # appear
195 # appear
206 matcher = None
196 matcher = None
207 # DO NOT "continue", because line might start next fragment
197 # DO NOT "continue", because line might start next fragment
208 else:
198 else:
209 code.append(matcher.codeinside(ctx, line))
199 code.append(matcher.codeinside(ctx, line))
210 continue
200 continue
211
201
212 # examine whether current line starts embedded code or not
202 # examine whether current line starts embedded code or not
213 assert not matcher
203 assert not matcher
214
204
215 matched = []
205 matched = []
216 for m in matchers:
206 for m in matchers:
217 ctx = m.startsat(line)
207 ctx = m.startsat(line)
218 if ctx:
208 if ctx:
219 matched.append((m, ctx))
209 matched.append((m, ctx))
220 if matched:
210 if matched:
221 if len(matched) > 1:
211 if len(matched) > 1:
222 # this is an error of matchers, maybe
212 # this is an error of matchers, maybe
223 errors.append(
213 errors.append(
224 '%s:%d: ambiguous line for %s'
214 '%s:%d: ambiguous line for %s'
225 % (
215 % (
226 basefile,
216 basefile,
227 lineno,
217 lineno,
228 ', '.join(['"%s"' % m.desc for m, c in matched]),
218 ', '.join(['"%s"' % m.desc for m, c in matched]),
229 )
219 )
230 )
220 )
231 # omit extracting embedded code, because choosing
221 # omit extracting embedded code, because choosing
232 # arbitrary matcher from matched ones might fail to
222 # arbitrary matcher from matched ones might fail to
233 # detect the end of embedded code as expected.
223 # detect the end of embedded code as expected.
234 continue
224 continue
235 matcher, ctx = matched[0]
225 matcher, ctx = matched[0]
236 filename = matcher.filename(ctx)
226 filename = matcher.filename(ctx)
237 code = []
227 code = []
238 codeatstart = matcher.codeatstart(ctx, line)
228 codeatstart = matcher.codeatstart(ctx, line)
239 if codeatstart is not None:
229 if codeatstart is not None:
240 code.append(codeatstart)
230 code.append(codeatstart)
241 startline = lineno
231 startline = lineno
242 else:
232 else:
243 startline = lineno + 1
233 startline = lineno + 1
244
234
245 if matcher:
235 if matcher:
246 # examine whether EOF ends embedded code, because embedded
236 # examine whether EOF ends embedded code, because embedded
247 # code isn't yet ended explicitly
237 # code isn't yet ended explicitly
248 if matcher.endsat(ctx, '\n'):
238 if matcher.endsat(ctx, '\n'):
249 codeatend = matcher.codeatend(ctx, '\n')
239 codeatend = matcher.codeatend(ctx, '\n')
250 if codeatend is not None:
240 if codeatend is not None:
251 code.append(codeatend)
241 code.append(codeatend)
252 if not matcher.ignores(ctx):
242 if not matcher.ignores(ctx):
253 yield (filename, startline, lineno + 1, ''.join(code))
243 yield (filename, startline, lineno + 1, ''.join(code))
254 else:
244 else:
255 # this is an error of basefile
245 # this is an error of basefile
256 # (if matchers are implemented correctly)
246 # (if matchers are implemented correctly)
257 errors.append(
247 errors.append(
258 '%s:%d: unexpected end of file for "%s"'
248 '%s:%d: unexpected end of file for "%s"'
259 % (basefile, lineno, matcher.desc)
249 % (basefile, lineno, matcher.desc)
260 )
250 )
261
251
262
252
263 # heredoc limit mark to ignore embedded code at check-code.py or so
253 # heredoc limit mark to ignore embedded code at check-code.py or so
264 heredocignorelimit = 'NO_CHECK_EOF'
254 heredocignorelimit = 'NO_CHECK_EOF'
265
255
266 # the pattern to match against cases below, and to return a limit mark
256 # the pattern to match against cases below, and to return a limit mark
267 # string as 'lname' group
257 # string as 'lname' group
268 #
258 #
269 # - << LIMITMARK
259 # - << LIMITMARK
270 # - << "LIMITMARK"
260 # - << "LIMITMARK"
271 # - << 'LIMITMARK'
261 # - << 'LIMITMARK'
272 heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
262 heredoclimitpat = r'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)'
273
263
274
264
275 class fileheredocmatcher(embeddedmatcher):
265 class fileheredocmatcher(embeddedmatcher):
276 """Detect "cat > FILE << LIMIT" style embedded code
266 """Detect "cat > FILE << LIMIT" style embedded code
277
267
278 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
268 >>> matcher = fileheredocmatcher('heredoc .py file', r'[^<]+\\.py')
279 >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))
269 >>> b2s(matcher.startsat(' $ cat > file.py << EOF\\n'))
280 ('file.py', ' > EOF\\n')
270 ('file.py', ' > EOF\\n')
281 >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))
271 >>> b2s(matcher.startsat(' $ cat >>file.py <<EOF\\n'))
282 ('file.py', ' > EOF\\n')
272 ('file.py', ' > EOF\\n')
283 >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
273 >>> b2s(matcher.startsat(' $ cat> \\x27any file.py\\x27<< "EOF"\\n'))
284 ('any file.py', ' > EOF\\n')
274 ('any file.py', ' > EOF\\n')
285 >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))
275 >>> b2s(matcher.startsat(" $ cat > file.py << 'ANYLIMIT'\\n"))
286 ('file.py', ' > ANYLIMIT\\n')
276 ('file.py', ' > ANYLIMIT\\n')
287 >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))
277 >>> b2s(matcher.startsat(' $ cat<<ANYLIMIT>"file.py"\\n'))
288 ('file.py', ' > ANYLIMIT\\n')
278 ('file.py', ' > ANYLIMIT\\n')
289 >>> start = ' $ cat > file.py << EOF\\n'
279 >>> start = ' $ cat > file.py << EOF\\n'
290 >>> ctx = matcher.startsat(start)
280 >>> ctx = matcher.startsat(start)
291 >>> matcher.codeatstart(ctx, start)
281 >>> matcher.codeatstart(ctx, start)
292 >>> b2s(matcher.filename(ctx))
282 >>> b2s(matcher.filename(ctx))
293 'file.py'
283 'file.py'
294 >>> matcher.ignores(ctx)
284 >>> matcher.ignores(ctx)
295 False
285 False
296 >>> inside = ' > foo = 1\\n'
286 >>> inside = ' > foo = 1\\n'
297 >>> matcher.endsat(ctx, inside)
287 >>> matcher.endsat(ctx, inside)
298 False
288 False
299 >>> matcher.isinside(ctx, inside)
289 >>> matcher.isinside(ctx, inside)
300 True
290 True
301 >>> b2s(matcher.codeinside(ctx, inside))
291 >>> b2s(matcher.codeinside(ctx, inside))
302 'foo = 1\\n'
292 'foo = 1\\n'
303 >>> end = ' > EOF\\n'
293 >>> end = ' > EOF\\n'
304 >>> matcher.endsat(ctx, end)
294 >>> matcher.endsat(ctx, end)
305 True
295 True
306 >>> matcher.codeatend(ctx, end)
296 >>> matcher.codeatend(ctx, end)
307 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
297 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
308 False
298 False
309 >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')
299 >>> ctx = matcher.startsat(' $ cat > file.py << NO_CHECK_EOF\\n')
310 >>> matcher.ignores(ctx)
300 >>> matcher.ignores(ctx)
311 True
301 True
312 """
302 """
313
303
314 _prefix = ' > '
304 _prefix = ' > '
315
305
316 def __init__(self, desc, namepat):
306 def __init__(self, desc, namepat):
317 super(fileheredocmatcher, self).__init__(desc)
307 super(fileheredocmatcher, self).__init__(desc)
318
308
319 # build the pattern to match against cases below (and ">>"
309 # build the pattern to match against cases below (and ">>"
320 # variants), and to return a target filename string as 'name'
310 # variants), and to return a target filename string as 'name'
321 # group
311 # group
322 #
312 #
323 # - > NAMEPAT
313 # - > NAMEPAT
324 # - > "NAMEPAT"
314 # - > "NAMEPAT"
325 # - > 'NAMEPAT'
315 # - > 'NAMEPAT'
326 namepat = (
316 namepat = (
327 r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
317 r'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' % namepat
328 )
318 )
329 self._fileres = [
319 self._fileres = [
330 # "cat > NAME << LIMIT" case
320 # "cat > NAME << LIMIT" case
331 re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat),
321 re.compile(r' {2}\$ \s*cat' + namepat + heredoclimitpat),
332 # "cat << LIMIT > NAME" case
322 # "cat << LIMIT > NAME" case
333 re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat),
323 re.compile(r' {2}\$ \s*cat' + heredoclimitpat + namepat),
334 ]
324 ]
335
325
336 def startsat(self, line):
326 def startsat(self, line):
337 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
327 # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple
338 for filere in self._fileres:
328 for filere in self._fileres:
339 matched = filere.match(line)
329 matched = filere.match(line)
340 if matched:
330 if matched:
341 return (
331 return (
342 matched.group('name'),
332 matched.group('name'),
343 ' > %s\n' % matched.group('limit'),
333 ' > %s\n' % matched.group('limit'),
344 )
334 )
345
335
346 def endsat(self, ctx, line):
336 def endsat(self, ctx, line):
347 return ctx[1] == line
337 return ctx[1] == line
348
338
349 def isinside(self, ctx, line):
339 def isinside(self, ctx, line):
350 return line.startswith(self._prefix)
340 return line.startswith(self._prefix)
351
341
352 def ignores(self, ctx):
342 def ignores(self, ctx):
353 return ' > %s\n' % heredocignorelimit == ctx[1]
343 return ' > %s\n' % heredocignorelimit == ctx[1]
354
344
355 def filename(self, ctx):
345 def filename(self, ctx):
356 return ctx[0]
346 return ctx[0]
357
347
358 def codeatstart(self, ctx, line):
348 def codeatstart(self, ctx, line):
359 return None # no embedded code at start line
349 return None # no embedded code at start line
360
350
361 def codeatend(self, ctx, line):
351 def codeatend(self, ctx, line):
362 return None # no embedded code at end line
352 return None # no embedded code at end line
363
353
364 def codeinside(self, ctx, line):
354 def codeinside(self, ctx, line):
365 return line[len(self._prefix) :] # strip prefix
355 return line[len(self._prefix) :] # strip prefix
366
356
367
357
368 ####
358 ####
369 # for embedded python script
359 # for embedded python script
370
360
371
361
372 class pydoctestmatcher(embeddedmatcher):
362 class pydoctestmatcher(embeddedmatcher):
373 """Detect ">>> code" style embedded python code
363 """Detect ">>> code" style embedded python code
374
364
375 >>> matcher = pydoctestmatcher()
365 >>> matcher = pydoctestmatcher()
376 >>> startline = ' >>> foo = 1\\n'
366 >>> startline = ' >>> foo = 1\\n'
377 >>> matcher.startsat(startline)
367 >>> matcher.startsat(startline)
378 True
368 True
379 >>> matcher.startsat(' ... foo = 1\\n')
369 >>> matcher.startsat(' ... foo = 1\\n')
380 False
370 False
381 >>> ctx = matcher.startsat(startline)
371 >>> ctx = matcher.startsat(startline)
382 >>> matcher.filename(ctx)
372 >>> matcher.filename(ctx)
383 >>> matcher.ignores(ctx)
373 >>> matcher.ignores(ctx)
384 False
374 False
385 >>> b2s(matcher.codeatstart(ctx, startline))
375 >>> b2s(matcher.codeatstart(ctx, startline))
386 'foo = 1\\n'
376 'foo = 1\\n'
387 >>> inside = ' >>> foo = 1\\n'
377 >>> inside = ' >>> foo = 1\\n'
388 >>> matcher.endsat(ctx, inside)
378 >>> matcher.endsat(ctx, inside)
389 False
379 False
390 >>> matcher.isinside(ctx, inside)
380 >>> matcher.isinside(ctx, inside)
391 True
381 True
392 >>> b2s(matcher.codeinside(ctx, inside))
382 >>> b2s(matcher.codeinside(ctx, inside))
393 'foo = 1\\n'
383 'foo = 1\\n'
394 >>> inside = ' ... foo = 1\\n'
384 >>> inside = ' ... foo = 1\\n'
395 >>> matcher.endsat(ctx, inside)
385 >>> matcher.endsat(ctx, inside)
396 False
386 False
397 >>> matcher.isinside(ctx, inside)
387 >>> matcher.isinside(ctx, inside)
398 True
388 True
399 >>> b2s(matcher.codeinside(ctx, inside))
389 >>> b2s(matcher.codeinside(ctx, inside))
400 'foo = 1\\n'
390 'foo = 1\\n'
401 >>> inside = ' expected output\\n'
391 >>> inside = ' expected output\\n'
402 >>> matcher.endsat(ctx, inside)
392 >>> matcher.endsat(ctx, inside)
403 False
393 False
404 >>> matcher.isinside(ctx, inside)
394 >>> matcher.isinside(ctx, inside)
405 True
395 True
406 >>> b2s(matcher.codeinside(ctx, inside))
396 >>> b2s(matcher.codeinside(ctx, inside))
407 '\\n'
397 '\\n'
408 >>> inside = ' \\n'
398 >>> inside = ' \\n'
409 >>> matcher.endsat(ctx, inside)
399 >>> matcher.endsat(ctx, inside)
410 False
400 False
411 >>> matcher.isinside(ctx, inside)
401 >>> matcher.isinside(ctx, inside)
412 True
402 True
413 >>> b2s(matcher.codeinside(ctx, inside))
403 >>> b2s(matcher.codeinside(ctx, inside))
414 '\\n'
404 '\\n'
415 >>> end = ' $ foo bar\\n'
405 >>> end = ' $ foo bar\\n'
416 >>> matcher.endsat(ctx, end)
406 >>> matcher.endsat(ctx, end)
417 True
407 True
418 >>> matcher.codeatend(ctx, end)
408 >>> matcher.codeatend(ctx, end)
419 >>> end = '\\n'
409 >>> end = '\\n'
420 >>> matcher.endsat(ctx, end)
410 >>> matcher.endsat(ctx, end)
421 True
411 True
422 >>> matcher.codeatend(ctx, end)
412 >>> matcher.codeatend(ctx, end)
423 """
413 """
424
414
425 _prefix = ' >>> '
415 _prefix = ' >>> '
426 _prefixre = re.compile(r' {2}(>>>|\.\.\.) ')
416 _prefixre = re.compile(r' {2}(>>>|\.\.\.) ')
427
417
428 # If a line matches against not _prefixre but _outputre, that line
418 # If a line matches against not _prefixre but _outputre, that line
429 # is "an expected output line" (= not a part of code fragment).
419 # is "an expected output line" (= not a part of code fragment).
430 #
420 #
431 # Strictly speaking, a line matching against "(#if|#else|#endif)"
421 # Strictly speaking, a line matching against "(#if|#else|#endif)"
432 # is also treated similarly in "inline python code" semantics by
422 # is also treated similarly in "inline python code" semantics by
433 # run-tests.py. But "directive line inside inline python code"
423 # run-tests.py. But "directive line inside inline python code"
434 # should be rejected by Mercurial reviewers. Therefore, this
424 # should be rejected by Mercurial reviewers. Therefore, this
435 # regexp does not matche against such directive lines.
425 # regexp does not matche against such directive lines.
436 _outputre = re.compile(r' {2}$| {2}[^$]')
426 _outputre = re.compile(r' {2}$| {2}[^$]')
437
427
438 def __init__(self):
428 def __init__(self):
439 super(pydoctestmatcher, self).__init__("doctest style python code")
429 super(pydoctestmatcher, self).__init__("doctest style python code")
440
430
441 def startsat(self, line):
431 def startsat(self, line):
442 # ctx is "True"
432 # ctx is "True"
443 return line.startswith(self._prefix)
433 return line.startswith(self._prefix)
444
434
445 def endsat(self, ctx, line):
435 def endsat(self, ctx, line):
446 return not (self._prefixre.match(line) or self._outputre.match(line))
436 return not (self._prefixre.match(line) or self._outputre.match(line))
447
437
448 def isinside(self, ctx, line):
438 def isinside(self, ctx, line):
449 return True # always true, if not yet ended
439 return True # always true, if not yet ended
450
440
451 def ignores(self, ctx):
441 def ignores(self, ctx):
452 return False # should be checked always
442 return False # should be checked always
453
443
454 def filename(self, ctx):
444 def filename(self, ctx):
455 return None # no filename
445 return None # no filename
456
446
457 def codeatstart(self, ctx, line):
447 def codeatstart(self, ctx, line):
458 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
448 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
459
449
460 def codeatend(self, ctx, line):
450 def codeatend(self, ctx, line):
461 return None # no embedded code at end line
451 return None # no embedded code at end line
462
452
463 def codeinside(self, ctx, line):
453 def codeinside(self, ctx, line):
464 if self._prefixre.match(line):
454 if self._prefixre.match(line):
465 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
455 return line[len(self._prefix) :] # strip prefix ' >>> '/' ... '
466 return '\n' # an expected output line is treated as an empty line
456 return '\n' # an expected output line is treated as an empty line
467
457
468
458
469 class pyheredocmatcher(embeddedmatcher):
459 class pyheredocmatcher(embeddedmatcher):
470 """Detect "python << LIMIT" style embedded python code
460 """Detect "python << LIMIT" style embedded python code
471
461
472 >>> matcher = pyheredocmatcher()
462 >>> matcher = pyheredocmatcher()
473 >>> b2s(matcher.startsat(' $ python << EOF\\n'))
463 >>> b2s(matcher.startsat(' $ python << EOF\\n'))
474 ' > EOF\\n'
464 ' > EOF\\n'
475 >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))
465 >>> b2s(matcher.startsat(' $ $PYTHON <<EOF\\n'))
476 ' > EOF\\n'
466 ' > EOF\\n'
477 >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))
467 >>> b2s(matcher.startsat(' $ "$PYTHON"<< "EOF"\\n'))
478 ' > EOF\\n'
468 ' > EOF\\n'
479 >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))
469 >>> b2s(matcher.startsat(" $ $PYTHON << 'ANYLIMIT'\\n"))
480 ' > ANYLIMIT\\n'
470 ' > ANYLIMIT\\n'
481 >>> matcher.startsat(' $ "$PYTHON" < EOF\\n')
471 >>> matcher.startsat(' $ "$PYTHON" < EOF\\n')
482 >>> start = ' $ python << EOF\\n'
472 >>> start = ' $ python << EOF\\n'
483 >>> ctx = matcher.startsat(start)
473 >>> ctx = matcher.startsat(start)
484 >>> matcher.codeatstart(ctx, start)
474 >>> matcher.codeatstart(ctx, start)
485 >>> matcher.filename(ctx)
475 >>> matcher.filename(ctx)
486 >>> matcher.ignores(ctx)
476 >>> matcher.ignores(ctx)
487 False
477 False
488 >>> inside = ' > foo = 1\\n'
478 >>> inside = ' > foo = 1\\n'
489 >>> matcher.endsat(ctx, inside)
479 >>> matcher.endsat(ctx, inside)
490 False
480 False
491 >>> matcher.isinside(ctx, inside)
481 >>> matcher.isinside(ctx, inside)
492 True
482 True
493 >>> b2s(matcher.codeinside(ctx, inside))
483 >>> b2s(matcher.codeinside(ctx, inside))
494 'foo = 1\\n'
484 'foo = 1\\n'
495 >>> end = ' > EOF\\n'
485 >>> end = ' > EOF\\n'
496 >>> matcher.endsat(ctx, end)
486 >>> matcher.endsat(ctx, end)
497 True
487 True
498 >>> matcher.codeatend(ctx, end)
488 >>> matcher.codeatend(ctx, end)
499 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
489 >>> matcher.endsat(ctx, ' > EOFEOF\\n')
500 False
490 False
501 >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')
491 >>> ctx = matcher.startsat(' $ python << NO_CHECK_EOF\\n')
502 >>> matcher.ignores(ctx)
492 >>> matcher.ignores(ctx)
503 True
493 True
504 """
494 """
505
495
506 _prefix = ' > '
496 _prefix = ' > '
507
497
508 _startre = re.compile(
498 _startre = re.compile(
509 r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
499 r' {2}\$ (\$PYTHON|"\$PYTHON"|python).*' + heredoclimitpat
510 )
500 )
511
501
512 def __init__(self):
502 def __init__(self):
513 super(pyheredocmatcher, self).__init__("heredoc python invocation")
503 super(pyheredocmatcher, self).__init__("heredoc python invocation")
514
504
515 def startsat(self, line):
505 def startsat(self, line):
516 # ctx is END-LINE-OF-EMBEDDED-CODE
506 # ctx is END-LINE-OF-EMBEDDED-CODE
517 matched = self._startre.match(line)
507 matched = self._startre.match(line)
518 if matched:
508 if matched:
519 return ' > %s\n' % matched.group('limit')
509 return ' > %s\n' % matched.group('limit')
520
510
521 def endsat(self, ctx, line):
511 def endsat(self, ctx, line):
522 return ctx == line
512 return ctx == line
523
513
524 def isinside(self, ctx, line):
514 def isinside(self, ctx, line):
525 return line.startswith(self._prefix)
515 return line.startswith(self._prefix)
526
516
527 def ignores(self, ctx):
517 def ignores(self, ctx):
528 return ' > %s\n' % heredocignorelimit == ctx
518 return ' > %s\n' % heredocignorelimit == ctx
529
519
530 def filename(self, ctx):
520 def filename(self, ctx):
531 return None # no filename
521 return None # no filename
532
522
533 def codeatstart(self, ctx, line):
523 def codeatstart(self, ctx, line):
534 return None # no embedded code at start line
524 return None # no embedded code at start line
535
525
536 def codeatend(self, ctx, line):
526 def codeatend(self, ctx, line):
537 return None # no embedded code at end line
527 return None # no embedded code at end line
538
528
539 def codeinside(self, ctx, line):
529 def codeinside(self, ctx, line):
540 return line[len(self._prefix) :] # strip prefix
530 return line[len(self._prefix) :] # strip prefix
541
531
542
532
543 _pymatchers = [
533 _pymatchers = [
544 pydoctestmatcher(),
534 pydoctestmatcher(),
545 pyheredocmatcher(),
535 pyheredocmatcher(),
546 # use '[^<]+' instead of '\S+', in order to match against
536 # use '[^<]+' instead of '\S+', in order to match against
547 # paths including whitespaces
537 # paths including whitespaces
548 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
538 fileheredocmatcher('heredoc .py file', r'[^<]+\.py'),
549 ]
539 ]
550
540
551
541
552 def pyembedded(basefile, lines, errors):
542 def pyembedded(basefile, lines, errors):
553 return embedded(basefile, lines, errors, _pymatchers)
543 return embedded(basefile, lines, errors, _pymatchers)
554
544
555
545
556 ####
546 ####
557 # for embedded shell script
547 # for embedded shell script
558
548
559 _shmatchers = [
549 _shmatchers = [
560 # use '[^<]+' instead of '\S+', in order to match against
550 # use '[^<]+' instead of '\S+', in order to match against
561 # paths including whitespaces
551 # paths including whitespaces
562 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
552 fileheredocmatcher('heredoc .sh file', r'[^<]+\.sh'),
563 ]
553 ]
564
554
565
555
566 def shembedded(basefile, lines, errors):
556 def shembedded(basefile, lines, errors):
567 return embedded(basefile, lines, errors, _shmatchers)
557 return embedded(basefile, lines, errors, _shmatchers)
568
558
569
559
570 ####
560 ####
571 # for embedded hgrc configuration
561 # for embedded hgrc configuration
572
562
573 _hgrcmatchers = [
563 _hgrcmatchers = [
574 # use '[^<]+' instead of '\S+', in order to match against
564 # use '[^<]+' instead of '\S+', in order to match against
575 # paths including whitespaces
565 # paths including whitespaces
576 fileheredocmatcher(
566 fileheredocmatcher(
577 'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
567 'heredoc hgrc file', r'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'
578 ),
568 ),
579 ]
569 ]
580
570
581
571
582 def hgrcembedded(basefile, lines, errors):
572 def hgrcembedded(basefile, lines, errors):
583 return embedded(basefile, lines, errors, _hgrcmatchers)
573 return embedded(basefile, lines, errors, _hgrcmatchers)
584
574
585
575
586 ####
576 ####
587
577
588 if __name__ == "__main__":
578 if __name__ == "__main__":
589 import optparse
579 import optparse
590 import sys
580 import sys
591
581
592 def showembedded(basefile, lines, embeddedfunc, opts):
582 def showembedded(basefile, lines, embeddedfunc, opts):
593 errors = []
583 errors = []
594 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
584 for name, starts, ends, code in embeddedfunc(basefile, lines, errors):
595 if not name:
585 if not name:
596 name = '<anonymous>'
586 name = '<anonymous>'
597 writeout("%s:%d: %s starts\n" % (basefile, starts, name))
587 writeout("%s:%d: %s starts\n" % (basefile, starts, name))
598 if opts.verbose and code:
588 if opts.verbose and code:
599 writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))
589 writeout(" |%s\n" % "\n |".join(l for l in code.splitlines()))
600 writeout("%s:%d: %s ends\n" % (basefile, ends, name))
590 writeout("%s:%d: %s ends\n" % (basefile, ends, name))
601 for e in errors:
591 for e in errors:
602 writeerr("%s\n" % e)
592 writeerr("%s\n" % e)
603 return len(errors)
593 return len(errors)
604
594
605 def applyembedded(args, embeddedfunc, opts):
595 def applyembedded(args, embeddedfunc, opts):
606 ret = 0
596 ret = 0
607 if args:
597 if args:
608 for f in args:
598 for f in args:
609 with opentext(f) as fp:
599 with opentext(f) as fp:
610 if showembedded(f, fp, embeddedfunc, opts):
600 if showembedded(f, fp, embeddedfunc, opts):
611 ret = 1
601 ret = 1
612 else:
602 else:
613 lines = [l for l in sys.stdin.readlines()]
603 lines = [l for l in sys.stdin.readlines()]
614 if showembedded('<stdin>', lines, embeddedfunc, opts):
604 if showembedded('<stdin>', lines, embeddedfunc, opts):
615 ret = 1
605 ret = 1
616 return ret
606 return ret
617
607
618 commands = {}
608 commands = {}
619
609
620 def command(name, desc):
610 def command(name, desc):
621 def wrap(func):
611 def wrap(func):
622 commands[name] = (desc, func)
612 commands[name] = (desc, func)
623
613
624 return wrap
614 return wrap
625
615
626 @command("pyembedded", "detect embedded python script")
616 @command("pyembedded", "detect embedded python script")
627 def pyembeddedcmd(args, opts):
617 def pyembeddedcmd(args, opts):
628 return applyembedded(args, pyembedded, opts)
618 return applyembedded(args, pyembedded, opts)
629
619
630 @command("shembedded", "detect embedded shell script")
620 @command("shembedded", "detect embedded shell script")
631 def shembeddedcmd(args, opts):
621 def shembeddedcmd(args, opts):
632 return applyembedded(args, shembedded, opts)
622 return applyembedded(args, shembedded, opts)
633
623
634 @command("hgrcembedded", "detect embedded hgrc configuration")
624 @command("hgrcembedded", "detect embedded hgrc configuration")
635 def hgrcembeddedcmd(args, opts):
625 def hgrcembeddedcmd(args, opts):
636 return applyembedded(args, hgrcembedded, opts)
626 return applyembedded(args, hgrcembedded, opts)
637
627
638 availablecommands = "\n".join(
628 availablecommands = "\n".join(
639 [" - %s: %s" % (key, value[0]) for key, value in commands.items()]
629 [" - %s: %s" % (key, value[0]) for key, value in commands.items()]
640 )
630 )
641
631
642 parser = optparse.OptionParser(
632 parser = optparse.OptionParser(
643 """%prog COMMAND [file ...]
633 """%prog COMMAND [file ...]
644
634
645 Pick up embedded code fragments from given file(s) or stdin, and list
635 Pick up embedded code fragments from given file(s) or stdin, and list
646 up start/end lines of them in standard compiler format
636 up start/end lines of them in standard compiler format
647 ("FILENAME:LINENO:").
637 ("FILENAME:LINENO:").
648
638
649 Available commands are:
639 Available commands are:
650 """
640 """
651 + availablecommands
641 + availablecommands
652 + """
642 + """
653 """
643 """
654 )
644 )
655 parser.add_option(
645 parser.add_option(
656 "-v",
646 "-v",
657 "--verbose",
647 "--verbose",
658 help="enable additional output (e.g. actual code)",
648 help="enable additional output (e.g. actual code)",
659 action="store_true",
649 action="store_true",
660 )
650 )
661 (opts, args) = parser.parse_args()
651 (opts, args) = parser.parse_args()
662
652
663 if not args or args[0] not in commands:
653 if not args or args[0] not in commands:
664 parser.print_help()
654 parser.print_help()
665 sys.exit(255)
655 sys.exit(255)
666
656
667 sys.exit(commands[args[0]][1](args[1:], opts))
657 sys.exit(commands[args[0]][1](args[1:], opts))
General Comments 0
You need to be logged in to leave comments. Login now