Show More
This diff has been collapsed as it changes many lines, (630 lines changed) Show them Hide them | |||||
@@ -0,0 +1,630 | |||||
|
1 | # testparseutil.py - utilities to parse test script for check tools | |||
|
2 | # | |||
|
3 | # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others | |||
|
4 | # | |||
|
5 | # This software may be used and distributed according to the terms of the | |||
|
6 | # GNU General Public License version 2 or any later version. | |||
|
7 | ||||
|
8 | from __future__ import absolute_import, print_function | |||
|
9 | ||||
|
10 | import abc | |||
|
11 | import re | |||
|
12 | import sys | |||
|
13 | ||||
|
14 | #################### | |||
|
15 | # for Python3 compatibility (almost comes from mercurial/pycompat.py) | |||
|
16 | ||||
|
17 | ispy3 = (sys.version_info[0] >= 3) | |||
|
18 | ||||
|
19 | def identity(a): | |||
|
20 | return a | |||
|
21 | ||||
|
22 | def _rapply(f, xs): | |||
|
23 | if xs is None: | |||
|
24 | # assume None means non-value of optional data | |||
|
25 | return xs | |||
|
26 | if isinstance(xs, (list, set, tuple)): | |||
|
27 | return type(xs)(_rapply(f, x) for x in xs) | |||
|
28 | if isinstance(xs, dict): | |||
|
29 | return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items()) | |||
|
30 | return f(xs) | |||
|
31 | ||||
|
32 | def rapply(f, xs): | |||
|
33 | if f is identity: | |||
|
34 | # fast path mainly for py2 | |||
|
35 | return xs | |||
|
36 | return _rapply(f, xs) | |||
|
37 | ||||
|
38 | if ispy3: | |||
|
39 | import builtins | |||
|
40 | ||||
|
41 | # TODO: .buffer might not exist if std streams were replaced; we'll need | |||
|
42 | # a silly wrapper to make a bytes stream backed by a unicode one. | |||
|
43 | stdin = sys.stdin.buffer | |||
|
44 | stdout = sys.stdout.buffer | |||
|
45 | stderr = sys.stderr.buffer | |||
|
46 | ||||
|
47 | def bytestr(s): | |||
|
48 | # tiny version of pycompat.bytestr | |||
|
49 | return s.encode('latin1') | |||
|
50 | ||||
|
51 | def sysstr(s): | |||
|
52 | if isinstance(s, builtins.str): | |||
|
53 | return s | |||
|
54 | return s.decode(u'latin-1') | |||
|
55 | ||||
|
56 | def opentext(f): | |||
|
57 | return open(f, 'rb') | |||
|
58 | else: | |||
|
59 | stdin = sys.stdin | |||
|
60 | stdout = sys.stdout | |||
|
61 | stderr = sys.stderr | |||
|
62 | ||||
|
63 | bytestr = str | |||
|
64 | sysstr = identity | |||
|
65 | ||||
|
66 | opentext = open | |||
|
67 | ||||
|
68 | def b2s(x): | |||
|
69 | # convert BYTES elements in "x" to SYSSTR recursively | |||
|
70 | return rapply(sysstr, x) | |||
|
71 | ||||
|
72 | def writeout(data): | |||
|
73 | # write "data" in BYTES into stdout | |||
|
74 | stdout.write(data) | |||
|
75 | ||||
|
76 | def writeerr(data): | |||
|
77 | # write "data" in BYTES into stderr | |||
|
78 | stderr.write(data) | |||
|
79 | ||||
|
80 | #################### | |||
|
81 | ||||
|
82 | class embeddedmatcher(object): | |||
|
83 | """Base class to detect embedded code fragments in *.t test script | |||
|
84 | """ | |||
|
85 | __metaclass__ = abc.ABCMeta | |||
|
86 | ||||
|
87 | def __init__(self, desc): | |||
|
88 | self.desc = desc | |||
|
89 | ||||
|
90 | @abc.abstractmethod | |||
|
91 | def startsat(self, line): | |||
|
92 | """Examine whether embedded code starts at line | |||
|
93 | ||||
|
94 | This can return arbitrary object, and it is used as 'ctx' for | |||
|
95 | subsequent method invocations. | |||
|
96 | """ | |||
|
97 | ||||
|
98 | @abc.abstractmethod | |||
|
99 | def endsat(self, ctx, line): | |||
|
100 | """Examine whether embedded code ends at line""" | |||
|
101 | ||||
|
102 | @abc.abstractmethod | |||
|
103 | def isinside(self, ctx, line): | |||
|
104 | """Examine whether line is inside embedded code, if not yet endsat | |||
|
105 | """ | |||
|
106 | ||||
|
107 | @abc.abstractmethod | |||
|
108 | def ignores(self, ctx): | |||
|
109 | """Examine whether detected embedded code should be ignored""" | |||
|
110 | ||||
|
111 | @abc.abstractmethod | |||
|
112 | def filename(self, ctx): | |||
|
113 | """Return filename of embedded code | |||
|
114 | ||||
|
115 | If filename isn't specified for embedded code explicitly, this | |||
|
116 | returns None. | |||
|
117 | """ | |||
|
118 | ||||
|
119 | @abc.abstractmethod | |||
|
120 | def codeatstart(self, ctx, line): | |||
|
121 | """Return actual code at the start line of embedded code | |||
|
122 | ||||
|
123 | This might return None, if the start line doesn't contain | |||
|
124 | actual code. | |||
|
125 | """ | |||
|
126 | ||||
|
127 | @abc.abstractmethod | |||
|
128 | def codeatend(self, ctx, line): | |||
|
129 | """Return actual code at the end line of embedded code | |||
|
130 | ||||
|
131 | This might return None, if the end line doesn't contain actual | |||
|
132 | code. | |||
|
133 | """ | |||
|
134 | ||||
|
135 | @abc.abstractmethod | |||
|
136 | def codeinside(self, ctx, line): | |||
|
137 | """Return actual code at line inside embedded code""" | |||
|
138 | ||||
|
139 | def embedded(basefile, lines, errors, matchers): | |||
|
140 | """pick embedded code fragments up from given lines | |||
|
141 | ||||
|
142 | This is common parsing logic, which examines specified matchers on | |||
|
143 | given lines. | |||
|
144 | ||||
|
145 | :basefile: a name of a file, from which lines to be parsed come. | |||
|
146 | :lines: to be parsed (might be a value returned by "open(basefile)") | |||
|
147 | :errors: an array, into which messages for detected error are stored | |||
|
148 | :matchers: an array of embeddedmatcher objects | |||
|
149 | ||||
|
150 | This function yields '(filename, starts, ends, code)' tuple. | |||
|
151 | ||||
|
152 | :filename: a name of embedded code, if it is explicitly specified | |||
|
153 | (e.g. "foobar" of "cat >> foobar <<EOF"). | |||
|
154 | Otherwise, this is None | |||
|
155 | :starts: line number (1-origin), at which embedded code starts (inclusive) | |||
|
156 | :ends: line number (1-origin), at which embedded code ends (exclusive) | |||
|
157 | :code: extracted embedded code, which is single-stringified | |||
|
158 | ||||
|
159 | >>> class ambigmatcher(object): | |||
|
160 | ... # mock matcher class to examine implementation of | |||
|
161 | ... # "ambiguous matching" corner case | |||
|
162 | ... def __init__(self, desc, matchfunc): | |||
|
163 | ... self.desc = desc | |||
|
164 | ... self.matchfunc = matchfunc | |||
|
165 | ... def startsat(self, line): | |||
|
166 | ... return self.matchfunc(line) | |||
|
167 | >>> ambig1 = ambigmatcher(b'ambiguous #1', | |||
|
168 | ... lambda l: l.startswith(b' $ cat ')) | |||
|
169 | >>> ambig2 = ambigmatcher(b'ambiguous #2', | |||
|
170 | ... lambda l: l.endswith(b'<< EOF\\n')) | |||
|
171 | >>> lines = [b' $ cat > foo.py << EOF\\n'] | |||
|
172 | >>> errors = [] | |||
|
173 | >>> matchers = [ambig1, ambig2] | |||
|
174 | >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers)) | |||
|
175 | [] | |||
|
176 | >>> b2s(errors) | |||
|
177 | ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"'] | |||
|
178 | ||||
|
179 | """ | |||
|
180 | matcher = None | |||
|
181 | ctx = filename = code = startline = None # for pyflakes | |||
|
182 | ||||
|
183 | for lineno, line in enumerate(lines, 1): | |||
|
184 | if not line.endswith(b'\n'): | |||
|
185 | line += b'\n' # to normalize EOF line | |||
|
186 | if matcher: # now, inside embedded code | |||
|
187 | if matcher.endsat(ctx, line): | |||
|
188 | codeatend = matcher.codeatend(ctx, line) | |||
|
189 | if codeatend is not None: | |||
|
190 | code.append(codeatend) | |||
|
191 | if not matcher.ignores(ctx): | |||
|
192 | yield (filename, startline, lineno, b''.join(code)) | |||
|
193 | matcher = None | |||
|
194 | # DO NOT "continue", because line might start next fragment | |||
|
195 | elif not matcher.isinside(ctx, line): | |||
|
196 | # this is an error of basefile | |||
|
197 | # (if matchers are implemented correctly) | |||
|
198 | errors.append(b'%s:%d: unexpected line for "%s"' | |||
|
199 | % (basefile, lineno, matcher.desc)) | |||
|
200 | # stop extracting embedded code by current 'matcher', | |||
|
201 | # because appearance of unexpected line might mean | |||
|
202 | # that expected end-of-embedded-code line might never | |||
|
203 | # appear | |||
|
204 | matcher = None | |||
|
205 | # DO NOT "continue", because line might start next fragment | |||
|
206 | else: | |||
|
207 | code.append(matcher.codeinside(ctx, line)) | |||
|
208 | continue | |||
|
209 | ||||
|
210 | # examine whether current line starts embedded code or not | |||
|
211 | assert not matcher | |||
|
212 | ||||
|
213 | matched = [] | |||
|
214 | for m in matchers: | |||
|
215 | ctx = m.startsat(line) | |||
|
216 | if ctx: | |||
|
217 | matched.append((m, ctx)) | |||
|
218 | if matched: | |||
|
219 | if len(matched) > 1: | |||
|
220 | # this is an error of matchers, maybe | |||
|
221 | errors.append(b'%s:%d: ambiguous line for %s' % | |||
|
222 | (basefile, lineno, | |||
|
223 | b', '.join([b'"%s"' % m.desc | |||
|
224 | for m, c in matched]))) | |||
|
225 | # omit extracting embedded code, because choosing | |||
|
226 | # arbitrary matcher from matched ones might fail to | |||
|
227 | # detect the end of embedded code as expected. | |||
|
228 | continue | |||
|
229 | matcher, ctx = matched[0] | |||
|
230 | filename = matcher.filename(ctx) | |||
|
231 | code = [] | |||
|
232 | codeatstart = matcher.codeatstart(ctx, line) | |||
|
233 | if codeatstart is not None: | |||
|
234 | code.append(codeatstart) | |||
|
235 | startline = lineno | |||
|
236 | else: | |||
|
237 | startline = lineno + 1 | |||
|
238 | ||||
|
239 | if matcher: | |||
|
240 | # examine whether EOF ends embedded code, because embedded | |||
|
241 | # code isn't yet ended explicitly | |||
|
242 | if matcher.endsat(ctx, b'\n'): | |||
|
243 | codeatend = matcher.codeatend(ctx, b'\n') | |||
|
244 | if codeatend is not None: | |||
|
245 | code.append(codeatend) | |||
|
246 | if not matcher.ignores(ctx): | |||
|
247 | yield (filename, startline, lineno + 1, b''.join(code)) | |||
|
248 | else: | |||
|
249 | # this is an error of basefile | |||
|
250 | # (if matchers are implemented correctly) | |||
|
251 | errors.append(b'%s:%d: unexpected end of file for "%s"' | |||
|
252 | % (basefile, lineno, matcher.desc)) | |||
|
253 | ||||
|
254 | # heredoc limit mark to ignore embedded code at check-code.py or so | |||
|
255 | heredocignorelimit = b'NO_CHECK_EOF' | |||
|
256 | ||||
|
257 | # the pattern to match against cases below, and to return a limit mark | |||
|
258 | # string as 'lname' group | |||
|
259 | # | |||
|
260 | # - << LIMITMARK | |||
|
261 | # - << "LIMITMARK" | |||
|
262 | # - << 'LIMITMARK' | |||
|
263 | heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)' | |||
|
264 | ||||
|
265 | class fileheredocmatcher(embeddedmatcher): | |||
|
266 | """Detect "cat > FILE << LIMIT" style embedded code | |||
|
267 | ||||
|
268 | >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py') | |||
|
269 | >>> b2s(matcher.startsat(b' $ cat > file.py << EOF\\n')) | |||
|
270 | ('file.py', ' > EOF\\n') | |||
|
271 | >>> b2s(matcher.startsat(b' $ cat >>file.py <<EOF\\n')) | |||
|
272 | ('file.py', ' > EOF\\n') | |||
|
273 | >>> b2s(matcher.startsat(b' $ cat> \\x27any file.py\\x27<< "EOF"\\n')) | |||
|
274 | ('any file.py', ' > EOF\\n') | |||
|
275 | >>> b2s(matcher.startsat(b" $ cat > file.py << 'ANYLIMIT'\\n")) | |||
|
276 | ('file.py', ' > ANYLIMIT\\n') | |||
|
277 | >>> b2s(matcher.startsat(b' $ cat<<ANYLIMIT>"file.py"\\n')) | |||
|
278 | ('file.py', ' > ANYLIMIT\\n') | |||
|
279 | >>> start = b' $ cat > file.py << EOF\\n' | |||
|
280 | >>> ctx = matcher.startsat(start) | |||
|
281 | >>> matcher.codeatstart(ctx, start) | |||
|
282 | >>> b2s(matcher.filename(ctx)) | |||
|
283 | 'file.py' | |||
|
284 | >>> matcher.ignores(ctx) | |||
|
285 | False | |||
|
286 | >>> inside = b' > foo = 1\\n' | |||
|
287 | >>> matcher.endsat(ctx, inside) | |||
|
288 | False | |||
|
289 | >>> matcher.isinside(ctx, inside) | |||
|
290 | True | |||
|
291 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
292 | 'foo = 1\\n' | |||
|
293 | >>> end = b' > EOF\\n' | |||
|
294 | >>> matcher.endsat(ctx, end) | |||
|
295 | True | |||
|
296 | >>> matcher.codeatend(ctx, end) | |||
|
297 | >>> matcher.endsat(ctx, b' > EOFEOF\\n') | |||
|
298 | False | |||
|
299 | >>> ctx = matcher.startsat(b' $ cat > file.py << NO_CHECK_EOF\\n') | |||
|
300 | >>> matcher.ignores(ctx) | |||
|
301 | True | |||
|
302 | """ | |||
|
303 | _prefix = b' > ' | |||
|
304 | ||||
|
305 | def __init__(self, desc, namepat): | |||
|
306 | super(fileheredocmatcher, self).__init__(desc) | |||
|
307 | ||||
|
308 | # build the pattern to match against cases below (and ">>" | |||
|
309 | # variants), and to return a target filename string as 'name' | |||
|
310 | # group | |||
|
311 | # | |||
|
312 | # - > NAMEPAT | |||
|
313 | # - > "NAMEPAT" | |||
|
314 | # - > 'NAMEPAT' | |||
|
315 | namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' | |||
|
316 | % namepat) | |||
|
317 | self._fileres = [ | |||
|
318 | # "cat > NAME << LIMIT" case | |||
|
319 | re.compile(br' \$ \s*cat' + namepat + heredoclimitpat), | |||
|
320 | # "cat << LIMIT > NAME" case | |||
|
321 | re.compile(br' \$ \s*cat' + heredoclimitpat + namepat), | |||
|
322 | ] | |||
|
323 | ||||
|
324 | def startsat(self, line): | |||
|
325 | # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple | |||
|
326 | for filere in self._fileres: | |||
|
327 | matched = filere.match(line) | |||
|
328 | if matched: | |||
|
329 | return (matched.group('name'), | |||
|
330 | b' > %s\n' % matched.group('limit')) | |||
|
331 | ||||
|
332 | def endsat(self, ctx, line): | |||
|
333 | return ctx[1] == line | |||
|
334 | ||||
|
335 | def isinside(self, ctx, line): | |||
|
336 | return line.startswith(self._prefix) | |||
|
337 | ||||
|
338 | def ignores(self, ctx): | |||
|
339 | return b' > %s\n' % heredocignorelimit == ctx[1] | |||
|
340 | ||||
|
341 | def filename(self, ctx): | |||
|
342 | return ctx[0] | |||
|
343 | ||||
|
344 | def codeatstart(self, ctx, line): | |||
|
345 | return None # no embedded code at start line | |||
|
346 | ||||
|
347 | def codeatend(self, ctx, line): | |||
|
348 | return None # no embedded code at end line | |||
|
349 | ||||
|
350 | def codeinside(self, ctx, line): | |||
|
351 | return line[len(self._prefix):] # strip prefix | |||
|
352 | ||||
|
353 | #### | |||
|
354 | # for embedded python script | |||
|
355 | ||||
|
356 | class pydoctestmatcher(embeddedmatcher): | |||
|
357 | """Detect ">>> code" style embedded python code | |||
|
358 | ||||
|
359 | >>> matcher = pydoctestmatcher() | |||
|
360 | >>> startline = b' >>> foo = 1\\n' | |||
|
361 | >>> matcher.startsat(startline) | |||
|
362 | True | |||
|
363 | >>> matcher.startsat(b' ... foo = 1\\n') | |||
|
364 | False | |||
|
365 | >>> ctx = matcher.startsat(startline) | |||
|
366 | >>> matcher.filename(ctx) | |||
|
367 | >>> matcher.ignores(ctx) | |||
|
368 | False | |||
|
369 | >>> b2s(matcher.codeatstart(ctx, startline)) | |||
|
370 | 'foo = 1\\n' | |||
|
371 | >>> inside = b' >>> foo = 1\\n' | |||
|
372 | >>> matcher.endsat(ctx, inside) | |||
|
373 | False | |||
|
374 | >>> matcher.isinside(ctx, inside) | |||
|
375 | True | |||
|
376 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
377 | 'foo = 1\\n' | |||
|
378 | >>> inside = b' ... foo = 1\\n' | |||
|
379 | >>> matcher.endsat(ctx, inside) | |||
|
380 | False | |||
|
381 | >>> matcher.isinside(ctx, inside) | |||
|
382 | True | |||
|
383 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
384 | 'foo = 1\\n' | |||
|
385 | >>> inside = b' expected output\\n' | |||
|
386 | >>> matcher.endsat(ctx, inside) | |||
|
387 | False | |||
|
388 | >>> matcher.isinside(ctx, inside) | |||
|
389 | True | |||
|
390 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
391 | '\\n' | |||
|
392 | >>> inside = b' \\n' | |||
|
393 | >>> matcher.endsat(ctx, inside) | |||
|
394 | False | |||
|
395 | >>> matcher.isinside(ctx, inside) | |||
|
396 | True | |||
|
397 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
398 | '\\n' | |||
|
399 | >>> end = b' $ foo bar\\n' | |||
|
400 | >>> matcher.endsat(ctx, end) | |||
|
401 | True | |||
|
402 | >>> matcher.codeatend(ctx, end) | |||
|
403 | >>> end = b'\\n' | |||
|
404 | >>> matcher.endsat(ctx, end) | |||
|
405 | True | |||
|
406 | >>> matcher.codeatend(ctx, end) | |||
|
407 | """ | |||
|
408 | _prefix = b' >>> ' | |||
|
409 | _prefixre = re.compile(br' (>>>|\.\.\.) ') | |||
|
410 | ||||
|
411 | # If a line matches against not _prefixre but _outputre, that line | |||
|
412 | # is "an expected output line" (= not a part of code fragment). | |||
|
413 | # | |||
|
414 | # Strictly speaking, a line matching against "(#if|#else|#endif)" | |||
|
415 | # is also treated similarly in "inline python code" semantics by | |||
|
416 | # run-tests.py. But "directive line inside inline python code" | |||
|
417 | # should be rejected by Mercurial reviewers. Therefore, this | |||
|
418 | # regexp does not matche against such directive lines. | |||
|
419 | _outputre = re.compile(br' $| [^$]') | |||
|
420 | ||||
|
421 | def __init__(self): | |||
|
422 | super(pydoctestmatcher, self).__init__(b"doctest style python code") | |||
|
423 | ||||
|
424 | def startsat(self, line): | |||
|
425 | # ctx is "True" | |||
|
426 | return line.startswith(self._prefix) | |||
|
427 | ||||
|
428 | def endsat(self, ctx, line): | |||
|
429 | return not (self._prefixre.match(line) or self._outputre.match(line)) | |||
|
430 | ||||
|
431 | def isinside(self, ctx, line): | |||
|
432 | return True # always true, if not yet ended | |||
|
433 | ||||
|
434 | def ignores(self, ctx): | |||
|
435 | return False # should be checked always | |||
|
436 | ||||
|
437 | def filename(self, ctx): | |||
|
438 | return None # no filename | |||
|
439 | ||||
|
440 | def codeatstart(self, ctx, line): | |||
|
441 | return line[len(self._prefix):] # strip prefix ' >>> '/' ... ' | |||
|
442 | ||||
|
443 | def codeatend(self, ctx, line): | |||
|
444 | return None # no embedded code at end line | |||
|
445 | ||||
|
446 | def codeinside(self, ctx, line): | |||
|
447 | if self._prefixre.match(line): | |||
|
448 | return line[len(self._prefix):] # strip prefix ' >>> '/' ... ' | |||
|
449 | return b'\n' # an expected output line is treated as an empty line | |||
|
450 | ||||
|
451 | class pyheredocmatcher(embeddedmatcher): | |||
|
452 | """Detect "python << LIMIT" style embedded python code | |||
|
453 | ||||
|
454 | >>> matcher = pyheredocmatcher() | |||
|
455 | >>> b2s(matcher.startsat(b' $ python << EOF\\n')) | |||
|
456 | ' > EOF\\n' | |||
|
457 | >>> b2s(matcher.startsat(b' $ $PYTHON <<EOF\\n')) | |||
|
458 | ' > EOF\\n' | |||
|
459 | >>> b2s(matcher.startsat(b' $ "$PYTHON"<< "EOF"\\n')) | |||
|
460 | ' > EOF\\n' | |||
|
461 | >>> b2s(matcher.startsat(b" $ $PYTHON << 'ANYLIMIT'\\n")) | |||
|
462 | ' > ANYLIMIT\\n' | |||
|
463 | >>> matcher.startsat(b' $ "$PYTHON" < EOF\\n') | |||
|
464 | >>> start = b' $ python << EOF\\n' | |||
|
465 | >>> ctx = matcher.startsat(start) | |||
|
466 | >>> matcher.codeatstart(ctx, start) | |||
|
467 | >>> matcher.filename(ctx) | |||
|
468 | >>> matcher.ignores(ctx) | |||
|
469 | False | |||
|
470 | >>> inside = b' > foo = 1\\n' | |||
|
471 | >>> matcher.endsat(ctx, inside) | |||
|
472 | False | |||
|
473 | >>> matcher.isinside(ctx, inside) | |||
|
474 | True | |||
|
475 | >>> b2s(matcher.codeinside(ctx, inside)) | |||
|
476 | 'foo = 1\\n' | |||
|
477 | >>> end = b' > EOF\\n' | |||
|
478 | >>> matcher.endsat(ctx, end) | |||
|
479 | True | |||
|
480 | >>> matcher.codeatend(ctx, end) | |||
|
481 | >>> matcher.endsat(ctx, b' > EOFEOF\\n') | |||
|
482 | False | |||
|
483 | >>> ctx = matcher.startsat(b' $ python << NO_CHECK_EOF\\n') | |||
|
484 | >>> matcher.ignores(ctx) | |||
|
485 | True | |||
|
486 | """ | |||
|
487 | _prefix = b' > ' | |||
|
488 | ||||
|
489 | _startre = re.compile(br' \$ (\$PYTHON|"\$PYTHON"|python).*' + | |||
|
490 | heredoclimitpat) | |||
|
491 | ||||
|
492 | def __init__(self): | |||
|
493 | super(pyheredocmatcher, self).__init__(b"heredoc python invocation") | |||
|
494 | ||||
|
495 | def startsat(self, line): | |||
|
496 | # ctx is END-LINE-OF-EMBEDDED-CODE | |||
|
497 | matched = self._startre.match(line) | |||
|
498 | if matched: | |||
|
499 | return b' > %s\n' % matched.group('limit') | |||
|
500 | ||||
|
501 | def endsat(self, ctx, line): | |||
|
502 | return ctx == line | |||
|
503 | ||||
|
504 | def isinside(self, ctx, line): | |||
|
505 | return line.startswith(self._prefix) | |||
|
506 | ||||
|
507 | def ignores(self, ctx): | |||
|
508 | return b' > %s\n' % heredocignorelimit == ctx | |||
|
509 | ||||
|
510 | def filename(self, ctx): | |||
|
511 | return None # no filename | |||
|
512 | ||||
|
513 | def codeatstart(self, ctx, line): | |||
|
514 | return None # no embedded code at start line | |||
|
515 | ||||
|
516 | def codeatend(self, ctx, line): | |||
|
517 | return None # no embedded code at end line | |||
|
518 | ||||
|
519 | def codeinside(self, ctx, line): | |||
|
520 | return line[len(self._prefix):] # strip prefix | |||
|
521 | ||||
|
522 | _pymatchers = [ | |||
|
523 | pydoctestmatcher(), | |||
|
524 | pyheredocmatcher(), | |||
|
525 | # use '[^<]+' instead of '\S+', in order to match against | |||
|
526 | # paths including whitespaces | |||
|
527 | fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'), | |||
|
528 | ] | |||
|
529 | ||||
|
530 | def pyembedded(basefile, lines, errors): | |||
|
531 | return embedded(basefile, lines, errors, _pymatchers) | |||
|
532 | ||||
|
533 | #### | |||
|
534 | # for embedded shell script | |||
|
535 | ||||
|
536 | _shmatchers = [ | |||
|
537 | # use '[^<]+' instead of '\S+', in order to match against | |||
|
538 | # paths including whitespaces | |||
|
539 | fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'), | |||
|
540 | ] | |||
|
541 | ||||
|
542 | def shembedded(basefile, lines, errors): | |||
|
543 | return embedded(basefile, lines, errors, _shmatchers) | |||
|
544 | ||||
|
545 | #### | |||
|
546 | # for embedded hgrc configuration | |||
|
547 | ||||
|
548 | _hgrcmatchers = [ | |||
|
549 | # use '[^<]+' instead of '\S+', in order to match against | |||
|
550 | # paths including whitespaces | |||
|
551 | fileheredocmatcher(b'heredoc hgrc file', | |||
|
552 | br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'), | |||
|
553 | ] | |||
|
554 | ||||
|
555 | def hgrcembedded(basefile, lines, errors): | |||
|
556 | return embedded(basefile, lines, errors, _hgrcmatchers) | |||
|
557 | ||||
|
558 | #### | |||
|
559 | ||||
|
560 | if __name__ == "__main__": | |||
|
561 | import optparse | |||
|
562 | import sys | |||
|
563 | ||||
|
564 | def showembedded(basefile, lines, embeddedfunc, opts): | |||
|
565 | errors = [] | |||
|
566 | for name, starts, ends, code in embeddedfunc(basefile, lines, errors): | |||
|
567 | if not name: | |||
|
568 | name = b'<anonymous>' | |||
|
569 | writeout(b"%s:%d: %s starts\n" % (basefile, starts, name)) | |||
|
570 | if opts.verbose and code: | |||
|
571 | writeout(b" |%s\n" % | |||
|
572 | b"\n |".join(l for l in code.splitlines())) | |||
|
573 | writeout(b"%s:%d: %s ends\n" % (basefile, ends, name)) | |||
|
574 | for e in errors: | |||
|
575 | writeerr(b"%s\n" % e) | |||
|
576 | return len(errors) | |||
|
577 | ||||
|
578 | def applyembedded(args, embeddedfunc, opts): | |||
|
579 | ret = 0 | |||
|
580 | if args: | |||
|
581 | for f in args: | |||
|
582 | with opentext(f) as fp: | |||
|
583 | if showembedded(bytestr(f), fp, embeddedfunc, opts): | |||
|
584 | ret = 1 | |||
|
585 | else: | |||
|
586 | lines = [l for l in stdin.readlines()] | |||
|
587 | if showembedded(b'<stdin>', lines, embeddedfunc, opts): | |||
|
588 | ret = 1 | |||
|
589 | return ret | |||
|
590 | ||||
|
591 | commands = {} | |||
|
592 | def command(name, desc): | |||
|
593 | def wrap(func): | |||
|
594 | commands[name] = (desc, func) | |||
|
595 | return wrap | |||
|
596 | ||||
|
597 | @command("pyembedded", "detect embedded python script") | |||
|
598 | def pyembeddedcmd(args, opts): | |||
|
599 | return applyembedded(args, pyembedded, opts) | |||
|
600 | ||||
|
601 | @command("shembedded", "detect embedded shell script") | |||
|
602 | def shembeddedcmd(args, opts): | |||
|
603 | return applyembedded(args, shembedded, opts) | |||
|
604 | ||||
|
605 | @command("hgrcembedded", "detect embedded hgrc configuration") | |||
|
606 | def hgrcembeddedcmd(args, opts): | |||
|
607 | return applyembedded(args, hgrcembedded, opts) | |||
|
608 | ||||
|
609 | availablecommands = "\n".join([" - %s: %s" % (key, value[0]) | |||
|
610 | for key, value in commands.items()]) | |||
|
611 | ||||
|
612 | parser = optparse.OptionParser("""%prog COMMAND [file ...] | |||
|
613 | ||||
|
614 | Pick up embedded code fragments from given file(s) or stdin, and list | |||
|
615 | up start/end lines of them in standard compiler format | |||
|
616 | ("FILENAME:LINENO:"). | |||
|
617 | ||||
|
618 | Available commands are: | |||
|
619 | """ + availablecommands + """ | |||
|
620 | """) | |||
|
621 | parser.add_option("-v", "--verbose", | |||
|
622 | help="enable additional output (e.g. actual code)", | |||
|
623 | action="store_true") | |||
|
624 | (opts, args) = parser.parse_args() | |||
|
625 | ||||
|
626 | if not args or args[0] not in commands: | |||
|
627 | parser.print_help() | |||
|
628 | sys.exit(255) | |||
|
629 | ||||
|
630 | sys.exit(commands[args[0]][1](args[1:], opts)) |
@@ -0,0 +1,192 | |||||
|
1 | $ testparseutil="$TESTDIR"/../contrib/testparseutil.py | |||
|
2 | ||||
|
3 | Internal test by doctest | |||
|
4 | ||||
|
5 | $ "$PYTHON" -m doctest "$testparseutil" | |||
|
6 | ||||
|
7 | Tests for embedded python script | |||
|
8 | ||||
|
9 | Typical cases | |||
|
10 | ||||
|
11 | $ "$PYTHON" "$testparseutil" -v pyembedded <<NO_CHECK_EOF | |||
|
12 | > >>> for f in [1, 2, 3]: | |||
|
13 | > ... foo = 1 | |||
|
14 | > >>> foo = 2 | |||
|
15 | > $ echo "doctest is terminated by command, empty line, or comment" | |||
|
16 | > >>> foo = 31 | |||
|
17 | > expected output of doctest fragment | |||
|
18 | > >>> foo = 32 | |||
|
19 | > | |||
|
20 | > >>> foo = 33 | |||
|
21 | > | |||
|
22 | > >>> foo = 34 | |||
|
23 | > comment | |||
|
24 | > >>> foo = 35 | |||
|
25 | > | |||
|
26 | > $ "\$PYTHON" <<EOF | |||
|
27 | > > foo = 4 | |||
|
28 | > > | |||
|
29 | > > EOF | |||
|
30 | > $ cat > foo.py <<EOF | |||
|
31 | > > foo = 5 | |||
|
32 | > > EOF | |||
|
33 | > $ cat >> foo.py <<EOF | |||
|
34 | > > foo = 6 # appended | |||
|
35 | > > EOF | |||
|
36 | > | |||
|
37 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |||
|
38 | > (this is useful to use bad code intentionally) | |||
|
39 | > | |||
|
40 | > $ "\$PYTHON" <<NO_CHECK_EOF | |||
|
41 | > > foo = 7 # this should be ignored at detection | |||
|
42 | > > NO_CHECK_EOF | |||
|
43 | > $ cat > foo.py <<NO_CHECK_EOF | |||
|
44 | > > foo = 8 # this should be ignored at detection | |||
|
45 | > > NO_CHECK_EOF | |||
|
46 | > | |||
|
47 | > doctest fragment ended by EOF | |||
|
48 | > | |||
|
49 | > >>> foo = 9 | |||
|
50 | > NO_CHECK_EOF | |||
|
51 | <stdin>:1: <anonymous> starts | |||
|
52 | |for f in [1, 2, 3]: | |||
|
53 | | foo = 1 | |||
|
54 | |foo = 2 | |||
|
55 | <stdin>:4: <anonymous> ends | |||
|
56 | <stdin>:5: <anonymous> starts | |||
|
57 | |foo = 31 | |||
|
58 | | | |||
|
59 | |foo = 32 | |||
|
60 | | | |||
|
61 | |foo = 33 | |||
|
62 | <stdin>:10: <anonymous> ends | |||
|
63 | <stdin>:11: <anonymous> starts | |||
|
64 | |foo = 34 | |||
|
65 | <stdin>:12: <anonymous> ends | |||
|
66 | <stdin>:13: <anonymous> starts | |||
|
67 | |foo = 35 | |||
|
68 | <stdin>:14: <anonymous> ends | |||
|
69 | <stdin>:16: <anonymous> starts | |||
|
70 | |foo = 4 | |||
|
71 | | | |||
|
72 | <stdin>:18: <anonymous> ends | |||
|
73 | <stdin>:20: foo.py starts | |||
|
74 | |foo = 5 | |||
|
75 | <stdin>:21: foo.py ends | |||
|
76 | <stdin>:23: foo.py starts | |||
|
77 | |foo = 6 # appended | |||
|
78 | <stdin>:24: foo.py ends | |||
|
79 | <stdin>:38: <anonymous> starts | |||
|
80 | |foo = 9 | |||
|
81 | <stdin>:39: <anonymous> ends | |||
|
82 | ||||
|
83 | Invalid test script | |||
|
84 | ||||
|
85 | (similar test for shell script and hgrc configuration is omitted, | |||
|
86 | because this tests common base class of them) | |||
|
87 | ||||
|
88 | $ "$PYTHON" "$testparseutil" -v pyembedded <<NO_CHECK_EOF > detected | |||
|
89 | > $ "\$PYTHON" <<EOF | |||
|
90 | > > foo = 1 | |||
|
91 | > | |||
|
92 | > $ "\$PYTHON" <<EOF | |||
|
93 | > > foo = 2 | |||
|
94 | > $ cat > bar.py <<EOF | |||
|
95 | > > bar = 2 # this fragment will be detected as expected | |||
|
96 | > > EOF | |||
|
97 | > | |||
|
98 | > $ cat > foo.py <<EOF | |||
|
99 | > > foo = 3 | |||
|
100 | > NO_CHECK_EOF | |||
|
101 | <stdin>:3: unexpected line for "heredoc python invocation" | |||
|
102 | <stdin>:6: unexpected line for "heredoc python invocation" | |||
|
103 | <stdin>:11: unexpected end of file for "heredoc .py file" | |||
|
104 | [1] | |||
|
105 | $ cat detected | |||
|
106 | <stdin>:7: bar.py starts | |||
|
107 | |bar = 2 # this fragment will be detected as expected | |||
|
108 | <stdin>:8: bar.py ends | |||
|
109 | ||||
|
110 | Tests for embedded shell script | |||
|
111 | ||||
|
112 | $ "$PYTHON" "$testparseutil" -v shembedded <<NO_CHECK_EOF | |||
|
113 | > $ cat > foo.sh <<EOF | |||
|
114 | > > foo = 1 | |||
|
115 | > > | |||
|
116 | > > foo = 2 | |||
|
117 | > > EOF | |||
|
118 | > $ cat >> foo.sh <<EOF | |||
|
119 | > > foo = 3 # appended | |||
|
120 | > > EOF | |||
|
121 | > | |||
|
122 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |||
|
123 | > (this is useful to use bad code intentionally) | |||
|
124 | > | |||
|
125 | > $ cat > foo.sh <<NO_CHECK_EOF | |||
|
126 | > > # this should be ignored at detection | |||
|
127 | > > foo = 4 | |||
|
128 | > > NO_CHECK_EOF | |||
|
129 | > | |||
|
130 | > NO_CHECK_EOF | |||
|
131 | <stdin>:2: foo.sh starts | |||
|
132 | |foo = 1 | |||
|
133 | | | |||
|
134 | |foo = 2 | |||
|
135 | <stdin>:5: foo.sh ends | |||
|
136 | <stdin>:7: foo.sh starts | |||
|
137 | |foo = 3 # appended | |||
|
138 | <stdin>:8: foo.sh ends | |||
|
139 | ||||
|
140 | Tests for embedded hgrc configuration | |||
|
141 | ||||
|
142 | $ "$PYTHON" "$testparseutil" -v hgrcembedded <<NO_CHECK_EOF | |||
|
143 | > $ cat > .hg/hgrc <<EOF | |||
|
144 | > > [ui] | |||
|
145 | > > verbose = true | |||
|
146 | > > | |||
|
147 | > > # end of local configuration | |||
|
148 | > > EOF | |||
|
149 | > | |||
|
150 | > $ cat > \$HGRCPATH <<EOF | |||
|
151 | > > [extensions] | |||
|
152 | > > rebase = | |||
|
153 | > > # end of global configuration | |||
|
154 | > > EOF | |||
|
155 | > | |||
|
156 | > $ cat >> \$HGRCPATH <<EOF | |||
|
157 | > > # appended | |||
|
158 | > > [extensions] | |||
|
159 | > > rebase =! | |||
|
160 | > > EOF | |||
|
161 | > | |||
|
162 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |||
|
163 | > (this is useful to use bad code intentionally) | |||
|
164 | > | |||
|
165 | > $ cat > .hg/hgrc <<NO_CHECK_EOF | |||
|
166 | > > # this local configuration should be ignored at detection | |||
|
167 | > > [ui] | |||
|
168 | > > username = foo bar | |||
|
169 | > > NO_CHECK_EOF | |||
|
170 | > | |||
|
171 | > $ cat > \$HGRCPATH <<NO_CHECK_EOF | |||
|
172 | > > # this global configuration should be ignored at detection | |||
|
173 | > > [extensions] | |||
|
174 | > > foobar = | |||
|
175 | > > NO_CHECK_EOF | |||
|
176 | > NO_CHECK_EOF | |||
|
177 | <stdin>:2: .hg/hgrc starts | |||
|
178 | |[ui] | |||
|
179 | |verbose = true | |||
|
180 | | | |||
|
181 | |# end of local configuration | |||
|
182 | <stdin>:6: .hg/hgrc ends | |||
|
183 | <stdin>:9: $HGRCPATH starts | |||
|
184 | |[extensions] | |||
|
185 | |rebase = | |||
|
186 | |# end of global configuration | |||
|
187 | <stdin>:12: $HGRCPATH ends | |||
|
188 | <stdin>:15: $HGRCPATH starts | |||
|
189 | |# appended | |||
|
190 | |[extensions] | |||
|
191 | |rebase =! | |||
|
192 | <stdin>:18: $HGRCPATH ends |
General Comments 0
You need to be logged in to leave comments.
Login now