Show More
This diff has been collapsed as it changes many lines, (630 lines changed) Show them Hide them | |||
@@ -0,0 +1,630 b'' | |||
|
1 | # testparseutil.py - utilities to parse test script for check tools | |
|
2 | # | |
|
3 | # Copyright 2018 FUJIWARA Katsunori <foozy@lares.dti.ne.jp> and others | |
|
4 | # | |
|
5 | # This software may be used and distributed according to the terms of the | |
|
6 | # GNU General Public License version 2 or any later version. | |
|
7 | ||
|
8 | from __future__ import absolute_import, print_function | |
|
9 | ||
|
10 | import abc | |
|
11 | import re | |
|
12 | import sys | |
|
13 | ||
|
14 | #################### | |
|
15 | # for Python3 compatibility (almost comes from mercurial/pycompat.py) | |
|
16 | ||
|
17 | ispy3 = (sys.version_info[0] >= 3) | |
|
18 | ||
|
19 | def identity(a): | |
|
20 | return a | |
|
21 | ||
|
22 | def _rapply(f, xs): | |
|
23 | if xs is None: | |
|
24 | # assume None means non-value of optional data | |
|
25 | return xs | |
|
26 | if isinstance(xs, (list, set, tuple)): | |
|
27 | return type(xs)(_rapply(f, x) for x in xs) | |
|
28 | if isinstance(xs, dict): | |
|
29 | return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items()) | |
|
30 | return f(xs) | |
|
31 | ||
|
32 | def rapply(f, xs): | |
|
33 | if f is identity: | |
|
34 | # fast path mainly for py2 | |
|
35 | return xs | |
|
36 | return _rapply(f, xs) | |
|
37 | ||
|
38 | if ispy3: | |
|
39 | import builtins | |
|
40 | ||
|
41 | # TODO: .buffer might not exist if std streams were replaced; we'll need | |
|
42 | # a silly wrapper to make a bytes stream backed by a unicode one. | |
|
43 | stdin = sys.stdin.buffer | |
|
44 | stdout = sys.stdout.buffer | |
|
45 | stderr = sys.stderr.buffer | |
|
46 | ||
|
47 | def bytestr(s): | |
|
48 | # tiny version of pycompat.bytestr | |
|
49 | return s.encode('latin1') | |
|
50 | ||
|
51 | def sysstr(s): | |
|
52 | if isinstance(s, builtins.str): | |
|
53 | return s | |
|
54 | return s.decode(u'latin-1') | |
|
55 | ||
|
56 | def opentext(f): | |
|
57 | return open(f, 'rb') | |
|
58 | else: | |
|
59 | stdin = sys.stdin | |
|
60 | stdout = sys.stdout | |
|
61 | stderr = sys.stderr | |
|
62 | ||
|
63 | bytestr = str | |
|
64 | sysstr = identity | |
|
65 | ||
|
66 | opentext = open | |
|
67 | ||
|
68 | def b2s(x): | |
|
69 | # convert BYTES elements in "x" to SYSSTR recursively | |
|
70 | return rapply(sysstr, x) | |
|
71 | ||
|
72 | def writeout(data): | |
|
73 | # write "data" in BYTES into stdout | |
|
74 | stdout.write(data) | |
|
75 | ||
|
76 | def writeerr(data): | |
|
77 | # write "data" in BYTES into stderr | |
|
78 | stderr.write(data) | |
|
79 | ||
|
80 | #################### | |
|
81 | ||
|
82 | class embeddedmatcher(object): | |
|
83 | """Base class to detect embedded code fragments in *.t test script | |
|
84 | """ | |
|
85 | __metaclass__ = abc.ABCMeta | |
|
86 | ||
|
87 | def __init__(self, desc): | |
|
88 | self.desc = desc | |
|
89 | ||
|
90 | @abc.abstractmethod | |
|
91 | def startsat(self, line): | |
|
92 | """Examine whether embedded code starts at line | |
|
93 | ||
|
94 | This can return arbitrary object, and it is used as 'ctx' for | |
|
95 | subsequent method invocations. | |
|
96 | """ | |
|
97 | ||
|
98 | @abc.abstractmethod | |
|
99 | def endsat(self, ctx, line): | |
|
100 | """Examine whether embedded code ends at line""" | |
|
101 | ||
|
102 | @abc.abstractmethod | |
|
103 | def isinside(self, ctx, line): | |
|
104 | """Examine whether line is inside embedded code, if not yet endsat | |
|
105 | """ | |
|
106 | ||
|
107 | @abc.abstractmethod | |
|
108 | def ignores(self, ctx): | |
|
109 | """Examine whether detected embedded code should be ignored""" | |
|
110 | ||
|
111 | @abc.abstractmethod | |
|
112 | def filename(self, ctx): | |
|
113 | """Return filename of embedded code | |
|
114 | ||
|
115 | If filename isn't specified for embedded code explicitly, this | |
|
116 | returns None. | |
|
117 | """ | |
|
118 | ||
|
119 | @abc.abstractmethod | |
|
120 | def codeatstart(self, ctx, line): | |
|
121 | """Return actual code at the start line of embedded code | |
|
122 | ||
|
123 | This might return None, if the start line doesn't contain | |
|
124 | actual code. | |
|
125 | """ | |
|
126 | ||
|
127 | @abc.abstractmethod | |
|
128 | def codeatend(self, ctx, line): | |
|
129 | """Return actual code at the end line of embedded code | |
|
130 | ||
|
131 | This might return None, if the end line doesn't contain actual | |
|
132 | code. | |
|
133 | """ | |
|
134 | ||
|
135 | @abc.abstractmethod | |
|
136 | def codeinside(self, ctx, line): | |
|
137 | """Return actual code at line inside embedded code""" | |
|
138 | ||
|
139 | def embedded(basefile, lines, errors, matchers): | |
|
140 | """pick embedded code fragments up from given lines | |
|
141 | ||
|
142 | This is common parsing logic, which examines specified matchers on | |
|
143 | given lines. | |
|
144 | ||
|
145 | :basefile: a name of a file, from which lines to be parsed come. | |
|
146 | :lines: to be parsed (might be a value returned by "open(basefile)") | |
|
147 | :errors: an array, into which messages for detected error are stored | |
|
148 | :matchers: an array of embeddedmatcher objects | |
|
149 | ||
|
150 | This function yields '(filename, starts, ends, code)' tuple. | |
|
151 | ||
|
152 | :filename: a name of embedded code, if it is explicitly specified | |
|
153 | (e.g. "foobar" of "cat >> foobar <<EOF"). | |
|
154 | Otherwise, this is None | |
|
155 | :starts: line number (1-origin), at which embedded code starts (inclusive) | |
|
156 | :ends: line number (1-origin), at which embedded code ends (exclusive) | |
|
157 | :code: extracted embedded code, which is single-stringified | |
|
158 | ||
|
159 | >>> class ambigmatcher(object): | |
|
160 | ... # mock matcher class to examine implementation of | |
|
161 | ... # "ambiguous matching" corner case | |
|
162 | ... def __init__(self, desc, matchfunc): | |
|
163 | ... self.desc = desc | |
|
164 | ... self.matchfunc = matchfunc | |
|
165 | ... def startsat(self, line): | |
|
166 | ... return self.matchfunc(line) | |
|
167 | >>> ambig1 = ambigmatcher(b'ambiguous #1', | |
|
168 | ... lambda l: l.startswith(b' $ cat ')) | |
|
169 | >>> ambig2 = ambigmatcher(b'ambiguous #2', | |
|
170 | ... lambda l: l.endswith(b'<< EOF\\n')) | |
|
171 | >>> lines = [b' $ cat > foo.py << EOF\\n'] | |
|
172 | >>> errors = [] | |
|
173 | >>> matchers = [ambig1, ambig2] | |
|
174 | >>> list(t for t in embedded(b'<dummy>', lines, errors, matchers)) | |
|
175 | [] | |
|
176 | >>> b2s(errors) | |
|
177 | ['<dummy>:1: ambiguous line for "ambiguous #1", "ambiguous #2"'] | |
|
178 | ||
|
179 | """ | |
|
180 | matcher = None | |
|
181 | ctx = filename = code = startline = None # for pyflakes | |
|
182 | ||
|
183 | for lineno, line in enumerate(lines, 1): | |
|
184 | if not line.endswith(b'\n'): | |
|
185 | line += b'\n' # to normalize EOF line | |
|
186 | if matcher: # now, inside embedded code | |
|
187 | if matcher.endsat(ctx, line): | |
|
188 | codeatend = matcher.codeatend(ctx, line) | |
|
189 | if codeatend is not None: | |
|
190 | code.append(codeatend) | |
|
191 | if not matcher.ignores(ctx): | |
|
192 | yield (filename, startline, lineno, b''.join(code)) | |
|
193 | matcher = None | |
|
194 | # DO NOT "continue", because line might start next fragment | |
|
195 | elif not matcher.isinside(ctx, line): | |
|
196 | # this is an error of basefile | |
|
197 | # (if matchers are implemented correctly) | |
|
198 | errors.append(b'%s:%d: unexpected line for "%s"' | |
|
199 | % (basefile, lineno, matcher.desc)) | |
|
200 | # stop extracting embedded code by current 'matcher', | |
|
201 | # because appearance of unexpected line might mean | |
|
202 | # that expected end-of-embedded-code line might never | |
|
203 | # appear | |
|
204 | matcher = None | |
|
205 | # DO NOT "continue", because line might start next fragment | |
|
206 | else: | |
|
207 | code.append(matcher.codeinside(ctx, line)) | |
|
208 | continue | |
|
209 | ||
|
210 | # examine whether current line starts embedded code or not | |
|
211 | assert not matcher | |
|
212 | ||
|
213 | matched = [] | |
|
214 | for m in matchers: | |
|
215 | ctx = m.startsat(line) | |
|
216 | if ctx: | |
|
217 | matched.append((m, ctx)) | |
|
218 | if matched: | |
|
219 | if len(matched) > 1: | |
|
220 | # this is an error of matchers, maybe | |
|
221 | errors.append(b'%s:%d: ambiguous line for %s' % | |
|
222 | (basefile, lineno, | |
|
223 | b', '.join([b'"%s"' % m.desc | |
|
224 | for m, c in matched]))) | |
|
225 | # omit extracting embedded code, because choosing | |
|
226 | # arbitrary matcher from matched ones might fail to | |
|
227 | # detect the end of embedded code as expected. | |
|
228 | continue | |
|
229 | matcher, ctx = matched[0] | |
|
230 | filename = matcher.filename(ctx) | |
|
231 | code = [] | |
|
232 | codeatstart = matcher.codeatstart(ctx, line) | |
|
233 | if codeatstart is not None: | |
|
234 | code.append(codeatstart) | |
|
235 | startline = lineno | |
|
236 | else: | |
|
237 | startline = lineno + 1 | |
|
238 | ||
|
239 | if matcher: | |
|
240 | # examine whether EOF ends embedded code, because embedded | |
|
241 | # code isn't yet ended explicitly | |
|
242 | if matcher.endsat(ctx, b'\n'): | |
|
243 | codeatend = matcher.codeatend(ctx, b'\n') | |
|
244 | if codeatend is not None: | |
|
245 | code.append(codeatend) | |
|
246 | if not matcher.ignores(ctx): | |
|
247 | yield (filename, startline, lineno + 1, b''.join(code)) | |
|
248 | else: | |
|
249 | # this is an error of basefile | |
|
250 | # (if matchers are implemented correctly) | |
|
251 | errors.append(b'%s:%d: unexpected end of file for "%s"' | |
|
252 | % (basefile, lineno, matcher.desc)) | |
|
253 | ||
|
254 | # heredoc limit mark to ignore embedded code at check-code.py or so | |
|
255 | heredocignorelimit = b'NO_CHECK_EOF' | |
|
256 | ||
|
257 | # the pattern to match against cases below, and to return a limit mark | |
|
258 | # string as 'lname' group | |
|
259 | # | |
|
260 | # - << LIMITMARK | |
|
261 | # - << "LIMITMARK" | |
|
262 | # - << 'LIMITMARK' | |
|
263 | heredoclimitpat = br'\s*<<\s*(?P<lquote>["\']?)(?P<limit>\w+)(?P=lquote)' | |
|
264 | ||
|
265 | class fileheredocmatcher(embeddedmatcher): | |
|
266 | """Detect "cat > FILE << LIMIT" style embedded code | |
|
267 | ||
|
268 | >>> matcher = fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py') | |
|
269 | >>> b2s(matcher.startsat(b' $ cat > file.py << EOF\\n')) | |
|
270 | ('file.py', ' > EOF\\n') | |
|
271 | >>> b2s(matcher.startsat(b' $ cat >>file.py <<EOF\\n')) | |
|
272 | ('file.py', ' > EOF\\n') | |
|
273 | >>> b2s(matcher.startsat(b' $ cat> \\x27any file.py\\x27<< "EOF"\\n')) | |
|
274 | ('any file.py', ' > EOF\\n') | |
|
275 | >>> b2s(matcher.startsat(b" $ cat > file.py << 'ANYLIMIT'\\n")) | |
|
276 | ('file.py', ' > ANYLIMIT\\n') | |
|
277 | >>> b2s(matcher.startsat(b' $ cat<<ANYLIMIT>"file.py"\\n')) | |
|
278 | ('file.py', ' > ANYLIMIT\\n') | |
|
279 | >>> start = b' $ cat > file.py << EOF\\n' | |
|
280 | >>> ctx = matcher.startsat(start) | |
|
281 | >>> matcher.codeatstart(ctx, start) | |
|
282 | >>> b2s(matcher.filename(ctx)) | |
|
283 | 'file.py' | |
|
284 | >>> matcher.ignores(ctx) | |
|
285 | False | |
|
286 | >>> inside = b' > foo = 1\\n' | |
|
287 | >>> matcher.endsat(ctx, inside) | |
|
288 | False | |
|
289 | >>> matcher.isinside(ctx, inside) | |
|
290 | True | |
|
291 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
292 | 'foo = 1\\n' | |
|
293 | >>> end = b' > EOF\\n' | |
|
294 | >>> matcher.endsat(ctx, end) | |
|
295 | True | |
|
296 | >>> matcher.codeatend(ctx, end) | |
|
297 | >>> matcher.endsat(ctx, b' > EOFEOF\\n') | |
|
298 | False | |
|
299 | >>> ctx = matcher.startsat(b' $ cat > file.py << NO_CHECK_EOF\\n') | |
|
300 | >>> matcher.ignores(ctx) | |
|
301 | True | |
|
302 | """ | |
|
303 | _prefix = b' > ' | |
|
304 | ||
|
305 | def __init__(self, desc, namepat): | |
|
306 | super(fileheredocmatcher, self).__init__(desc) | |
|
307 | ||
|
308 | # build the pattern to match against cases below (and ">>" | |
|
309 | # variants), and to return a target filename string as 'name' | |
|
310 | # group | |
|
311 | # | |
|
312 | # - > NAMEPAT | |
|
313 | # - > "NAMEPAT" | |
|
314 | # - > 'NAMEPAT' | |
|
315 | namepat = (br'\s*>>?\s*(?P<nquote>["\']?)(?P<name>%s)(?P=nquote)' | |
|
316 | % namepat) | |
|
317 | self._fileres = [ | |
|
318 | # "cat > NAME << LIMIT" case | |
|
319 | re.compile(br' \$ \s*cat' + namepat + heredoclimitpat), | |
|
320 | # "cat << LIMIT > NAME" case | |
|
321 | re.compile(br' \$ \s*cat' + heredoclimitpat + namepat), | |
|
322 | ] | |
|
323 | ||
|
324 | def startsat(self, line): | |
|
325 | # ctx is (filename, END-LINE-OF-EMBEDDED-CODE) tuple | |
|
326 | for filere in self._fileres: | |
|
327 | matched = filere.match(line) | |
|
328 | if matched: | |
|
329 | return (matched.group('name'), | |
|
330 | b' > %s\n' % matched.group('limit')) | |
|
331 | ||
|
332 | def endsat(self, ctx, line): | |
|
333 | return ctx[1] == line | |
|
334 | ||
|
335 | def isinside(self, ctx, line): | |
|
336 | return line.startswith(self._prefix) | |
|
337 | ||
|
338 | def ignores(self, ctx): | |
|
339 | return b' > %s\n' % heredocignorelimit == ctx[1] | |
|
340 | ||
|
341 | def filename(self, ctx): | |
|
342 | return ctx[0] | |
|
343 | ||
|
344 | def codeatstart(self, ctx, line): | |
|
345 | return None # no embedded code at start line | |
|
346 | ||
|
347 | def codeatend(self, ctx, line): | |
|
348 | return None # no embedded code at end line | |
|
349 | ||
|
350 | def codeinside(self, ctx, line): | |
|
351 | return line[len(self._prefix):] # strip prefix | |
|
352 | ||
|
353 | #### | |
|
354 | # for embedded python script | |
|
355 | ||
|
356 | class pydoctestmatcher(embeddedmatcher): | |
|
357 | """Detect ">>> code" style embedded python code | |
|
358 | ||
|
359 | >>> matcher = pydoctestmatcher() | |
|
360 | >>> startline = b' >>> foo = 1\\n' | |
|
361 | >>> matcher.startsat(startline) | |
|
362 | True | |
|
363 | >>> matcher.startsat(b' ... foo = 1\\n') | |
|
364 | False | |
|
365 | >>> ctx = matcher.startsat(startline) | |
|
366 | >>> matcher.filename(ctx) | |
|
367 | >>> matcher.ignores(ctx) | |
|
368 | False | |
|
369 | >>> b2s(matcher.codeatstart(ctx, startline)) | |
|
370 | 'foo = 1\\n' | |
|
371 | >>> inside = b' >>> foo = 1\\n' | |
|
372 | >>> matcher.endsat(ctx, inside) | |
|
373 | False | |
|
374 | >>> matcher.isinside(ctx, inside) | |
|
375 | True | |
|
376 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
377 | 'foo = 1\\n' | |
|
378 | >>> inside = b' ... foo = 1\\n' | |
|
379 | >>> matcher.endsat(ctx, inside) | |
|
380 | False | |
|
381 | >>> matcher.isinside(ctx, inside) | |
|
382 | True | |
|
383 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
384 | 'foo = 1\\n' | |
|
385 | >>> inside = b' expected output\\n' | |
|
386 | >>> matcher.endsat(ctx, inside) | |
|
387 | False | |
|
388 | >>> matcher.isinside(ctx, inside) | |
|
389 | True | |
|
390 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
391 | '\\n' | |
|
392 | >>> inside = b' \\n' | |
|
393 | >>> matcher.endsat(ctx, inside) | |
|
394 | False | |
|
395 | >>> matcher.isinside(ctx, inside) | |
|
396 | True | |
|
397 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
398 | '\\n' | |
|
399 | >>> end = b' $ foo bar\\n' | |
|
400 | >>> matcher.endsat(ctx, end) | |
|
401 | True | |
|
402 | >>> matcher.codeatend(ctx, end) | |
|
403 | >>> end = b'\\n' | |
|
404 | >>> matcher.endsat(ctx, end) | |
|
405 | True | |
|
406 | >>> matcher.codeatend(ctx, end) | |
|
407 | """ | |
|
408 | _prefix = b' >>> ' | |
|
409 | _prefixre = re.compile(br' (>>>|\.\.\.) ') | |
|
410 | ||
|
411 | # If a line matches against not _prefixre but _outputre, that line | |
|
412 | # is "an expected output line" (= not a part of code fragment). | |
|
413 | # | |
|
414 | # Strictly speaking, a line matching against "(#if|#else|#endif)" | |
|
415 | # is also treated similarly in "inline python code" semantics by | |
|
416 | # run-tests.py. But "directive line inside inline python code" | |
|
417 | # should be rejected by Mercurial reviewers. Therefore, this | |
|
418 | # regexp does not matche against such directive lines. | |
|
419 | _outputre = re.compile(br' $| [^$]') | |
|
420 | ||
|
421 | def __init__(self): | |
|
422 | super(pydoctestmatcher, self).__init__(b"doctest style python code") | |
|
423 | ||
|
424 | def startsat(self, line): | |
|
425 | # ctx is "True" | |
|
426 | return line.startswith(self._prefix) | |
|
427 | ||
|
428 | def endsat(self, ctx, line): | |
|
429 | return not (self._prefixre.match(line) or self._outputre.match(line)) | |
|
430 | ||
|
431 | def isinside(self, ctx, line): | |
|
432 | return True # always true, if not yet ended | |
|
433 | ||
|
434 | def ignores(self, ctx): | |
|
435 | return False # should be checked always | |
|
436 | ||
|
437 | def filename(self, ctx): | |
|
438 | return None # no filename | |
|
439 | ||
|
440 | def codeatstart(self, ctx, line): | |
|
441 | return line[len(self._prefix):] # strip prefix ' >>> '/' ... ' | |
|
442 | ||
|
443 | def codeatend(self, ctx, line): | |
|
444 | return None # no embedded code at end line | |
|
445 | ||
|
446 | def codeinside(self, ctx, line): | |
|
447 | if self._prefixre.match(line): | |
|
448 | return line[len(self._prefix):] # strip prefix ' >>> '/' ... ' | |
|
449 | return b'\n' # an expected output line is treated as an empty line | |
|
450 | ||
|
451 | class pyheredocmatcher(embeddedmatcher): | |
|
452 | """Detect "python << LIMIT" style embedded python code | |
|
453 | ||
|
454 | >>> matcher = pyheredocmatcher() | |
|
455 | >>> b2s(matcher.startsat(b' $ python << EOF\\n')) | |
|
456 | ' > EOF\\n' | |
|
457 | >>> b2s(matcher.startsat(b' $ $PYTHON <<EOF\\n')) | |
|
458 | ' > EOF\\n' | |
|
459 | >>> b2s(matcher.startsat(b' $ "$PYTHON"<< "EOF"\\n')) | |
|
460 | ' > EOF\\n' | |
|
461 | >>> b2s(matcher.startsat(b" $ $PYTHON << 'ANYLIMIT'\\n")) | |
|
462 | ' > ANYLIMIT\\n' | |
|
463 | >>> matcher.startsat(b' $ "$PYTHON" < EOF\\n') | |
|
464 | >>> start = b' $ python << EOF\\n' | |
|
465 | >>> ctx = matcher.startsat(start) | |
|
466 | >>> matcher.codeatstart(ctx, start) | |
|
467 | >>> matcher.filename(ctx) | |
|
468 | >>> matcher.ignores(ctx) | |
|
469 | False | |
|
470 | >>> inside = b' > foo = 1\\n' | |
|
471 | >>> matcher.endsat(ctx, inside) | |
|
472 | False | |
|
473 | >>> matcher.isinside(ctx, inside) | |
|
474 | True | |
|
475 | >>> b2s(matcher.codeinside(ctx, inside)) | |
|
476 | 'foo = 1\\n' | |
|
477 | >>> end = b' > EOF\\n' | |
|
478 | >>> matcher.endsat(ctx, end) | |
|
479 | True | |
|
480 | >>> matcher.codeatend(ctx, end) | |
|
481 | >>> matcher.endsat(ctx, b' > EOFEOF\\n') | |
|
482 | False | |
|
483 | >>> ctx = matcher.startsat(b' $ python << NO_CHECK_EOF\\n') | |
|
484 | >>> matcher.ignores(ctx) | |
|
485 | True | |
|
486 | """ | |
|
487 | _prefix = b' > ' | |
|
488 | ||
|
489 | _startre = re.compile(br' \$ (\$PYTHON|"\$PYTHON"|python).*' + | |
|
490 | heredoclimitpat) | |
|
491 | ||
|
492 | def __init__(self): | |
|
493 | super(pyheredocmatcher, self).__init__(b"heredoc python invocation") | |
|
494 | ||
|
495 | def startsat(self, line): | |
|
496 | # ctx is END-LINE-OF-EMBEDDED-CODE | |
|
497 | matched = self._startre.match(line) | |
|
498 | if matched: | |
|
499 | return b' > %s\n' % matched.group('limit') | |
|
500 | ||
|
501 | def endsat(self, ctx, line): | |
|
502 | return ctx == line | |
|
503 | ||
|
504 | def isinside(self, ctx, line): | |
|
505 | return line.startswith(self._prefix) | |
|
506 | ||
|
507 | def ignores(self, ctx): | |
|
508 | return b' > %s\n' % heredocignorelimit == ctx | |
|
509 | ||
|
510 | def filename(self, ctx): | |
|
511 | return None # no filename | |
|
512 | ||
|
513 | def codeatstart(self, ctx, line): | |
|
514 | return None # no embedded code at start line | |
|
515 | ||
|
516 | def codeatend(self, ctx, line): | |
|
517 | return None # no embedded code at end line | |
|
518 | ||
|
519 | def codeinside(self, ctx, line): | |
|
520 | return line[len(self._prefix):] # strip prefix | |
|
521 | ||
|
522 | _pymatchers = [ | |
|
523 | pydoctestmatcher(), | |
|
524 | pyheredocmatcher(), | |
|
525 | # use '[^<]+' instead of '\S+', in order to match against | |
|
526 | # paths including whitespaces | |
|
527 | fileheredocmatcher(b'heredoc .py file', br'[^<]+\.py'), | |
|
528 | ] | |
|
529 | ||
|
530 | def pyembedded(basefile, lines, errors): | |
|
531 | return embedded(basefile, lines, errors, _pymatchers) | |
|
532 | ||
|
533 | #### | |
|
534 | # for embedded shell script | |
|
535 | ||
|
536 | _shmatchers = [ | |
|
537 | # use '[^<]+' instead of '\S+', in order to match against | |
|
538 | # paths including whitespaces | |
|
539 | fileheredocmatcher(b'heredoc .sh file', br'[^<]+\.sh'), | |
|
540 | ] | |
|
541 | ||
|
542 | def shembedded(basefile, lines, errors): | |
|
543 | return embedded(basefile, lines, errors, _shmatchers) | |
|
544 | ||
|
545 | #### | |
|
546 | # for embedded hgrc configuration | |
|
547 | ||
|
548 | _hgrcmatchers = [ | |
|
549 | # use '[^<]+' instead of '\S+', in order to match against | |
|
550 | # paths including whitespaces | |
|
551 | fileheredocmatcher(b'heredoc hgrc file', | |
|
552 | br'(([^/<]+/)+hgrc|\$HGRCPATH|\${HGRCPATH})'), | |
|
553 | ] | |
|
554 | ||
|
555 | def hgrcembedded(basefile, lines, errors): | |
|
556 | return embedded(basefile, lines, errors, _hgrcmatchers) | |
|
557 | ||
|
558 | #### | |
|
559 | ||
|
560 | if __name__ == "__main__": | |
|
561 | import optparse | |
|
562 | import sys | |
|
563 | ||
|
564 | def showembedded(basefile, lines, embeddedfunc, opts): | |
|
565 | errors = [] | |
|
566 | for name, starts, ends, code in embeddedfunc(basefile, lines, errors): | |
|
567 | if not name: | |
|
568 | name = b'<anonymous>' | |
|
569 | writeout(b"%s:%d: %s starts\n" % (basefile, starts, name)) | |
|
570 | if opts.verbose and code: | |
|
571 | writeout(b" |%s\n" % | |
|
572 | b"\n |".join(l for l in code.splitlines())) | |
|
573 | writeout(b"%s:%d: %s ends\n" % (basefile, ends, name)) | |
|
574 | for e in errors: | |
|
575 | writeerr(b"%s\n" % e) | |
|
576 | return len(errors) | |
|
577 | ||
|
578 | def applyembedded(args, embeddedfunc, opts): | |
|
579 | ret = 0 | |
|
580 | if args: | |
|
581 | for f in args: | |
|
582 | with opentext(f) as fp: | |
|
583 | if showembedded(bytestr(f), fp, embeddedfunc, opts): | |
|
584 | ret = 1 | |
|
585 | else: | |
|
586 | lines = [l for l in stdin.readlines()] | |
|
587 | if showembedded(b'<stdin>', lines, embeddedfunc, opts): | |
|
588 | ret = 1 | |
|
589 | return ret | |
|
590 | ||
|
591 | commands = {} | |
|
592 | def command(name, desc): | |
|
593 | def wrap(func): | |
|
594 | commands[name] = (desc, func) | |
|
595 | return wrap | |
|
596 | ||
|
597 | @command("pyembedded", "detect embedded python script") | |
|
598 | def pyembeddedcmd(args, opts): | |
|
599 | return applyembedded(args, pyembedded, opts) | |
|
600 | ||
|
601 | @command("shembedded", "detect embedded shell script") | |
|
602 | def shembeddedcmd(args, opts): | |
|
603 | return applyembedded(args, shembedded, opts) | |
|
604 | ||
|
605 | @command("hgrcembedded", "detect embedded hgrc configuration") | |
|
606 | def hgrcembeddedcmd(args, opts): | |
|
607 | return applyembedded(args, hgrcembedded, opts) | |
|
608 | ||
|
609 | availablecommands = "\n".join([" - %s: %s" % (key, value[0]) | |
|
610 | for key, value in commands.items()]) | |
|
611 | ||
|
612 | parser = optparse.OptionParser("""%prog COMMAND [file ...] | |
|
613 | ||
|
614 | Pick up embedded code fragments from given file(s) or stdin, and list | |
|
615 | up start/end lines of them in standard compiler format | |
|
616 | ("FILENAME:LINENO:"). | |
|
617 | ||
|
618 | Available commands are: | |
|
619 | """ + availablecommands + """ | |
|
620 | """) | |
|
621 | parser.add_option("-v", "--verbose", | |
|
622 | help="enable additional output (e.g. actual code)", | |
|
623 | action="store_true") | |
|
624 | (opts, args) = parser.parse_args() | |
|
625 | ||
|
626 | if not args or args[0] not in commands: | |
|
627 | parser.print_help() | |
|
628 | sys.exit(255) | |
|
629 | ||
|
630 | sys.exit(commands[args[0]][1](args[1:], opts)) |
@@ -0,0 +1,192 b'' | |||
|
1 | $ testparseutil="$TESTDIR"/../contrib/testparseutil.py | |
|
2 | ||
|
3 | Internal test by doctest | |
|
4 | ||
|
5 | $ "$PYTHON" -m doctest "$testparseutil" | |
|
6 | ||
|
7 | Tests for embedded python script | |
|
8 | ||
|
9 | Typical cases | |
|
10 | ||
|
11 | $ "$PYTHON" "$testparseutil" -v pyembedded <<NO_CHECK_EOF | |
|
12 | > >>> for f in [1, 2, 3]: | |
|
13 | > ... foo = 1 | |
|
14 | > >>> foo = 2 | |
|
15 | > $ echo "doctest is terminated by command, empty line, or comment" | |
|
16 | > >>> foo = 31 | |
|
17 | > expected output of doctest fragment | |
|
18 | > >>> foo = 32 | |
|
19 | > | |
|
20 | > >>> foo = 33 | |
|
21 | > | |
|
22 | > >>> foo = 34 | |
|
23 | > comment | |
|
24 | > >>> foo = 35 | |
|
25 | > | |
|
26 | > $ "\$PYTHON" <<EOF | |
|
27 | > > foo = 4 | |
|
28 | > > | |
|
29 | > > EOF | |
|
30 | > $ cat > foo.py <<EOF | |
|
31 | > > foo = 5 | |
|
32 | > > EOF | |
|
33 | > $ cat >> foo.py <<EOF | |
|
34 | > > foo = 6 # appended | |
|
35 | > > EOF | |
|
36 | > | |
|
37 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |
|
38 | > (this is useful to use bad code intentionally) | |
|
39 | > | |
|
40 | > $ "\$PYTHON" <<NO_CHECK_EOF | |
|
41 | > > foo = 7 # this should be ignored at detection | |
|
42 | > > NO_CHECK_EOF | |
|
43 | > $ cat > foo.py <<NO_CHECK_EOF | |
|
44 | > > foo = 8 # this should be ignored at detection | |
|
45 | > > NO_CHECK_EOF | |
|
46 | > | |
|
47 | > doctest fragment ended by EOF | |
|
48 | > | |
|
49 | > >>> foo = 9 | |
|
50 | > NO_CHECK_EOF | |
|
51 | <stdin>:1: <anonymous> starts | |
|
52 | |for f in [1, 2, 3]: | |
|
53 | | foo = 1 | |
|
54 | |foo = 2 | |
|
55 | <stdin>:4: <anonymous> ends | |
|
56 | <stdin>:5: <anonymous> starts | |
|
57 | |foo = 31 | |
|
58 | | | |
|
59 | |foo = 32 | |
|
60 | | | |
|
61 | |foo = 33 | |
|
62 | <stdin>:10: <anonymous> ends | |
|
63 | <stdin>:11: <anonymous> starts | |
|
64 | |foo = 34 | |
|
65 | <stdin>:12: <anonymous> ends | |
|
66 | <stdin>:13: <anonymous> starts | |
|
67 | |foo = 35 | |
|
68 | <stdin>:14: <anonymous> ends | |
|
69 | <stdin>:16: <anonymous> starts | |
|
70 | |foo = 4 | |
|
71 | | | |
|
72 | <stdin>:18: <anonymous> ends | |
|
73 | <stdin>:20: foo.py starts | |
|
74 | |foo = 5 | |
|
75 | <stdin>:21: foo.py ends | |
|
76 | <stdin>:23: foo.py starts | |
|
77 | |foo = 6 # appended | |
|
78 | <stdin>:24: foo.py ends | |
|
79 | <stdin>:38: <anonymous> starts | |
|
80 | |foo = 9 | |
|
81 | <stdin>:39: <anonymous> ends | |
|
82 | ||
|
83 | Invalid test script | |
|
84 | ||
|
85 | (similar test for shell script and hgrc configuration is omitted, | |
|
86 | because this tests common base class of them) | |
|
87 | ||
|
88 | $ "$PYTHON" "$testparseutil" -v pyembedded <<NO_CHECK_EOF > detected | |
|
89 | > $ "\$PYTHON" <<EOF | |
|
90 | > > foo = 1 | |
|
91 | > | |
|
92 | > $ "\$PYTHON" <<EOF | |
|
93 | > > foo = 2 | |
|
94 | > $ cat > bar.py <<EOF | |
|
95 | > > bar = 2 # this fragment will be detected as expected | |
|
96 | > > EOF | |
|
97 | > | |
|
98 | > $ cat > foo.py <<EOF | |
|
99 | > > foo = 3 | |
|
100 | > NO_CHECK_EOF | |
|
101 | <stdin>:3: unexpected line for "heredoc python invocation" | |
|
102 | <stdin>:6: unexpected line for "heredoc python invocation" | |
|
103 | <stdin>:11: unexpected end of file for "heredoc .py file" | |
|
104 | [1] | |
|
105 | $ cat detected | |
|
106 | <stdin>:7: bar.py starts | |
|
107 | |bar = 2 # this fragment will be detected as expected | |
|
108 | <stdin>:8: bar.py ends | |
|
109 | ||
|
110 | Tests for embedded shell script | |
|
111 | ||
|
112 | $ "$PYTHON" "$testparseutil" -v shembedded <<NO_CHECK_EOF | |
|
113 | > $ cat > foo.sh <<EOF | |
|
114 | > > foo = 1 | |
|
115 | > > | |
|
116 | > > foo = 2 | |
|
117 | > > EOF | |
|
118 | > $ cat >> foo.sh <<EOF | |
|
119 | > > foo = 3 # appended | |
|
120 | > > EOF | |
|
121 | > | |
|
122 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |
|
123 | > (this is useful to use bad code intentionally) | |
|
124 | > | |
|
125 | > $ cat > foo.sh <<NO_CHECK_EOF | |
|
126 | > > # this should be ignored at detection | |
|
127 | > > foo = 4 | |
|
128 | > > NO_CHECK_EOF | |
|
129 | > | |
|
130 | > NO_CHECK_EOF | |
|
131 | <stdin>:2: foo.sh starts | |
|
132 | |foo = 1 | |
|
133 | | | |
|
134 | |foo = 2 | |
|
135 | <stdin>:5: foo.sh ends | |
|
136 | <stdin>:7: foo.sh starts | |
|
137 | |foo = 3 # appended | |
|
138 | <stdin>:8: foo.sh ends | |
|
139 | ||
|
140 | Tests for embedded hgrc configuration | |
|
141 | ||
|
142 | $ "$PYTHON" "$testparseutil" -v hgrcembedded <<NO_CHECK_EOF | |
|
143 | > $ cat > .hg/hgrc <<EOF | |
|
144 | > > [ui] | |
|
145 | > > verbose = true | |
|
146 | > > | |
|
147 | > > # end of local configuration | |
|
148 | > > EOF | |
|
149 | > | |
|
150 | > $ cat > \$HGRCPATH <<EOF | |
|
151 | > > [extensions] | |
|
152 | > > rebase = | |
|
153 | > > # end of global configuration | |
|
154 | > > EOF | |
|
155 | > | |
|
156 | > $ cat >> \$HGRCPATH <<EOF | |
|
157 | > > # appended | |
|
158 | > > [extensions] | |
|
159 | > > rebase =! | |
|
160 | > > EOF | |
|
161 | > | |
|
162 | > NO_CHECK_EOF limit mark makes parsing ignore corresponded fragment | |
|
163 | > (this is useful to use bad code intentionally) | |
|
164 | > | |
|
165 | > $ cat > .hg/hgrc <<NO_CHECK_EOF | |
|
166 | > > # this local configuration should be ignored at detection | |
|
167 | > > [ui] | |
|
168 | > > username = foo bar | |
|
169 | > > NO_CHECK_EOF | |
|
170 | > | |
|
171 | > $ cat > \$HGRCPATH <<NO_CHECK_EOF | |
|
172 | > > # this global configuration should be ignored at detection | |
|
173 | > > [extensions] | |
|
174 | > > foobar = | |
|
175 | > > NO_CHECK_EOF | |
|
176 | > NO_CHECK_EOF | |
|
177 | <stdin>:2: .hg/hgrc starts | |
|
178 | |[ui] | |
|
179 | |verbose = true | |
|
180 | | | |
|
181 | |# end of local configuration | |
|
182 | <stdin>:6: .hg/hgrc ends | |
|
183 | <stdin>:9: $HGRCPATH starts | |
|
184 | |[extensions] | |
|
185 | |rebase = | |
|
186 | |# end of global configuration | |
|
187 | <stdin>:12: $HGRCPATH ends | |
|
188 | <stdin>:15: $HGRCPATH starts | |
|
189 | |# appended | |
|
190 | |[extensions] | |
|
191 | |rebase =! | |
|
192 | <stdin>:18: $HGRCPATH ends |
General Comments 0
You need to be logged in to leave comments.
Login now