##// END OF EJS Templates
Fixes for tokenize in Python 3.3
Thomas Kluyver -
Show More
@@ -1,595 +1,595 b''
1 1 """Patched version of standard library tokenize, to deal with various bugs.
2 2
3 3 Based on Python 3.2 code.
4 4
5 5 Patches:
6 6
7 7 - Gareth Rees' patch for Python issue #12691 (untokenizing)
8 8 - Except we don't encode the output of untokenize
9 9 - Python 2 compatible syntax, so that it can be byte-compiled at installation
10 10 - Newlines in comments and blank lines should be either NL or NEWLINE, depending
11 11 on whether they are in a multi-line statement. Filed as Python issue #17061.
12 12 - Export generate_tokens & TokenError
13 13 - u and rb literals are allowed under Python 3.3 and above.
14 14
15 15 ------------------------------------------------------------------------------
16 16 Tokenization help for Python programs.
17 17
18 18 tokenize(readline) is a generator that breaks a stream of bytes into
19 19 Python tokens. It decodes the bytes according to PEP-0263 for
20 20 determining source file encoding.
21 21
22 22 It accepts a readline-like method which is called repeatedly to get the
23 23 next line of input (or b"" for EOF). It generates 5-tuples with these
24 24 members:
25 25
26 26 the token type (see token.py)
27 27 the token (a string)
28 28 the starting (row, column) indices of the token (a 2-tuple of ints)
29 29 the ending (row, column) indices of the token (a 2-tuple of ints)
30 30 the original line (string)
31 31
32 32 It is designed to match the working of the Python tokenizer exactly, except
33 33 that it produces COMMENT tokens for comments and gives type OP for all
34 34 operators. Additionally, all token lists start with an ENCODING token
35 35 which tells you which encoding was used to decode the bytes stream.
36 36 """
37 37 from __future__ import absolute_import
38 38
39 39 __author__ = 'Ka-Ping Yee <ping@lfw.org>'
40 40 __credits__ = ('GvR, ESR, Tim Peters, Thomas Wouters, Fred Drake, '
41 41 'Skip Montanaro, Raymond Hettinger, Trent Nelson, '
42 42 'Michael Foord')
43 43 import builtins
44 44 import re
45 45 import sys
46 46 from token import *
47 47 from codecs import lookup, BOM_UTF8
48 48 import collections
49 49 from io import TextIOWrapper
50 50 cookie_re = re.compile("coding[:=]\s*([-\w.]+)")
51 51
52 52 import token
53 53 __all__ = token.__all__ + ["COMMENT", "tokenize", "detect_encoding",
54 54 "NL", "untokenize", "ENCODING", "TokenInfo"]
55 55 del token
56 56
57 57 __all__ += ["generate_tokens", "TokenError"]
58 58
59 59 COMMENT = N_TOKENS
60 60 tok_name[COMMENT] = 'COMMENT'
61 61 NL = N_TOKENS + 1
62 62 tok_name[NL] = 'NL'
63 63 ENCODING = N_TOKENS + 2
64 64 tok_name[ENCODING] = 'ENCODING'
65 65 N_TOKENS += 3
66 66
67 67 class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
68 68 def __repr__(self):
69 69 annotated_type = '%d (%s)' % (self.type, tok_name[self.type])
70 70 return ('TokenInfo(type=%s, string=%r, start=%r, end=%r, line=%r)' %
71 71 self._replace(type=annotated_type))
72 72
73 73 def group(*choices): return '(' + '|'.join(choices) + ')'
74 74 def any(*choices): return group(*choices) + '*'
75 75 def maybe(*choices): return group(*choices) + '?'
76 76
77 77 # Note: we use unicode matching for names ("\w") but ascii matching for
78 78 # number literals.
79 79 Whitespace = r'[ \f\t]*'
80 80 Comment = r'#[^\r\n]*'
81 81 Ignore = Whitespace + any(r'\\\r?\n' + Whitespace) + maybe(Comment)
82 82 Name = r'\w+'
83 83
84 84 Hexnumber = r'0[xX][0-9a-fA-F]+'
85 85 Binnumber = r'0[bB][01]+'
86 86 Octnumber = r'0[oO][0-7]+'
87 87 Decnumber = r'(?:0+|[1-9][0-9]*)'
88 88 Intnumber = group(Hexnumber, Binnumber, Octnumber, Decnumber)
89 89 Exponent = r'[eE][-+]?[0-9]+'
90 90 Pointfloat = group(r'[0-9]+\.[0-9]*', r'\.[0-9]+') + maybe(Exponent)
91 91 Expfloat = r'[0-9]+' + Exponent
92 92 Floatnumber = group(Pointfloat, Expfloat)
93 93 Imagnumber = group(r'[0-9]+[jJ]', Floatnumber + r'[jJ]')
94 94 Number = group(Imagnumber, Floatnumber, Intnumber)
95 95
96 96 if sys.version_info.minor >= 3:
97 97 StringPrefix = r'(?:[bB][rR]?|[rR][bB]?|[uU])?'
98 98 else:
99 99 StringPrefix = r'(?:[bB]?[rR]?)?'
100 100
101 101 # Tail end of ' string.
102 102 Single = r"[^'\\]*(?:\\.[^'\\]*)*'"
103 103 # Tail end of " string.
104 104 Double = r'[^"\\]*(?:\\.[^"\\]*)*"'
105 105 # Tail end of ''' string.
106 106 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
107 107 # Tail end of """ string.
108 108 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
109 109 Triple = group(StringPrefix + "'''", StringPrefix + '"""')
110 110 # Single-line ' or " string.
111 111 String = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
112 112 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
113 113
114 114 # Because of leftmost-then-longest match semantics, be sure to put the
115 115 # longest operators first (e.g., if = came before ==, == would get
116 116 # recognized as two instances of =).
117 117 Operator = group(r"\*\*=?", r">>=?", r"<<=?", r"!=",
118 118 r"//=?", r"->",
119 119 r"[+\-*/%&|^=<>]=?",
120 120 r"~")
121 121
122 122 Bracket = '[][(){}]'
123 123 Special = group(r'\r?\n', r'\.\.\.', r'[:;.,@]')
124 124 Funny = group(Operator, Bracket, Special)
125 125
126 126 PlainToken = group(Number, Funny, String, Name)
127 127 Token = Ignore + PlainToken
128 128
129 129 # First (or only) line of ' or " string.
130 130 ContStr = group(StringPrefix + r"'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
131 131 group("'", r'\\\r?\n'),
132 132 StringPrefix + r'"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
133 133 group('"', r'\\\r?\n'))
134 134 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
135 135 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
136 136
137 137 def _compile(expr):
138 138 return re.compile(expr, re.UNICODE)
139 139
140 140 tokenprog, pseudoprog, single3prog, double3prog = map(
141 141 _compile, (Token, PseudoToken, Single3, Double3))
142 142 endprogs = {"'": _compile(Single), '"': _compile(Double),
143 143 "'''": single3prog, '"""': double3prog,
144 144 "r'''": single3prog, 'r"""': double3prog,
145 145 "b'''": single3prog, 'b"""': double3prog,
146 146 "R'''": single3prog, 'R"""': double3prog,
147 147 "B'''": single3prog, 'B"""': double3prog,
148 148 "br'''": single3prog, 'br"""': double3prog,
149 149 "bR'''": single3prog, 'bR"""': double3prog,
150 150 "Br'''": single3prog, 'Br"""': double3prog,
151 151 "BR'''": single3prog, 'BR"""': double3prog,
152 152 'r': None, 'R': None, 'b': None, 'B': None}
153 153
154 154 triple_quoted = {}
155 155 for t in ("'''", '"""',
156 156 "r'''", 'r"""', "R'''", 'R"""',
157 157 "b'''", 'b"""', "B'''", 'B"""',
158 158 "br'''", 'br"""', "Br'''", 'Br"""',
159 159 "bR'''", 'bR"""', "BR'''", 'BR"""'):
160 160 triple_quoted[t] = t
161 161 single_quoted = {}
162 162 for t in ("'", '"',
163 163 "r'", 'r"', "R'", 'R"',
164 164 "b'", 'b"', "B'", 'B"',
165 165 "br'", 'br"', "Br'", 'Br"',
166 166 "bR'", 'bR"', "BR'", 'BR"' ):
167 167 single_quoted[t] = t
168 168
169 169 if sys.version_info.minor >= 3:
170 170 # Python 3.3
171 171 for _prefix in ['rb', 'rB', 'Rb', 'RB', 'u', 'U']:
172 _t2 = prefix+'"""'
172 _t2 = _prefix+'"""'
173 173 endprogs[_t2] = double3prog
174 174 triple_quoted[_t2] = _t2
175 _t1 = prefix + "'''"
175 _t1 = _prefix + "'''"
176 176 endprogs[_t1] = single3prog
177 177 triple_quoted[_t1] = _t1
178 178 single_quoted[_prefix+'"'] = _prefix+'"'
179 single_quoted[_prefix+"'"] + _prefix+"'"
179 single_quoted[_prefix+"'"] = _prefix+"'"
180 180 del _prefix, _t2, _t1
181 181 endprogs['u'] = None
182 182 endprogs['U'] = None
183 183
184 184 del _compile
185 185
186 186 tabsize = 8
187 187
188 188 class TokenError(Exception): pass
189 189
190 190 class StopTokenizing(Exception): pass
191 191
192 192
193 193 class Untokenizer:
194 194
195 195 def __init__(self):
196 196 self.tokens = []
197 197 self.prev_row = 1
198 198 self.prev_col = 0
199 199 self.encoding = 'utf-8'
200 200
201 201 def add_whitespace(self, tok_type, start):
202 202 row, col = start
203 203 assert row >= self.prev_row
204 204 col_offset = col - self.prev_col
205 205 if col_offset > 0:
206 206 self.tokens.append(" " * col_offset)
207 207 elif row > self.prev_row and tok_type not in (NEWLINE, NL, ENDMARKER):
208 208 # Line was backslash-continued.
209 209 self.tokens.append(" ")
210 210
211 211 def untokenize(self, tokens):
212 212 iterable = iter(tokens)
213 213 for t in iterable:
214 214 if len(t) == 2:
215 215 self.compat(t, iterable)
216 216 break
217 217 tok_type, token, start, end = t[:4]
218 218 if tok_type == ENCODING:
219 219 self.encoding = token
220 220 continue
221 221 self.add_whitespace(tok_type, start)
222 222 self.tokens.append(token)
223 223 self.prev_row, self.prev_col = end
224 224 if tok_type in (NEWLINE, NL):
225 225 self.prev_row += 1
226 226 self.prev_col = 0
227 227 return "".join(self.tokens)
228 228
229 229 def compat(self, token, iterable):
230 230 # This import is here to avoid problems when the itertools
231 231 # module is not built yet and tokenize is imported.
232 232 from itertools import chain
233 233 startline = False
234 234 prevstring = False
235 235 indents = []
236 236 toks_append = self.tokens.append
237 237
238 238 for tok in chain([token], iterable):
239 239 toknum, tokval = tok[:2]
240 240 if toknum == ENCODING:
241 241 self.encoding = tokval
242 242 continue
243 243
244 244 if toknum in (NAME, NUMBER):
245 245 tokval += ' '
246 246
247 247 # Insert a space between two consecutive strings
248 248 if toknum == STRING:
249 249 if prevstring:
250 250 tokval = ' ' + tokval
251 251 prevstring = True
252 252 else:
253 253 prevstring = False
254 254
255 255 if toknum == INDENT:
256 256 indents.append(tokval)
257 257 continue
258 258 elif toknum == DEDENT:
259 259 indents.pop()
260 260 continue
261 261 elif toknum in (NEWLINE, NL):
262 262 startline = True
263 263 elif startline and indents:
264 264 toks_append(indents[-1])
265 265 startline = False
266 266 toks_append(tokval)
267 267
268 268
269 269 def untokenize(tokens):
270 270 """
271 271 Convert ``tokens`` (an iterable) back into Python source code. Return
272 272 a bytes object, encoded using the encoding specified by the last
273 273 ENCODING token in ``tokens``, or UTF-8 if no ENCODING token is found.
274 274
275 275 The result is guaranteed to tokenize back to match the input so that
276 276 the conversion is lossless and round-trips are assured. The
277 277 guarantee applies only to the token type and token string as the
278 278 spacing between tokens (column positions) may change.
279 279
280 280 :func:`untokenize` has two modes. If the input tokens are sequences
281 281 of length 2 (``type``, ``string``) then spaces are added as necessary to
282 282 preserve the round-trip property.
283 283
284 284 If the input tokens are sequences of length 4 or more (``type``,
285 285 ``string``, ``start``, ``end``), as returned by :func:`tokenize`, then
286 286 spaces are added so that each token appears in the result at the
287 287 position indicated by ``start`` and ``end``, if possible.
288 288 """
289 289 return Untokenizer().untokenize(tokens)
290 290
291 291
292 292 def _get_normal_name(orig_enc):
293 293 """Imitates get_normal_name in tokenizer.c."""
294 294 # Only care about the first 12 characters.
295 295 enc = orig_enc[:12].lower().replace("_", "-")
296 296 if enc == "utf-8" or enc.startswith("utf-8-"):
297 297 return "utf-8"
298 298 if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
299 299 enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
300 300 return "iso-8859-1"
301 301 return orig_enc
302 302
303 303 def detect_encoding(readline):
304 304 """
305 305 The detect_encoding() function is used to detect the encoding that should
306 306 be used to decode a Python source file. It requires one argment, readline,
307 307 in the same way as the tokenize() generator.
308 308
309 309 It will call readline a maximum of twice, and return the encoding used
310 310 (as a string) and a list of any lines (left as bytes) it has read in.
311 311
312 312 It detects the encoding from the presence of a utf-8 bom or an encoding
313 313 cookie as specified in pep-0263. If both a bom and a cookie are present,
314 314 but disagree, a SyntaxError will be raised. If the encoding cookie is an
315 315 invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
316 316 'utf-8-sig' is returned.
317 317
318 318 If no encoding is specified, then the default of 'utf-8' will be returned.
319 319 """
320 320 bom_found = False
321 321 encoding = None
322 322 default = 'utf-8'
323 323 def read_or_stop():
324 324 try:
325 325 return readline()
326 326 except StopIteration:
327 327 return b''
328 328
329 329 def find_cookie(line):
330 330 try:
331 331 # Decode as UTF-8. Either the line is an encoding declaration,
332 332 # in which case it should be pure ASCII, or it must be UTF-8
333 333 # per default encoding.
334 334 line_string = line.decode('utf-8')
335 335 except UnicodeDecodeError:
336 336 raise SyntaxError("invalid or missing encoding declaration")
337 337
338 338 matches = cookie_re.findall(line_string)
339 339 if not matches:
340 340 return None
341 341 encoding = _get_normal_name(matches[0])
342 342 try:
343 343 codec = lookup(encoding)
344 344 except LookupError:
345 345 # This behaviour mimics the Python interpreter
346 346 raise SyntaxError("unknown encoding: " + encoding)
347 347
348 348 if bom_found:
349 349 if encoding != 'utf-8':
350 350 # This behaviour mimics the Python interpreter
351 351 raise SyntaxError('encoding problem: utf-8')
352 352 encoding += '-sig'
353 353 return encoding
354 354
355 355 first = read_or_stop()
356 356 if first.startswith(BOM_UTF8):
357 357 bom_found = True
358 358 first = first[3:]
359 359 default = 'utf-8-sig'
360 360 if not first:
361 361 return default, []
362 362
363 363 encoding = find_cookie(first)
364 364 if encoding:
365 365 return encoding, [first]
366 366
367 367 second = read_or_stop()
368 368 if not second:
369 369 return default, [first]
370 370
371 371 encoding = find_cookie(second)
372 372 if encoding:
373 373 return encoding, [first, second]
374 374
375 375 return default, [first, second]
376 376
377 377
378 378 def open(filename):
379 379 """Open a file in read only mode using the encoding detected by
380 380 detect_encoding().
381 381 """
382 382 buffer = builtins.open(filename, 'rb')
383 383 encoding, lines = detect_encoding(buffer.readline)
384 384 buffer.seek(0)
385 385 text = TextIOWrapper(buffer, encoding, line_buffering=True)
386 386 text.mode = 'r'
387 387 return text
388 388
389 389
390 390 def tokenize(readline):
391 391 """
392 392 The tokenize() generator requires one argment, readline, which
393 393 must be a callable object which provides the same interface as the
394 394 readline() method of built-in file objects. Each call to the function
395 395 should return one line of input as bytes. Alternately, readline
396 396 can be a callable function terminating with StopIteration:
397 397 readline = open(myfile, 'rb').__next__ # Example of alternate readline
398 398
399 399 The generator produces 5-tuples with these members: the token type; the
400 400 token string; a 2-tuple (srow, scol) of ints specifying the row and
401 401 column where the token begins in the source; a 2-tuple (erow, ecol) of
402 402 ints specifying the row and column where the token ends in the source;
403 403 and the line on which the token was found. The line passed is the
404 404 logical line; continuation lines are included.
405 405
406 406 The first token sequence will always be an ENCODING token
407 407 which tells you which encoding was used to decode the bytes stream.
408 408 """
409 409 # This import is here to avoid problems when the itertools module is not
410 410 # built yet and tokenize is imported.
411 411 from itertools import chain, repeat
412 412 encoding, consumed = detect_encoding(readline)
413 413 rl_gen = iter(readline, b"")
414 414 empty = repeat(b"")
415 415 return _tokenize(chain(consumed, rl_gen, empty).__next__, encoding)
416 416
417 417
418 418 def _tokenize(readline, encoding):
419 419 lnum = parenlev = continued = 0
420 420 numchars = '0123456789'
421 421 contstr, needcont = '', 0
422 422 contline = None
423 423 indents = [0]
424 424
425 425 if encoding is not None:
426 426 if encoding == "utf-8-sig":
427 427 # BOM will already have been stripped.
428 428 encoding = "utf-8"
429 429 yield TokenInfo(ENCODING, encoding, (0, 0), (0, 0), '')
430 430 while True: # loop over lines in stream
431 431 try:
432 432 line = readline()
433 433 except StopIteration:
434 434 line = b''
435 435
436 436 if encoding is not None:
437 437 line = line.decode(encoding)
438 438 lnum += 1
439 439 pos, max = 0, len(line)
440 440
441 441 if contstr: # continued string
442 442 if not line:
443 443 raise TokenError("EOF in multi-line string", strstart)
444 444 endmatch = endprog.match(line)
445 445 if endmatch:
446 446 pos = end = endmatch.end(0)
447 447 yield TokenInfo(STRING, contstr + line[:end],
448 448 strstart, (lnum, end), contline + line)
449 449 contstr, needcont = '', 0
450 450 contline = None
451 451 elif needcont and line[-2:] != '\\\n' and line[-3:] != '\\\r\n':
452 452 yield TokenInfo(ERRORTOKEN, contstr + line,
453 453 strstart, (lnum, len(line)), contline)
454 454 contstr = ''
455 455 contline = None
456 456 continue
457 457 else:
458 458 contstr = contstr + line
459 459 contline = contline + line
460 460 continue
461 461
462 462 elif parenlev == 0 and not continued: # new statement
463 463 if not line: break
464 464 column = 0
465 465 while pos < max: # measure leading whitespace
466 466 if line[pos] == ' ':
467 467 column += 1
468 468 elif line[pos] == '\t':
469 469 column = (column//tabsize + 1)*tabsize
470 470 elif line[pos] == '\f':
471 471 column = 0
472 472 else:
473 473 break
474 474 pos += 1
475 475 if pos == max:
476 476 break
477 477
478 478 if line[pos] in '#\r\n': # skip comments or blank lines
479 479 if line[pos] == '#':
480 480 comment_token = line[pos:].rstrip('\r\n')
481 481 nl_pos = pos + len(comment_token)
482 482 yield TokenInfo(COMMENT, comment_token,
483 483 (lnum, pos), (lnum, pos + len(comment_token)), line)
484 484 yield TokenInfo(NEWLINE, line[nl_pos:],
485 485 (lnum, nl_pos), (lnum, len(line)), line)
486 486 else:
487 487 yield TokenInfo(NEWLINE, line[pos:],
488 488 (lnum, pos), (lnum, len(line)), line)
489 489 continue
490 490
491 491 if column > indents[-1]: # count indents or dedents
492 492 indents.append(column)
493 493 yield TokenInfo(INDENT, line[:pos], (lnum, 0), (lnum, pos), line)
494 494 while column < indents[-1]:
495 495 if column not in indents:
496 496 raise IndentationError(
497 497 "unindent does not match any outer indentation level",
498 498 ("<tokenize>", lnum, pos, line))
499 499 indents = indents[:-1]
500 500 yield TokenInfo(DEDENT, '', (lnum, pos), (lnum, pos), line)
501 501
502 502 else: # continued statement
503 503 if not line:
504 504 raise TokenError("EOF in multi-line statement", (lnum, 0))
505 505 continued = 0
506 506
507 507 while pos < max:
508 508 pseudomatch = pseudoprog.match(line, pos)
509 509 if pseudomatch: # scan for tokens
510 510 start, end = pseudomatch.span(1)
511 511 spos, epos, pos = (lnum, start), (lnum, end), end
512 512 token, initial = line[start:end], line[start]
513 513
514 514 if (initial in numchars or # ordinary number
515 515 (initial == '.' and token != '.' and token != '...')):
516 516 yield TokenInfo(NUMBER, token, spos, epos, line)
517 517 elif initial in '\r\n':
518 518 yield TokenInfo(NL if parenlev > 0 else NEWLINE,
519 519 token, spos, epos, line)
520 520 elif initial == '#':
521 521 assert not token.endswith("\n")
522 522 yield TokenInfo(COMMENT, token, spos, epos, line)
523 523 elif token in triple_quoted:
524 524 endprog = endprogs[token]
525 525 endmatch = endprog.match(line, pos)
526 526 if endmatch: # all on one line
527 527 pos = endmatch.end(0)
528 528 token = line[start:pos]
529 529 yield TokenInfo(STRING, token, spos, (lnum, pos), line)
530 530 else:
531 531 strstart = (lnum, start) # multiple lines
532 532 contstr = line[start:]
533 533 contline = line
534 534 break
535 535 elif initial in single_quoted or \
536 536 token[:2] in single_quoted or \
537 537 token[:3] in single_quoted:
538 538 if token[-1] == '\n': # continued string
539 539 strstart = (lnum, start)
540 540 endprog = (endprogs[initial] or endprogs[token[1]] or
541 541 endprogs[token[2]])
542 542 contstr, needcont = line[start:], 1
543 543 contline = line
544 544 break
545 545 else: # ordinary string
546 546 yield TokenInfo(STRING, token, spos, epos, line)
547 547 elif initial.isidentifier(): # ordinary name
548 548 yield TokenInfo(NAME, token, spos, epos, line)
549 549 elif initial == '\\': # continued stmt
550 550 continued = 1
551 551 else:
552 552 if initial in '([{':
553 553 parenlev += 1
554 554 elif initial in ')]}':
555 555 parenlev -= 1
556 556 yield TokenInfo(OP, token, spos, epos, line)
557 557 else:
558 558 yield TokenInfo(ERRORTOKEN, line[pos],
559 559 (lnum, pos), (lnum, pos+1), line)
560 560 pos += 1
561 561
562 562 for indent in indents[1:]: # pop remaining indent levels
563 563 yield TokenInfo(DEDENT, '', (lnum, 0), (lnum, 0), '')
564 564 yield TokenInfo(ENDMARKER, '', (lnum, 0), (lnum, 0), '')
565 565
566 566
567 567 # An undocumented, backwards compatible, API for all the places in the standard
568 568 # library that expect to be able to use tokenize with strings
569 569 def generate_tokens(readline):
570 570 return _tokenize(readline, None)
571 571
572 572 if __name__ == "__main__":
573 573 # Quick sanity check
574 574 s = b'''def parseline(self, line):
575 575 """Parse the line into a command name and a string containing
576 576 the arguments. Returns a tuple containing (command, args, line).
577 577 'command' and 'args' may be None if the line couldn't be parsed.
578 578 """
579 579 line = line.strip()
580 580 if not line:
581 581 return None, None, line
582 582 elif line[0] == '?':
583 583 line = 'help ' + line[1:]
584 584 elif line[0] == '!':
585 585 if hasattr(self, 'do_shell'):
586 586 line = 'shell ' + line[1:]
587 587 else:
588 588 return None, None, line
589 589 i, n = 0, len(line)
590 590 while i < n and line[i] in self.identchars: i = i+1
591 591 cmd, arg = line[:i], line[i:].strip()
592 592 return cmd, arg, line
593 593 '''
594 594 for tok in tokenize(iter(s.splitlines()).__next__):
595 595 print(tok)
General Comments 0
You need to be logged in to leave comments. Login now