tokenutil.py
160 lines
| 5.0 KiB
| text/x-python
|
PythonLexer
MinRK
|
r16578 | """Token-related utilities""" | ||
# Copyright (c) IPython Development Team. | ||||
# Distributed under the terms of the Modified BSD License. | ||||
from collections import namedtuple | ||||
from io import StringIO | ||||
from keyword import iskeyword | ||||
Thomas Kluyver
|
r24179 | import tokenize | ||
Matthias Bussonnier
|
r28475 | from tokenize import TokenInfo | ||
from typing import List, Optional | ||||
Srinivas Reddy Thatiparthy
|
r23669 | |||
MinRK
|
r16578 | |||
Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line']) | ||||
def generate_tokens(readline): | ||||
Matthias Bussonnier
|
r28475 | """wrap generate_tkens to catch EOF errors""" | ||
MinRK
|
r16578 | try: | ||
Thomas Kluyver
|
r24179 | for token in tokenize.generate_tokens(readline): | ||
MinRK
|
r16578 | yield token | ||
Thomas Kluyver
|
r24179 | except tokenize.TokenError: | ||
MinRK
|
r16578 | # catch EOF error | ||
return | ||||
Matthias Bussonnier
|
r28327 | |||
Matthias Bussonnier
|
r28480 | def generate_tokens_catch_errors( | ||
readline, extra_errors_to_catch: Optional[List[str]] = None | ||||
): | ||||
Matthias Bussonnier
|
r28327 | default_errors_to_catch = [ | ||
"unterminated string literal", | ||||
"invalid non-printable character", | ||||
"after line continuation character", | ||||
] | ||||
Lysandros Nikolaou
|
r28326 | assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list) | ||
errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or []) | ||||
Matthias Bussonnier
|
r28480 | tokens: List[TokenInfo] = [] | ||
Lysandros Nikolaou
|
r28326 | try: | ||
for token in tokenize.generate_tokens(readline): | ||||
tokens.append(token) | ||||
yield token | ||||
except tokenize.TokenError as exc: | ||||
if any(error in exc.args[0] for error in errors_to_catch): | ||||
if tokens: | ||||
start = tokens[-1].start[0], tokens[-1].end[0] | ||||
end = start | ||||
line = tokens[-1].line | ||||
else: | ||||
start = end = (1, 0) | ||||
Matthias Bussonnier
|
r28327 | line = "" | ||
yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line) | ||||
Lysandros Nikolaou
|
r28326 | else: | ||
# Catch EOF | ||||
raise | ||||
Matthias Bussonnier
|
r28327 | |||
MinRK
|
r18478 | def line_at_cursor(cell, cursor_pos=0): | ||
"""Return the line in a cell at a given cursor position | ||||
Matthias Bussonnier
|
r26419 | |||
MinRK
|
r18478 | Used for calling line-based APIs that don't support multi-line input, yet. | ||
Matthias Bussonnier
|
r26419 | |||
MinRK
|
r18478 | Parameters | ||
---------- | ||||
Matthias Bussonnier
|
r26419 | cell : str | ||
MinRK
|
r18478 | multiline block of text | ||
Matthias Bussonnier
|
r26419 | cursor_pos : integer | ||
MinRK
|
r18478 | the cursor position | ||
Matthias Bussonnier
|
r26419 | |||
MinRK
|
r18478 | Returns | ||
------- | ||||
Matthias Bussonnier
|
r23477 | (line, offset): (string, integer) | ||
MinRK
|
r18478 | The line with the current cursor, and the character offset of the start of the line. | ||
""" | ||||
offset = 0 | ||||
lines = cell.splitlines(True) | ||||
for line in lines: | ||||
next_offset = offset + len(line) | ||||
Thomas Kluyver
|
r23966 | if not line.endswith('\n'): | ||
# If the last line doesn't have a trailing newline, treat it as if | ||||
# it does so that the cursor at the end of the line still counts | ||||
# as being on that line. | ||||
next_offset += 1 | ||||
Thomas Kluyver
|
r23959 | if next_offset > cursor_pos: | ||
MinRK
|
r18478 | break | ||
offset = next_offset | ||||
Doug Blank
|
r18879 | else: | ||
line = "" | ||||
MinRK
|
r18478 | return (line, offset) | ||
Matthias Bussonnier
|
r28480 | |||
def token_at_cursor(cell: str, cursor_pos: int = 0): | ||||
MinRK
|
r16578 | """Get the token at a given cursor | ||
Matthias Bussonnier
|
r26419 | |||
MinRK
|
r16578 | Used for introspection. | ||
Matthias Bussonnier
|
r26419 | |||
Min RK
|
r20471 | Function calls are prioritized, so the token for the callable will be returned | ||
if the cursor is anywhere inside the call. | ||||
Matthias Bussonnier
|
r26419 | |||
MinRK
|
r16578 | Parameters | ||
---------- | ||||
Matthias Bussonnier
|
r28475 | cell : str | ||
MinRK
|
r16578 | A block of Python code | ||
MinRK
|
r16580 | cursor_pos : int | ||
The location of the cursor in the block where the token should be found | ||||
MinRK
|
r16578 | """ | ||
Matthias Bussonnier
|
r28480 | names: List[str] = [] | ||
tokens: List[Token] = [] | ||||
Min RK
|
r20471 | call_names = [] | ||
Min RK
|
r21701 | |||
offsets = {1: 0} # lines start at 1 | ||||
MinRK
|
r16578 | for tup in generate_tokens(StringIO(cell).readline): | ||
tok = Token(*tup) | ||||
# token, text, start, end, line = tup | ||||
Min RK
|
r21701 | start_line, start_col = tok.start | ||
end_line, end_col = tok.end | ||||
if end_line + 1 not in offsets: | ||||
# keep track of offsets for each line | ||||
lines = tok.line.splitlines(True) | ||||
Matthias Bussonnier
|
r23365 | for lineno, line in enumerate(lines, start_line + 1): | ||
Min RK
|
r21701 | if lineno not in offsets: | ||
offsets[lineno] = offsets[lineno-1] + len(line) | ||||
offset = offsets[start_line] | ||||
MinRK
|
r18453 | # allow '|foo' to find 'foo' at the beginning of a line | ||
boundary = cursor_pos + 1 if start_col == 0 else cursor_pos | ||||
if offset + start_col >= boundary: | ||||
MinRK
|
r16578 | # current token starts after the cursor, | ||
# don't consume it | ||||
break | ||||
Thomas Kluyver
|
r24179 | if tok.token == tokenize.NAME and not iskeyword(tok.text): | ||
if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.': | ||||
MinRK
|
r16578 | names[-1] = "%s.%s" % (names[-1], tok.text) | ||
else: | ||||
names.append(tok.text) | ||||
Thomas Kluyver
|
r24179 | elif tok.token == tokenize.OP: | ||
MinRK
|
r16578 | if tok.text == '=' and names: | ||
# don't inspect the lhs of an assignment | ||||
names.pop(-1) | ||||
Min RK
|
r20471 | if tok.text == '(' and names: | ||
# if we are inside a function call, inspect the function | ||||
call_names.append(names[-1]) | ||||
elif tok.text == ')' and call_names: | ||||
call_names.pop(-1) | ||||
MinRK
|
r16578 | |||
Min RK
|
r21701 | tokens.append(tok) | ||
if offsets[end_line] + end_col > cursor_pos: | ||||
MinRK
|
r16578 | # we found the cursor, stop reading | ||
break | ||||
Min RK
|
r20471 | if call_names: | ||
return call_names[-1] | ||||
elif names: | ||||
MinRK
|
r16578 | return names[-1] | ||
else: | ||||
return '' | ||||