Show More
@@ -1,102 +1,121 b'' | |||
|
1 | 1 | """Tests for tokenutil""" |
|
2 | 2 | # Copyright (c) IPython Development Team. |
|
3 | 3 | # Distributed under the terms of the Modified BSD License. |
|
4 | 4 | |
|
5 | 5 | import nose.tools as nt |
|
6 | 6 | |
|
7 | 7 | from IPython.utils.tokenutil import token_at_cursor, line_at_cursor |
|
8 | 8 | |
|
9 | 9 | def expect_token(expected, cell, cursor_pos): |
|
10 | 10 | token = token_at_cursor(cell, cursor_pos) |
|
11 | 11 | offset = 0 |
|
12 | 12 | for line in cell.splitlines(): |
|
13 | 13 | if offset + len(line) >= cursor_pos: |
|
14 | 14 | break |
|
15 | 15 | else: |
|
16 | 16 | offset += len(line)+1 |
|
17 | 17 | column = cursor_pos - offset |
|
18 | 18 | line_with_cursor = '%s|%s' % (line[:column], line[column:]) |
|
19 | 19 | nt.assert_equal(token, expected, |
|
20 | 20 | "Expected %r, got %r in: %r (pos %i)" % ( |
|
21 | 21 | expected, token, line_with_cursor, cursor_pos) |
|
22 | 22 | ) |
|
23 | 23 | |
|
24 | 24 | def test_simple(): |
|
25 | 25 | cell = "foo" |
|
26 | 26 | for i in range(len(cell)): |
|
27 | 27 | expect_token("foo", cell, i) |
|
28 | 28 | |
|
29 | 29 | def test_function(): |
|
30 | 30 | cell = "foo(a=5, b='10')" |
|
31 | 31 | expected = 'foo' |
|
32 | 32 | # up to `foo(|a=` |
|
33 | 33 | for i in range(cell.find('a=') + 1): |
|
34 | 34 | expect_token("foo", cell, i) |
|
35 | 35 | # find foo after `=` |
|
36 | 36 | for i in [cell.find('=') + 1, cell.rfind('=') + 1]: |
|
37 | 37 | expect_token("foo", cell, i) |
|
38 | 38 | # in between `5,|` and `|b=` |
|
39 | 39 | for i in range(cell.find(','), cell.find('b=')): |
|
40 | 40 | expect_token("foo", cell, i) |
|
41 | 41 | |
|
42 | 42 | def test_multiline(): |
|
43 | 43 | cell = '\n'.join([ |
|
44 | 44 | 'a = 5', |
|
45 | 45 | 'b = hello("string", there)' |
|
46 | 46 | ]) |
|
47 | 47 | expected = 'hello' |
|
48 | 48 | start = cell.index(expected) + 1 |
|
49 | 49 | for i in range(start, start + len(expected)): |
|
50 | 50 | expect_token(expected, cell, i) |
|
51 | 51 | expected = 'hello' |
|
52 | 52 | start = cell.index(expected) + 1 |
|
53 | 53 | for i in range(start, start + len(expected)): |
|
54 | 54 | expect_token(expected, cell, i) |
|
55 | 55 | |
|
56 | def test_multiline_token(): | |
|
57 | cell = '\n'.join([ | |
|
58 | '"""\n\nxxxxxxxxxx\n\n"""', | |
|
59 | '5, """', | |
|
60 | 'docstring', | |
|
61 | 'multiline token', | |
|
62 | '""", [', | |
|
63 | '2, 3, "complicated"]', | |
|
64 | 'b = hello("string", there)' | |
|
65 | ]) | |
|
66 | expected = 'hello' | |
|
67 | start = cell.index(expected) + 1 | |
|
68 | for i in range(start, start + len(expected)): | |
|
69 | expect_token(expected, cell, i) | |
|
70 | expected = 'hello' | |
|
71 | start = cell.index(expected) + 1 | |
|
72 | for i in range(start, start + len(expected)): | |
|
73 | expect_token(expected, cell, i) | |
|
74 | ||
|
56 | 75 | def test_nested_call(): |
|
57 | 76 | cell = "foo(bar(a=5), b=10)" |
|
58 | 77 | expected = 'foo' |
|
59 | 78 | start = cell.index('bar') + 1 |
|
60 | 79 | for i in range(start, start + 3): |
|
61 | 80 | expect_token(expected, cell, i) |
|
62 | 81 | expected = 'bar' |
|
63 | 82 | start = cell.index('a=') |
|
64 | 83 | for i in range(start, start + 3): |
|
65 | 84 | expect_token(expected, cell, i) |
|
66 | 85 | expected = 'foo' |
|
67 | 86 | start = cell.index(')') + 1 |
|
68 | 87 | for i in range(start, len(cell)-1): |
|
69 | 88 | expect_token(expected, cell, i) |
|
70 | 89 | |
|
71 | 90 | def test_attrs(): |
|
72 | 91 | cell = "a = obj.attr.subattr" |
|
73 | 92 | expected = 'obj' |
|
74 | 93 | idx = cell.find('obj') + 1 |
|
75 | 94 | for i in range(idx, idx + 3): |
|
76 | 95 | expect_token(expected, cell, i) |
|
77 | 96 | idx = cell.find('.attr') + 2 |
|
78 | 97 | expected = 'obj.attr' |
|
79 | 98 | for i in range(idx, idx + 4): |
|
80 | 99 | expect_token(expected, cell, i) |
|
81 | 100 | idx = cell.find('.subattr') + 2 |
|
82 | 101 | expected = 'obj.attr.subattr' |
|
83 | 102 | for i in range(idx, len(cell)): |
|
84 | 103 | expect_token(expected, cell, i) |
|
85 | 104 | |
|
86 | 105 | def test_line_at_cursor(): |
|
87 | 106 | cell = "" |
|
88 | 107 | (line, offset) = line_at_cursor(cell, cursor_pos=11) |
|
89 | 108 | assert line == "", ("Expected '', got %r" % line) |
|
90 | 109 | assert offset == 0, ("Expected '', got %r" % line) |
|
91 | 110 | |
|
92 | 111 | def test_muliline_statement(): |
|
93 | 112 | cell = """a = (1, |
|
94 | 113 | 3) |
|
95 | 114 | |
|
96 | 115 | int() |
|
97 | 116 | map() |
|
98 | 117 | """ |
|
99 | 118 | for c in range(16, 22): |
|
100 | 119 | yield lambda: expect_token("int", cell, c) |
|
101 | 120 | for c in range(22, 28): |
|
102 | 121 | yield lambda: expect_token("map", cell, c) |
@@ -1,121 +1,128 b'' | |||
|
1 | 1 | """Token-related utilities""" |
|
2 | 2 | |
|
3 | 3 | # Copyright (c) IPython Development Team. |
|
4 | 4 | # Distributed under the terms of the Modified BSD License. |
|
5 | 5 | |
|
6 | 6 | from __future__ import absolute_import, print_function |
|
7 | 7 | |
|
8 | 8 | from collections import namedtuple |
|
9 | 9 | from io import StringIO |
|
10 | 10 | from keyword import iskeyword |
|
11 | 11 | |
|
12 | 12 | from . import tokenize2 |
|
13 | 13 | from .py3compat import cast_unicode_py2 |
|
14 | 14 | |
|
15 | 15 | Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line']) |
|
16 | 16 | |
|
17 | 17 | def generate_tokens(readline): |
|
18 | 18 | """wrap generate_tokens to catch EOF errors""" |
|
19 | 19 | try: |
|
20 | 20 | for token in tokenize2.generate_tokens(readline): |
|
21 | 21 | yield token |
|
22 | 22 | except tokenize2.TokenError: |
|
23 | 23 | # catch EOF error |
|
24 | 24 | return |
|
25 | 25 | |
|
26 | 26 | def line_at_cursor(cell, cursor_pos=0): |
|
27 | 27 | """Return the line in a cell at a given cursor position |
|
28 | 28 | |
|
29 | 29 | Used for calling line-based APIs that don't support multi-line input, yet. |
|
30 | 30 | |
|
31 | 31 | Parameters |
|
32 | 32 | ---------- |
|
33 | 33 | |
|
34 | 34 | cell: text |
|
35 | 35 | multiline block of text |
|
36 | 36 | cursor_pos: integer |
|
37 | 37 | the cursor position |
|
38 | 38 | |
|
39 | 39 | Returns |
|
40 | 40 | ------- |
|
41 | 41 | |
|
42 | 42 | (line, offset): (text, integer) |
|
43 | 43 | The line with the current cursor, and the character offset of the start of the line. |
|
44 | 44 | """ |
|
45 | 45 | offset = 0 |
|
46 | 46 | lines = cell.splitlines(True) |
|
47 | 47 | for line in lines: |
|
48 | 48 | next_offset = offset + len(line) |
|
49 | 49 | if next_offset >= cursor_pos: |
|
50 | 50 | break |
|
51 | 51 | offset = next_offset |
|
52 | 52 | else: |
|
53 | 53 | line = "" |
|
54 | 54 | return (line, offset) |
|
55 | 55 | |
|
56 | 56 | def token_at_cursor(cell, cursor_pos=0): |
|
57 | 57 | """Get the token at a given cursor |
|
58 | 58 | |
|
59 | 59 | Used for introspection. |
|
60 | 60 | |
|
61 | 61 | Function calls are prioritized, so the token for the callable will be returned |
|
62 | 62 | if the cursor is anywhere inside the call. |
|
63 | 63 | |
|
64 | 64 | Parameters |
|
65 | 65 | ---------- |
|
66 | 66 | |
|
67 | 67 | cell : unicode |
|
68 | 68 | A block of Python code |
|
69 | 69 | cursor_pos : int |
|
70 | 70 | The location of the cursor in the block where the token should be found |
|
71 | 71 | """ |
|
72 | 72 | cell = cast_unicode_py2(cell) |
|
73 | 73 | names = [] |
|
74 | 74 | tokens = [] |
|
75 | offset = 0 | |
|
76 | 75 | call_names = [] |
|
76 | ||
|
77 | offsets = {1: 0} # lines start at 1 | |
|
77 | 78 | for tup in generate_tokens(StringIO(cell).readline): |
|
78 | 79 | |
|
79 | 80 | tok = Token(*tup) |
|
80 | 81 | |
|
81 | 82 | # token, text, start, end, line = tup |
|
82 |
start_col = tok.start |
|
|
83 |
end_col = tok.end |
|
|
83 | start_line, start_col = tok.start | |
|
84 | end_line, end_col = tok.end | |
|
85 | if end_line + 1 not in offsets: | |
|
86 | # keep track of offsets for each line | |
|
87 | lines = tok.line.splitlines(True) | |
|
88 | for lineno, line in zip(range(start_line + 1, end_line + 2), lines): | |
|
89 | if lineno not in offsets: | |
|
90 | offsets[lineno] = offsets[lineno-1] + len(line) | |
|
91 | ||
|
92 | offset = offsets[start_line] | |
|
84 | 93 | # allow '|foo' to find 'foo' at the beginning of a line |
|
85 | 94 | boundary = cursor_pos + 1 if start_col == 0 else cursor_pos |
|
86 | 95 | if offset + start_col >= boundary: |
|
87 | 96 | # current token starts after the cursor, |
|
88 | 97 | # don't consume it |
|
89 | 98 | break |
|
90 | 99 | |
|
91 | 100 | if tok.token == tokenize2.NAME and not iskeyword(tok.text): |
|
92 | 101 | if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.': |
|
93 | 102 | names[-1] = "%s.%s" % (names[-1], tok.text) |
|
94 | 103 | else: |
|
95 | 104 | names.append(tok.text) |
|
96 | 105 | elif tok.token == tokenize2.OP: |
|
97 | 106 | if tok.text == '=' and names: |
|
98 | 107 | # don't inspect the lhs of an assignment |
|
99 | 108 | names.pop(-1) |
|
100 | 109 | if tok.text == '(' and names: |
|
101 | 110 | # if we are inside a function call, inspect the function |
|
102 | 111 | call_names.append(names[-1]) |
|
103 | 112 | elif tok.text == ')' and call_names: |
|
104 | 113 | call_names.pop(-1) |
|
105 | 114 | |
|
106 | if offset + end_col > cursor_pos: | |
|
115 | tokens.append(tok) | |
|
116 | ||
|
117 | if offsets[end_line] + end_col > cursor_pos: | |
|
107 | 118 | # we found the cursor, stop reading |
|
108 | 119 | break |
|
109 | 120 | |
|
110 | tokens.append(tok) | |
|
111 | if tok.token in (tokenize2.NEWLINE, tokenize2.NL): | |
|
112 | offset += len(tok.line) | |
|
113 | ||
|
114 | 121 | if call_names: |
|
115 | 122 | return call_names[-1] |
|
116 | 123 | elif names: |
|
117 | 124 | return names[-1] |
|
118 | 125 | else: |
|
119 | 126 | return '' |
|
120 | 127 | |
|
121 | 128 |
General Comments 0
You need to be logged in to leave comments.
Login now