Show More
@@ -1,102 +1,121 b'' | |||||
1 | """Tests for tokenutil""" |
|
1 | """Tests for tokenutil""" | |
2 | # Copyright (c) IPython Development Team. |
|
2 | # Copyright (c) IPython Development Team. | |
3 | # Distributed under the terms of the Modified BSD License. |
|
3 | # Distributed under the terms of the Modified BSD License. | |
4 |
|
4 | |||
5 | import nose.tools as nt |
|
5 | import nose.tools as nt | |
6 |
|
6 | |||
7 | from IPython.utils.tokenutil import token_at_cursor, line_at_cursor |
|
7 | from IPython.utils.tokenutil import token_at_cursor, line_at_cursor | |
8 |
|
8 | |||
9 | def expect_token(expected, cell, cursor_pos): |
|
9 | def expect_token(expected, cell, cursor_pos): | |
10 | token = token_at_cursor(cell, cursor_pos) |
|
10 | token = token_at_cursor(cell, cursor_pos) | |
11 | offset = 0 |
|
11 | offset = 0 | |
12 | for line in cell.splitlines(): |
|
12 | for line in cell.splitlines(): | |
13 | if offset + len(line) >= cursor_pos: |
|
13 | if offset + len(line) >= cursor_pos: | |
14 | break |
|
14 | break | |
15 | else: |
|
15 | else: | |
16 | offset += len(line)+1 |
|
16 | offset += len(line)+1 | |
17 | column = cursor_pos - offset |
|
17 | column = cursor_pos - offset | |
18 | line_with_cursor = '%s|%s' % (line[:column], line[column:]) |
|
18 | line_with_cursor = '%s|%s' % (line[:column], line[column:]) | |
19 | nt.assert_equal(token, expected, |
|
19 | nt.assert_equal(token, expected, | |
20 | "Expected %r, got %r in: %r (pos %i)" % ( |
|
20 | "Expected %r, got %r in: %r (pos %i)" % ( | |
21 | expected, token, line_with_cursor, cursor_pos) |
|
21 | expected, token, line_with_cursor, cursor_pos) | |
22 | ) |
|
22 | ) | |
23 |
|
23 | |||
24 | def test_simple(): |
|
24 | def test_simple(): | |
25 | cell = "foo" |
|
25 | cell = "foo" | |
26 | for i in range(len(cell)): |
|
26 | for i in range(len(cell)): | |
27 | expect_token("foo", cell, i) |
|
27 | expect_token("foo", cell, i) | |
28 |
|
28 | |||
29 | def test_function(): |
|
29 | def test_function(): | |
30 | cell = "foo(a=5, b='10')" |
|
30 | cell = "foo(a=5, b='10')" | |
31 | expected = 'foo' |
|
31 | expected = 'foo' | |
32 | # up to `foo(|a=` |
|
32 | # up to `foo(|a=` | |
33 | for i in range(cell.find('a=') + 1): |
|
33 | for i in range(cell.find('a=') + 1): | |
34 | expect_token("foo", cell, i) |
|
34 | expect_token("foo", cell, i) | |
35 | # find foo after `=` |
|
35 | # find foo after `=` | |
36 | for i in [cell.find('=') + 1, cell.rfind('=') + 1]: |
|
36 | for i in [cell.find('=') + 1, cell.rfind('=') + 1]: | |
37 | expect_token("foo", cell, i) |
|
37 | expect_token("foo", cell, i) | |
38 | # in between `5,|` and `|b=` |
|
38 | # in between `5,|` and `|b=` | |
39 | for i in range(cell.find(','), cell.find('b=')): |
|
39 | for i in range(cell.find(','), cell.find('b=')): | |
40 | expect_token("foo", cell, i) |
|
40 | expect_token("foo", cell, i) | |
41 |
|
41 | |||
42 | def test_multiline(): |
|
42 | def test_multiline(): | |
43 | cell = '\n'.join([ |
|
43 | cell = '\n'.join([ | |
44 | 'a = 5', |
|
44 | 'a = 5', | |
45 | 'b = hello("string", there)' |
|
45 | 'b = hello("string", there)' | |
46 | ]) |
|
46 | ]) | |
47 | expected = 'hello' |
|
47 | expected = 'hello' | |
48 | start = cell.index(expected) + 1 |
|
48 | start = cell.index(expected) + 1 | |
49 | for i in range(start, start + len(expected)): |
|
49 | for i in range(start, start + len(expected)): | |
50 | expect_token(expected, cell, i) |
|
50 | expect_token(expected, cell, i) | |
51 | expected = 'hello' |
|
51 | expected = 'hello' | |
52 | start = cell.index(expected) + 1 |
|
52 | start = cell.index(expected) + 1 | |
53 | for i in range(start, start + len(expected)): |
|
53 | for i in range(start, start + len(expected)): | |
54 | expect_token(expected, cell, i) |
|
54 | expect_token(expected, cell, i) | |
55 |
|
55 | |||
|
56 | def test_multiline_token(): | |||
|
57 | cell = '\n'.join([ | |||
|
58 | '"""\n\nxxxxxxxxxx\n\n"""', | |||
|
59 | '5, """', | |||
|
60 | 'docstring', | |||
|
61 | 'multiline token', | |||
|
62 | '""", [', | |||
|
63 | '2, 3, "complicated"]', | |||
|
64 | 'b = hello("string", there)' | |||
|
65 | ]) | |||
|
66 | expected = 'hello' | |||
|
67 | start = cell.index(expected) + 1 | |||
|
68 | for i in range(start, start + len(expected)): | |||
|
69 | expect_token(expected, cell, i) | |||
|
70 | expected = 'hello' | |||
|
71 | start = cell.index(expected) + 1 | |||
|
72 | for i in range(start, start + len(expected)): | |||
|
73 | expect_token(expected, cell, i) | |||
|
74 | ||||
56 | def test_nested_call(): |
|
75 | def test_nested_call(): | |
57 | cell = "foo(bar(a=5), b=10)" |
|
76 | cell = "foo(bar(a=5), b=10)" | |
58 | expected = 'foo' |
|
77 | expected = 'foo' | |
59 | start = cell.index('bar') + 1 |
|
78 | start = cell.index('bar') + 1 | |
60 | for i in range(start, start + 3): |
|
79 | for i in range(start, start + 3): | |
61 | expect_token(expected, cell, i) |
|
80 | expect_token(expected, cell, i) | |
62 | expected = 'bar' |
|
81 | expected = 'bar' | |
63 | start = cell.index('a=') |
|
82 | start = cell.index('a=') | |
64 | for i in range(start, start + 3): |
|
83 | for i in range(start, start + 3): | |
65 | expect_token(expected, cell, i) |
|
84 | expect_token(expected, cell, i) | |
66 | expected = 'foo' |
|
85 | expected = 'foo' | |
67 | start = cell.index(')') + 1 |
|
86 | start = cell.index(')') + 1 | |
68 | for i in range(start, len(cell)-1): |
|
87 | for i in range(start, len(cell)-1): | |
69 | expect_token(expected, cell, i) |
|
88 | expect_token(expected, cell, i) | |
70 |
|
89 | |||
71 | def test_attrs(): |
|
90 | def test_attrs(): | |
72 | cell = "a = obj.attr.subattr" |
|
91 | cell = "a = obj.attr.subattr" | |
73 | expected = 'obj' |
|
92 | expected = 'obj' | |
74 | idx = cell.find('obj') + 1 |
|
93 | idx = cell.find('obj') + 1 | |
75 | for i in range(idx, idx + 3): |
|
94 | for i in range(idx, idx + 3): | |
76 | expect_token(expected, cell, i) |
|
95 | expect_token(expected, cell, i) | |
77 | idx = cell.find('.attr') + 2 |
|
96 | idx = cell.find('.attr') + 2 | |
78 | expected = 'obj.attr' |
|
97 | expected = 'obj.attr' | |
79 | for i in range(idx, idx + 4): |
|
98 | for i in range(idx, idx + 4): | |
80 | expect_token(expected, cell, i) |
|
99 | expect_token(expected, cell, i) | |
81 | idx = cell.find('.subattr') + 2 |
|
100 | idx = cell.find('.subattr') + 2 | |
82 | expected = 'obj.attr.subattr' |
|
101 | expected = 'obj.attr.subattr' | |
83 | for i in range(idx, len(cell)): |
|
102 | for i in range(idx, len(cell)): | |
84 | expect_token(expected, cell, i) |
|
103 | expect_token(expected, cell, i) | |
85 |
|
104 | |||
86 | def test_line_at_cursor(): |
|
105 | def test_line_at_cursor(): | |
87 | cell = "" |
|
106 | cell = "" | |
88 | (line, offset) = line_at_cursor(cell, cursor_pos=11) |
|
107 | (line, offset) = line_at_cursor(cell, cursor_pos=11) | |
89 | assert line == "", ("Expected '', got %r" % line) |
|
108 | assert line == "", ("Expected '', got %r" % line) | |
90 | assert offset == 0, ("Expected '', got %r" % line) |
|
109 | assert offset == 0, ("Expected '', got %r" % line) | |
91 |
|
110 | |||
92 | def test_muliline_statement(): |
|
111 | def test_muliline_statement(): | |
93 | cell = """a = (1, |
|
112 | cell = """a = (1, | |
94 | 3) |
|
113 | 3) | |
95 |
|
114 | |||
96 | int() |
|
115 | int() | |
97 | map() |
|
116 | map() | |
98 | """ |
|
117 | """ | |
99 | for c in range(16, 22): |
|
118 | for c in range(16, 22): | |
100 | yield lambda: expect_token("int", cell, c) |
|
119 | yield lambda: expect_token("int", cell, c) | |
101 | for c in range(22, 28): |
|
120 | for c in range(22, 28): | |
102 | yield lambda: expect_token("map", cell, c) |
|
121 | yield lambda: expect_token("map", cell, c) |
@@ -1,121 +1,128 b'' | |||||
1 | """Token-related utilities""" |
|
1 | """Token-related utilities""" | |
2 |
|
2 | |||
3 | # Copyright (c) IPython Development Team. |
|
3 | # Copyright (c) IPython Development Team. | |
4 | # Distributed under the terms of the Modified BSD License. |
|
4 | # Distributed under the terms of the Modified BSD License. | |
5 |
|
5 | |||
6 | from __future__ import absolute_import, print_function |
|
6 | from __future__ import absolute_import, print_function | |
7 |
|
7 | |||
8 | from collections import namedtuple |
|
8 | from collections import namedtuple | |
9 | from io import StringIO |
|
9 | from io import StringIO | |
10 | from keyword import iskeyword |
|
10 | from keyword import iskeyword | |
11 |
|
11 | |||
12 | from . import tokenize2 |
|
12 | from . import tokenize2 | |
13 | from .py3compat import cast_unicode_py2 |
|
13 | from .py3compat import cast_unicode_py2 | |
14 |
|
14 | |||
15 | Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line']) |
|
15 | Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line']) | |
16 |
|
16 | |||
17 | def generate_tokens(readline): |
|
17 | def generate_tokens(readline): | |
18 | """wrap generate_tokens to catch EOF errors""" |
|
18 | """wrap generate_tokens to catch EOF errors""" | |
19 | try: |
|
19 | try: | |
20 | for token in tokenize2.generate_tokens(readline): |
|
20 | for token in tokenize2.generate_tokens(readline): | |
21 | yield token |
|
21 | yield token | |
22 | except tokenize2.TokenError: |
|
22 | except tokenize2.TokenError: | |
23 | # catch EOF error |
|
23 | # catch EOF error | |
24 | return |
|
24 | return | |
25 |
|
25 | |||
26 | def line_at_cursor(cell, cursor_pos=0): |
|
26 | def line_at_cursor(cell, cursor_pos=0): | |
27 | """Return the line in a cell at a given cursor position |
|
27 | """Return the line in a cell at a given cursor position | |
28 |
|
28 | |||
29 | Used for calling line-based APIs that don't support multi-line input, yet. |
|
29 | Used for calling line-based APIs that don't support multi-line input, yet. | |
30 |
|
30 | |||
31 | Parameters |
|
31 | Parameters | |
32 | ---------- |
|
32 | ---------- | |
33 |
|
33 | |||
34 | cell: text |
|
34 | cell: text | |
35 | multiline block of text |
|
35 | multiline block of text | |
36 | cursor_pos: integer |
|
36 | cursor_pos: integer | |
37 | the cursor position |
|
37 | the cursor position | |
38 |
|
38 | |||
39 | Returns |
|
39 | Returns | |
40 | ------- |
|
40 | ------- | |
41 |
|
41 | |||
42 | (line, offset): (text, integer) |
|
42 | (line, offset): (text, integer) | |
43 | The line with the current cursor, and the character offset of the start of the line. |
|
43 | The line with the current cursor, and the character offset of the start of the line. | |
44 | """ |
|
44 | """ | |
45 | offset = 0 |
|
45 | offset = 0 | |
46 | lines = cell.splitlines(True) |
|
46 | lines = cell.splitlines(True) | |
47 | for line in lines: |
|
47 | for line in lines: | |
48 | next_offset = offset + len(line) |
|
48 | next_offset = offset + len(line) | |
49 | if next_offset >= cursor_pos: |
|
49 | if next_offset >= cursor_pos: | |
50 | break |
|
50 | break | |
51 | offset = next_offset |
|
51 | offset = next_offset | |
52 | else: |
|
52 | else: | |
53 | line = "" |
|
53 | line = "" | |
54 | return (line, offset) |
|
54 | return (line, offset) | |
55 |
|
55 | |||
56 | def token_at_cursor(cell, cursor_pos=0): |
|
56 | def token_at_cursor(cell, cursor_pos=0): | |
57 | """Get the token at a given cursor |
|
57 | """Get the token at a given cursor | |
58 |
|
58 | |||
59 | Used for introspection. |
|
59 | Used for introspection. | |
60 |
|
60 | |||
61 | Function calls are prioritized, so the token for the callable will be returned |
|
61 | Function calls are prioritized, so the token for the callable will be returned | |
62 | if the cursor is anywhere inside the call. |
|
62 | if the cursor is anywhere inside the call. | |
63 |
|
63 | |||
64 | Parameters |
|
64 | Parameters | |
65 | ---------- |
|
65 | ---------- | |
66 |
|
66 | |||
67 | cell : unicode |
|
67 | cell : unicode | |
68 | A block of Python code |
|
68 | A block of Python code | |
69 | cursor_pos : int |
|
69 | cursor_pos : int | |
70 | The location of the cursor in the block where the token should be found |
|
70 | The location of the cursor in the block where the token should be found | |
71 | """ |
|
71 | """ | |
72 | cell = cast_unicode_py2(cell) |
|
72 | cell = cast_unicode_py2(cell) | |
73 | names = [] |
|
73 | names = [] | |
74 | tokens = [] |
|
74 | tokens = [] | |
75 | offset = 0 |
|
|||
76 | call_names = [] |
|
75 | call_names = [] | |
|
76 | ||||
|
77 | offsets = {1: 0} # lines start at 1 | |||
77 | for tup in generate_tokens(StringIO(cell).readline): |
|
78 | for tup in generate_tokens(StringIO(cell).readline): | |
78 |
|
79 | |||
79 | tok = Token(*tup) |
|
80 | tok = Token(*tup) | |
80 |
|
81 | |||
81 | # token, text, start, end, line = tup |
|
82 | # token, text, start, end, line = tup | |
82 |
start_col = tok.start |
|
83 | start_line, start_col = tok.start | |
83 |
end_col = tok.end |
|
84 | end_line, end_col = tok.end | |
|
85 | if end_line + 1 not in offsets: | |||
|
86 | # keep track of offsets for each line | |||
|
87 | lines = tok.line.splitlines(True) | |||
|
88 | for lineno, line in zip(range(start_line + 1, end_line + 2), lines): | |||
|
89 | if lineno not in offsets: | |||
|
90 | offsets[lineno] = offsets[lineno-1] + len(line) | |||
|
91 | ||||
|
92 | offset = offsets[start_line] | |||
84 | # allow '|foo' to find 'foo' at the beginning of a line |
|
93 | # allow '|foo' to find 'foo' at the beginning of a line | |
85 | boundary = cursor_pos + 1 if start_col == 0 else cursor_pos |
|
94 | boundary = cursor_pos + 1 if start_col == 0 else cursor_pos | |
86 | if offset + start_col >= boundary: |
|
95 | if offset + start_col >= boundary: | |
87 | # current token starts after the cursor, |
|
96 | # current token starts after the cursor, | |
88 | # don't consume it |
|
97 | # don't consume it | |
89 | break |
|
98 | break | |
90 |
|
99 | |||
91 | if tok.token == tokenize2.NAME and not iskeyword(tok.text): |
|
100 | if tok.token == tokenize2.NAME and not iskeyword(tok.text): | |
92 | if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.': |
|
101 | if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.': | |
93 | names[-1] = "%s.%s" % (names[-1], tok.text) |
|
102 | names[-1] = "%s.%s" % (names[-1], tok.text) | |
94 | else: |
|
103 | else: | |
95 | names.append(tok.text) |
|
104 | names.append(tok.text) | |
96 | elif tok.token == tokenize2.OP: |
|
105 | elif tok.token == tokenize2.OP: | |
97 | if tok.text == '=' and names: |
|
106 | if tok.text == '=' and names: | |
98 | # don't inspect the lhs of an assignment |
|
107 | # don't inspect the lhs of an assignment | |
99 | names.pop(-1) |
|
108 | names.pop(-1) | |
100 | if tok.text == '(' and names: |
|
109 | if tok.text == '(' and names: | |
101 | # if we are inside a function call, inspect the function |
|
110 | # if we are inside a function call, inspect the function | |
102 | call_names.append(names[-1]) |
|
111 | call_names.append(names[-1]) | |
103 | elif tok.text == ')' and call_names: |
|
112 | elif tok.text == ')' and call_names: | |
104 | call_names.pop(-1) |
|
113 | call_names.pop(-1) | |
105 |
|
114 | |||
106 | if offset + end_col > cursor_pos: |
|
115 | tokens.append(tok) | |
|
116 | ||||
|
117 | if offsets[end_line] + end_col > cursor_pos: | |||
107 | # we found the cursor, stop reading |
|
118 | # we found the cursor, stop reading | |
108 | break |
|
119 | break | |
109 |
|
120 | |||
110 | tokens.append(tok) |
|
|||
111 | if tok.token in (tokenize2.NEWLINE, tokenize2.NL): |
|
|||
112 | offset += len(tok.line) |
|
|||
113 |
|
||||
114 | if call_names: |
|
121 | if call_names: | |
115 | return call_names[-1] |
|
122 | return call_names[-1] | |
116 | elif names: |
|
123 | elif names: | |
117 | return names[-1] |
|
124 | return names[-1] | |
118 | else: |
|
125 | else: | |
119 | return '' |
|
126 | return '' | |
120 |
|
127 | |||
121 |
|
128 |
General Comments 0
You need to be logged in to leave comments.
Login now