##// END OF EJS Templates
handle multi-line tokens in token_at_cursor...
Min RK -
Show More
@@ -1,102 +1,121 b''
1 1 """Tests for tokenutil"""
2 2 # Copyright (c) IPython Development Team.
3 3 # Distributed under the terms of the Modified BSD License.
4 4
5 5 import nose.tools as nt
6 6
7 7 from IPython.utils.tokenutil import token_at_cursor, line_at_cursor
8 8
9 9 def expect_token(expected, cell, cursor_pos):
10 10 token = token_at_cursor(cell, cursor_pos)
11 11 offset = 0
12 12 for line in cell.splitlines():
13 13 if offset + len(line) >= cursor_pos:
14 14 break
15 15 else:
16 16 offset += len(line)+1
17 17 column = cursor_pos - offset
18 18 line_with_cursor = '%s|%s' % (line[:column], line[column:])
19 19 nt.assert_equal(token, expected,
20 20 "Expected %r, got %r in: %r (pos %i)" % (
21 21 expected, token, line_with_cursor, cursor_pos)
22 22 )
23 23
24 24 def test_simple():
25 25 cell = "foo"
26 26 for i in range(len(cell)):
27 27 expect_token("foo", cell, i)
28 28
29 29 def test_function():
30 30 cell = "foo(a=5, b='10')"
31 31 expected = 'foo'
32 32 # up to `foo(|a=`
33 33 for i in range(cell.find('a=') + 1):
34 34 expect_token("foo", cell, i)
35 35 # find foo after `=`
36 36 for i in [cell.find('=') + 1, cell.rfind('=') + 1]:
37 37 expect_token("foo", cell, i)
38 38 # in between `5,|` and `|b=`
39 39 for i in range(cell.find(','), cell.find('b=')):
40 40 expect_token("foo", cell, i)
41 41
42 42 def test_multiline():
43 43 cell = '\n'.join([
44 44 'a = 5',
45 45 'b = hello("string", there)'
46 46 ])
47 47 expected = 'hello'
48 48 start = cell.index(expected) + 1
49 49 for i in range(start, start + len(expected)):
50 50 expect_token(expected, cell, i)
51 51 expected = 'hello'
52 52 start = cell.index(expected) + 1
53 53 for i in range(start, start + len(expected)):
54 54 expect_token(expected, cell, i)
55 55
56 def test_multiline_token():
57 cell = '\n'.join([
58 '"""\n\nxxxxxxxxxx\n\n"""',
59 '5, """',
60 'docstring',
61 'multiline token',
62 '""", [',
63 '2, 3, "complicated"]',
64 'b = hello("string", there)'
65 ])
66 expected = 'hello'
67 start = cell.index(expected) + 1
68 for i in range(start, start + len(expected)):
69 expect_token(expected, cell, i)
70 expected = 'hello'
71 start = cell.index(expected) + 1
72 for i in range(start, start + len(expected)):
73 expect_token(expected, cell, i)
74
56 75 def test_nested_call():
57 76 cell = "foo(bar(a=5), b=10)"
58 77 expected = 'foo'
59 78 start = cell.index('bar') + 1
60 79 for i in range(start, start + 3):
61 80 expect_token(expected, cell, i)
62 81 expected = 'bar'
63 82 start = cell.index('a=')
64 83 for i in range(start, start + 3):
65 84 expect_token(expected, cell, i)
66 85 expected = 'foo'
67 86 start = cell.index(')') + 1
68 87 for i in range(start, len(cell)-1):
69 88 expect_token(expected, cell, i)
70 89
71 90 def test_attrs():
72 91 cell = "a = obj.attr.subattr"
73 92 expected = 'obj'
74 93 idx = cell.find('obj') + 1
75 94 for i in range(idx, idx + 3):
76 95 expect_token(expected, cell, i)
77 96 idx = cell.find('.attr') + 2
78 97 expected = 'obj.attr'
79 98 for i in range(idx, idx + 4):
80 99 expect_token(expected, cell, i)
81 100 idx = cell.find('.subattr') + 2
82 101 expected = 'obj.attr.subattr'
83 102 for i in range(idx, len(cell)):
84 103 expect_token(expected, cell, i)
85 104
86 105 def test_line_at_cursor():
87 106 cell = ""
88 107 (line, offset) = line_at_cursor(cell, cursor_pos=11)
89 108 assert line == "", ("Expected '', got %r" % line)
90 109 assert offset == 0, ("Expected '', got %r" % line)
91 110
92 111 def test_muliline_statement():
93 112 cell = """a = (1,
94 113 3)
95 114
96 115 int()
97 116 map()
98 117 """
99 118 for c in range(16, 22):
100 119 yield lambda: expect_token("int", cell, c)
101 120 for c in range(22, 28):
102 121 yield lambda: expect_token("map", cell, c)
@@ -1,121 +1,128 b''
1 1 """Token-related utilities"""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 from __future__ import absolute_import, print_function
7 7
8 8 from collections import namedtuple
9 9 from io import StringIO
10 10 from keyword import iskeyword
11 11
12 12 from . import tokenize2
13 13 from .py3compat import cast_unicode_py2
14 14
15 15 Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
16 16
17 17 def generate_tokens(readline):
18 18 """wrap generate_tokens to catch EOF errors"""
19 19 try:
20 20 for token in tokenize2.generate_tokens(readline):
21 21 yield token
22 22 except tokenize2.TokenError:
23 23 # catch EOF error
24 24 return
25 25
26 26 def line_at_cursor(cell, cursor_pos=0):
27 27 """Return the line in a cell at a given cursor position
28 28
29 29 Used for calling line-based APIs that don't support multi-line input, yet.
30 30
31 31 Parameters
32 32 ----------
33 33
34 34 cell: text
35 35 multiline block of text
36 36 cursor_pos: integer
37 37 the cursor position
38 38
39 39 Returns
40 40 -------
41 41
42 42 (line, offset): (text, integer)
43 43 The line with the current cursor, and the character offset of the start of the line.
44 44 """
45 45 offset = 0
46 46 lines = cell.splitlines(True)
47 47 for line in lines:
48 48 next_offset = offset + len(line)
49 49 if next_offset >= cursor_pos:
50 50 break
51 51 offset = next_offset
52 52 else:
53 53 line = ""
54 54 return (line, offset)
55 55
56 56 def token_at_cursor(cell, cursor_pos=0):
57 57 """Get the token at a given cursor
58 58
59 59 Used for introspection.
60 60
61 61 Function calls are prioritized, so the token for the callable will be returned
62 62 if the cursor is anywhere inside the call.
63 63
64 64 Parameters
65 65 ----------
66 66
67 67 cell : unicode
68 68 A block of Python code
69 69 cursor_pos : int
70 70 The location of the cursor in the block where the token should be found
71 71 """
72 72 cell = cast_unicode_py2(cell)
73 73 names = []
74 74 tokens = []
75 offset = 0
76 75 call_names = []
76
77 offsets = {1: 0} # lines start at 1
77 78 for tup in generate_tokens(StringIO(cell).readline):
78 79
79 80 tok = Token(*tup)
80 81
81 82 # token, text, start, end, line = tup
82 start_col = tok.start[1]
83 end_col = tok.end[1]
83 start_line, start_col = tok.start
84 end_line, end_col = tok.end
85 if end_line + 1 not in offsets:
86 # keep track of offsets for each line
87 lines = tok.line.splitlines(True)
88 for lineno, line in zip(range(start_line + 1, end_line + 2), lines):
89 if lineno not in offsets:
90 offsets[lineno] = offsets[lineno-1] + len(line)
91
92 offset = offsets[start_line]
84 93 # allow '|foo' to find 'foo' at the beginning of a line
85 94 boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
86 95 if offset + start_col >= boundary:
87 96 # current token starts after the cursor,
88 97 # don't consume it
89 98 break
90 99
91 100 if tok.token == tokenize2.NAME and not iskeyword(tok.text):
92 101 if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':
93 102 names[-1] = "%s.%s" % (names[-1], tok.text)
94 103 else:
95 104 names.append(tok.text)
96 105 elif tok.token == tokenize2.OP:
97 106 if tok.text == '=' and names:
98 107 # don't inspect the lhs of an assignment
99 108 names.pop(-1)
100 109 if tok.text == '(' and names:
101 110 # if we are inside a function call, inspect the function
102 111 call_names.append(names[-1])
103 112 elif tok.text == ')' and call_names:
104 113 call_names.pop(-1)
105 114
106 if offset + end_col > cursor_pos:
115 tokens.append(tok)
116
117 if offsets[end_line] + end_col > cursor_pos:
107 118 # we found the cursor, stop reading
108 119 break
109 120
110 tokens.append(tok)
111 if tok.token in (tokenize2.NEWLINE, tokenize2.NL):
112 offset += len(tok.line)
113
114 121 if call_names:
115 122 return call_names[-1]
116 123 elif names:
117 124 return names[-1]
118 125 else:
119 126 return ''
120 127
121 128
General Comments 0
You need to be logged in to leave comments. Login now