upstream/ipython Commit - r21701:a85e1dcd

handle multi-line tokens in token_at_cursor...

Min RK -

r21701:a85e1dcd

parent child

IPython/utils/tests/test_tokenutil.py

0 +19 0

             """Tests for tokenutil"""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             import nose.tools as nt
             from IPython.utils.tokenutil import token_at_cursor, line_at_cursor
             def expect_token(expected, cell, cursor_pos):
                 token = token_at_cursor(cell, cursor_pos)
                 offset = 0
                 for line in cell.splitlines():
                     if offset + len(line) >= cursor_pos:
                         break
                     else:
                         offset += len(line)+1
                 column = cursor_pos - offset
                 line_with_cursor = '%s|%s' % (line[:column], line[column:])
                 nt.assert_equal(token, expected,
                     "Expected %r, got %r in: %r (pos %i)" % (
                     expected, token, line_with_cursor, cursor_pos)
                 )
             def test_simple():
                 cell = "foo"
                 for i in range(len(cell)):
                     expect_token("foo", cell, i)
             def test_function():
                 cell = "foo(a=5, b='10')"
                 expected = 'foo'
                 # up to `foo(|a=`
                 for i in range(cell.find('a=') + 1):
                     expect_token("foo", cell, i)
                 # find foo after `=`
                 for i in [cell.find('=') + 1, cell.rfind('=') + 1]:
                     expect_token("foo", cell, i)
                 # in between `5,|` and `|b=`
                 for i in range(cell.find(','), cell.find('b=')):
                     expect_token("foo", cell, i)
             def test_multiline():
                 cell = '\n'.join([
                     'a = 5',
                     'b = hello("string", there)'
                 ])
                 expected = 'hello'
                 start = cell.index(expected) + 1
                 for i in range(start, start + len(expected)):
                     expect_token(expected, cell, i)
                 expected = 'hello'
                 start = cell.index(expected) + 1
                 for i in range(start, start + len(expected)):
                     expect_token(expected, cell, i)
+            def test_multiline_token():
+                cell = '\n'.join([
+                    '"""\n\nxxxxxxxxxx\n\n"""',
+                    '5, """',
+                    'docstring',
+                    'multiline token',
+                    '""", [',
+                    '2, 3, "complicated"]',
+                    'b = hello("string", there)'
+                ])
+                expected = 'hello'
+                start = cell.index(expected) + 1
+                for i in range(start, start + len(expected)):
+                    expect_token(expected, cell, i)
+                expected = 'hello'
+                start = cell.index(expected) + 1
+                for i in range(start, start + len(expected)):
+                    expect_token(expected, cell, i)
             def test_nested_call():
                 cell = "foo(bar(a=5), b=10)"
                 expected = 'foo'
                 start = cell.index('bar') + 1
                 for i in range(start, start + 3):
                     expect_token(expected, cell, i)
                 expected = 'bar'
                 start = cell.index('a=')
                 for i in range(start, start + 3):
                     expect_token(expected, cell, i)
                 expected = 'foo'
                 start = cell.index(')') + 1
                 for i in range(start, len(cell)-1):
                     expect_token(expected, cell, i)
             def test_attrs():
                 cell = "a = obj.attr.subattr"
                 expected = 'obj'
                 idx = cell.find('obj') + 1
                 for i in range(idx, idx + 3):
                     expect_token(expected, cell, i)
                 idx = cell.find('.attr') + 2
                 expected = 'obj.attr'
                 for i in range(idx, idx + 4):
                     expect_token(expected, cell, i)
                 idx = cell.find('.subattr') + 2
                 expected = 'obj.attr.subattr'
                 for i in range(idx, len(cell)):
                     expect_token(expected, cell, i)
             def test_line_at_cursor():
                 cell = ""
                 (line, offset) = line_at_cursor(cell, cursor_pos=11)
                 assert line == "", ("Expected '', got %r" % line)
                 assert offset == 0, ("Expected '', got %r" % line)
             def test_muliline_statement():
                 cell = """a = (1,
 )
             int()
             map()
             """
                 for c in range(16, 22):
                     yield lambda: expect_token("int", cell, c)
                 for c in range(22, 28):
                     yield lambda: expect_token("map", cell, c)

IPython/utils/tokenutil.py

0 +15 -8

             """Token-related utilities"""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             from __future__ import absolute_import, print_function
             from collections import namedtuple
             from io import StringIO
             from keyword import iskeyword
             from . import tokenize2
             from .py3compat import cast_unicode_py2
             Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
             def generate_tokens(readline):
                 """wrap generate_tokens to catch EOF errors"""
                 try:
                     for token in tokenize2.generate_tokens(readline):
                         yield token
                 except tokenize2.TokenError:
                     # catch EOF error
                     return
             def line_at_cursor(cell, cursor_pos=0):
                 """Return the line in a cell at a given cursor position
                 Used for calling line-based APIs that don't support multi-line input, yet.
                 Parameters
                 ----------
                 cell: text
                     multiline block of text
                 cursor_pos: integer
                     the cursor position
                 Returns
                 -------
                 (line, offset): (text, integer)
                     The line with the current cursor, and the character offset of the start of the line.
                 """
                 offset = 0
                 lines = cell.splitlines(True)
                 for line in lines:
                     next_offset = offset + len(line)
                     if next_offset >= cursor_pos:
                         break
                     offset = next_offset
                 else:
                     line = ""
                 return (line, offset)
             def token_at_cursor(cell, cursor_pos=0):
                 """Get the token at a given cursor
                 Used for introspection.
                 Function calls are prioritized, so the token for the callable will be returned
                 if the cursor is anywhere inside the call.
                 Parameters
                 ----------
                 cell : unicode
                     A block of Python code
                 cursor_pos : int
                     The location of the cursor in the block where the token should be found
                 """
                 cell = cast_unicode_py2(cell)
                 names = []
                 tokens = []
-                offset = 0
                 call_names = []
+                offsets = {1: 0} # lines start at 1
                 for tup in generate_tokens(StringIO(cell).readline):
                     tok = Token(*tup)
                     # token, text, start, end, line = tup
-                    start_col = tok.start[1]
+                    start_line, start_col = tok.start
-                    end_col = tok.end[1]
+                    end_line, end_col = tok.end
+                    if end_line + 1 not in offsets:
+                        # keep track of offsets for each line
+                        lines = tok.line.splitlines(True)
+                        for lineno, line in zip(range(start_line + 1, end_line + 2), lines):
+                            if lineno not in offsets:
+                                offsets[lineno] = offsets[lineno-1] + len(line)
+                    offset = offsets[start_line]
                     # allow '|foo' to find 'foo' at the beginning of a line
                     boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
                     if offset + start_col >= boundary:
                         # current token starts after the cursor,
                         # don't consume it
                         break
                     if tok.token == tokenize2.NAME and not iskeyword(tok.text):
                         if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':
                             names[-1] = "%s.%s" % (names[-1], tok.text)
                         else:
                             names.append(tok.text)
                     elif tok.token == tokenize2.OP:
                         if tok.text == '=' and names:
                             # don't inspect the lhs of an assignment
                             names.pop(-1)
                         if tok.text == '(' and names:
                             # if we are inside a function call, inspect the function
                             call_names.append(names[-1])
                         elif tok.text == ')' and call_names:
                             call_names.pop(-1)
-                    if offset + end_col > cursor_pos:
+                    tokens.append(tok)
+                    if offsets[end_line] + end_col > cursor_pos:
                         # we found the cursor, stop reading
                         break
-                    tokens.append(tok)
-                    if tok.token in (tokenize2.NEWLINE, tokenize2.NL):
-                        offset += len(tok.line)
                 if call_names:
                     return call_names[-1]
                 elif names:
                     return names[-1]
                 else:
                     return ''

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages