upstream/ipython Commit - r21701:a85e1dcd

handle multi-line tokens in token_at_cursor...

Min RK -

r21701:a85e1dcd

parent child

IPython/utils/tests/test_tokenutil.py

0 +19 0

              """Tests for tokenutil"""
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              import nose.tools as nt
              from IPython.utils.tokenutil import token_at_cursor, line_at_cursor
              def expect_token(expected, cell, cursor_pos):
                  token = token_at_cursor(cell, cursor_pos)
                  offset = 0
                  for line in cell.splitlines():
                      if offset + len(line) >= cursor_pos:
                          break
                      else:
                          offset += len(line)+1
                  column = cursor_pos - offset
                  line_with_cursor = '%s|%s' % (line[:column], line[column:])
                  nt.assert_equal(token, expected,
                      "Expected %r, got %r in: %r (pos %i)" % (
                      expected, token, line_with_cursor, cursor_pos)
                  )
              def test_simple():
                  cell = "foo"
                  for i in range(len(cell)):
                      expect_token("foo", cell, i)
              def test_function():
                  cell = "foo(a=5, b='10')"
                  expected = 'foo'
                  # up to `foo(|a=`
                  for i in range(cell.find('a=') + 1):
                      expect_token("foo", cell, i)
                  # find foo after `=`
                  for i in [cell.find('=') + 1, cell.rfind('=') + 1]:
                      expect_token("foo", cell, i)
                  # in between `5,|` and `|b=`
                  for i in range(cell.find(','), cell.find('b=')):
                      expect_token("foo", cell, i)
              def test_multiline():
                  cell = '\n'.join([
                      'a = 5',
                      'b = hello("string", there)'
                  ])
                  expected = 'hello'
                  start = cell.index(expected) + 1
                  for i in range(start, start + len(expected)):
                      expect_token(expected, cell, i)
                  expected = 'hello'
                  start = cell.index(expected) + 1
                  for i in range(start, start + len(expected)):
                      expect_token(expected, cell, i)
+             def test_multiline_token():
+                 cell = '\n'.join([
+                     '"""\n\nxxxxxxxxxx\n\n"""',
+                     '5, """',
+                     'docstring',
+                     'multiline token',
+                     '""", [',
+                     '2, 3, "complicated"]',
+                     'b = hello("string", there)'
+                 ])
+                 expected = 'hello'
+                 start = cell.index(expected) + 1
+                 for i in range(start, start + len(expected)):
+                     expect_token(expected, cell, i)
+                 expected = 'hello'
+                 start = cell.index(expected) + 1
+                 for i in range(start, start + len(expected)):
+                     expect_token(expected, cell, i)
              def test_nested_call():
                  cell = "foo(bar(a=5), b=10)"
                  expected = 'foo'
                  start = cell.index('bar') + 1
                  for i in range(start, start + 3):
                      expect_token(expected, cell, i)
                  expected = 'bar'
                  start = cell.index('a=')
                  for i in range(start, start + 3):
                      expect_token(expected, cell, i)
                  expected = 'foo'
                  start = cell.index(')') + 1
                  for i in range(start, len(cell)-1):
                      expect_token(expected, cell, i)
              def test_attrs():
                  cell = "a = obj.attr.subattr"
                  expected = 'obj'
                  idx = cell.find('obj') + 1
                  for i in range(idx, idx + 3):
                      expect_token(expected, cell, i)
                  idx = cell.find('.attr') + 2
                  expected = 'obj.attr'
                  for i in range(idx, idx + 4):
                      expect_token(expected, cell, i)
                  idx = cell.find('.subattr') + 2
                  expected = 'obj.attr.subattr'
                  for i in range(idx, len(cell)):
                      expect_token(expected, cell, i)
              def test_line_at_cursor():
                  cell = ""
                  (line, offset) = line_at_cursor(cell, cursor_pos=11)
                  assert line == "", ("Expected '', got %r" % line)
                  assert offset == 0, ("Expected '', got %r" % line)
              def test_muliline_statement():
                  cell = """a = (1,
 )
              int()
              map()
              """
                  for c in range(16, 22):
                      yield lambda: expect_token("int", cell, c)
                  for c in range(22, 28):
                      yield lambda: expect_token("map", cell, c)

IPython/utils/tokenutil.py

0 +15 -8

              """Token-related utilities"""
              # Copyright (c) IPython Development Team.
              # Distributed under the terms of the Modified BSD License.
              from __future__ import absolute_import, print_function
              from collections import namedtuple
              from io import StringIO
              from keyword import iskeyword
              from . import tokenize2
              from .py3compat import cast_unicode_py2
              Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])
              def generate_tokens(readline):
                  """wrap generate_tokens to catch EOF errors"""
                  try:
                      for token in tokenize2.generate_tokens(readline):
                          yield token
                  except tokenize2.TokenError:
                      # catch EOF error
                      return
              def line_at_cursor(cell, cursor_pos=0):
                  """Return the line in a cell at a given cursor position
                  Used for calling line-based APIs that don't support multi-line input, yet.
                  Parameters
                  ----------
                  cell: text
                      multiline block of text
                  cursor_pos: integer
                      the cursor position
                  Returns
                  -------
                  (line, offset): (text, integer)
                      The line with the current cursor, and the character offset of the start of the line.
                  """
                  offset = 0
                  lines = cell.splitlines(True)
                  for line in lines:
                      next_offset = offset + len(line)
                      if next_offset >= cursor_pos:
                          break
                      offset = next_offset
                  else:
                      line = ""
                  return (line, offset)
              def token_at_cursor(cell, cursor_pos=0):
                  """Get the token at a given cursor
                  Used for introspection.
                  Function calls are prioritized, so the token for the callable will be returned
                  if the cursor is anywhere inside the call.
                  Parameters
                  ----------
                  cell : unicode
                      A block of Python code
                  cursor_pos : int
                      The location of the cursor in the block where the token should be found
                  """
                  cell = cast_unicode_py2(cell)
                  names = []
                  tokens = []
-                 offset = 0
                  call_names = []
+                 offsets = {1: 0} # lines start at 1
                  for tup in generate_tokens(StringIO(cell).readline):
                      tok = Token(*tup)
                      # token, text, start, end, line = tup
-                     start_col = tok.start[1]
-                     end_col = tok.end[1]
+                     start_line, start_col = tok.start
+                     end_line, end_col = tok.end
+                     if end_line + 1 not in offsets:
+                         # keep track of offsets for each line
+                         lines = tok.line.splitlines(True)
+                         for lineno, line in zip(range(start_line + 1, end_line + 2), lines):
+                             if lineno not in offsets:
+                                 offsets[lineno] = offsets[lineno-1] + len(line)
+                     offset = offsets[start_line]
                      # allow '|foo' to find 'foo' at the beginning of a line
                      boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
                      if offset + start_col >= boundary:
                          # current token starts after the cursor,
                          # don't consume it
                          break
                      if tok.token == tokenize2.NAME and not iskeyword(tok.text):
                          if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':
                              names[-1] = "%s.%s" % (names[-1], tok.text)
                          else:
                              names.append(tok.text)
                      elif tok.token == tokenize2.OP:
                          if tok.text == '=' and names:
                              # don't inspect the lhs of an assignment
                              names.pop(-1)
                          if tok.text == '(' and names:
                              # if we are inside a function call, inspect the function
                              call_names.append(names[-1])
                          elif tok.text == ')' and call_names:
                              call_names.pop(-1)
-                     if offset + end_col > cursor_pos:
+                     tokens.append(tok)
+                     if offsets[end_line] + end_col > cursor_pos:
                          # we found the cursor, stop reading
                          break
-                     tokens.append(tok)
-                     if tok.token in (tokenize2.NEWLINE, tokenize2.NL):
-                         offset += len(tok.line)
                  if call_names:
                      return call_names[-1]
                  elif names:
                      return names[-1]
                  else:
                      return ''

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages