upstream/ipython Files · IPython/utils/tokenutil.py

Merge pull request from Carreau/clean-enumerate...

Merge pull request from Carreau/clean-enumerate Update a couple of iteration idioms.

Matthias Bussonnier - - Load All Authors

File last commit:

r23365:76f8d4f3


                r23366:e5f3033d

Download file

             tokenutil.py
        
                    127 lines
            
             | 3.7 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tokenutil.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Token-related utilities"""

      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

      from collections import namedtuple

      from io import StringIO

      from keyword import iskeyword

      from . import tokenize2

      from .py3compat import cast_unicode_py2

      Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

      def generate_tokens(readline):

          """wrap generate_tokens to catch EOF errors"""

          try:

              for token in tokenize2.generate_tokens(readline):

                  yield token

          except tokenize2.TokenError:

              # catch EOF error

              return

      def line_at_cursor(cell, cursor_pos=0):

          """Return the line in a cell at a given cursor position

          Used for calling line-based APIs that don't support multi-line input, yet.

          Parameters

          ----------

          cell: str

              multiline block of text

          cursor_pos: integer

              the cursor position

          Returns

          -------

          (line, offset): (text, integer)

              The line with the current cursor, and the character offset of the start of the line.

          """

          offset = 0

          lines = cell.splitlines(True)

          for line in lines:

              next_offset = offset + len(line)

              if next_offset >= cursor_pos:

                  break

              offset = next_offset

          else:

              line = ""

          return (line, offset)

      def token_at_cursor(cell, cursor_pos=0):

          """Get the token at a given cursor

          Used for introspection.

          Function calls are prioritized, so the token for the callable will be returned

          if the cursor is anywhere inside the call.

          Parameters

          ----------

          cell : unicode

              A block of Python code

          cursor_pos : int

              The location of the cursor in the block where the token should be found

          """

          cell = cast_unicode_py2(cell)

          names = []

          tokens = []

          call_names = []

          offsets = {1: 0} # lines start at 1

          for tup in generate_tokens(StringIO(cell).readline):

              tok = Token(*tup)

              # token, text, start, end, line = tup

              start_line, start_col = tok.start

              end_line, end_col = tok.end

              if end_line + 1 not in offsets:

                  # keep track of offsets for each line

                  lines = tok.line.splitlines(True)

                  for lineno, line in enumerate(lines, start_line + 1):

                      if lineno not in offsets:

                          offsets[lineno] = offsets[lineno-1] + len(line)

              offset = offsets[start_line]

              # allow '|foo' to find 'foo' at the beginning of a line

              boundary = cursor_pos + 1 if start_col == 0 else cursor_pos

              if offset + start_col >= boundary:

                  # current token starts after the cursor,

                  # don't consume it

                  break

              if tok.token == tokenize2.NAME and not iskeyword(tok.text):

                  if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':

                      names[-1] = "%s.%s" % (names[-1], tok.text)

                  else:

                      names.append(tok.text)

              elif tok.token == tokenize2.OP:

                  if tok.text == '=' and names:

                      # don't inspect the lhs of an assignment

                      names.pop(-1)

                  if tok.text == '(' and names:

                      # if we are inside a function call, inspect the function

                      call_names.append(names[-1])

                  elif tok.text == ')' and call_names:

                      call_names.pop(-1)

              tokens.append(tok)

              if offsets[end_line] + end_col > cursor_pos:

                  # we found the cursor, stop reading

                  break

          if call_names:

              return call_names[-1]

          elif names:

              return names[-1]

          else:

              return ''

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Token-related utilities"""

				# Copyright (c) IPython Development Team.
				# Distributed under the terms of the Modified BSD License.


				from collections import namedtuple
				from io import StringIO
				from keyword import iskeyword

				from . import tokenize2
				from .py3compat import cast_unicode_py2

				Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

				def generate_tokens(readline):
				"""wrap generate_tokens to catch EOF errors"""
				try:
				for token in tokenize2.generate_tokens(readline):
				yield token
				except tokenize2.TokenError:
				# catch EOF error
				return

				def line_at_cursor(cell, cursor_pos=0):
				"""Return the line in a cell at a given cursor position

				Used for calling line-based APIs that don't support multi-line input, yet.

				Parameters
				----------

				cell: str
				multiline block of text
				cursor_pos: integer
				the cursor position

				Returns
				-------

				(line, offset): (text, integer)
				The line with the current cursor, and the character offset of the start of the line.
				"""
				offset = 0
				lines = cell.splitlines(True)
				for line in lines:
				next_offset = offset + len(line)
				if next_offset >= cursor_pos:
				break
				offset = next_offset
				else:
				line = ""
				return (line, offset)

				def token_at_cursor(cell, cursor_pos=0):
				"""Get the token at a given cursor

				Used for introspection.

				Function calls are prioritized, so the token for the callable will be returned
				if the cursor is anywhere inside the call.

				Parameters
				----------

				cell : unicode
				A block of Python code
				cursor_pos : int
				The location of the cursor in the block where the token should be found
				"""
				cell = cast_unicode_py2(cell)
				names = []
				tokens = []
				call_names = []

				offsets = {1: 0} # lines start at 1
				for tup in generate_tokens(StringIO(cell).readline):

				tok = Token(*tup)

				# token, text, start, end, line = tup
				start_line, start_col = tok.start
				end_line, end_col = tok.end
				if end_line + 1 not in offsets:
				# keep track of offsets for each line
				lines = tok.line.splitlines(True)
				for lineno, line in enumerate(lines, start_line + 1):
				if lineno not in offsets:
				offsets[lineno] = offsets[lineno-1] + len(line)

				offset = offsets[start_line]
				# allow '\|foo' to find 'foo' at the beginning of a line
				boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
				if offset + start_col >= boundary:
				# current token starts after the cursor,
				# don't consume it
				break

				if tok.token == tokenize2.NAME and not iskeyword(tok.text):
				if names and tokens and tokens[-1].token == tokenize2.OP and tokens[-1].text == '.':
				names[-1] = "%s.%s" % (names[-1], tok.text)
				else:
				names.append(tok.text)
				elif tok.token == tokenize2.OP:
				if tok.text == '=' and names:
				# don't inspect the lhs of an assignment
				names.pop(-1)
				if tok.text == '(' and names:
				# if we are inside a function call, inspect the function
				call_names.append(names[-1])
				elif tok.text == ')' and call_names:
				call_names.pop(-1)

				tokens.append(tok)

				if offsets[end_line] + end_col > cursor_pos:
				# we found the cursor, stop reading
				break

				if call_names:
				return call_names[-1]
				elif names:
				return names[-1]
				else:
				return ''