upstream/ipython Files · IPython/utils/tokenutil.py

remove raw_print and raw_print_err deprecated since IPython 7.0

Matthias Bussonnier - - Load All Authors

File last commit:

r28480:87ab1c59


                r29006:0bee99f8

Download file

             tokenutil.py
        
                    160 lines
            
             | 5.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tokenutil.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Token-related utilities"""

      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

      from collections import namedtuple

      from io import StringIO

      from keyword import iskeyword

      import tokenize

      from tokenize import TokenInfo

      from typing import List, Optional

      Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

      def generate_tokens(readline):

          """wrap generate_tkens to catch EOF errors"""

          try:

              for token in tokenize.generate_tokens(readline):

                  yield token

          except tokenize.TokenError:

              # catch EOF error

              return

      def generate_tokens_catch_errors(

          readline, extra_errors_to_catch: Optional[List[str]] = None

      ):

          default_errors_to_catch = [

              "unterminated string literal",

              "invalid non-printable character",

              "after line continuation character",

          ]

          assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)

          errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])

          tokens: List[TokenInfo] = []

          try:

              for token in tokenize.generate_tokens(readline):

                  tokens.append(token)

                  yield token

          except tokenize.TokenError as exc:

              if any(error in exc.args[0] for error in errors_to_catch):

                  if tokens:

                      start = tokens[-1].start[0], tokens[-1].end[0]

                      end = start

                      line = tokens[-1].line

                  else:

                      start = end = (1, 0)

                      line = ""

                  yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)

              else:

                  # Catch EOF

                  raise

      def line_at_cursor(cell, cursor_pos=0):

          """Return the line in a cell at a given cursor position

          Used for calling line-based APIs that don't support multi-line input, yet.

          Parameters

          ----------

          cell : str

              multiline block of text

          cursor_pos : integer

              the cursor position

          Returns

          -------

          (line, offset): (string, integer)

              The line with the current cursor, and the character offset of the start of the line.

          """

          offset = 0

          lines = cell.splitlines(True)

          for line in lines:

              next_offset = offset + len(line)

              if not line.endswith('\n'):

                  # If the last line doesn't have a trailing newline, treat it as if

                  # it does so that the cursor at the end of the line still counts

                  # as being on that line.

                  next_offset += 1

              if next_offset > cursor_pos:

                  break

              offset = next_offset

          else:

              line = ""

          return (line, offset)

      def token_at_cursor(cell: str, cursor_pos: int = 0):

          """Get the token at a given cursor

          Used for introspection.

          Function calls are prioritized, so the token for the callable will be returned

          if the cursor is anywhere inside the call.

          Parameters

          ----------

          cell : str

              A block of Python code

          cursor_pos : int

              The location of the cursor in the block where the token should be found

          """

          names: List[str] = []

          tokens: List[Token] = []

          call_names = []

          offsets = {1: 0} # lines start at 1

          for tup in generate_tokens(StringIO(cell).readline):

              tok = Token(*tup)

              # token, text, start, end, line = tup

              start_line, start_col = tok.start

              end_line, end_col = tok.end

              if end_line + 1 not in offsets:

                  # keep track of offsets for each line

                  lines = tok.line.splitlines(True)

                  for lineno, line in enumerate(lines, start_line + 1):

                      if lineno not in offsets:

                          offsets[lineno] = offsets[lineno-1] + len(line)

              offset = offsets[start_line]

              # allow '|foo' to find 'foo' at the beginning of a line

              boundary = cursor_pos + 1 if start_col == 0 else cursor_pos

              if offset + start_col >= boundary:

                  # current token starts after the cursor,

                  # don't consume it

                  break

              if tok.token == tokenize.NAME and not iskeyword(tok.text):

                  if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':

                      names[-1] = "%s.%s" % (names[-1], tok.text)

                  else:

                      names.append(tok.text)

              elif tok.token == tokenize.OP:

                  if tok.text == '=' and names:

                      # don't inspect the lhs of an assignment

                      names.pop(-1)

                  if tok.text == '(' and names:

                      # if we are inside a function call, inspect the function

                      call_names.append(names[-1])

                  elif tok.text == ')' and call_names:

                      call_names.pop(-1)

              tokens.append(tok)

              if offsets[end_line] + end_col > cursor_pos:

                  # we found the cursor, stop reading

                  break

          if call_names:

              return call_names[-1]

          elif names:

              return names[-1]

          else:

              return ''

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Token-related utilities"""

				# Copyright (c) IPython Development Team.
				# Distributed under the terms of the Modified BSD License.

				from collections import namedtuple
				from io import StringIO
				from keyword import iskeyword

				import tokenize
				from tokenize import TokenInfo
				from typing import List, Optional


				Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

				def generate_tokens(readline):
				"""wrap generate_tkens to catch EOF errors"""
				try:
				for token in tokenize.generate_tokens(readline):
				yield token
				except tokenize.TokenError:
				# catch EOF error
				return


				def generate_tokens_catch_errors(
				readline, extra_errors_to_catch: Optional[List[str]] = None
				):
				default_errors_to_catch = [
				"unterminated string literal",
				"invalid non-printable character",
				"after line continuation character",
				]
				assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
				errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])

				tokens: List[TokenInfo] = []
				try:
				for token in tokenize.generate_tokens(readline):
				tokens.append(token)
				yield token
				except tokenize.TokenError as exc:
				if any(error in exc.args[0] for error in errors_to_catch):
				if tokens:
				start = tokens[-1].start[0], tokens[-1].end[0]
				end = start
				line = tokens[-1].line
				else:
				start = end = (1, 0)
				line = ""
				yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
				else:
				# Catch EOF
				raise


				def line_at_cursor(cell, cursor_pos=0):
				"""Return the line in a cell at a given cursor position

				Used for calling line-based APIs that don't support multi-line input, yet.

				Parameters
				----------
				cell : str
				multiline block of text
				cursor_pos : integer
				the cursor position

				Returns
				-------
				(line, offset): (string, integer)
				The line with the current cursor, and the character offset of the start of the line.
				"""
				offset = 0
				lines = cell.splitlines(True)
				for line in lines:
				next_offset = offset + len(line)
				if not line.endswith('\n'):
				# If the last line doesn't have a trailing newline, treat it as if
				# it does so that the cursor at the end of the line still counts
				# as being on that line.
				next_offset += 1
				if next_offset > cursor_pos:
				break
				offset = next_offset
				else:
				line = ""
				return (line, offset)


				def token_at_cursor(cell: str, cursor_pos: int = 0):
				"""Get the token at a given cursor

				Used for introspection.

				Function calls are prioritized, so the token for the callable will be returned
				if the cursor is anywhere inside the call.

				Parameters
				----------
				cell : str
				A block of Python code
				cursor_pos : int
				The location of the cursor in the block where the token should be found
				"""
				names: List[str] = []
				tokens: List[Token] = []
				call_names = []

				offsets = {1: 0} # lines start at 1
				for tup in generate_tokens(StringIO(cell).readline):

				tok = Token(*tup)

				# token, text, start, end, line = tup
				start_line, start_col = tok.start
				end_line, end_col = tok.end
				if end_line + 1 not in offsets:
				# keep track of offsets for each line
				lines = tok.line.splitlines(True)
				for lineno, line in enumerate(lines, start_line + 1):
				if lineno not in offsets:
				offsets[lineno] = offsets[lineno-1] + len(line)

				offset = offsets[start_line]
				# allow '\|foo' to find 'foo' at the beginning of a line
				boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
				if offset + start_col >= boundary:
				# current token starts after the cursor,
				# don't consume it
				break

				if tok.token == tokenize.NAME and not iskeyword(tok.text):
				if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
				names[-1] = "%s.%s" % (names[-1], tok.text)
				else:
				names.append(tok.text)
				elif tok.token == tokenize.OP:
				if tok.text == '=' and names:
				# don't inspect the lhs of an assignment
				names.pop(-1)
				if tok.text == '(' and names:
				# if we are inside a function call, inspect the function
				call_names.append(names[-1])
				elif tok.text == ')' and call_names:
				call_names.pop(-1)

				tokens.append(tok)

				if offsets[end_line] + end_col > cursor_pos:
				# we found the cursor, stop reading
				break

				if call_names:
				return call_names[-1]
				elif names:
				return names[-1]
				else:
				return ''