upstream/ipython Files · IPython/utils/tokenutil.py

more Whats new update (#14225)

Matthias Bussonnier - - Load All Authors

File last commit:

r28480:87ab1c59


                r28490:e80bee4b

Download file

             tokenutil.py
        
                    160 lines
            
             | 5.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tokenutil.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
      """Token-related utilities"""

      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

      from collections import namedtuple

      from io import StringIO

      from keyword import iskeyword

        Thomas Kluyver
    
Drop bundled, outdated copy of the tokenize module

              r24179
            
      import tokenize

        Matthias Bussonnier
    
some more typing

              r28475
            
      from tokenize import TokenInfo

      from typing import List, Optional

        Srinivas Reddy Thatiparthy
    
remove python2 specific cast_unicode_py2 function

              r23669
            
        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
      Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

      def generate_tokens(readline):

        Matthias Bussonnier
    
some more typing

              r28475
            
          """wrap generate_tkens to catch EOF errors"""

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          try:

        Thomas Kluyver
    
Drop bundled, outdated copy of the tokenize module

              r24179
            
              for token in tokenize.generate_tokens(readline):

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
                  yield token

        Thomas Kluyver
    
Drop bundled, outdated copy of the tokenize module

              r24179
            
          except tokenize.TokenError:

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
              # catch EOF error

              return

        Matthias Bussonnier
    
run formatter

              r28327
            
        Matthias Bussonnier
    
please linter

              r28480
            
      def generate_tokens_catch_errors(

          readline, extra_errors_to_catch: Optional[List[str]] = None

      ):

        Matthias Bussonnier
    
run formatter

              r28327
            
          default_errors_to_catch = [

              "unterminated string literal",

              "invalid non-printable character",

              "after line continuation character",

          ]

        Lysandros Nikolaou
    
Fix issues due to breaking tokenize changes in 3.12

              r28326
            
          assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)

          errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])

        Matthias Bussonnier
    
please linter

              r28480
            
          tokens: List[TokenInfo] = []

        Lysandros Nikolaou
    
Fix issues due to breaking tokenize changes in 3.12

              r28326
            
          try:

              for token in tokenize.generate_tokens(readline):

                  tokens.append(token)

                  yield token

          except tokenize.TokenError as exc:

              if any(error in exc.args[0] for error in errors_to_catch):

                  if tokens:

                      start = tokens[-1].start[0], tokens[-1].end[0]

                      end = start

                      line = tokens[-1].line

                  else:

                      start = end = (1, 0)

        Matthias Bussonnier
    
run formatter

              r28327
            
                      line = ""

                  yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)

        Lysandros Nikolaou
    
Fix issues due to breaking tokenize changes in 3.12

              r28326
            
              else:

                  # Catch EOF

                  raise

        Matthias Bussonnier
    
run formatter

              r28327
            
        MinRK
    
only complete on current line...

              r18478
            
      def line_at_cursor(cell, cursor_pos=0):

          """Return the line in a cell at a given cursor position

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        MinRK
    
only complete on current line...

              r18478
            
          Used for calling line-based APIs that don't support multi-line input, yet.

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        MinRK
    
only complete on current line...

              r18478
            
          Parameters

          ----------

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
          cell : str

        MinRK
    
only complete on current line...

              r18478
            
              multiline block of text

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
          cursor_pos : integer

        MinRK
    
only complete on current line...

              r18478
            
              the cursor position

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        MinRK
    
only complete on current line...

              r18478
            
          Returns

          -------

        Matthias Bussonnier
    
Fix a couple of warnings/errors in doc builds.

              r23477
            
          (line, offset): (string, integer)

        MinRK
    
only complete on current line...

              r18478
            
              The line with the current cursor, and the character offset of the start of the line.

          """

          offset = 0

          lines = cell.splitlines(True)

          for line in lines:

              next_offset = offset + len(line)

        Thomas Kluyver
    
Fix line_at_cursor for end of last line without trailing newline

              r23966
            
              if not line.endswith('\n'):

                  # If the last line doesn't have a trailing newline, treat it as if

                  # it does so that the cursor at the end of the line still counts

                  # as being on that line.

                  next_offset += 1

        Thomas Kluyver
    
Identify position after a newline as the start of the next line...

              r23959
            
              if next_offset > cursor_pos:

        MinRK
    
only complete on current line...

              r18478
            
                  break

              offset = next_offset

        Doug Blank
    
TAB on empty line causes crash; with test

              r18879
            
          else:

              line = ""

        MinRK
    
only complete on current line...

              r18478
            
          return (line, offset)

        Matthias Bussonnier
    
please linter

              r28480
            
      def token_at_cursor(cell: str, cursor_pos: int = 0):

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          """Get the token at a given cursor

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          Used for introspection.

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        Min RK
    
prioritize function token for inspection...

              r20471
            
          Function calls are prioritized, so the token for the callable will be returned

          if the cursor is anywhere inside the call.

        Matthias Bussonnier
    
reformat docstring in IPython utils

              r26419
            
        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          Parameters

          ----------

        Matthias Bussonnier
    
some more typing

              r28475
            
          cell : str

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
              A block of Python code

        MinRK
    
update completion_ and objection_info_request...

              r16580
            
          cursor_pos : int

              The location of the cursor in the block where the token should be found

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          """

        Matthias Bussonnier
    
please linter

              r28480
            
          names: List[str] = []

          tokens: List[Token] = []

        Min RK
    
prioritize function token for inspection...

              r20471
            
          call_names = []

        Min RK
    
handle multi-line tokens in token_at_cursor...

              r21701
            
          offsets = {1: 0} # lines start at 1

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
          for tup in generate_tokens(StringIO(cell).readline):

              tok = Token(*tup)

              # token, text, start, end, line = tup

        Min RK
    
handle multi-line tokens in token_at_cursor...

              r21701
            
              start_line, start_col = tok.start

              end_line, end_col = tok.end

              if end_line + 1 not in offsets:

                  # keep track of offsets for each line

                  lines = tok.line.splitlines(True)

        Matthias Bussonnier
    
Update a couple of iteration idioms....

              r23365
            
                  for lineno, line in enumerate(lines, start_line + 1):

        Min RK
    
handle multi-line tokens in token_at_cursor...

              r21701
            
                      if lineno not in offsets:

                          offsets[lineno] = offsets[lineno-1] + len(line)

              offset = offsets[start_line]

        MinRK
    
don't pick up tokens right of cursor...

              r18453
            
              # allow '|foo' to find 'foo' at the beginning of a line

              boundary = cursor_pos + 1 if start_col == 0 else cursor_pos

              if offset + start_col >= boundary:

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
                  # current token starts after the cursor,

                  # don't consume it

                  break

        Thomas Kluyver
    
Drop bundled, outdated copy of the tokenize module

              r24179
            
              if tok.token == tokenize.NAME and not iskeyword(tok.text):

                  if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
                      names[-1] = "%s.%s" % (names[-1], tok.text)

                  else:

                      names.append(tok.text)

        Thomas Kluyver
    
Drop bundled, outdated copy of the tokenize module

              r24179
            
              elif tok.token == tokenize.OP:

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
                  if tok.text == '=' and names:

                      # don't inspect the lhs of an assignment

                      names.pop(-1)

        Min RK
    
prioritize function token for inspection...

              r20471
            
                  if tok.text == '(' and names:

                      # if we are inside a function call, inspect the function

                      call_names.append(names[-1])

                  elif tok.text == ')' and call_names:

                      call_names.pop(-1)

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
        Min RK
    
handle multi-line tokens in token_at_cursor...

              r21701
            
              tokens.append(tok)

              if offsets[end_line] + end_col > cursor_pos:

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
                  # we found the cursor, stop reading

                  break

        Min RK
    
prioritize function token for inspection...

              r20471
            
          if call_names:

              return call_names[-1]

          elif names:

        MinRK
    
add utils.tokenutil for getting the token at a cursor offset

              r16578
            
              return names[-1]

          else:

              return ''

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	"""Token-related utilities"""

		# Copyright (c) IPython Development Team.
		# Distributed under the terms of the Modified BSD License.

		from collections import namedtuple
		from io import StringIO
		from keyword import iskeyword

Thomas Kluyver Drop bundled, outdated copy of the tokenize module	r24179	import tokenize
Matthias Bussonnier some more typing	r28475	from tokenize import TokenInfo
		from typing import List, Optional
Srinivas Reddy Thatiparthy remove python2 specific cast_unicode_py2 function	r23669
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578
		Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

		def generate_tokens(readline):
Matthias Bussonnier some more typing	r28475	"""wrap generate_tkens to catch EOF errors"""
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	try:
Thomas Kluyver Drop bundled, outdated copy of the tokenize module	r24179	for token in tokenize.generate_tokens(readline):
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	yield token
Thomas Kluyver Drop bundled, outdated copy of the tokenize module	r24179	except tokenize.TokenError:
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	# catch EOF error
		return

Matthias Bussonnier run formatter	r28327
Matthias Bussonnier please linter	r28480	def generate_tokens_catch_errors(
		readline, extra_errors_to_catch: Optional[List[str]] = None
		):
Matthias Bussonnier run formatter	r28327	default_errors_to_catch = [
		"unterminated string literal",
		"invalid non-printable character",
		"after line continuation character",
		]
Lysandros Nikolaou Fix issues due to breaking tokenize changes in 3.12	r28326	assert extra_errors_to_catch is None or isinstance(extra_errors_to_catch, list)
		errors_to_catch = default_errors_to_catch + (extra_errors_to_catch or [])

Matthias Bussonnier please linter	r28480	tokens: List[TokenInfo] = []
Lysandros Nikolaou Fix issues due to breaking tokenize changes in 3.12	r28326	try:
		for token in tokenize.generate_tokens(readline):
		tokens.append(token)
		yield token
		except tokenize.TokenError as exc:
		if any(error in exc.args[0] for error in errors_to_catch):
		if tokens:
		start = tokens[-1].start[0], tokens[-1].end[0]
		end = start
		line = tokens[-1].line
		else:
		start = end = (1, 0)
Matthias Bussonnier run formatter	r28327	line = ""
		yield tokenize.TokenInfo(tokenize.ERRORTOKEN, "", start, end, line)
Lysandros Nikolaou Fix issues due to breaking tokenize changes in 3.12	r28326	else:
		# Catch EOF
		raise

Matthias Bussonnier run formatter	r28327
MinRK only complete on current line...	r18478	def line_at_cursor(cell, cursor_pos=0):
		"""Return the line in a cell at a given cursor position
Matthias Bussonnier reformat docstring in IPython utils	r26419
MinRK only complete on current line...	r18478	Used for calling line-based APIs that don't support multi-line input, yet.
Matthias Bussonnier reformat docstring in IPython utils	r26419
MinRK only complete on current line...	r18478	Parameters
		----------
Matthias Bussonnier reformat docstring in IPython utils	r26419	cell : str
MinRK only complete on current line...	r18478	multiline block of text
Matthias Bussonnier reformat docstring in IPython utils	r26419	cursor_pos : integer
MinRK only complete on current line...	r18478	the cursor position
Matthias Bussonnier reformat docstring in IPython utils	r26419
MinRK only complete on current line...	r18478	Returns
		-------
Matthias Bussonnier Fix a couple of warnings/errors in doc builds.	r23477	(line, offset): (string, integer)
MinRK only complete on current line...	r18478	The line with the current cursor, and the character offset of the start of the line.
		"""
		offset = 0
		lines = cell.splitlines(True)
		for line in lines:
		next_offset = offset + len(line)
Thomas Kluyver Fix line_at_cursor for end of last line without trailing newline	r23966	if not line.endswith('\n'):
		# If the last line doesn't have a trailing newline, treat it as if
		# it does so that the cursor at the end of the line still counts
		# as being on that line.
		next_offset += 1
Thomas Kluyver Identify position after a newline as the start of the next line...	r23959	if next_offset > cursor_pos:
MinRK only complete on current line...	r18478	break
		offset = next_offset
Doug Blank TAB on empty line causes crash; with test	r18879	else:
		line = ""
MinRK only complete on current line...	r18478	return (line, offset)

Matthias Bussonnier please linter	r28480
		def token_at_cursor(cell: str, cursor_pos: int = 0):
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	"""Get the token at a given cursor
Matthias Bussonnier reformat docstring in IPython utils	r26419
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	Used for introspection.
Matthias Bussonnier reformat docstring in IPython utils	r26419
Min RK prioritize function token for inspection...	r20471	Function calls are prioritized, so the token for the callable will be returned
		if the cursor is anywhere inside the call.
Matthias Bussonnier reformat docstring in IPython utils	r26419
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	Parameters
		----------
Matthias Bussonnier some more typing	r28475	cell : str
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	A block of Python code
MinRK update completion_ and objection_info_request...	r16580	cursor_pos : int
		The location of the cursor in the block where the token should be found
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	"""
Matthias Bussonnier please linter	r28480	names: List[str] = []
		tokens: List[Token] = []
Min RK prioritize function token for inspection...	r20471	call_names = []
Min RK handle multi-line tokens in token_at_cursor...	r21701
		offsets = {1: 0} # lines start at 1
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	for tup in generate_tokens(StringIO(cell).readline):

		tok = Token(*tup)

		# token, text, start, end, line = tup
Min RK handle multi-line tokens in token_at_cursor...	r21701	start_line, start_col = tok.start
		end_line, end_col = tok.end
		if end_line + 1 not in offsets:
		# keep track of offsets for each line
		lines = tok.line.splitlines(True)
Matthias Bussonnier Update a couple of iteration idioms....	r23365	for lineno, line in enumerate(lines, start_line + 1):
Min RK handle multi-line tokens in token_at_cursor...	r21701	if lineno not in offsets:
		offsets[lineno] = offsets[lineno-1] + len(line)

		offset = offsets[start_line]
MinRK don't pick up tokens right of cursor...	r18453	# allow '\|foo' to find 'foo' at the beginning of a line
		boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
		if offset + start_col >= boundary:
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	# current token starts after the cursor,
		# don't consume it
		break

Thomas Kluyver Drop bundled, outdated copy of the tokenize module	r24179	if tok.token == tokenize.NAME and not iskeyword(tok.text):
		if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	names[-1] = "%s.%s" % (names[-1], tok.text)
		else:
		names.append(tok.text)
Thomas Kluyver Drop bundled, outdated copy of the tokenize module	r24179	elif tok.token == tokenize.OP:
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	if tok.text == '=' and names:
		# don't inspect the lhs of an assignment
		names.pop(-1)
Min RK prioritize function token for inspection...	r20471	if tok.text == '(' and names:
		# if we are inside a function call, inspect the function
		call_names.append(names[-1])
		elif tok.text == ')' and call_names:
		call_names.pop(-1)
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578
Min RK handle multi-line tokens in token_at_cursor...	r21701	tokens.append(tok)

		if offsets[end_line] + end_col > cursor_pos:
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	# we found the cursor, stop reading
		break

Min RK prioritize function token for inspection...	r20471	if call_names:
		return call_names[-1]
		elif names:
MinRK add utils.tokenutil for getting the token at a cursor offset	r16578	return names[-1]
		else:
		return ''