upstream/ipython Files · IPython/utils/tokenutil.py

support for unicode identifiers...

support for unicode identifiers This rewrites some of the regular expressions that are used to match Python identifiers, so that they are unicode compatible. In Python 3, identifiers can contain unicode characters as long as the first character is not numeric. Examples for the changes: • inputtransformer: ``` In [1]: π = 3.14 In [2]: π.is_integer? Object `is_integer` not found. ``` ---------- • namespace: ``` π.is_integ*? ``` or ``` In [1]: %psearch π.is_integ Python identifiers can only contain ascii characters. ``` ---------- • prefilter: ``` %autocall 1 φ = float get_ipython().prefilter("φ 3") # should be 'φ(3)', but returns 'φ 3' ``` ---------- • completerlib: If there is a file e.g. named `π.py` in the current directory, then ``` import IPython IPython.core.completerlib.module_list('.') # should contain module 'π' ```

Thomas Kluyver - - Load All Authors

File last commit:

r24179:14e53cdd


                r25595:d9c0e690

Download file

             tokenutil.py
        
                    130 lines
            
             | 3.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / utils / tokenutil.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Token-related utilities"""

      # Copyright (c) IPython Development Team.

      # Distributed under the terms of the Modified BSD License.

      from collections import namedtuple

      from io import StringIO

      from keyword import iskeyword

      import tokenize

      Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

      def generate_tokens(readline):

          """wrap generate_tokens to catch EOF errors"""

          try:

              for token in tokenize.generate_tokens(readline):

                  yield token

          except tokenize.TokenError:

              # catch EOF error

              return

      def line_at_cursor(cell, cursor_pos=0):

          """Return the line in a cell at a given cursor position

          Used for calling line-based APIs that don't support multi-line input, yet.

          Parameters

          ----------

          cell: str

              multiline block of text

          cursor_pos: integer

              the cursor position

          Returns

          -------

          (line, offset): (string, integer)

              The line with the current cursor, and the character offset of the start of the line.

          """

          offset = 0

          lines = cell.splitlines(True)

          for line in lines:

              next_offset = offset + len(line)

              if not line.endswith('\n'):

                  # If the last line doesn't have a trailing newline, treat it as if

                  # it does so that the cursor at the end of the line still counts

                  # as being on that line.

                  next_offset += 1

              if next_offset > cursor_pos:

                  break

              offset = next_offset

          else:

              line = ""

          return (line, offset)

      def token_at_cursor(cell, cursor_pos=0):

          """Get the token at a given cursor

          Used for introspection.

          Function calls are prioritized, so the token for the callable will be returned

          if the cursor is anywhere inside the call.

          Parameters

          ----------

          cell : unicode

              A block of Python code

          cursor_pos : int

              The location of the cursor in the block where the token should be found

          """

          names = []

          tokens = []

          call_names = []

          offsets = {1: 0} # lines start at 1

          for tup in generate_tokens(StringIO(cell).readline):

              tok = Token(*tup)

              # token, text, start, end, line = tup

              start_line, start_col = tok.start

              end_line, end_col = tok.end

              if end_line + 1 not in offsets:

                  # keep track of offsets for each line

                  lines = tok.line.splitlines(True)

                  for lineno, line in enumerate(lines, start_line + 1):

                      if lineno not in offsets:

                          offsets[lineno] = offsets[lineno-1] + len(line)

              offset = offsets[start_line]

              # allow '|foo' to find 'foo' at the beginning of a line

              boundary = cursor_pos + 1 if start_col == 0 else cursor_pos

              if offset + start_col >= boundary:

                  # current token starts after the cursor,

                  # don't consume it

                  break

              if tok.token == tokenize.NAME and not iskeyword(tok.text):

                  if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':

                      names[-1] = "%s.%s" % (names[-1], tok.text)

                  else:

                      names.append(tok.text)

              elif tok.token == tokenize.OP:

                  if tok.text == '=' and names:

                      # don't inspect the lhs of an assignment

                      names.pop(-1)

                  if tok.text == '(' and names:

                      # if we are inside a function call, inspect the function

                      call_names.append(names[-1])

                  elif tok.text == ')' and call_names:

                      call_names.pop(-1)

              tokens.append(tok)

              if offsets[end_line] + end_col > cursor_pos:

                  # we found the cursor, stop reading

                  break

          if call_names:

              return call_names[-1]

          elif names:

              return names[-1]

          else:

              return ''

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Token-related utilities"""

				# Copyright (c) IPython Development Team.
				# Distributed under the terms of the Modified BSD License.

				from collections import namedtuple
				from io import StringIO
				from keyword import iskeyword

				import tokenize


				Token = namedtuple('Token', ['token', 'text', 'start', 'end', 'line'])

				def generate_tokens(readline):
				"""wrap generate_tokens to catch EOF errors"""
				try:
				for token in tokenize.generate_tokens(readline):
				yield token
				except tokenize.TokenError:
				# catch EOF error
				return

				def line_at_cursor(cell, cursor_pos=0):
				"""Return the line in a cell at a given cursor position

				Used for calling line-based APIs that don't support multi-line input, yet.

				Parameters
				----------

				cell: str
				multiline block of text
				cursor_pos: integer
				the cursor position

				Returns
				-------

				(line, offset): (string, integer)
				The line with the current cursor, and the character offset of the start of the line.
				"""
				offset = 0
				lines = cell.splitlines(True)
				for line in lines:
				next_offset = offset + len(line)
				if not line.endswith('\n'):
				# If the last line doesn't have a trailing newline, treat it as if
				# it does so that the cursor at the end of the line still counts
				# as being on that line.
				next_offset += 1
				if next_offset > cursor_pos:
				break
				offset = next_offset
				else:
				line = ""
				return (line, offset)

				def token_at_cursor(cell, cursor_pos=0):
				"""Get the token at a given cursor

				Used for introspection.

				Function calls are prioritized, so the token for the callable will be returned
				if the cursor is anywhere inside the call.

				Parameters
				----------

				cell : unicode
				A block of Python code
				cursor_pos : int
				The location of the cursor in the block where the token should be found
				"""
				names = []
				tokens = []
				call_names = []

				offsets = {1: 0} # lines start at 1
				for tup in generate_tokens(StringIO(cell).readline):

				tok = Token(*tup)

				# token, text, start, end, line = tup
				start_line, start_col = tok.start
				end_line, end_col = tok.end
				if end_line + 1 not in offsets:
				# keep track of offsets for each line
				lines = tok.line.splitlines(True)
				for lineno, line in enumerate(lines, start_line + 1):
				if lineno not in offsets:
				offsets[lineno] = offsets[lineno-1] + len(line)

				offset = offsets[start_line]
				# allow '\|foo' to find 'foo' at the beginning of a line
				boundary = cursor_pos + 1 if start_col == 0 else cursor_pos
				if offset + start_col >= boundary:
				# current token starts after the cursor,
				# don't consume it
				break

				if tok.token == tokenize.NAME and not iskeyword(tok.text):
				if names and tokens and tokens[-1].token == tokenize.OP and tokens[-1].text == '.':
				names[-1] = "%s.%s" % (names[-1], tok.text)
				else:
				names.append(tok.text)
				elif tok.token == tokenize.OP:
				if tok.text == '=' and names:
				# don't inspect the lhs of an assignment
				names.pop(-1)
				if tok.text == '(' and names:
				# if we are inside a function call, inspect the function
				call_names.append(names[-1])
				elif tok.text == ')' and call_names:
				call_names.pop(-1)

				tokens.append(tok)

				if offsets[end_line] + end_col > cursor_pos:
				# we found the cursor, stop reading
				break

				if call_names:
				return call_names[-1]
				elif names:
				return names[-1]
				else:
				return ''