upstream/ipython Files · IPython/frontend/qt/console/completion_lexer.py

Backport PR : Unicode content crashes the pager (console)...

Backport PR : Unicode content crashes the pager (console) We've run into an interesting bug in the astropy project. https://github.com/astropy/astropy/issues/600 When displaying a docstring that contains Unicode and is also long enough that it gets sent to the pager it fails since the docstring can't be sent to the pager as ascii. This crashes in the middle of sending content to the pager, so the shell ends up in an inconsistent state and stops echoing the keyboard etc. The fix (attached) is merely to encode the content sent to the pager in the same encoding as the terminal (`sys.stdout.encoding`). Strictly speaking, this isn't always the right thing to do, since the pager may be configured to expect a different encoding than the terminal, but that is sort of an irrational way to configure a machine... ;) For example, `less`, in the absence of any special environment variables to tell it otherwise, uses the standard `LC*` environment variables to determine what to do, which should be the same mechanism the terminal also uses by default. If anyone can suggest a better fix, I'm all for it. Perhaps it should be configurable, defaulting to `sys.stdout.encoding`?

Bernardo B. Marques - - Load All Authors

File last commit:

r4872:34c10438


                r9853:7f9a133e

Download file

             completion_lexer.py
        
                    74 lines
            
             | 2.4 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / frontend / qt / console / completion_lexer.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      # System library imports

      from pygments.token import Token, is_token_subtype

      class CompletionLexer(object):

          """ Uses Pygments and some auxillary information to lex code snippets for

              symbol contexts.

          """

          # Maps Lexer names to a list of possible name separators

          separator_map = { 'C' : [ '.', '->' ],

                            'C++' : [ '.', '->', '::' ],

                            'Python' : [ '.' ] }

          def __init__(self, lexer):

              """ Create a CompletionLexer using the specified Pygments lexer.

              """

              self.lexer = lexer

          def get_context(self, string):

              """ Assuming the cursor is at the end of the specified string, get the

                  context (a list of names) for the symbol at cursor position.

              """

              context = []

              reversed_tokens = list(self._lexer.get_tokens(string))

              reversed_tokens.reverse()

              # Pygments often tacks on a newline when none is specified in the input.

              # Remove this newline.

              if reversed_tokens and reversed_tokens[0][1].endswith('\n') and \

                      not string.endswith('\n'):

                  reversed_tokens.pop(0)

              current_op = ''

              for token, text in reversed_tokens:

                  if is_token_subtype(token, Token.Name):

                      # Handle a trailing separator, e.g 'foo.bar.'

                      if current_op in self._name_separators:

                          if not context:

                              context.insert(0, '')

                      # Handle non-separator operators and punction.

                      elif current_op:

                          break

                      context.insert(0, text)

                      current_op = ''

                  # Pygments doesn't understand that, e.g., '->' is a single operator

                  # in C++. This is why we have to build up an operator from

                  # potentially several tokens.

                  elif token is Token.Operator or token is Token.Punctuation:

                      current_op = text + current_op

                  # Break on anything that is not a Operator, Punctuation, or Name.

                  else:

                      break

              return context

          def get_lexer(self, lexer):

              return self._lexer

          def set_lexer(self, lexer, name_separators=None):

              self._lexer = lexer

              if name_separators is None:

                  self._name_separators = self.separator_map.get(lexer.name, ['.'])

              else:

                  self._name_separators = list(name_separators)

          lexer = property(get_lexer, set_lexer)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				# System library imports
				from pygments.token import Token, is_token_subtype


				class CompletionLexer(object):
				""" Uses Pygments and some auxillary information to lex code snippets for
				symbol contexts.
				"""

				# Maps Lexer names to a list of possible name separators
				separator_map = { 'C' : [ '.', '->' ],
				'C++' : [ '.', '->', '::' ],
				'Python' : [ '.' ] }

				def __init__(self, lexer):
				""" Create a CompletionLexer using the specified Pygments lexer.
				"""
				self.lexer = lexer

				def get_context(self, string):
				""" Assuming the cursor is at the end of the specified string, get the
				context (a list of names) for the symbol at cursor position.
				"""
				context = []
				reversed_tokens = list(self._lexer.get_tokens(string))
				reversed_tokens.reverse()

				# Pygments often tacks on a newline when none is specified in the input.
				# Remove this newline.
				if reversed_tokens and reversed_tokens[0][1].endswith('\n') and \
				not string.endswith('\n'):
				reversed_tokens.pop(0)

				current_op = ''
				for token, text in reversed_tokens:

				if is_token_subtype(token, Token.Name):

				# Handle a trailing separator, e.g 'foo.bar.'
				if current_op in self._name_separators:
				if not context:
				context.insert(0, '')

				# Handle non-separator operators and punction.
				elif current_op:
				break

				context.insert(0, text)
				current_op = ''

				# Pygments doesn't understand that, e.g., '->' is a single operator
				# in C++. This is why we have to build up an operator from
				# potentially several tokens.
				elif token is Token.Operator or token is Token.Punctuation:
				current_op = text + current_op

				# Break on anything that is not a Operator, Punctuation, or Name.
				else:
				break

				return context

				def get_lexer(self, lexer):
				return self._lexer

				def set_lexer(self, lexer, name_separators=None):
				self._lexer = lexer
				if name_separators is None:
				self._name_separators = self.separator_map.get(lexer.name, ['.'])
				else:
				self._name_separators = list(name_separators)

				lexer = property(get_lexer, set_lexer)