upstream/ipython Files · IPython/quarantine/ipy_lookfor.py

Backport PR : Unicode content crashes the pager (console)...

Backport PR : Unicode content crashes the pager (console) We've run into an interesting bug in the astropy project. https://github.com/astropy/astropy/issues/600 When displaying a docstring that contains Unicode and is also long enough that it gets sent to the pager it fails since the docstring can't be sent to the pager as ascii. This crashes in the middle of sending content to the pager, so the shell ends up in an inconsistent state and stops echoing the keyboard etc. The fix (attached) is merely to encode the content sent to the pager in the same encoding as the terminal (`sys.stdout.encoding`). Strictly speaking, this isn't always the right thing to do, since the pager may be configured to expect a different encoding than the terminal, but that is sort of an irrational way to configure a machine... ;) For example, `less`, in the absence of any special environment variables to tell it otherwise, uses the standard `LC*` environment variables to determine what to do, which should be the same mechanism the terminal also uses by default. If anyone can suggest a better fix, I'm all for it. Perhaps it should be configurable, defaulting to `sys.stdout.encoding`?

Bernardo B. Marques - - Load All Authors

File last commit:

r4872:34c10438


                r9853:7f9a133e

Download file

             ipy_lookfor.py
        
                    234 lines
            
             | 6.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / quarantine / ipy_lookfor.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """

      IPython extension: %lookfor command for searching docstrings

      """

      # Pauli Virtanen <pav@iki.fi>, 2008.

      import re, inspect, pkgutil, pydoc

      #------------------------------------------------------------------------------

      # Lookfor functionality

      #------------------------------------------------------------------------------

      # Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}

      # where kind: "func", "class", "module", "object"

      # and index: index in breadth-first namespace traversal

      _lookfor_caches = {}

      # regexp whose match indicates that the string may contain a function signature

      _function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)

      def lookfor(what, modules=None, import_modules=True, regenerate=False):

          """

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Parameters

          ----------

          what : str

              String containing words to look for.

          module : str, module

              Module whose docstrings to go through.

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          """

          # Cache

          cache = {}

          for module in modules:

              try:

                  c = _lookfor_generate_cache(module, import_modules, regenerate)

                  cache.update(c)

              except ImportError:

                  pass

          # Search

          # XXX: maybe using a real stemming search engine would be better?

          found = []

          whats = str(what).lower().split()

          if not whats: return

          for name, (docstring, kind, index) in cache.iteritems():

              if kind in ('module', 'object'):

                  # don't show modules or objects

                  continue

              ok = True

              doc = docstring.lower()

              for w in whats:

                  if w not in doc:

                      ok = False

                      break

              if ok:

                  found.append(name)

          # Relevance sort

          # XXX: this is full Harrison-Stetson heuristics now,

          # XXX: it probably could be improved

          kind_relevance = {'func': 1000, 'class': 1000,

                            'module': -1000, 'object': -1000}

          def relevance(name, docstr, kind, index):

              r = 0

              # do the keywords occur within the start of the docstring?

              first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])

              r += sum([200 for w in whats if w in first_doc])

              # do the keywords occur in the function name?

              r += sum([30 for w in whats if w in name])

              # is the full name long?

              r += -len(name) * 5

              # is the object of bad type?

              r += kind_relevance.get(kind, -1000)

              # is the object deep in namespace hierarchy?

              r += -name.count('.') * 10

              r += max(-index / 100, -100)

              return r

          def relevance_sort(a, b):

              dr = relevance(b, *cache[b]) - relevance(a, *cache[a])

              if dr != 0: return dr

              else: return cmp(a, b)

          found.sort(relevance_sort)

          # Pretty-print

          s = "Search results for '%s'" % (' '.join(whats))

          help_text = [s, "-"*len(s)]

          for name in found:

              doc, kind, ix = cache[name]

              doclines = [line.strip() for line in doc.strip().split("\n")

                          if line.strip()]

              # find a suitable short description

              try:

                  first_doc = doclines[0].strip()

                  if _function_signature_re.search(first_doc):

                      first_doc = doclines[1].strip()

              except IndexError:

                  first_doc = ""

              help_text.append("%s\n    %s" % (name, first_doc))

          # Output

          if len(help_text) > 10:

              pager = pydoc.getpager()

              pager("\n".join(help_text))

          else:

              print "\n".join(help_text)

      def _lookfor_generate_cache(module, import_modules, regenerate):

          """

          Generate docstring cache for given module.

          Parameters

          ----------

          module : str, None, module

              Module for which to generate docstring cache

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          Returns

          -------

          cache : dict {obj_full_name: (docstring, kind, index), ...}

              Docstring cache for the module, either cached one (regenerate=False)

              or newly generated.

          """

          global _lookfor_caches

          if module is None:

              module = "numpy"

          if isinstance(module, str):

              module = __import__(module)

          if id(module) in _lookfor_caches and not regenerate:

              return _lookfor_caches[id(module)]

          # walk items and collect docstrings

          cache = {}

          _lookfor_caches[id(module)] = cache

          seen = {}

          index = 0

          stack = [(module.__name__, module)]

          while stack:

              name, item = stack.pop(0)

              if id(item) in seen: continue

              seen[id(item)] = True

              index += 1

              kind = "object"

              if inspect.ismodule(item):

                  kind = "module"

                  try:

                      _all = item.__all__

                  except AttributeError:

                      _all = None

                  # import sub-packages

                  if import_modules and hasattr(item, '__path__'):

                      for m in pkgutil.iter_modules(item.__path__):

                          if _all is not None and m[1] not in _all:

                              continue

                          try:

                              __import__("%s.%s" % (name, m[1]))

                          except ImportError:

                              continue

                  for n, v in inspect.getmembers(item):

                      if _all is not None and n not in _all:

                          continue

                      stack.append(("%s.%s" % (name, n), v))

              elif inspect.isclass(item):

                  kind = "class"

                  for n, v in inspect.getmembers(item):

                      stack.append(("%s.%s" % (name, n), v))

              elif callable(item):

                  kind = "func"

              doc = inspect.getdoc(item)

              if doc is not None:

                  cache[name] = (doc, kind, index)

          return cache

      #------------------------------------------------------------------------------

      # IPython connectivity

      #------------------------------------------------------------------------------

      from IPython.core import ipapi

      ip = ipapi.get()

      _lookfor_modules = ['numpy', 'scipy']

      def lookfor_f(self, arg=''):

          r"""

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Usage

          -----

          %lookfor +numpy  some words

          Search module 'numpy'

          %lookfor_modules numpy scipy

          Set default modules whose docstrings to search

          """

          lookfor(arg, modules=_lookfor_modules)

      def lookfor_modules_f(self, arg=''):

          global _lookfor_modules

          if not arg:

              print "Modules included in %lookfor search:", _lookfor_modules

          else:

              _lookfor_modules = arg.split()

      ip.define_magic('lookfor', lookfor_f)

      ip.define_magic('lookfor_modules', lookfor_modules_f)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""
				IPython extension: %lookfor command for searching docstrings

				"""
				# Pauli Virtanen <pav@iki.fi>, 2008.

				import re, inspect, pkgutil, pydoc

				#------------------------------------------------------------------------------
				# Lookfor functionality
				#------------------------------------------------------------------------------

				# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
				# where kind: "func", "class", "module", "object"
				# and index: index in breadth-first namespace traversal
				_lookfor_caches = {}

				# regexp whose match indicates that the string may contain a function signature
				_function_signature_re = re.compile(r"[a-z_]+\(.[,=].\)", re.I)

				def lookfor(what, modules=None, import_modules=True, regenerate=False):
				"""
				Search for objects whose documentation contains all given words.
				Shows a summary of matching objects, sorted roughly by relevance.

				Parameters
				----------
				what : str
				String containing words to look for.

				module : str, module
				Module whose docstrings to go through.
				import_modules : bool
				Whether to import sub-modules in packages.
				Will import only modules in __all__
				regenerate: bool
				Re-generate the docstring cache

				"""
				# Cache
				cache = {}
				for module in modules:
				try:
				c = _lookfor_generate_cache(module, import_modules, regenerate)
				cache.update(c)
				except ImportError:
				pass

				# Search
				# XXX: maybe using a real stemming search engine would be better?
				found = []
				whats = str(what).lower().split()
				if not whats: return

				for name, (docstring, kind, index) in cache.iteritems():
				if kind in ('module', 'object'):
				# don't show modules or objects
				continue
				ok = True
				doc = docstring.lower()
				for w in whats:
				if w not in doc:
				ok = False
				break
				if ok:
				found.append(name)

				# Relevance sort
				# XXX: this is full Harrison-Stetson heuristics now,
				# XXX: it probably could be improved

				kind_relevance = {'func': 1000, 'class': 1000,
				'module': -1000, 'object': -1000}

				def relevance(name, docstr, kind, index):
				r = 0
				# do the keywords occur within the start of the docstring?
				first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
				r += sum([200 for w in whats if w in first_doc])
				# do the keywords occur in the function name?
				r += sum([30 for w in whats if w in name])
				# is the full name long?
				r += -len(name) * 5
				# is the object of bad type?
				r += kind_relevance.get(kind, -1000)
				# is the object deep in namespace hierarchy?
				r += -name.count('.') * 10
				r += max(-index / 100, -100)
				return r

				def relevance_sort(a, b):
				dr = relevance(b, cache[b]) - relevance(a, cache[a])
				if dr != 0: return dr
				else: return cmp(a, b)
				found.sort(relevance_sort)

				# Pretty-print
				s = "Search results for '%s'" % (' '.join(whats))
				help_text = [s, "-"*len(s)]
				for name in found:
				doc, kind, ix = cache[name]

				doclines = [line.strip() for line in doc.strip().split("\n")
				if line.strip()]

				# find a suitable short description
				try:
				first_doc = doclines[0].strip()
				if _function_signature_re.search(first_doc):
				first_doc = doclines[1].strip()
				except IndexError:
				first_doc = ""
				help_text.append("%s\n %s" % (name, first_doc))

				# Output
				if len(help_text) > 10:
				pager = pydoc.getpager()
				pager("\n".join(help_text))
				else:
				print "\n".join(help_text)

				def _lookfor_generate_cache(module, import_modules, regenerate):
				"""
				Generate docstring cache for given module.

				Parameters
				----------
				module : str, None, module
				Module for which to generate docstring cache
				import_modules : bool
				Whether to import sub-modules in packages.
				Will import only modules in __all__
				regenerate: bool
				Re-generate the docstring cache

				Returns
				-------
				cache : dict {obj_full_name: (docstring, kind, index), ...}
				Docstring cache for the module, either cached one (regenerate=False)
				or newly generated.

				"""
				global _lookfor_caches

				if module is None:
				module = "numpy"

				if isinstance(module, str):
				module = __import__(module)

				if id(module) in _lookfor_caches and not regenerate:
				return _lookfor_caches[id(module)]

				# walk items and collect docstrings
				cache = {}
				_lookfor_caches[id(module)] = cache
				seen = {}
				index = 0
				stack = [(module.__name__, module)]
				while stack:
				name, item = stack.pop(0)
				if id(item) in seen: continue
				seen[id(item)] = True

				index += 1
				kind = "object"

				if inspect.ismodule(item):
				kind = "module"
				try:
				_all = item.__all__
				except AttributeError:
				_all = None
				# import sub-packages
				if import_modules and hasattr(item, '__path__'):
				for m in pkgutil.iter_modules(item.__path__):
				if _all is not None and m[1] not in _all:
				continue
				try:
				__import__("%s.%s" % (name, m[1]))
				except ImportError:
				continue
				for n, v in inspect.getmembers(item):
				if _all is not None and n not in _all:
				continue
				stack.append(("%s.%s" % (name, n), v))
				elif inspect.isclass(item):
				kind = "class"
				for n, v in inspect.getmembers(item):
				stack.append(("%s.%s" % (name, n), v))
				elif callable(item):
				kind = "func"

				doc = inspect.getdoc(item)
				if doc is not None:
				cache[name] = (doc, kind, index)

				return cache

				#------------------------------------------------------------------------------
				# IPython connectivity
				#------------------------------------------------------------------------------

				from IPython.core import ipapi
				ip = ipapi.get()

				_lookfor_modules = ['numpy', 'scipy']

				def lookfor_f(self, arg=''):
				r"""
				Search for objects whose documentation contains all given words.
				Shows a summary of matching objects, sorted roughly by relevance.

				Usage
				-----
				%lookfor +numpy some words
				Search module 'numpy'

				%lookfor_modules numpy scipy
				Set default modules whose docstrings to search

				"""
				lookfor(arg, modules=_lookfor_modules)

				def lookfor_modules_f(self, arg=''):
				global _lookfor_modules
				if not arg:
				print "Modules included in %lookfor search:", _lookfor_modules
				else:
				_lookfor_modules = arg.split()

				ip.define_magic('lookfor', lookfor_f)
				ip.define_magic('lookfor_modules', lookfor_modules_f)