##// END OF EJS Templates
Backport PR #2738: Unicode content crashes the pager (console)...
Backport PR #2738: Unicode content crashes the pager (console) We've run into an interesting bug in the astropy project. https://github.com/astropy/astropy/issues/600 When displaying a docstring that contains Unicode and is also long enough that it gets sent to the pager it fails since the docstring can't be sent to the pager as ascii. This crashes in the middle of sending content to the pager, so the shell ends up in an inconsistent state and stops echoing the keyboard etc. The fix (attached) is merely to encode the content sent to the pager in the same encoding as the terminal (`sys.stdout.encoding`). Strictly speaking, this isn't always the right thing to do, since the pager may be configured to expect a different encoding than the terminal, but that is sort of an irrational way to configure a machine... ;) For example, `less`, in the absence of any special environment variables to tell it otherwise, uses the standard `LC*` environment variables to determine what to do, which should be the same mechanism the terminal also uses by default. If anyone can suggest a better fix, I'm all for it. Perhaps it should be configurable, defaulting to `sys.stdout.encoding`?

File last commit:

r4872:34c10438
r9853:7f9a133e
Show More
ipy_lookfor.py
234 lines | 6.9 KiB | text/x-python | PythonLexer
"""
IPython extension: %lookfor command for searching docstrings
"""
# Pauli Virtanen <pav@iki.fi>, 2008.
import re, inspect, pkgutil, pydoc
#------------------------------------------------------------------------------
# Lookfor functionality
#------------------------------------------------------------------------------
# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
# where kind: "func", "class", "module", "object"
# and index: index in breadth-first namespace traversal
_lookfor_caches = {}
# regexp whose match indicates that the string may contain a function signature
_function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)
def lookfor(what, modules=None, import_modules=True, regenerate=False):
"""
Search for objects whose documentation contains all given words.
Shows a summary of matching objects, sorted roughly by relevance.
Parameters
----------
what : str
String containing words to look for.
module : str, module
Module whose docstrings to go through.
import_modules : bool
Whether to import sub-modules in packages.
Will import only modules in __all__
regenerate: bool
Re-generate the docstring cache
"""
# Cache
cache = {}
for module in modules:
try:
c = _lookfor_generate_cache(module, import_modules, regenerate)
cache.update(c)
except ImportError:
pass
# Search
# XXX: maybe using a real stemming search engine would be better?
found = []
whats = str(what).lower().split()
if not whats: return
for name, (docstring, kind, index) in cache.iteritems():
if kind in ('module', 'object'):
# don't show modules or objects
continue
ok = True
doc = docstring.lower()
for w in whats:
if w not in doc:
ok = False
break
if ok:
found.append(name)
# Relevance sort
# XXX: this is full Harrison-Stetson heuristics now,
# XXX: it probably could be improved
kind_relevance = {'func': 1000, 'class': 1000,
'module': -1000, 'object': -1000}
def relevance(name, docstr, kind, index):
r = 0
# do the keywords occur within the start of the docstring?
first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
r += sum([200 for w in whats if w in first_doc])
# do the keywords occur in the function name?
r += sum([30 for w in whats if w in name])
# is the full name long?
r += -len(name) * 5
# is the object of bad type?
r += kind_relevance.get(kind, -1000)
# is the object deep in namespace hierarchy?
r += -name.count('.') * 10
r += max(-index / 100, -100)
return r
def relevance_sort(a, b):
dr = relevance(b, *cache[b]) - relevance(a, *cache[a])
if dr != 0: return dr
else: return cmp(a, b)
found.sort(relevance_sort)
# Pretty-print
s = "Search results for '%s'" % (' '.join(whats))
help_text = [s, "-"*len(s)]
for name in found:
doc, kind, ix = cache[name]
doclines = [line.strip() for line in doc.strip().split("\n")
if line.strip()]
# find a suitable short description
try:
first_doc = doclines[0].strip()
if _function_signature_re.search(first_doc):
first_doc = doclines[1].strip()
except IndexError:
first_doc = ""
help_text.append("%s\n %s" % (name, first_doc))
# Output
if len(help_text) > 10:
pager = pydoc.getpager()
pager("\n".join(help_text))
else:
print "\n".join(help_text)
def _lookfor_generate_cache(module, import_modules, regenerate):
"""
Generate docstring cache for given module.
Parameters
----------
module : str, None, module
Module for which to generate docstring cache
import_modules : bool
Whether to import sub-modules in packages.
Will import only modules in __all__
regenerate: bool
Re-generate the docstring cache
Returns
-------
cache : dict {obj_full_name: (docstring, kind, index), ...}
Docstring cache for the module, either cached one (regenerate=False)
or newly generated.
"""
global _lookfor_caches
if module is None:
module = "numpy"
if isinstance(module, str):
module = __import__(module)
if id(module) in _lookfor_caches and not regenerate:
return _lookfor_caches[id(module)]
# walk items and collect docstrings
cache = {}
_lookfor_caches[id(module)] = cache
seen = {}
index = 0
stack = [(module.__name__, module)]
while stack:
name, item = stack.pop(0)
if id(item) in seen: continue
seen[id(item)] = True
index += 1
kind = "object"
if inspect.ismodule(item):
kind = "module"
try:
_all = item.__all__
except AttributeError:
_all = None
# import sub-packages
if import_modules and hasattr(item, '__path__'):
for m in pkgutil.iter_modules(item.__path__):
if _all is not None and m[1] not in _all:
continue
try:
__import__("%s.%s" % (name, m[1]))
except ImportError:
continue
for n, v in inspect.getmembers(item):
if _all is not None and n not in _all:
continue
stack.append(("%s.%s" % (name, n), v))
elif inspect.isclass(item):
kind = "class"
for n, v in inspect.getmembers(item):
stack.append(("%s.%s" % (name, n), v))
elif callable(item):
kind = "func"
doc = inspect.getdoc(item)
if doc is not None:
cache[name] = (doc, kind, index)
return cache
#------------------------------------------------------------------------------
# IPython connectivity
#------------------------------------------------------------------------------
from IPython.core import ipapi
ip = ipapi.get()
_lookfor_modules = ['numpy', 'scipy']
def lookfor_f(self, arg=''):
r"""
Search for objects whose documentation contains all given words.
Shows a summary of matching objects, sorted roughly by relevance.
Usage
-----
%lookfor +numpy some words
Search module 'numpy'
%lookfor_modules numpy scipy
Set default modules whose docstrings to search
"""
lookfor(arg, modules=_lookfor_modules)
def lookfor_modules_f(self, arg=''):
global _lookfor_modules
if not arg:
print "Modules included in %lookfor search:", _lookfor_modules
else:
_lookfor_modules = arg.split()
ip.define_magic('lookfor', lookfor_f)
ip.define_magic('lookfor_modules', lookfor_modules_f)