upstream/ipython Files · IPython/extensions/ipy_lookfor.py

A few more small traitlets features.

Brian Granger - - Load All Authors

File last commit:

r2063:9650bd9e


                r2179:aa3d0030

Download file

             ipy_lookfor.py
        
                    234 lines
            
             | 7.0 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / extensions / ipy_lookfor.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Ville M. Vainio
    
add ipy_lookfor. closes #245

              r1164
            
      """

      IPython extension: %lookfor command for searching docstrings

      """

      # Pauli Virtanen <pav@iki.fi>, 2008.

      import re, inspect, pkgutil, pydoc

      #------------------------------------------------------------------------------

      # Lookfor functionality

      #------------------------------------------------------------------------------

      # Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}

      # where kind: "func", "class", "module", "object"

      # and index: index in breadth-first namespace traversal

      _lookfor_caches = {}

      # regexp whose match indicates that the string may contain a function signature

      _function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)

      def lookfor(what, modules=None, import_modules=True, regenerate=False):

          """

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Parameters

          ----------

          what : str

              String containing words to look for.

          module : str, module

              Module whose docstrings to go through.

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          """

          # Cache

          cache = {}

          for module in modules:

              try:

                  c = _lookfor_generate_cache(module, import_modules, regenerate)

                  cache.update(c)

              except ImportError:

                  pass

          # Search

          # XXX: maybe using a real stemming search engine would be better?

          found = []

          whats = str(what).lower().split()

          if not whats: return

          for name, (docstring, kind, index) in cache.iteritems():

              if kind in ('module', 'object'): 

                  # don't show modules or objects

                  continue

              ok = True

              doc = docstring.lower()

              for w in whats:

                  if w not in doc:

                      ok = False

                      break

              if ok:

                  found.append(name)

          # Relevance sort

          # XXX: this is full Harrison-Stetson heuristics now,

          # XXX: it probably could be improved

          kind_relevance = {'func': 1000, 'class': 1000, 

                            'module': -1000, 'object': -1000}

          def relevance(name, docstr, kind, index):

              r = 0

              # do the keywords occur within the start of the docstring?

              first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])

              r += sum([200 for w in whats if w in first_doc])

              # do the keywords occur in the function name?

              r += sum([30 for w in whats if w in name])

              # is the full name long?

              r += -len(name) * 5

              # is the object of bad type?

              r += kind_relevance.get(kind, -1000)

              # is the object deep in namespace hierarchy?

              r += -name.count('.') * 10

              r += max(-index / 100, -100)

              return r

          def relevance_sort(a, b):

              dr = relevance(b, *cache[b]) - relevance(a, *cache[a])

              if dr != 0: return dr

              else: return cmp(a, b)

          found.sort(relevance_sort)

          # Pretty-print

          s = "Search results for '%s'" % (' '.join(whats))

          help_text = [s, "-"*len(s)]

          for name in found:

              doc, kind, ix = cache[name]

              doclines = [line.strip() for line in doc.strip().split("\n")

                          if line.strip()]

              # find a suitable short description

              try:

                  first_doc = doclines[0].strip()

                  if _function_signature_re.search(first_doc):

                      first_doc = doclines[1].strip()

              except IndexError:

                  first_doc = ""

              help_text.append("%s\n    %s" % (name, first_doc))

          # Output

          if len(help_text) > 10:

              pager = pydoc.getpager()

              pager("\n".join(help_text))

          else:

              print "\n".join(help_text)

      def _lookfor_generate_cache(module, import_modules, regenerate):

          """

          Generate docstring cache for given module.

          Parameters

          ----------

          module : str, None, module

              Module for which to generate docstring cache

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          Returns

          -------

          cache : dict {obj_full_name: (docstring, kind, index), ...}

              Docstring cache for the module, either cached one (regenerate=False)

              or newly generated.

          """

          global _lookfor_caches

          if module is None:

              module = "numpy"

          if isinstance(module, str):

              module = __import__(module)

          if id(module) in _lookfor_caches and not regenerate:

              return _lookfor_caches[id(module)]

          # walk items and collect docstrings

          cache = {}

          _lookfor_caches[id(module)] = cache

          seen = {}

          index = 0

          stack = [(module.__name__, module)]

          while stack:

              name, item = stack.pop(0)

              if id(item) in seen: continue

              seen[id(item)] = True

              index += 1

              kind = "object"

              if inspect.ismodule(item):

                  kind = "module"

                  try:

                      _all = item.__all__

                  except AttributeError:

                      _all = None

                  # import sub-packages

                  if import_modules and hasattr(item, '__path__'):

                      for m in pkgutil.iter_modules(item.__path__):

                          if _all is not None and m[1] not in _all:

                              continue

                          try:

                              __import__("%s.%s" % (name, m[1]))

                          except ImportError:

                              continue

                  for n, v in inspect.getmembers(item):

                      if _all is not None and n not in _all:

                          continue

                      stack.append(("%s.%s" % (name, n), v))

              elif inspect.isclass(item):

                  kind = "class"

                  for n, v in inspect.getmembers(item):

                      stack.append(("%s.%s" % (name, n), v))

              elif callable(item):

                  kind = "func"

              doc = inspect.getdoc(item)

              if doc is not None:

                  cache[name] = (doc, kind, index)

          return cache

      #------------------------------------------------------------------------------

      # IPython connectivity

      #------------------------------------------------------------------------------

        Brian Granger
    
ipapi.py => core/ipapi.py and imports updated.

              r2027
            
      from IPython.core import ipapi

      ip = ipapi.get()

        Ville M. Vainio
    
add ipy_lookfor. closes #245

              r1164
            
      _lookfor_modules = ['numpy', 'scipy']

      def lookfor_f(self, arg=''):

          r"""

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Usage

          -----

          %lookfor +numpy  some words

          Search module 'numpy'

          %lookfor_modules numpy scipy

          Set default modules whose docstrings to search

          """

          lookfor(arg, modules=_lookfor_modules)

      def lookfor_modules_f(self, arg=''):

          global _lookfor_modules

          if not arg:

              print "Modules included in %lookfor search:", _lookfor_modules

          else:

              _lookfor_modules = arg.split()

      ip.expose_magic('lookfor', lookfor_f)

      ip.expose_magic('lookfor_modules', lookfor_modules_f)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Ville M. Vainio add ipy_lookfor. closes #245	r1164	"""
		IPython extension: %lookfor command for searching docstrings

		"""
		# Pauli Virtanen <pav@iki.fi>, 2008.

		import re, inspect, pkgutil, pydoc

		#------------------------------------------------------------------------------
		# Lookfor functionality
		#------------------------------------------------------------------------------

		# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
		# where kind: "func", "class", "module", "object"
		# and index: index in breadth-first namespace traversal
		_lookfor_caches = {}

		# regexp whose match indicates that the string may contain a function signature
		_function_signature_re = re.compile(r"[a-z_]+\(.[,=].\)", re.I)

		def lookfor(what, modules=None, import_modules=True, regenerate=False):
		"""
		Search for objects whose documentation contains all given words.
		Shows a summary of matching objects, sorted roughly by relevance.

		Parameters
		----------
		what : str
		String containing words to look for.

		module : str, module
		Module whose docstrings to go through.
		import_modules : bool
		Whether to import sub-modules in packages.
		Will import only modules in __all__
		regenerate: bool
		Re-generate the docstring cache

		"""
		# Cache
		cache = {}
		for module in modules:
		try:
		c = _lookfor_generate_cache(module, import_modules, regenerate)
		cache.update(c)
		except ImportError:
		pass

		# Search
		# XXX: maybe using a real stemming search engine would be better?
		found = []
		whats = str(what).lower().split()
		if not whats: return

		for name, (docstring, kind, index) in cache.iteritems():
		if kind in ('module', 'object'):
		# don't show modules or objects
		continue
		ok = True
		doc = docstring.lower()
		for w in whats:
		if w not in doc:
		ok = False
		break
		if ok:
		found.append(name)

		# Relevance sort
		# XXX: this is full Harrison-Stetson heuristics now,
		# XXX: it probably could be improved

		kind_relevance = {'func': 1000, 'class': 1000,
		'module': -1000, 'object': -1000}

		def relevance(name, docstr, kind, index):
		r = 0
		# do the keywords occur within the start of the docstring?
		first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
		r += sum([200 for w in whats if w in first_doc])
		# do the keywords occur in the function name?
		r += sum([30 for w in whats if w in name])
		# is the full name long?
		r += -len(name) * 5
		# is the object of bad type?
		r += kind_relevance.get(kind, -1000)
		# is the object deep in namespace hierarchy?
		r += -name.count('.') * 10
		r += max(-index / 100, -100)
		return r

		def relevance_sort(a, b):
		dr = relevance(b, cache[b]) - relevance(a, cache[a])
		if dr != 0: return dr
		else: return cmp(a, b)
		found.sort(relevance_sort)

		# Pretty-print
		s = "Search results for '%s'" % (' '.join(whats))
		help_text = [s, "-"*len(s)]
		for name in found:
		doc, kind, ix = cache[name]

		doclines = [line.strip() for line in doc.strip().split("\n")
		if line.strip()]

		# find a suitable short description
		try:
		first_doc = doclines[0].strip()
		if _function_signature_re.search(first_doc):
		first_doc = doclines[1].strip()
		except IndexError:
		first_doc = ""
		help_text.append("%s\n %s" % (name, first_doc))

		# Output
		if len(help_text) > 10:
		pager = pydoc.getpager()
		pager("\n".join(help_text))
		else:
		print "\n".join(help_text)

		def _lookfor_generate_cache(module, import_modules, regenerate):
		"""
		Generate docstring cache for given module.

		Parameters
		----------
		module : str, None, module
		Module for which to generate docstring cache
		import_modules : bool
		Whether to import sub-modules in packages.
		Will import only modules in __all__
		regenerate: bool
		Re-generate the docstring cache

		Returns
		-------
		cache : dict {obj_full_name: (docstring, kind, index), ...}
		Docstring cache for the module, either cached one (regenerate=False)
		or newly generated.

		"""
		global _lookfor_caches

		if module is None:
		module = "numpy"

		if isinstance(module, str):
		module = __import__(module)

		if id(module) in _lookfor_caches and not regenerate:
		return _lookfor_caches[id(module)]

		# walk items and collect docstrings
		cache = {}
		_lookfor_caches[id(module)] = cache
		seen = {}
		index = 0
		stack = [(module.__name__, module)]
		while stack:
		name, item = stack.pop(0)
		if id(item) in seen: continue
		seen[id(item)] = True

		index += 1
		kind = "object"

		if inspect.ismodule(item):
		kind = "module"
		try:
		_all = item.__all__
		except AttributeError:
		_all = None
		# import sub-packages
		if import_modules and hasattr(item, '__path__'):
		for m in pkgutil.iter_modules(item.__path__):
		if _all is not None and m[1] not in _all:
		continue
		try:
		__import__("%s.%s" % (name, m[1]))
		except ImportError:
		continue
		for n, v in inspect.getmembers(item):
		if _all is not None and n not in _all:
		continue
		stack.append(("%s.%s" % (name, n), v))
		elif inspect.isclass(item):
		kind = "class"
		for n, v in inspect.getmembers(item):
		stack.append(("%s.%s" % (name, n), v))
		elif callable(item):
		kind = "func"

		doc = inspect.getdoc(item)
		if doc is not None:
		cache[name] = (doc, kind, index)

		return cache

		#------------------------------------------------------------------------------
		# IPython connectivity
		#------------------------------------------------------------------------------

Brian Granger ipapi.py => core/ipapi.py and imports updated.	r2027	from IPython.core import ipapi
		ip = ipapi.get()
Ville M. Vainio add ipy_lookfor. closes #245	r1164
		_lookfor_modules = ['numpy', 'scipy']

		def lookfor_f(self, arg=''):
		r"""
		Search for objects whose documentation contains all given words.
		Shows a summary of matching objects, sorted roughly by relevance.

		Usage
		-----
		%lookfor +numpy some words
		Search module 'numpy'

		%lookfor_modules numpy scipy
		Set default modules whose docstrings to search

		"""
		lookfor(arg, modules=_lookfor_modules)

		def lookfor_modules_f(self, arg=''):
		global _lookfor_modules
		if not arg:
		print "Modules included in %lookfor search:", _lookfor_modules
		else:
		_lookfor_modules = arg.split()

		ip.expose_magic('lookfor', lookfor_f)
		ip.expose_magic('lookfor_modules', lookfor_modules_f)