upstream/ipython Files · IPython/Extensions/ipy_lookfor.py

Disable typecheck for namespaces to be dicts....

Disable typecheck for namespaces to be dicts. This will allow more sophisticated objects to be used as namespaces, which can provide custom behavior.

Ville M. Vainio - - Load All Authors

File last commit:

r1164:3fe8916b


                r1421:9cb86e51

Download file

             ipy_lookfor.py
        
                    234 lines
            
             | 6.9 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / Extensions / ipy_lookfor.py
          
                    History
                
                 |
                  Source
                 | Raw
                 |Copy content
                 |Copy permalink

        Ville M. Vainio
    
add ipy_lookfor. closes #245

              r1164
            
      """

      IPython extension: %lookfor command for searching docstrings

      """

      # Pauli Virtanen <pav@iki.fi>, 2008.

      import re, inspect, pkgutil, pydoc

      #------------------------------------------------------------------------------

      # Lookfor functionality

      #------------------------------------------------------------------------------

      # Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}

      # where kind: "func", "class", "module", "object"

      # and index: index in breadth-first namespace traversal

      _lookfor_caches = {}

      # regexp whose match indicates that the string may contain a function signature

      _function_signature_re = re.compile(r"[a-z_]+\(.*[,=].*\)", re.I)

      def lookfor(what, modules=None, import_modules=True, regenerate=False):

          """

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Parameters

          ----------

          what : str

              String containing words to look for.

          module : str, module

              Module whose docstrings to go through.

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          """

          # Cache

          cache = {}

          for module in modules:

              try:

                  c = _lookfor_generate_cache(module, import_modules, regenerate)

                  cache.update(c)

              except ImportError:

                  pass

          # Search

          # XXX: maybe using a real stemming search engine would be better?

          found = []

          whats = str(what).lower().split()

          if not whats: return

          for name, (docstring, kind, index) in cache.iteritems():

              if kind in ('module', 'object'): 

                  # don't show modules or objects

                  continue

              ok = True

              doc = docstring.lower()

              for w in whats:

                  if w not in doc:

                      ok = False

                      break

              if ok:

                  found.append(name)

          # Relevance sort

          # XXX: this is full Harrison-Stetson heuristics now,

          # XXX: it probably could be improved

          kind_relevance = {'func': 1000, 'class': 1000, 

                            'module': -1000, 'object': -1000}

          def relevance(name, docstr, kind, index):

              r = 0

              # do the keywords occur within the start of the docstring?

              first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])

              r += sum([200 for w in whats if w in first_doc])

              # do the keywords occur in the function name?

              r += sum([30 for w in whats if w in name])

              # is the full name long?

              r += -len(name) * 5

              # is the object of bad type?

              r += kind_relevance.get(kind, -1000)

              # is the object deep in namespace hierarchy?

              r += -name.count('.') * 10

              r += max(-index / 100, -100)

              return r

          def relevance_sort(a, b):

              dr = relevance(b, *cache[b]) - relevance(a, *cache[a])

              if dr != 0: return dr

              else: return cmp(a, b)

          found.sort(relevance_sort)

          # Pretty-print

          s = "Search results for '%s'" % (' '.join(whats))

          help_text = [s, "-"*len(s)]

          for name in found:

              doc, kind, ix = cache[name]

              doclines = [line.strip() for line in doc.strip().split("\n")

                          if line.strip()]

              # find a suitable short description

              try:

                  first_doc = doclines[0].strip()

                  if _function_signature_re.search(first_doc):

                      first_doc = doclines[1].strip()

              except IndexError:

                  first_doc = ""

              help_text.append("%s\n    %s" % (name, first_doc))

          # Output

          if len(help_text) > 10:

              pager = pydoc.getpager()

              pager("\n".join(help_text))

          else:

              print "\n".join(help_text)

      def _lookfor_generate_cache(module, import_modules, regenerate):

          """

          Generate docstring cache for given module.

          Parameters

          ----------

          module : str, None, module

              Module for which to generate docstring cache

          import_modules : bool

              Whether to import sub-modules in packages.

              Will import only modules in __all__

          regenerate: bool

              Re-generate the docstring cache

          Returns

          -------

          cache : dict {obj_full_name: (docstring, kind, index), ...}

              Docstring cache for the module, either cached one (regenerate=False)

              or newly generated.

          """

          global _lookfor_caches

          if module is None:

              module = "numpy"

          if isinstance(module, str):

              module = __import__(module)

          if id(module) in _lookfor_caches and not regenerate:

              return _lookfor_caches[id(module)]

          # walk items and collect docstrings

          cache = {}

          _lookfor_caches[id(module)] = cache

          seen = {}

          index = 0

          stack = [(module.__name__, module)]

          while stack:

              name, item = stack.pop(0)

              if id(item) in seen: continue

              seen[id(item)] = True

              index += 1

              kind = "object"

              if inspect.ismodule(item):

                  kind = "module"

                  try:

                      _all = item.__all__

                  except AttributeError:

                      _all = None

                  # import sub-packages

                  if import_modules and hasattr(item, '__path__'):

                      for m in pkgutil.iter_modules(item.__path__):

                          if _all is not None and m[1] not in _all:

                              continue

                          try:

                              __import__("%s.%s" % (name, m[1]))

                          except ImportError:

                              continue

                  for n, v in inspect.getmembers(item):

                      if _all is not None and n not in _all:

                          continue

                      stack.append(("%s.%s" % (name, n), v))

              elif inspect.isclass(item):

                  kind = "class"

                  for n, v in inspect.getmembers(item):

                      stack.append(("%s.%s" % (name, n), v))

              elif callable(item):

                  kind = "func"

              doc = inspect.getdoc(item)

              if doc is not None:

                  cache[name] = (doc, kind, index)

          return cache

      #------------------------------------------------------------------------------

      # IPython connectivity

      #------------------------------------------------------------------------------

      import IPython.ipapi

      ip = IPython.ipapi.get()

      _lookfor_modules = ['numpy', 'scipy']

      def lookfor_f(self, arg=''):

          r"""

          Search for objects whose documentation contains all given words.

          Shows a summary of matching objects, sorted roughly by relevance.

          Usage

          -----

          %lookfor +numpy  some words

          Search module 'numpy'

          %lookfor_modules numpy scipy

          Set default modules whose docstrings to search

          """

          lookfor(arg, modules=_lookfor_modules)

      def lookfor_modules_f(self, arg=''):

          global _lookfor_modules

          if not arg:

              print "Modules included in %lookfor search:", _lookfor_modules

          else:

              _lookfor_modules = arg.split()

      ip.expose_magic('lookfor', lookfor_f)

      ip.expose_magic('lookfor_modules', lookfor_modules_f)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

Ville M. Vainio add ipy_lookfor. closes #245	r1164	"""
		IPython extension: %lookfor command for searching docstrings

		"""
		# Pauli Virtanen <pav@iki.fi>, 2008.

		import re, inspect, pkgutil, pydoc

		#------------------------------------------------------------------------------
		# Lookfor functionality
		#------------------------------------------------------------------------------

		# Cache for lookfor: {id(module): {name: (docstring, kind, index), ...}...}
		# where kind: "func", "class", "module", "object"
		# and index: index in breadth-first namespace traversal
		_lookfor_caches = {}

		# regexp whose match indicates that the string may contain a function signature
		_function_signature_re = re.compile(r"[a-z_]+\(.[,=].\)", re.I)

		def lookfor(what, modules=None, import_modules=True, regenerate=False):
		"""
		Search for objects whose documentation contains all given words.
		Shows a summary of matching objects, sorted roughly by relevance.

		Parameters
		----------
		what : str
		String containing words to look for.

		module : str, module
		Module whose docstrings to go through.
		import_modules : bool
		Whether to import sub-modules in packages.
		Will import only modules in __all__
		regenerate: bool
		Re-generate the docstring cache

		"""
		# Cache
		cache = {}
		for module in modules:
		try:
		c = _lookfor_generate_cache(module, import_modules, regenerate)
		cache.update(c)
		except ImportError:
		pass

		# Search
		# XXX: maybe using a real stemming search engine would be better?
		found = []
		whats = str(what).lower().split()
		if not whats: return

		for name, (docstring, kind, index) in cache.iteritems():
		if kind in ('module', 'object'):
		# don't show modules or objects
		continue
		ok = True
		doc = docstring.lower()
		for w in whats:
		if w not in doc:
		ok = False
		break
		if ok:
		found.append(name)

		# Relevance sort
		# XXX: this is full Harrison-Stetson heuristics now,
		# XXX: it probably could be improved

		kind_relevance = {'func': 1000, 'class': 1000,
		'module': -1000, 'object': -1000}

		def relevance(name, docstr, kind, index):
		r = 0
		# do the keywords occur within the start of the docstring?
		first_doc = "\n".join(docstr.lower().strip().split("\n")[:3])
		r += sum([200 for w in whats if w in first_doc])
		# do the keywords occur in the function name?
		r += sum([30 for w in whats if w in name])
		# is the full name long?
		r += -len(name) * 5
		# is the object of bad type?
		r += kind_relevance.get(kind, -1000)
		# is the object deep in namespace hierarchy?
		r += -name.count('.') * 10
		r += max(-index / 100, -100)
		return r

		def relevance_sort(a, b):
		dr = relevance(b, cache[b]) - relevance(a, cache[a])
		if dr != 0: return dr
		else: return cmp(a, b)
		found.sort(relevance_sort)

		# Pretty-print
		s = "Search results for '%s'" % (' '.join(whats))
		help_text = [s, "-"*len(s)]
		for name in found:
		doc, kind, ix = cache[name]

		doclines = [line.strip() for line in doc.strip().split("\n")
		if line.strip()]

		# find a suitable short description
		try:
		first_doc = doclines[0].strip()
		if _function_signature_re.search(first_doc):
		first_doc = doclines[1].strip()
		except IndexError:
		first_doc = ""
		help_text.append("%s\n %s" % (name, first_doc))

		# Output
		if len(help_text) > 10:
		pager = pydoc.getpager()
		pager("\n".join(help_text))
		else:
		print "\n".join(help_text)

		def _lookfor_generate_cache(module, import_modules, regenerate):
		"""
		Generate docstring cache for given module.

		Parameters
		----------
		module : str, None, module
		Module for which to generate docstring cache
		import_modules : bool
		Whether to import sub-modules in packages.
		Will import only modules in __all__
		regenerate: bool
		Re-generate the docstring cache

		Returns
		-------
		cache : dict {obj_full_name: (docstring, kind, index), ...}
		Docstring cache for the module, either cached one (regenerate=False)
		or newly generated.

		"""
		global _lookfor_caches

		if module is None:
		module = "numpy"

		if isinstance(module, str):
		module = __import__(module)

		if id(module) in _lookfor_caches and not regenerate:
		return _lookfor_caches[id(module)]

		# walk items and collect docstrings
		cache = {}
		_lookfor_caches[id(module)] = cache
		seen = {}
		index = 0
		stack = [(module.__name__, module)]
		while stack:
		name, item = stack.pop(0)
		if id(item) in seen: continue
		seen[id(item)] = True

		index += 1
		kind = "object"

		if inspect.ismodule(item):
		kind = "module"
		try:
		_all = item.__all__
		except AttributeError:
		_all = None
		# import sub-packages
		if import_modules and hasattr(item, '__path__'):
		for m in pkgutil.iter_modules(item.__path__):
		if _all is not None and m[1] not in _all:
		continue
		try:
		__import__("%s.%s" % (name, m[1]))
		except ImportError:
		continue
		for n, v in inspect.getmembers(item):
		if _all is not None and n not in _all:
		continue
		stack.append(("%s.%s" % (name, n), v))
		elif inspect.isclass(item):
		kind = "class"
		for n, v in inspect.getmembers(item):
		stack.append(("%s.%s" % (name, n), v))
		elif callable(item):
		kind = "func"

		doc = inspect.getdoc(item)
		if doc is not None:
		cache[name] = (doc, kind, index)

		return cache

		#------------------------------------------------------------------------------
		# IPython connectivity
		#------------------------------------------------------------------------------

		import IPython.ipapi
		ip = IPython.ipapi.get()

		_lookfor_modules = ['numpy', 'scipy']

		def lookfor_f(self, arg=''):
		r"""
		Search for objects whose documentation contains all given words.
		Shows a summary of matching objects, sorted roughly by relevance.

		Usage
		-----
		%lookfor +numpy some words
		Search module 'numpy'

		%lookfor_modules numpy scipy
		Set default modules whose docstrings to search

		"""
		lookfor(arg, modules=_lookfor_modules)

		def lookfor_modules_f(self, arg=''):
		global _lookfor_modules
		if not arg:
		print "Modules included in %lookfor search:", _lookfor_modules
		else:
		_lookfor_modules = arg.split()

		ip.expose_magic('lookfor', lookfor_f)
		ip.expose_magic('lookfor_modules', lookfor_modules_f)