docscrape.py
497 lines
| 14.5 KiB
| text/x-python
|
PythonLexer
Fernando Perez
|
r1850 | """Extract reference documentation from the NumPy source tree. | ||
""" | ||||
import inspect | ||||
import textwrap | ||||
import re | ||||
import pydoc | ||||
from StringIO import StringIO | ||||
from warnings import warn | ||||
4 | ||||
class Reader(object): | ||||
"""A line-based string reader. | ||||
""" | ||||
def __init__(self, data): | ||||
""" | ||||
Parameters | ||||
---------- | ||||
data : str | ||||
String with lines separated by '\n'. | ||||
""" | ||||
if isinstance(data,list): | ||||
self._str = data | ||||
else: | ||||
self._str = data.split('\n') # store string as list of lines | ||||
self.reset() | ||||
def __getitem__(self, n): | ||||
return self._str[n] | ||||
def reset(self): | ||||
self._l = 0 # current line nr | ||||
def read(self): | ||||
if not self.eof(): | ||||
out = self[self._l] | ||||
self._l += 1 | ||||
return out | ||||
else: | ||||
return '' | ||||
def seek_next_non_empty_line(self): | ||||
for l in self[self._l:]: | ||||
if l.strip(): | ||||
break | ||||
else: | ||||
self._l += 1 | ||||
def eof(self): | ||||
return self._l >= len(self._str) | ||||
def read_to_condition(self, condition_func): | ||||
start = self._l | ||||
for line in self[start:]: | ||||
if condition_func(line): | ||||
return self[start:self._l] | ||||
self._l += 1 | ||||
if self.eof(): | ||||
return self[start:self._l+1] | ||||
return [] | ||||
def read_to_next_empty_line(self): | ||||
self.seek_next_non_empty_line() | ||||
def is_empty(line): | ||||
return not line.strip() | ||||
return self.read_to_condition(is_empty) | ||||
def read_to_next_unindented_line(self): | ||||
def is_unindented(line): | ||||
return (line.strip() and (len(line.lstrip()) == len(line))) | ||||
return self.read_to_condition(is_unindented) | ||||
def peek(self,n=0): | ||||
if self._l + n < len(self._str): | ||||
return self[self._l + n] | ||||
else: | ||||
return '' | ||||
def is_empty(self): | ||||
return not ''.join(self._str).strip() | ||||
class NumpyDocString(object): | ||||
def __init__(self,docstring): | ||||
docstring = textwrap.dedent(docstring).split('\n') | ||||
self._doc = Reader(docstring) | ||||
self._parsed_data = { | ||||
'Signature': '', | ||||
'Summary': [''], | ||||
'Extended Summary': [], | ||||
'Parameters': [], | ||||
'Returns': [], | ||||
'Raises': [], | ||||
'Warns': [], | ||||
'Other Parameters': [], | ||||
'Attributes': [], | ||||
'Methods': [], | ||||
'See Also': [], | ||||
'Notes': [], | ||||
'Warnings': [], | ||||
'References': '', | ||||
'Examples': '', | ||||
'index': {} | ||||
} | ||||
self._parse() | ||||
def __getitem__(self,key): | ||||
return self._parsed_data[key] | ||||
def __setitem__(self,key,val): | ||||
if not self._parsed_data.has_key(key): | ||||
warn("Unknown section %s" % key) | ||||
else: | ||||
self._parsed_data[key] = val | ||||
def _is_at_section(self): | ||||
self._doc.seek_next_non_empty_line() | ||||
if self._doc.eof(): | ||||
return False | ||||
l1 = self._doc.peek().strip() # e.g. Parameters | ||||
if l1.startswith('.. index::'): | ||||
return True | ||||
l2 = self._doc.peek(1).strip() # ---------- or ========== | ||||
return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1)) | ||||
def _strip(self,doc): | ||||
i = 0 | ||||
j = 0 | ||||
for i,line in enumerate(doc): | ||||
if line.strip(): break | ||||
for j,line in enumerate(doc[::-1]): | ||||
if line.strip(): break | ||||
return doc[i:len(doc)-j] | ||||
def _read_to_next_section(self): | ||||
section = self._doc.read_to_next_empty_line() | ||||
while not self._is_at_section() and not self._doc.eof(): | ||||
if not self._doc.peek(-1).strip(): # previous line was empty | ||||
section += [''] | ||||
section += self._doc.read_to_next_empty_line() | ||||
return section | ||||
def _read_sections(self): | ||||
while not self._doc.eof(): | ||||
data = self._read_to_next_section() | ||||
name = data[0].strip() | ||||
if name.startswith('..'): # index section | ||||
yield name, data[1:] | ||||
elif len(data) < 2: | ||||
yield StopIteration | ||||
else: | ||||
yield name, self._strip(data[2:]) | ||||
def _parse_param_list(self,content): | ||||
r = Reader(content) | ||||
params = [] | ||||
while not r.eof(): | ||||
header = r.read().strip() | ||||
if ' : ' in header: | ||||
arg_name, arg_type = header.split(' : ')[:2] | ||||
else: | ||||
arg_name, arg_type = header, '' | ||||
desc = r.read_to_next_unindented_line() | ||||
desc = dedent_lines(desc) | ||||
params.append((arg_name,arg_type,desc)) | ||||
return params | ||||
_name_rgx = re.compile(r"^\s*(:(?P<role>\w+):`(?P<name>[a-zA-Z0-9_.-]+)`|" | ||||
r" (?P<name2>[a-zA-Z0-9_.-]+))\s*", re.X) | ||||
def _parse_see_also(self, content): | ||||
""" | ||||
func_name : Descriptive text | ||||
continued text | ||||
another_func_name : Descriptive text | ||||
func_name1, func_name2, :meth:`func_name`, func_name3 | ||||
""" | ||||
items = [] | ||||
def parse_item_name(text): | ||||
"""Match ':role:`name`' or 'name'""" | ||||
m = self._name_rgx.match(text) | ||||
if m: | ||||
g = m.groups() | ||||
if g[1] is None: | ||||
return g[3], None | ||||
else: | ||||
return g[2], g[1] | ||||
raise ValueError("%s is not a item name" % text) | ||||
def push_item(name, rest): | ||||
if not name: | ||||
return | ||||
name, role = parse_item_name(name) | ||||
items.append((name, list(rest), role)) | ||||
del rest[:] | ||||
current_func = None | ||||
rest = [] | ||||
for line in content: | ||||
if not line.strip(): continue | ||||
m = self._name_rgx.match(line) | ||||
if m and line[m.end():].strip().startswith(':'): | ||||
push_item(current_func, rest) | ||||
current_func, line = line[:m.end()], line[m.end():] | ||||
rest = [line.split(':', 1)[1].strip()] | ||||
if not rest[0]: | ||||
rest = [] | ||||
elif not line.startswith(' '): | ||||
push_item(current_func, rest) | ||||
current_func = None | ||||
if ',' in line: | ||||
for func in line.split(','): | ||||
push_item(func, []) | ||||
elif line.strip(): | ||||
current_func = line | ||||
elif current_func is not None: | ||||
rest.append(line.strip()) | ||||
push_item(current_func, rest) | ||||
return items | ||||
def _parse_index(self, section, content): | ||||
""" | ||||
.. index: default | ||||
:refguide: something, else, and more | ||||
""" | ||||
def strip_each_in(lst): | ||||
return [s.strip() for s in lst] | ||||
out = {} | ||||
section = section.split('::') | ||||
if len(section) > 1: | ||||
out['default'] = strip_each_in(section[1].split(','))[0] | ||||
for line in content: | ||||
line = line.split(':') | ||||
if len(line) > 2: | ||||
out[line[1]] = strip_each_in(line[2].split(',')) | ||||
return out | ||||
def _parse_summary(self): | ||||
"""Grab signature (if given) and summary""" | ||||
if self._is_at_section(): | ||||
return | ||||
summary = self._doc.read_to_next_empty_line() | ||||
summary_str = " ".join([s.strip() for s in summary]).strip() | ||||
if re.compile('^([\w., ]+=)?\s*[\w\.]+\(.*\)$').match(summary_str): | ||||
self['Signature'] = summary_str | ||||
if not self._is_at_section(): | ||||
self['Summary'] = self._doc.read_to_next_empty_line() | ||||
else: | ||||
self['Summary'] = summary | ||||
if not self._is_at_section(): | ||||
self['Extended Summary'] = self._read_to_next_section() | ||||
def _parse(self): | ||||
self._doc.reset() | ||||
self._parse_summary() | ||||
for (section,content) in self._read_sections(): | ||||
if not section.startswith('..'): | ||||
section = ' '.join([s.capitalize() for s in section.split(' ')]) | ||||
if section in ('Parameters', 'Attributes', 'Methods', | ||||
'Returns', 'Raises', 'Warns'): | ||||
self[section] = self._parse_param_list(content) | ||||
elif section.startswith('.. index::'): | ||||
self['index'] = self._parse_index(section, content) | ||||
elif section == 'See Also': | ||||
self['See Also'] = self._parse_see_also(content) | ||||
else: | ||||
self[section] = content | ||||
# string conversion routines | ||||
def _str_header(self, name, symbol='-'): | ||||
return [name, len(name)*symbol] | ||||
def _str_indent(self, doc, indent=4): | ||||
out = [] | ||||
for line in doc: | ||||
out += [' '*indent + line] | ||||
return out | ||||
def _str_signature(self): | ||||
if self['Signature']: | ||||
return [self['Signature'].replace('*','\*')] + [''] | ||||
else: | ||||
return [''] | ||||
def _str_summary(self): | ||||
if self['Summary']: | ||||
return self['Summary'] + [''] | ||||
else: | ||||
return [] | ||||
def _str_extended_summary(self): | ||||
if self['Extended Summary']: | ||||
return self['Extended Summary'] + [''] | ||||
else: | ||||
return [] | ||||
def _str_param_list(self, name): | ||||
out = [] | ||||
if self[name]: | ||||
out += self._str_header(name) | ||||
for param,param_type,desc in self[name]: | ||||
out += ['%s : %s' % (param, param_type)] | ||||
out += self._str_indent(desc) | ||||
out += [''] | ||||
return out | ||||
def _str_section(self, name): | ||||
out = [] | ||||
if self[name]: | ||||
out += self._str_header(name) | ||||
out += self[name] | ||||
out += [''] | ||||
return out | ||||
def _str_see_also(self, func_role): | ||||
if not self['See Also']: return [] | ||||
out = [] | ||||
out += self._str_header("See Also") | ||||
last_had_desc = True | ||||
for func, desc, role in self['See Also']: | ||||
if role: | ||||
link = ':%s:`%s`' % (role, func) | ||||
elif func_role: | ||||
link = ':%s:`%s`' % (func_role, func) | ||||
else: | ||||
link = "`%s`_" % func | ||||
if desc or last_had_desc: | ||||
out += [''] | ||||
out += [link] | ||||
else: | ||||
out[-1] += ", %s" % link | ||||
if desc: | ||||
out += self._str_indent([' '.join(desc)]) | ||||
last_had_desc = True | ||||
else: | ||||
last_had_desc = False | ||||
out += [''] | ||||
return out | ||||
def _str_index(self): | ||||
idx = self['index'] | ||||
out = [] | ||||
out += ['.. index:: %s' % idx.get('default','')] | ||||
for section, references in idx.iteritems(): | ||||
if section == 'default': | ||||
continue | ||||
out += [' :%s: %s' % (section, ', '.join(references))] | ||||
return out | ||||
def __str__(self, func_role=''): | ||||
out = [] | ||||
out += self._str_signature() | ||||
out += self._str_summary() | ||||
out += self._str_extended_summary() | ||||
for param_list in ('Parameters','Returns','Raises'): | ||||
out += self._str_param_list(param_list) | ||||
out += self._str_section('Warnings') | ||||
out += self._str_see_also(func_role) | ||||
for s in ('Notes','References','Examples'): | ||||
out += self._str_section(s) | ||||
out += self._str_index() | ||||
return '\n'.join(out) | ||||
def indent(str,indent=4): | ||||
indent_str = ' '*indent | ||||
if str is None: | ||||
return indent_str | ||||
lines = str.split('\n') | ||||
return '\n'.join(indent_str + l for l in lines) | ||||
def dedent_lines(lines): | ||||
"""Deindent a list of lines maximally""" | ||||
return textwrap.dedent("\n".join(lines)).split("\n") | ||||
def header(text, style='-'): | ||||
return text + '\n' + style*len(text) + '\n' | ||||
class FunctionDoc(NumpyDocString): | ||||
def __init__(self, func, role='func', doc=None): | ||||
self._f = func | ||||
self._role = role # e.g. "func" or "meth" | ||||
if doc is None: | ||||
doc = inspect.getdoc(func) or '' | ||||
try: | ||||
NumpyDocString.__init__(self, doc) | ||||
except ValueError, e: | ||||
print '*'*78 | ||||
print "ERROR: '%s' while parsing `%s`" % (e, self._f) | ||||
print '*'*78 | ||||
#print "Docstring follows:" | ||||
#print doclines | ||||
#print '='*78 | ||||
if not self['Signature']: | ||||
func, func_name = self.get_func() | ||||
try: | ||||
# try to read signature | ||||
argspec = inspect.getargspec(func) | ||||
argspec = inspect.formatargspec(*argspec) | ||||
argspec = argspec.replace('*','\*') | ||||
signature = '%s%s' % (func_name, argspec) | ||||
except TypeError, e: | ||||
signature = '%s()' % func_name | ||||
self['Signature'] = signature | ||||
def get_func(self): | ||||
func_name = getattr(self._f, '__name__', self.__class__.__name__) | ||||
if inspect.isclass(self._f): | ||||
func = getattr(self._f, '__call__', self._f.__init__) | ||||
else: | ||||
func = self._f | ||||
return func, func_name | ||||
def __str__(self): | ||||
out = '' | ||||
func, func_name = self.get_func() | ||||
signature = self['Signature'].replace('*', '\*') | ||||
roles = {'func': 'function', | ||||
'meth': 'method'} | ||||
if self._role: | ||||
if not roles.has_key(self._role): | ||||
print "Warning: invalid role %s" % self._role | ||||
out += '.. %s:: %s\n \n\n' % (roles.get(self._role,''), | ||||
func_name) | ||||
out += super(FunctionDoc, self).__str__(func_role=self._role) | ||||
return out | ||||
class ClassDoc(NumpyDocString): | ||||
def __init__(self,cls,modulename='',func_doc=FunctionDoc,doc=None): | ||||
if not inspect.isclass(cls): | ||||
raise ValueError("Initialise using a class. Got %r" % cls) | ||||
self._cls = cls | ||||
if modulename and not modulename.endswith('.'): | ||||
modulename += '.' | ||||
self._mod = modulename | ||||
self._name = cls.__name__ | ||||
self._func_doc = func_doc | ||||
if doc is None: | ||||
doc = pydoc.getdoc(cls) | ||||
NumpyDocString.__init__(self, doc) | ||||
@property | ||||
def methods(self): | ||||
return [name for name,func in inspect.getmembers(self._cls) | ||||
if not name.startswith('_') and callable(func)] | ||||
def __str__(self): | ||||
out = '' | ||||
out += super(ClassDoc, self).__str__() | ||||
out += "\n\n" | ||||
#for m in self.methods: | ||||
# print "Parsing `%s`" % m | ||||
# out += str(self._func_doc(getattr(self._cls,m), 'meth')) + '\n\n' | ||||
# out += '.. index::\n single: %s; %s\n\n' % (self._name, m) | ||||
return out | ||||