```"""Extract reference documentation from the NumPy source tree.

"""

from __future__ import print_function

import inspect
from nose.plugins.skip import SkipTest
import re
import sys

import six

"""

def __init__(self, data):
"""
Parameters
----------
data : str
String with lines separated by '\n'.

"""
if isinstance(data, list):
self._str = data
else:
self._str = data.split('\n')  # store string as list of lines

self.reset()

def __getitem__(self, n):
return self._str[n]

def reset(self):
self._l = 0  # current line nr

if not self.eof():
out = self[self._l]
self._l += 1
return out
else:
return ''

def seek_next_non_empty_line(self):
for l in self[self._l:]:
if l.strip():
break
else:
self._l += 1

def eof(self):
return self._l >= len(self._str)

start = self._l
for line in self[start:]:
if condition_func(line):
return self[start:self._l]
self._l += 1
if self.eof():
return self[start:self._l + 1]
return []

self.seek_next_non_empty_line()

def is_empty(line):
return not line.strip()

def is_unindented(line):
return (line.strip() and (len(line.lstrip()) == len(line)))

def peek(self, n=0):
if self._l + n < len(self._str):
return self[self._l + n]
else:
return ''

def is_empty(self):
return not ''.join(self._str).strip()

def __iter__(self):
for line in self._str:
yield line

class NumpyDocString(object):

def __init__(self, docstring, name=None):
if name:
self.name = name
docstring = docstring.split('\n')

# De-indent paragraph
try:
indent = min(len(s) - len(s.lstrip()) for s in docstring
if s.strip())
except ValueError:
indent = 0

for n, line in enumerate(docstring):
docstring[n] = docstring[n][indent:]

self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Returns': [],
'Raises': [],
'Warns': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
'Attributes': [],
'Methods': [],
}
self.section_order = []

self._parse()

def __getitem__(self, key):
return self._parsed_data[key]

def __setitem__(self, key, val):
if key not in self._parsed_data:
raise ValueError("Unknown section %s" % key)
else:
self._parsed_data[key] = val

def _is_at_section(self):
self._doc.seek_next_non_empty_line()

if self._doc.eof():
return False

l1 = self._doc.peek().strip()  # e.g. Parameters

if l1.startswith('.. index::'):
return True

l2 = self._doc.peek(1).strip()  # ----------
return (len(l1) == len(l2) and l2 == '-' * len(l1))

def _strip(self, doc):
i = 0
j = 0
for i, line in enumerate(doc):
if line.strip():
break

for j, line in enumerate(doc[::-1]):
if line.strip():
break

return doc[i:len(doc) - j]

while not self._is_at_section() and not self._doc.eof():
if not self._doc.peek(-1).strip():  # previous line was empty
section += ['']

return section

while not self._doc.eof():
name = data[0].strip()

if name.startswith('..'):  # index section
yield name, data[1:]
elif len(data) < 2:
yield StopIteration
else:
yield name, self._strip(data[2:])

def _parse_param_list(self, content):
params = []
while not r.eof():
if ' : ' in header:
arg_name, arg_type = header.split(' : ')[:2]
else:

for n, line in enumerate(desc):
desc[n] = line.strip()
desc = desc  # '\n'.join(desc)

params.append((arg_name, arg_type, desc))

return params

def _parse_see_also(self, content):
"""
func_name : Descriptive text
continued text
another_func_name : Descriptive text
func_name1, func_name2, func_name3

"""
functions = []
current_func = None
rest = []
for line in content:
if not line.strip():
continue
if ':' in line:
if current_func:
functions.append((current_func, rest))
r = line.split(':', 1)
current_func = r[0].strip()
r[1] = r[1].strip()
if r[1]:
rest = [r[1]]
else:
rest = []
elif not line.startswith(' '):
if current_func:
functions.append((current_func, rest))
current_func = None
rest = []
if ',' in line:
for func in line.split(','):
func = func.strip()
if func:
functions.append((func, []))
elif line.strip():
current_func = line.strip()
elif current_func is not None:
rest.append(line.strip())
if current_func:
functions.append((current_func, rest))
return functions

def _parse_index(self, section, content):
"""
.. index: default
:refguide: something, else, and more

"""
def strip_each_in(lst):
return [s.strip() for s in lst]

out = {}
section = section.split('::')
if len(section) > 1:
out['default'] = strip_each_in(section[1].split(','))[0]
for line in content:
line = line.split(':')
if len(line) > 2:
out[line[1]] = strip_each_in(line[2].split(','))
return out

def _parse_summary(self):
"""Grab signature (if given) and summary"""
summary_str = "\n".join([s.strip() for s in summary])
if re.compile('^([\\w. ]+=)?[\\w\\.]+\\(.*\\)\$').match(summary_str):
self['Signature'] = summary_str
if not self._is_at_section():
elif re.compile('^[\\w]+\n[-]+').match(summary_str):
self['Summary'] = ''
self._doc.reset()
else:
self['Summary'] = summary

if not self._is_at_section():

def _parse(self):
self._doc.reset()
self._parse_summary()
if not section.startswith('..'):
section = ' '.join([s.capitalize()
for s in section.split(' ')])
if section in ('Parameters', 'Other Parameters', 'Returns',
'Raises', 'Warns', 'Attributes', 'Methods'):
self[section] = self._parse_param_list(content)
self.section_order.append(section)
elif section.startswith('.. index::'):
self['index'] = self._parse_index(section, content)
self.section_order.append('index')
else:
self[section] = content
self.section_order.append(section)

# string conversion routines

return [name, len(name) * symbol]

def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' ' * indent + line]
return out

def _str_signature(self):
if not self['Signature']:
return []
return ["*%s*" % self['Signature'].replace('*', '\\*')] + ['']

def _str_summary(self):
return self['Summary'] + ['']

def _str_extended_summary(self):
return self['Extended Summary'] + ['']

def _str_param_list(self, name):
out = []
if self[name]:
for param, param_type, desc in self[name]:
out += ['%s : %s' % (param, param_type)]
out += self._str_indent(desc)
out += ['']
return out

def _str_section(self, name):
out = []
if self[name]:
out += self[name]
out += ['']
return out

def _str_see_also(self):
return []
out = []
out += ['']
out += ["`%s`_" % func]
else:
out[-1] += ", `%s`_" % func
if desc:
out += self._str_indent(desc)
else:
out += ['']
return out

def _str_index(self):
idx = self['index']
out = []
out += ['.. index:: %s' % idx.get('default', '')]
for section, references in six.iteritems(idx):
if section == 'default':
continue
out += ['   :%s: %s' % (section, ', '.join(references))]
return out

def __str__(self):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters', 'Other Parameters',
'Returns', 'Raises', 'Warns'):
out += self._str_param_list(param_list)
out += self._str_see_also()
for s in ('Notes', 'References', 'Examples'):
out += self._str_section(s)
out += self._str_index()
return '\n'.join(out)

# --

def get_errors(self, check_order=True):
errors = []
self._doc.reset()
for j, line in enumerate(self._doc):
if len(line) > 75:
if hasattr(self, 'name'):
errors.append("%s: Line %d exceeds 75 chars"
": \"%s\"..." % (self.name, j + 1,
line[:30]))
else:
errors.append("Line %d exceeds 75 chars"
": \"%s\"..." % (j + 1, line[:30]))

if check_order:
canonical_order = ['Signature', 'Summary', 'Extended Summary',
'Attributes', 'Methods', 'Parameters',
'Other Parameters', 'Returns', 'Raises',
'Warns',
'index']

canonical_order_copy = list(canonical_order)

for s in self.section_order:
while canonical_order_copy and s != canonical_order_copy[0]:
canonical_order_copy.pop(0)
if not canonical_order_copy:
errors.append(
"Sections in wrong order (starting at %s). The"
" right order is %s" % (s, canonical_order))

return errors

def indent(str, indent=4):
indent_str = ' ' * indent
if str is None:
return indent_str
lines = str.split('\n')
return '\n'.join(indent_str + l for l in lines)

class NumpyFunctionDocString(NumpyDocString):

def __init__(self, docstring, function):
super(NumpyFunctionDocString, self).__init__(docstring)
args, varargs, keywords, defaults = inspect.getargspec(function)
if (args and args != ['self']) or varargs or keywords or defaults:
self.has_parameters = True
else:
self.has_parameters = False

def _parse(self):
self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Returns': [],
'Raises': [],
'Warns': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
}
return NumpyDocString._parse(self)

def get_errors(self):
errors = NumpyDocString.get_errors(self)

if not self['Signature']:
# This check is currently too restrictive.
# Disabling it for now.
# errors.append("No function signature")
pass

if not self['Summary']:
errors.append("No function summary line")

if len(" ".join(self['Summary'])) > 3 * 80:
errors.append("Brief function summary is longer than 3 lines")

if not self['Parameters'] and self.has_parameters:
errors.append("No Parameters section")

return errors

class NumpyClassDocString(NumpyDocString):

def __init__(self, docstring, class_name, class_object):
super(NumpyClassDocString, self).__init__(docstring)
self.class_name = class_name
methods = dict((name, func) for name, func
in inspect.getmembers(class_object))

self.has_parameters = False
if '__init__' in methods:
# verify if __init__ is a Python function. If it isn't
# (e.g. the function is implemented in C), getargspec will fail
if not inspect.ismethod(methods['__init__']):
return
args, varargs, keywords, defaults = inspect.getargspec(
methods['__init__'])
if (args and args != ['self']) or varargs or keywords or defaults:
self.has_parameters = True

def _parse(self):
self._parsed_data = {
'Signature': '',
'Summary': '',
'Extended Summary': [],
'Parameters': [],
'Other Parameters': [],
'Raises': [],
'Warns': [],
'Notes': [],
'References': '',
'Examples': '',
'index': {},
'Attributes': [],
'Methods': [],
}
return NumpyDocString._parse(self)

def __str__(self):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Attributes', 'Methods', 'Parameters', 'Raises',
'Warns'):
out += self._str_param_list(param_list)
out += self._str_see_also()
for s in ('Notes', 'References', 'Examples'):
out += self._str_section(s)
out += self._str_index()
return '\n'.join(out)

def get_errors(self):
errors = NumpyDocString.get_errors(self)
if not self['Parameters'] and self.has_parameters:
errors.append("%s class has no Parameters section"
% self.class_name)
return errors

class NumpyModuleDocString(NumpyDocString):

"""
Module doc strings: no parsing is done.

"""

def _parse(self):
self.out = []

def __str__(self):
return "\n".join(self._doc._str)

def get_errors(self):
errors = NumpyDocString.get_errors(self, check_order=False)
return errors

return text + '\n' + style * len(text) + '\n'

class SphinxDocString(NumpyDocString):
# string conversion routines

return ['**' + name + '**'] + [symbol * (len(name) + 4)]

def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [' ' * indent + line]
return out

def _str_signature(self):
return ['``%s``' % self['Signature'].replace('*', '\\*')] + ['']

def _str_summary(self):
return self['Summary'] + ['']

def _str_extended_summary(self):
return self['Extended Summary'] + ['']

def _str_param_list(self, name):
out = []
if self[name]:
out += ['']
for param, param_type, desc in self[name]:
out += self._str_indent(['**%s** : %s' % (param, param_type)])
out += ['']
out += self._str_indent(desc, 8)
out += ['']
return out

def _str_section(self, name):
out = []
if self[name]:
out += ['']
content = self._str_indent(self[name])
out += content
out += ['']
return out

def _str_index(self):
idx = self['index']
out = []
out += ['.. index:: %s' % idx.get('default', '')]
for section, references in six.iteritems(idx):
if section == 'default':
continue
out += ['   :%s: %s' % (section, ', '.join(references))]
return out

def __str__(self, indent=0):
out = []
out += self._str_summary()
out += self._str_extended_summary()
for param_list in ('Parameters', 'Returns', 'Raises', 'Warns'):
out += self._str_param_list(param_list)
for s in ('Notes', 'References', 'Examples'):
out += self._str_section(s)
#        out += self._str_index()
out = self._str_indent(out, indent)
return '\n'.join(out)

class FunctionDoc(object):

def __init__(self, func):
self._f = func

def __str__(self):
out = ''
doclines = inspect.getdoc(self._f) or ''
try:
doc = SphinxDocString(doclines)
except Exception as e:
print('*' * 78)
print("ERROR: '%s' while parsing `%s`" % (e, self._f))
print('*' * 78)
# print "Docstring follows:"
# print doclines
# print '='*78
return out

if doc['Signature']:
out += '%s\n' % header('**%s**' %
doc['Signature'].replace('*', '\\*'), '-')
else:
try:
argspec = inspect.getargspec(self._f)
argspec = inspect.formatargspec(*argspec)
argspec = argspec.replace('*', '\\*')
out += header('%s%s' % (self._f.__name__, argspec), '-')
except TypeError as e:
out += '%s\n' % header('**%s()**' % self._f.__name__, '-')

out += str(doc)
return out

class ClassDoc(object):

def __init__(self, cls, modulename=''):
if not inspect.isclass(cls):
raise ValueError("Initialise using an object")
self._cls = cls

if modulename and not modulename.endswith('.'):
modulename += '.'
self._mod = modulename
self._name = cls.__name__

@property
def methods(self):
return [name for name, func in inspect.getmembers(self._cls)
if not name.startswith('_') and callable(func)]

def __str__(self):
out = ''

return '"' * (match.end() - match.start())

for m in self.methods:
print("Parsing `%s`" % m)
out += str(FunctionDoc(getattr(self._cls, m))) + '\n\n'
out += '.. index::\n   single: %s; %s\n\n' % (self._name, m)

return out

def handle_function(val, name):
func_errors = []
docstring = inspect.getdoc(val)
if docstring is None:
func_errors.append((name, '**missing** function-level docstring'))
else:
func_errors = [
(name, e) for e in
NumpyFunctionDocString(docstring, val).get_errors()
]
return func_errors

def handle_module(val, name):
module_errors = []
docstring = val
if docstring is None:
module_errors.append((name, '**missing** module-level docstring'))
else:
module_errors = [
(name, e) for e in NumpyModuleDocString(docstring).get_errors()
]
return module_errors

def handle_method(method, method_name, class_name):
method_errors = []

# Skip out-of-library inherited methods
module = inspect.getmodule(method)
if module is not None:
if not module.__name__.startswith('pylearn2'):
return method_errors

docstring = inspect.getdoc(method)
if docstring is None:
method_errors.append((class_name, method_name,
'**missing** method-level docstring'))
else:
method_errors = [
(class_name, method_name, e) for e in
NumpyFunctionDocString(docstring, method).get_errors()
]
return method_errors

def handle_class(val, class_name):
cls_errors = []
docstring = inspect.getdoc(val)
if docstring is None:
cls_errors.append((class_name,
'**missing** class-level docstring'))
else:
cls_errors = [
(e,) for e in
NumpyClassDocString(docstring, class_name, val).get_errors()
]
# Get public methods and parse their docstrings
methods = dict(((name, func) for name, func in inspect.getmembers(val)
if not name.startswith('_') and callable(func) and
type(func) is not type))
for m_name, method in six.iteritems(methods):
# skip error check if the method was inherited
# from a parent class (which means it wasn't
# defined in this source file)
if inspect.getmodule(method) is not None:
continue
cls_errors.extend(handle_method(method, m_name, class_name))
return cls_errors

def docstring_errors(filename, global_dict=None):
"""
Run a Python file, parse the docstrings of all the classes
and functions it declares, and return them.

Parameters
----------
filename : str
Filename of the module to run.

global_dict : dict, optional
Globals dictionary to pass along to `execfile()`.

Returns
-------
all_errors : list
Each entry of the list is a tuple, of length 2 or 3, with
format either

(func_or_class_name, docstring_error_description)
or
(class_name, method_name, docstring_error_description)
"""
if global_dict is None:
global_dict = {}
if '__file__' not in global_dict:
global_dict['__file__'] = filename
if '__doc__' not in global_dict:
global_dict['__doc__'] = None
try:
with open(filename) as f:
exec(code, global_dict)
except SystemExit:
pass
except SkipTest:
raise AssertionError("Couldn't verify format of " + filename +
"due to SkipTest")
all_errors = []
for key, val in six.iteritems(global_dict):
if not key.startswith('_'):
module_name = ""
if hasattr(inspect.getmodule(val), '__name__'):
module_name = inspect.getmodule(val).__name__
if (inspect.isfunction(val) or inspect.isclass(val)) and\
(inspect.getmodule(val) is None
or module_name == '__builtin__'):
if inspect.isfunction(val):
all_errors.extend(handle_function(val, key))
elif inspect.isclass(val):
all_errors.extend(handle_class(val, key))
elif key == '__doc__':
all_errors.extend(handle_module(val, key))
if all_errors:
all_errors.insert(0, ("%s:" % filename,))
return all_errors

if __name__ == "__main__":
all_errors = docstring_errors(sys.argv[1])
if len(all_errors) > 0:
print("*" * 30, "docstring errors", "*" * 30)
for line in all_errors:
print(':'.join(line))
sys.exit(int(len(all_errors) > 0))
```