# #!/usr/bin/env python # # $Id: pythondoc.py 3271 2007-09-09 09:45:14Z fredrik $ # pythondoc documentation generator # # history: # 2003-10-19 fl first preview release (2.0a1) # 2003-10-19 fl fix HTML in descriptor tags, 1.5.2 tweaks, etc (2.0a2) # 2003-10-20 fl added encoding support, default HTML generator, etc (2.0a3) # 2003-10-21 fl fixed some 1.5.2 issues, etc (2.0b1) # 2003-10-22 fl HTML tweaks, pluggable output generators, etc (2.0b2) # 2003-10-23 fl fixed encoding, added @author, @version, @since etc # 2003-10-24 fl disable XML output by default # 2003-10-25 fl moved info properties into an 'info' element # 2003-10-26 fl expand wildcards on windows (2.0b3) # 2003-10-30 fl added support for RISC OS # 2003-10-31 fl (experimental) support module-level comments # 2003-11-01 fl minor HTML tweaks (2.0b4) # 2003-11-03 fl pythondoc 2.0 final # 2003-11-15 fl added support for inline @link/@linkplain tags (2.1b1) # 2003-11-20 fl fixed class attribute parsing bug # 2004-03-27 fl handle multiple single-line methods # 2004-09-01 fl support Python 2.4 decorators (2.1b2) # 2004-09-21 fl fixed output filename for "pythondoc ." # 2005-03-25 fl added docstring extraction for classes and methods (2.1b3) # 2005-06-18 fl fixed correct HTML output when using ElementTree 1.3 (2.1b4) # 2005-12-23 fl use xml.etree where available # 2006-04-04 fl refactored comment parser code; added -s support (2.1b5) # 2006-04-06 fl handle multiple params in docstrings correctly (2.1b6) # 2007-09-09 fl moved HTML parser into pythondoc module itself # # Copyright (c) 2002-2007 by Fredrik Lundh. # ## # This is the PythonDoc tool. This tool parses Python source files # and generates API descriptions in XML and HTML. # <p> # For more information on the PythonDoc tool and the markup format, see # <a href="http://effbot.org/zone/pythondoc.htm">the PythonDoc page</a> # at <a href="http://effbot.org/">effbot.org</a>. ## # -------------------------------------------------------------------- # Software License # -------------------------------------------------------------------- # # Copyright (c) 2002-2007 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its # associated documentation, you agree that you have read, understood, # and will comply with the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and # its associated documentation for any purpose and without fee is # hereby granted, provided that the above copyright notice appears in # all copies, and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Secret Labs AB or the author not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THIS SOFTWARE. # # -------------------------------------------------------------------- # to do in later releases: # # TODO: test this release under 1.5.2 ! # TODO: better rendering of constructors/package modules # TODO: check @param names against @def/define tags # TODO: support recursive parsing (-R) # TODO: warn for tags that doesn't make sense for a given target type # TODO: HTML output localization (the %s module, returns, raises, etc) # TODO: make compactHTML generate an element tree instead of raw HTML # # nice to have, maybe: # # IDEA: support multiple output handlers (multiple -O statements); # make -x an alias for -Oxml # IDEA: make pythondoc self-contained (include stub element implementation) VERSION_DATE = "2.1b7-20070909" VERSION = VERSION_DATE.split("-")[0] COPYRIGHT = "(c) 2002-2007 by Fredrik Lundh" # explicitly import site (for exemaker etc) import site # stuff we use in this module import glob, os, re, string, sys, tokenize # make sure elementtree is available try: try: import xml.etree.ElementTree as ET except ImportError: import elementtree.ElementTree as ET except ImportError: raise RuntimeError( "PythonDoc %s requires ElementTree 1.1 or later " "(available from http://effbot.org/downloads)." % VERSION ) # extension separator (not all systems use a period) try: EXTSEP = os.extsep except AttributeError: EXTSEP = "." ## # Debug level. The higher the value, the more junk you'll see on # standard output. # <p> # You can use the <b>-V</b> option to <b>pythondoc</b> to increase # the debug level. DEBUG = 0 ## # Whitespace tokens. These are ignored when the parser is scanning # for a subject. WHITESPACE_TOKEN = ( tokenize.NL, tokenize.NEWLINE, tokenize.DEDENT, tokenize.INDENT ) ## # Default encoding. To override this for a module, put a "coding" # directive in your Python module (see PEP 263 for details). ENCODING = "iso-8859-1" ## # Known tags. The parser generates warnings for tags that are not in # this list, but it still copies them to the XML infoset. TAGS = ( "def", "defreturn", "param", "keyparam", "return", "throws", "exception", # javadoc tags not used by the standard generator "author", "deprecated", "see", "since", "version" ) ## # (Helper) Combines filename prefix with extension part. # # @param prefix Filename prefix. # @param ext Extension string, including a leading period. The # period is replaced with a platform-specific separator, if # necessary. # @return The combined name. def joinext(prefix, ext): assert ext[0] == "." # require leading separator, to match os.path.splitext return prefix + EXTSEP + ext[1:] ## # (Helper) Extracts block tags from a PythonDoc comment. # # @param comment Comment text. # @return A list of (lineno, tag, text) tuples, where the tag is None # for the initial description. # @defreturn List of tuples. def gettags(comment): tags = [] tag = None tag_lineno = lineno = 0 tag_text = [] for line in comment: if line[:1] == "@": tags.append((tag_lineno, tag, string.join(tag_text, "\n"))) line = string.split(line, " ", 1) tag = line[0][1:] if len(line) > 1: tag_text = [line[1]] else: tag_text = [] tag_lineno = lineno else: tag_text.append(line) lineno = lineno + 1 tags.append((tag_lineno, tag, string.join(tag_text, "\n"))) return tags ## # (Helper) Flattens an element tree, returning only the text contents. # # @param elem An element tree. # @return A text string. # @defreturn String. def flatten(elem): text = elem.text or "" for e in elem: text += flatten(e) if e.tail: text += e.tail return text ## # (Helper) Extracts summary from a PythonDoc comment. This function # gets the first complete sentence from the description string. # # @param description An element containing the description. # @return A summary string. # @defreturn String. def getsummary(description): description = flatten(description) # extract the first sentence from the description m = re.search("(?s)(.+?\.)\s", description + " ") if m: return m.group(1) return description # sorry ## # (Helper) Parses HTML descriptor text into an XHTML structure. # # @param parser Parser instance (provides a warning method). # @param text Text fragment. # @return An element tree containing XHTML data. # @defreturn Element. def parsehtml(parser, tag, text, lineno): # transcode if parser.encoding != "ascii": try: text = unicode(text, parser.encoding) except NameError: pass # 1.5.2 # process inline links (@link, @linkplain) # note that links are replaced with <a href='link:...> elements; # the href's are resolved in a later step if "{" in text: def fixlink(m, parser=parser, lineno=lineno): linkdef = string.split(m.group(1), None, 2) if len(linkdef) == 2: # default text is same as last part of target name href = linkdef[1] if href[:1] == "#": href = href[1:] linkdef.append(string.split(href, ".")[-1]) if len(linkdef) < 3 or linkdef[0] not in ("@link", "@linkplain"): parser.warning( (lineno, 0), "malformed @link near this line", ) return m.group(0) type, href, text = linkdef href = "link:" + html_encode(href) if type == "@link": return "<a href='%s' class='link'><b>%s</b></a>" % (href, text) else: return "<a href='%s' class='linkplain'>%s</a>" % (href, text) text = re.sub("\{(@link[^}]+)\}", fixlink, text) if "<" not in text and "&" not in text: # plain text elem = ET.Element(tag) elem.text = string.strip(text) return elem p = HTMLTreeBuilder() ix = 0 try: p.feed("<%s>" % tag) p.feed("<p>") # make sure everything's wrapped in a paragraph tag # feed line by line for line in string.split(text, "\n"): p.feed(line + "\n") ix = ix + 1 p.feed("</%s>" % tag) tree = p.close() except: parser.warning( (lineno+ix, 0), "HTML parser error near this line (%s)", sys.exc_value ) return ET.Element("p") return tree # -------------------------------------------------------------------- # copied from ElementTree/HTMLTreeBuilder.py import htmlentitydefs AUTOCLOSE = "p", "li", "tr", "th", "td", "head", "body" IGNOREEND = "img", "hr", "meta", "link", "br" if sys.version[:3] == "1.5": is_not_ascii = re.compile(r"[\x80-\xff]").search # 1.5.2 else: is_not_ascii = re.compile(eval(r'u"[\u0080-\uffff]"')).search try: from HTMLParser import HTMLParser except ImportError: from sgmllib import SGMLParser # hack to use sgmllib's SGMLParser to emulate 2.2's HTMLParser class HTMLParser(SGMLParser): # the following only works as long as this class doesn't # provide any do, start, or end handlers def unknown_starttag(self, tag, attrs): self.handle_starttag(tag, attrs) def unknown_endtag(self, tag): self.handle_endtag(tag) ## # ElementTree builder for HTML source code. This builder converts an # HTML document or fragment to an ElementTree. # <p> # The parser is relatively picky, and requires balanced tags for most # elements. However, elements belonging to the following group are # automatically closed: P, LI, TR, TH, and TD. In addition, the # parser automatically inserts end tags immediately after the start # tag, and ignores any end tags for the following group: IMG, HR, # META, and LINK. # # @keyparam builder Optional builder object. If omitted, the parser # uses the standard <b>elementtree</b> builder. # @keyparam encoding Optional character encoding, if known. If omitted, # the parser looks for META tags inside the document. If no tags # are found, the parser defaults to ISO-8859-1. Note that if your # document uses a non-ASCII compatible encoding, you must decode # the document before parsing. class HTMLTreeBuilder(HTMLParser): def __init__(self, encoding=None): self.__stack = [] self.__builder = ET.TreeBuilder() self.encoding = encoding or "iso-8859-1" HTMLParser.__init__(self) ## # Flushes parser buffers, and return the root element. # # @return An Element instance. def close(self): HTMLParser.close(self) return self.__builder.close() ## # (Internal) Handles start tags. def handle_starttag(self, tag, attrs): if tag == "meta": # look for encoding directives http_equiv = content = None for k, v in attrs: if k == "http-equiv": http_equiv = string.lower(v) elif k == "content": content = v if http_equiv == "content-type" and content: # use mimetools to parse the http header import mimetools, StringIO header = mimetools.Message( StringIO.StringIO("%s: %s\n\n" % (http_equiv, content)) ) encoding = header.getparam("charset") if encoding: self.encoding = encoding if tag in AUTOCLOSE: if self.__stack and self.__stack[-1] == tag: self.handle_endtag(tag) self.__stack.append(tag) attrib = {} if attrs: for k, v in attrs: attrib[string.lower(k)] = v self.__builder.start(tag, attrib) if tag in IGNOREEND: self.__stack.pop() self.__builder.end(tag) ## # (Internal) Handles end tags. def handle_endtag(self, tag): if tag in IGNOREEND: return lasttag = self.__stack.pop() if tag != lasttag and lasttag in AUTOCLOSE: self.handle_endtag(lasttag) self.__builder.end(tag) ## # (Internal) Handles character references. def handle_charref(self, char): if char[:1] == "x": char = int(char[1:], 16) else: char = int(char) if 0 <= char < 128: self.__builder.data(chr(char)) else: self.__builder.data(unichr(char)) ## # (Internal) Handles entity references. def handle_entityref(self, name): entity = htmlentitydefs.entitydefs.get(name) if entity: if len(entity) == 1: entity = ord(entity) else: entity = int(entity[2:-1]) if 0 <= entity < 128: self.__builder.data(chr(entity)) else: self.__builder.data(unichr(entity)) else: self.unknown_entityref(name) ## # (Internal) Handles character data. def handle_data(self, data): if isinstance(data, type('')) and is_not_ascii(data): # convert to unicode, but only if necessary data = unicode(data, self.encoding, "ignore") self.__builder.data(data) ## # (Hook) Handles unknown entity references. The default action # is to ignore unknown entities. def unknown_entityref(self, name): pass # ignore by default; override if necessary # -------------------------------------------------------------------- ## # (Helper) Parses a PythonDoc comment into an PythonDoc info structure. # # @param parser Parser instance (provides a warning method). # @param lineno Line number where this comment starts. # @param comment A list of text line making up the comment. # @param dedent If true, strip leading whitespace from all comment # lines except the first one. # @return An element tree containing XHTML data. # @defreturn Element. def parsecomment(parser, lineno, comment, dedent=0): subject_info = ET.Element("info") # untabify for ix in range(len(comment)): comment[ix] = string.expandtabs(comment[ix]) if dedent: margin = None for ix in range(1, len(comment)): s = string.lstrip(comment[ix]) if not s: continue m = len(comment[ix]) - len(s) if margin is None: margin = m else: margin = min(m, margin) if margin: for ix in range(1, len(comment)): comment[ix] = comment[ix][margin:] for ix, tag, text in gettags(comment): pos = lineno + ix + 1, 0 # check tag name if tag is None: tag = "description" else: if tag not in TAGS: parser.warning( pos, "unknown tag in description: @%s", tag ) if tag in ("throws", "exception"): tag = "exception" # PythonDoc extension # deal with "named" tags if tag in ("param", "keyparam", "exception"): text = string.split(text, " ", 1) name = text[0] if len(text) > 1: text = string.lstrip(text[1]) else: text = "" else: name = None tag_elem = parsehtml(parser, tag, text, pos[0]) # generate summaries if tag == "description": summary = getsummary(tag_elem) if summary: elem = ET.SubElement(subject_info, "summary") elem.text = summary subject_info.append(tag_elem) if name: tag_elem.set("name", name) return subject_info ## # Module parser. # <p> # This class implements the PythonDoc source code scanner. It reads # source code from a file or a file-like object, and builds an element # tree with information about the module. # <p> # Note that the constructor only sets things up for parsing. Use the # {@link ModuleParser.parse} method to parse the file. Or for # convenience, use the {@link parse} function to create a parser # object and parse a given file. # # @param file Name of the module source file, or a file object. If a # file object is used, it must provide a <b>name</b> attribute and # a <b>readline</b> method. # @param prefix Optional name prefix. If given, this is prepended to # the module name. For example, if the prefix is set to "prefix" # and the module filename is "name.py", the module is assumed to # contain the "prefix.name" namespace. class ModuleParser: ## # Module name. name = None def __init__(self, file, prefix=None): if hasattr(file, "readline"): self.file = file self.filename = file.name else: self.file = None self.filename = file name = os.path.splitext(os.path.basename(self.filename))[0] if prefix and prefix != ".": name = prefix + "." + name self.name = name self.stack = [ ET.Element( "module", name=name, filename=self.filename ) ] self.indent = 0 self.scope = [] # list of (indent, tag, name, ...) tuples self.handler = self.look_for_encoding self.encoding = ENCODING ## # Parses the file. # # @keyparam docstring If true, look for markup in docstrings. # @return An element tree containing information about the module. # @defreturn Element. # @exception IOError If the file could not be opened. def parse(self, docstring=0): if self.file is None: file = open(self.filename) else: file = self.file try: tokenize.tokenize(file.readline, self.handle_token) except tokenize.TokenError, v: message, lineno = v self.warning(lineno, "exception in tokenizer: %s", message) if len(self.stack) != 1: pass # FIXME: print warning? tree = self.stack[0] # may be incomplete # fixup internal links # 1) find all named elements elems = {} for elem in tree.getiterator(): name = elem.get("name") if name: elems[name] = elem # 2) find all link anchors for elem in tree.getiterator("a"): href = elem.get("href") if href[:5] == "link:": # FIXME: add support for external links href = href[5:] if href[:1] == "#": href = href[1:] target = elems.get(self.name + "." + href) if target: href = "#" + target.get("name") + "-" + target.tag elem.set("href", href) if docstring: # look for markup in docstrings for info in tree.getiterator("info"): docstring = info.findtext("docstring") if not docstring: continue comment = docstring.split("\n") newinfo = parsecomment(self, 0, comment, dedent=1) for elem in info: if newinfo.find(elem.tag) is None: newinfo.append(elem) info[:] = newinfo return tree ## # Prints a warning message to standard output. # # @param position A (line, column) tuple. The column can be set # to None if not known (or not relevant). # @param format Message or format string. # @param *args Optional arguments. def warning(self, position, format, *args): line, column = position message = "%s:%d: WARNING: %s" % (self.filename, line, format % args) sys.stderr.write(message) sys.stderr.write("\n") ## # Dispatches tokens to the current handler. Each handler should # return the handler to call for the next token. # <p> # This method also handles indentation and dedentation tokens, # and manages the scope stack. def handle_token(self, *args): # dispatch incoming tokens to the current handler if DEBUG > 1: print self.handler.im_func.func_name, self.indent, print tokenize.tok_name[args[0]], repr(args[1]) if args[0] == tokenize.DEDENT: self.indent = self.indent - 1 while self.scope and self.scope[-1][0] >= self.indent: del self.scope[-1] del self.stack[-1] self.handler = apply(self.handler, args) if args[0] == tokenize.INDENT: self.indent = self.indent + 1 ## # (Token handler) Scans for encoding directive. def look_for_encoding(self, type, token, start, end, line): if type == tokenize.COMMENT: if string.rstrip(token) == "##": return self.look_for_pythondoc(type, token, start, end, line) m = re.search("coding[:=]\s*([-_.\w]+)", token) if m: self.encoding = m.group(1) return self.look_for_pythondoc if start[0] > 2: return self.look_for_pythondoc return self.look_for_encoding ## # (Token handler) Scans for PythonDoc comments. def look_for_pythondoc(self, type, token, start, end, line): if type == tokenize.COMMENT and string.rstrip(token) == "##": # found a comment: set things up for comment processing self.comment_start = start self.comment = [] return self.process_comment_body else: # deal with "bare" subjects if token == "def" or token == "class": self.subject_indent = self.indent self.subject_parens = 0 self.subject_start = self.comment_start = None self.subject = [] return self.process_subject(type, token, start, end, line) return self.look_for_pythondoc ## # (Token handler) Processes a comment body. This handler adds # comment lines to the current comment. def process_comment_body(self, type, token, start, end, line): if type == tokenize.COMMENT: if start[1] != self.comment_start[1]: self.warning( start, "comment line should be aligned with marker" ) line = string.rstrip(token) if line == "##": # handle module comments (experimental) # FIXME: add more consistency checks? if self.stack[0].find("info") is not None: self.warning( self.comment_start, "multiple module comments are not allowed" ) # FIXME: ignore additional comments? self.process_subject_info(None, self.stack[0]) return self.look_for_pythondoc elif line[:2] == "# ": line = line[2:] elif line[:1] == "#": line = line[1:] self.comment.append(line) else: if not self.comment: self.warning( self.comment_start, "found pythondoc marker but no comment body" ) return self.look_for_pythondoc self.subject_start = None self.subject = [] if type != tokenize.NL: return self.process_subject(type, token, start, end, line) return self.process_subject # end of comment return self.process_comment_body ## # (Token handler) Processes the comment subject. The subject can # be either a plain variable, or a function/method or class # definition. # <p> # This method is also used to process "bare" subjects; that is, # functions, methods, and classes that don't have PythonDoc # markup. In that case, the comment_start variable is set to # None. def process_subject(self, type, token, start, end, line): # got an item; deal with it if self.subject: # method/function/class definition definition = self.subject[0] in ("def", "class") if definition: if type not in WHITESPACE_TOKEN: if token == "(": self.subject_parens = self.subject_parens + 1 elif token == ")": self.subject_parens = self.subject_parens - 1 if self.subject_parens or token != ":": self.subject.append(token) return self.process_subject else: # simple assignment if token != "=": self.warning( self.subject_start, "bad subject %s; ignoring description", repr(self.subject[0]) ) # might be a pythondoc marker; pass it to the scanner return self.look_for_pythondoc( type, token, start, end, line ) # FIXME: keep adding stuff until end of expression else: if type in WHITESPACE_TOKEN: return self.process_subject if type == tokenize.COMMENT: self.warning( start, "comment between description and subject; " + "ignoring description" ) # might be a pythondoc marker; pass it to the scanner return self.look_for_pythondoc( type, token, start, end, line ) # FIXME: check token type! # the @ token type is currently tokenize.ERRORTOKEN; hopefully # this will change before 2.4 final if token == "@": self.decorator_parens = 0 return self.skip_decorator self.subject_start = start self.subject.append(token) if token in ("def", "class"): # handle single-line subjects while self.scope and self.scope[-1][0] >= self.indent: self.scope.pop() self.stack.pop() self.subject_indent = self.indent self.subject_parens = 0 return self.process_subject # check if this is a method or a function method = self.scope and self.scope[-1][1] == "class" # calculate fully qualified subject name name = [self.name] for s in self.scope: name.append(s[2]) if definition: name.append(self.subject[1]) else: name.append(self.subject[0]) # calculate subject definition statement statement = [] for part in self.subject: if part in ("class", "def"): continue statement.append(part) if part == ",": statement.append(" ") if self.subject[0] == "def" and method: # ignore the first argument for methods # 'name', '(', 'self', ',', ' ', ...) del statement[2:min(5, len(statement)-1)] statement = string.join(statement, "") # create subject element if self.subject[0] == "class": subject_elem = ET.Element("class") elif self.subject[0] == "def": if method: subject_elem = ET.Element("method") else: subject_elem = ET.Element("function") else: subject_elem = ET.Element("variable") self.stack[-1].append(subject_elem) # add new subject to the scope and element stacks if definition: self.scope.append((self.subject_indent,) + tuple(self.subject)) self.stack.append(subject_elem) subject_info = self.process_subject_info(name, subject_elem) # add local name to info elem = ET.Element("name") elem.text = name[-1] subject_info.insert(0, elem) name = string.join(name, ".") subject_elem.set("name", name) subject_elem.set("lineno", str(self.subject_start[0])) if subject_info.find("def") is None and statement: # add subject definition (unless specified in comment) elem = ET.Element("def") elem.text = statement # add to front, to make the XML easier to read subject_info.insert(0, elem) if definition: return self.look_for_docstring(type, token, start, end, line) else: return self.look_for_pythondoc(type, token, start, end, line) ## # (Token handler) Skips a decorator. def skip_decorator(self, type, token, start, end, line): if token == "(": self.decorator_parens = self.decorator_parens + 1 elif token == ")": self.decorator_parens = self.decorator_parens - 1 if self.decorator_parens or type != tokenize.NEWLINE: return self.skip_decorator return self.process_subject ## # (Token handler helper) Processes a PythonDoc comment. This # method creates an "info" element based on the current comment, # and attaches it to the current subject element. # # @param subject_name Subject name (or None if the name is not known). # @param subject_elem The current subject element. # @return The info element. Note that this element has already # been attached to the subject element. # @defreturn Element def process_subject_info(self, subject_name, subject_elem): # process pythondoc comment (if any) if self.comment_start: subject_info = parsecomment( self, self.comment_start[0], self.comment ) else: subject_info = ET.Element("info") subject_elem.append(subject_info) if DEBUG: if subject_name: subject_name = string.join(subject_name, ".") else: subject_name = "<module>" print "---", subject_name prefix = " " * len(self.scope) for line in self.comment: print prefix + line return subject_info ## # (Token handler) Look for docstring inside a definition. def look_for_docstring(self, type, token, start, end, line): if type in WHITESPACE_TOKEN or token == ":": return self.look_for_docstring if type == tokenize.STRING: subject_elem = self.stack[-1] subject_info = subject_elem.find("info") if subject_info is not None: elem = ET.SubElement(subject_info, "docstring") # FIXME: add string sanity check here, before doing eval elem.text = eval(token) return self.skip_subject_body(type, token, start, end, line) ## # (Token handler) Skips over the subject body. def skip_subject_body(self, type, token, start, end, line): # for now, just hand control back to the pythondoc scanner, # and let it skip over the subject body while looking for the # next marker. return self.look_for_pythondoc(type, token, start, end, line) ## # Parses a module. # <p> # This function creates a {@link #ModuleParser} instance, and uses it # to parse the given file. For details, see {@linkplain #ModuleParser # the <b>ModuleParser</b> documentation}. # # @param file Name of the module source file, or a file object. # @param prefix Optional name prefix. # @keyparam docstring If true, look for markup in docstrings. # @return An element tree containing the module description. # @defreturn Element. # @exception IOError If the file could not be found, or could not # be opened for reading. def parse(file, prefix=None, docstring=0): m = ModuleParser(file, prefix) return m.parse(docstring=docstring) # -------------------------------------------------------------------- # default formatter if sys.version[:3] == "1.5": _escape = re.compile(r"[&<>\"\x80-\xff]") # 1.5.2 else: _escape = re.compile(eval(r'u"[&<>\"\u0080-\uffff]"')) _escape_map = { "&": "&", "<": "<", ">": ">", '"': """, } ## # Encodes reserved HTML characters and non-ASCII characters as HTML # character references. # # @def html_encode(text) # @param text Source text. # @return An encoded string. def html_encode(text, pattern=_escape): if not text: return "" def escape_entities(m, map=_escape_map): char = m.group() text = map.get(char) if text is None: text = "&#%d;" % ord(char) return text text = pattern.sub(escape_entities, text) try: return text.encode("ascii") except AttributeError: return text # 1.5.2 ## # Compact HTML formatter. This formatter turns a module XML # description into a minimal HTML document. # <p> # This formatter supports the following options: # </p> # <dl> # <dt><b>-Dstyle</b>=URL</dt> # <dd>Stylesheet URL. If this option is present, the formatter adds a # stylesheet <link> element to the HTML output.</dd> # <dt><b>-Dzone</b></dt> # <dd>Generate effbot.org zone documents.</dd> # </dl> # # @param options Options dictionary. class CompactHTML: def __init__(self, options=None): self.options = options or {} ## # Writes an element containing some text (plain or formatted). # # @param elem Element. # @param compact If true, try to minimize the amount of vertical # padding. def writetext(self, elem, compact=0): if len(elem): if compact and len(elem) == 1 and elem[0].tag == "p": elem = elem[0] self.file.write(html_encode(elem.text)) for e in elem: ET.ElementTree(e).write(self.file) self.file.write(html_encode(elem.tail)) else: for e in elem: ET.ElementTree(e).write(self.file) elif elem is not None and elem.text: if compact: self.file.write(html_encode(elem.text)) else: self.file.write("<p>%s</p>\n" % html_encode(elem.text)) ## # Writes an object description (the description text, parameters, # return values) etc. # # @param object The object element. # @param summary If true, use summary instead of full description. def writeobject(self, object, summary=0): name = object.get("name") info = object.find("info") localname = string.split(name, ".")[-1] define = info.findtext("def") anchor = html_encode(name + "-" + object.tag) if object.tag == "class": # look for the constructor for obj in object: if obj.get("name") == name + ".__init__": inf = obj.find("info") define = string.split(inf.findtext("def"), "(", 1) define = localname + "(" + define[1] self.file.write( "<dt><b>%s</b> (class)" % html_encode(define) ) break else: self.file.write( "<dt><b>%s</b> (class) " % html_encode(localname) ) elif object.tag == "variable": self.file.write( "<dt><a id='%s' name='%s'><b>%s</b></a> (variable)" % ( anchor, anchor, html_encode(localname) ) ) else: self.file.write( "<dt><a id='%s' name='%s'><b>%s</b></a>" % ( anchor, anchor, html_encode(define) ) ) if object.tag == "function" or object.tag == "method": defreturn = info.find("defreturn") if defreturn is not None: defreturn = flatten(defreturn) self.file.write(" ⇒ %s" % html_encode(defreturn)) self.file.write(" [<a href='#%s'>#</a>]</dt>\n" % anchor) self.file.write("<dd>\n") if summary: text = info.findtext("summary") if text: self.file.write("<p>%s</p>\n" % html_encode(text)) else: self.writetext(info.find("description")) param = info.findall("param") keyparam = info.findall("keyparam") exception = info.findall("exception") return_elem = info.find("return") if param or keyparam or exception or return_elem != None: self.file.write("<dl>\n") for p in param + keyparam: name = p.get("name") if p.tag == "keyparam": name = name + "=" self.file.write("<dt><i>%s</i></dt>\n" % html_encode(name)) self.file.write("<dd>\n") self.writetext(p, compact=1) self.file.write("</dd>\n") if return_elem is not None: self.file.write("<dt>Returns:</dt>\n") self.file.write("<dd>\n") self.writetext(return_elem, compact=1) self.file.write("</dd>\n") for e in exception: name = html_encode(e.get("name")) self.file.write("<dt>Raises <b>%s</b>:</dt>" % name) self.file.write("<dd>\n") self.writetext(e, compact=1) self.file.write("</dd>\n") self.file.write("</dl><br />\n") if object.tag == "class" and summary: self.file.write( "<p>For more information about this class, see " "<a href='#%s'><i>The %s Class</i></a>.</p>\n" % ( anchor, localname ) ) self.file.write("</dd>\n") ## # Writes a module description to file. # # @param module A module element tree, as returned by {@link # ModuleParser.parse}. # @param file Output file name (minus extension). # @return If successful, the output filename used to store the # module. # @defreturn String or None. def save(self, module, file): title = "The %s Module" % module.get("name") zone = self.options.has_key("zone") if zone: filename = joinext(file, ".txt") else: filename = joinext(file, ".html") self.file = open(filename, "w") if zone: # generate zone document self.file.write(title + "\n\n") else: self.file.write( "<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' " "'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>\n" ) self.file.write("<html>\n<head>\n") self.file.write( "<meta http-equiv='Content-Type' " "content='text/html; charset=us-ascii' />\n" ) self.file.write("<title>%s</title>\n" % html_encode(title)) try: style = self.options["style"] except KeyError: pass else: self.file.write( "<link rel='stylesheet' href='%s' type='text/css' />\n" % html_encode(style) ) self.file.write("</head>\n<body>\n") self.file.write("<h1>%s</h1>\n" % title) # 0) module comments info = module.find("info") if info is not None: self.writetext(info.find("description")) self.file.write("<h2>Module Contents</h2>\n") # 1) toplevel subjects (including class overviews) objects = [] for object in module: info = object.find("info") if info is None or info.find("description") is None: continue if object.tag in ("variable", "function", "class"): objects.append(object) objects.sort(lambda a, b: cmp( string.lower(string.split(a.get("name"), ".")[-1]), string.lower(string.split(b.get("name"), ".")[-1]) )) self.file.write("<dl>\n") for object in objects: self.writeobject(object, object.tag == "class") self.file.write("</dl>\n") # 2) class descriptions for object in objects: if object.tag != "class": continue name = object.get("name") localname = string.split(name, ".")[-1] anchor = name + "-class" self.file.write( "<h2><a id='%s' name='%s'>The %s Class</a></h2>\n" % ( anchor, anchor, localname ) ) self.file.write("<dl>\n") self.writeobject(object) objects = [] for object in object: info = object.find("info") if info is None or info.find("description") is None: continue if object.tag not in ("method", "variable"): continue objects.append(object) objects.sort(lambda a, b: cmp( string.lower(string.split(a.get("name"), ".")[-1]), string.lower(string.split(b.get("name"), ".")[-1]) )) for object in objects: if object.tag == "variable": object.tag = "attribute" self.writeobject(object) if object.tag == "attribute": object.tag = "variable" self.file.write("</dl>\n") if not zone: self.file.write("</body></html>\n") self.file.close() self.file = None return filename ## # Prints a usage message and exits. def usage(): print "PythonDoc", VERSION, COPYRIGHT print print "Usage:" print print " pythondoc [options] files..." print print "where the files can be either python modules or package" print "directories." print print "Options:" print print " -p prefix Prepend given prefix to symbol names." print " -f Generate output also for files without descriptions." print " -x Generate XML output (pythondoc infosets)." print print " -s Look for markup in docstrings (experimental)." print print "Output options:" print print " -O format Use given output format handler." print " -D name Define output variable." print " -D name=text Set output variable to given text." print print "For more information on PythonDoc and the PythonDoc comment syntax," print "see http://effbot.org/zone/pythondoc.htm" sys.exit(1) if __name__ == "__main__": import getopt try: opts, args = getopt.getopt(sys.argv[1:], "D:fO:p:Vsx") except getopt.error: usage() force = 0 prefix = None docstring = 0 output_xml = 0 output_handler = CompactHTML output_options = {} for k, v in opts: if k == "-f": force = 1 elif k == "-p": prefix = v elif k == "-s": docstring = 1 elif k == "-x": output_xml = 1 elif k == "-O": try: m = __import__(v) for k in string.split(v, ".")[1:]: m = getattr(m, k) output_handler = getattr(m, "PythonDocGenerator") except (ImportError, AttributeError): print "cannot find/load", repr(v), "generator" sys.exit(1) elif k == "-D": try: k, v = string.split(v, "=", 1) except ValueError: k = v; v = None output_options[k] = v elif k == "-V": DEBUG = DEBUG + 1 if not args: usage() # instantiate output handler output_handler = output_handler(output_options) # check if handler supports custom tags try: TAGS = TAGS + output_handler.tags except AttributeError: pass import time t0 = time.time() input = output = 0 for filename in args: this_prefix = prefix if os.path.isdir(filename): # FIXME: explicitly check if this is a package? files = glob.glob(os.path.join(filename, joinext("*", ".py"))) if not this_prefix: this_prefix = os.path.basename(filename) else: if sys.platform == "win32" and glob.has_magic(filename): files = glob.glob(filename) else: files = [filename] files.sort() for file in files: try: module = parse(file, this_prefix, docstring=docstring) except IOError, v: sys.stderr.write("%s error: %s\n" % (file, v[1])) continue input = input + 1 # check if any toplevel object has a description if not force: for n in module: i = n.find("info") if i and i.find("description") is not None: break else: continue # no documented subjects f = "pythondoc-" + string.replace(module.get("name"), ".", EXTSEP) if output_xml: # generate XML filename = joinext(f, ".xml") try: out = open(filename, "w") ET.ElementTree(module).write(out) out.close() except IOError, v: sys.stderr.write("%s error: %s\n" % (filename, v[1])) else: sys.stderr.write("%s ok\n" % filename) # generate output try: out = output_handler.save(module, f) except IOError, v: sys.stderr.write("%s error: %s\n" % (file, v[1])) else: if out: sys.stderr.write("%s ok\n" % out) output = output + 1 # flush output handler try: done = output_handler.done except AttributeError: pass else: out = output_handler.done() if out: sys.stderr.write("%s ok\n" % out) if DEBUG: sys.stderr.write( "%d files parsed, %d descriptions generated, in %.2f seconds\n" % ( input, output, time.time() - t0 ))