python source code of grammar

Project: runa (GitHub Link)

runa-master
- Makefile
- .coveragerc
- tb.txt
- runac
  - liveness.py
  - typer.py
  - util.py
  - ast.py
  - blocks.py
  - specialize.py
  - codegen.py
  - pretty.py
  - destructor.py
  - __main__.py
  - escapes.py
  - __init__.py
  - parser.py
  - types.py
- LICENSE
- runa
- misc
  - runa.nanorc
- README.rst
- .travis.yml
- tests
  - main-type-arg-0.rns
  - ast-err.rns
  - for.out
  - oddeven.out
  - hello.out
  - opt-resolve.out
  - immutable-ref.err
  - method-arg-name.rns
  - self-type.err
  - immutable-ref.rns
  - file.out
  - type-diff.err
  - oddeven.rns
  - break.rns
  - named-args.rns
  - check-rtype.rns
  - bool-ops.rns
  - opt-resolve.rns
  - arith-int.rns
  - catch.out
  - cmp.rns
  - arith-int.out
  - multi-return.out
  - unhandled.rns
  - iter-obj.rns
  - float.out
  - unmatched.rns
  - retval.out
  - owner-reassign.out
  - method-select-fail.err
  - no-self.rns
  - mutable-owner.out
  - mutable-ref.rns
  - function.out
  - unmatched.err
  - err-escaping-owner.rns
  - no-self.err
  - opt-check.rns
  - while.rns
  - rtype.err
  - continue.rns
  - cmp.out
  - owner-after-pass.rns
  - cycle-typing.out
  - elem-proto.err
  - init-rtype.err
  - zero.rns
  - no-init.err
  - bool-ops.out
  - main-type-arg-1.rns
  - print-var.rns
  - retype.rns
  - pos-after-named.rns
  - opt-check.out
  - elem-proto.rns
  - no-init.rns
  - check-rtype.err
  - file.lng
  - main-type-arg-0.err
  - no-method.err
  - print-var.out
  - named-args.out
  - opt-use-attrib.rns
  - pretty.out
  - catch.rns
  - early-return-owner.out
  - init-rtype.rns
  - num-params.rns
  - half-defined.rns
  - yield-type.err
  - no-func.err
  - ternary.rns
  - inline-catch.out
  - zero.out
  - force-void.err
  - void-print.rns
  - inline-catch.rns
  - class.rns
  - if.out
  - non-type.rns
  - item-call.rns
  - opt-return.rns
  - opt-return.out
  - no-arg-type.rns
  - no-arg-call.rns
  - pass-ref-as-owner.err
  - for.rns
  - method-arg-name.err
  - cycle-typing.rns
  - no-compare.err
  - bitwise.out
  - owner-reassign.rns
  - pass-ref-as-owner.rns
  - no-arg-type.err
  - pos-after-named.err
  - break.out
  - multi-return.rns
  - yield-type.rns
  - item-call.err
  - owner-after-pass.err
  - no-method.rns
  - undefined.err
  - none.rns
  - force-void.rns
  - void-print.err
  - mutable-ref.out
  - retype.err
  - early-return-owner.rns
  - method-select-fail.rns
  - bitwise.rns
  - ast-err.err
  - opt-use-attrib.err
  - pretty.rns
  - main-type-r.err
  - continue.out
  - bool-ops-precedence.out
  - ternary.out
  - function.rns
  - bool-bool.rns
  - no-compare.rns
  - no-arg-call.out
  - class.out
  - str-ops.out
  - half-defined.err
  - no-func.rns
  - rtype.rns
  - num-params.err
  - const.rns
  - str-ops.rns
  - type-diff.rns
  - mutable-owner.rns
  - iter-obj.out
  - undefined.rns
  - hello.rns
  - unhandled.err
  - if.rns
  - while.out
  - none.out
  - bool-ops-precedence.rns
  - main-type-arg-1.err
  - const.out
  - non-type.err
  - main-type-r.rns
  - ternop-err.err
  - err-escaping-owner.err
  - ternop-err.rns
  - self-type.rns
  - float.rns
  - retval.rns
  - bool-bool.out
- core
  - personality.c
  - rt.ll
  - __builtins__.rns
  - unwind.h
- test.py
- llize
- .gitignore
- doc
  - hacking.rst
  - Makefile
  - notes.rst
  - refs.rst
  - overview.rst
  - _themes
    - runa
      - theme.conf
      - layout.html
      - searchbox.html
      - static
        runa.css_t
  - grammar.py
  - index.rst
  - conf.py

import ast, _ast, sys, collections
sys.path.append('..')
from runac import parser

TITLE = 'Language grammar'
PARSER_FILE = '../runac/parser.py'

INTRO = '''The table below (which is generated from the parser's source code)
can serve as a guide to Runa's grammar.
Code literals in rules represent regular expressions.
The special INDENT and DEDENT tokens are inserted by a secondary pass,
after the initial tokenization of source code;
they represent the increase and decrease of the indentation level.'''

def get_rules():
	
	with open(PARSER_FILE) as f:
		src = f.read()
	
	rules = collections.OrderedDict()
	for node in ast.parse(src).body:
		
		if not isinstance(node, _ast.FunctionDef):
			continue
		
		if not node.decorator_list:
			continue
		
		assert len(node.decorator_list) == 1
		decorator = node.decorator_list[0]
		if not isinstance(decorator, _ast.Call):
			continue
		
		func = decorator.func
		if not isinstance(func, _ast.Attribute):
			continue
		
		assert func.attr == 'production'
		ln = decorator.args[0].s
		name, match = ln.split(' : ', 1)
		rules.setdefault(name, []).append(tuple(match.split()))
	
	return rules

def get_tokens():
	
	tokens = {'INDENT': 'INDENT', 'DEDENT': 'DEDENT'}
	for rule in parser.LEXER.rules:
		name, pattern = rule.name, rule.re.pattern
		tokens[rule.name] = rule.re.pattern
	
	for word in parser.NAME_LIKE:
		tokens[word.upper()] = word
	
	return tokens

def main():
	
	rules, tokens = get_rules(), get_tokens()
	lines, columns = [], [0, 0]
	for name, expands in rules.iteritems():
		columns[0] = max(columns[0], len(name))
		for i, expand in enumerate(expands):
			
			bits = list(expand)
			for idx, s in enumerate(expand):
				if s.upper() == s:
					bits[idx] = '``' + tokens[s] + '``'
			
			defn = ' '.join(bits)
			columns[1] = max(columns[1], len(defn))
			lines.append((name if not i else '', defn))
	
	separator = ''.join((
		'+',
		'-' * (columns[0] + 2),
		'+',
		'-' * (columns[1] + 2),
		'+',
	))
	fmt = '| %%-%is | %%-%is |' % tuple(columns)
	
	print '*' * len(TITLE)
	print TITLE
	print '*' * len(TITLE)
	print
	print INTRO
	print
	
	for i, ln in enumerate(lines):
		print separator
		print fmt % ln
	print separator

if __name__ == '__main__':
	main()