python source code of readai

from nltk.parse import stanford
from nltk import Tree
import os
import sys
import getopt

# sys.path.append('~/Downloads/en') # put where you downloaded the nodebox/linguistics

import en

from nltk.parse import stanford

# Put where you downloaded the stanford-parser-full...
os.environ['STANFORD_PARSER'] = '.' #'~/Downloads/stanford-parser-full-2015-04-20/'
os.environ['STANFORD_MODELS'] = '.' #'~/Downloads/stanford-parser-full-2015-04-20/'

smap = {}


class Node(list):
    def __init__(self, label):
        self.label = label
        self.prev = DummyNode()

    def set(self, key, value):
        self.append((key, value))
        if isinstance(value.prev, DummyNode):
            value.prev = self

    def get(self, key):
        for k, v in self:
            if key == k:
                return v
        return DummyNode()

    def complete(self, tokens, qtype):
        # print tokens
        if len(tokens) == 0:
            if qtype.lower() == "why":
                cur_node = self.get('because') or self.get('since')
                ret = [cur_node.label]

                cur_node = cur_node.get('.')
                prev_node = cur_node.prev
                while prev_node.label not in smap:
                    ret.append(prev_node.label)
                    prev_node = prev_node.prev
                ret.append(prev_node.label)

                while cur_node.label not in smap and len(cur_node) > 0:
                    ret.append(cur_node.label)
                    cur_node = cur_node[0][1]
                ret.append(cur_node.label)
                return ' '.join(ret)

            else:
                if self.label in smap:
                    return self.label
                if not isinstance(self.get('.'), DummyNode):
                    return self.get('.').label
                elif len(self) > 0:
                    return self[0][0] + " " + self[0][1].complete(tokens, qtype)
                else:
                    return "Unsure"
        else:
            token = get_word(tokens[0])
            if tokens[0].label() in ["VB", "VBD", "VBZ"]:
                token = get_root_word(token)
            if tokens[0].label() == "NP":
                return self.complete(tokens[1:], qtype)

            for k, v in self:
                if k == token:
                    return v.complete(tokens[1:], qtype)
            return "Answer unclear"

    def matches(self, tokens):
        # print tokens
        if len(tokens) == 0:
            return True

        if tokens[0].label() == "NP":
            if not isinstance(self.get('.'), DummyNode):
                return self.get('.').matches(tokens)
            if self.label != get_word(tokens[0]).upper():
                return False
            else:
                return self.matches(tokens[1:])

        token = get_word(tokens[0])
        if tokens[0].label() in ["VB", "VBD", "VBZ"]:
            token = get_root_word(token)

        for k, v in self:
            if k == token:
                return v.matches(tokens[1:])
        return False


class DummyNode(Node):

    def __init__(self):
        self.label = "Answer unclear"

    def get(self, key):
        return self

    def __nonzero__(self):
        return False


def get_word(tree):
    if isinstance(tree, Tree):
        words = []
        for child in tree:
            words.append(get_word(child))
        return ' '.join(words)
    else:
        return tree


def get_root_word(word):
    if word in ['is', 'was']:
        return 'is'
    return en.verb.present(word)


def get_node(label):
    if label not in smap:
        smap[label] = Node(label)
    return smap[label]


def flatten_tree(tree):
    # print tree
    if len(tree) > 0:
        if isinstance(tree[0], Tree):
            if isinstance(tree, Tree) and tree.label() == "NP":
                return [tree]
            tokens = []
            for child in tree:
                tokens += flatten_tree(child)
            return tokens
        else:
            return [tree]
    else:
        return []


def get_tokens(tokens):
    tokens = tokens[1:-1]
    ret = []
    start = 0
    stack = 0
    for i in xrange(len(tokens)):
        if tokens[i] == "(":
            if stack == 0:
                start = i
            stack += 1
        elif tokens[i] == ")":
            stack -= 1
            if stack < 0:
                print "Brack mismatch: " + str(tokens)
            if stack == 0:
                ret.append(get_tokens(tokens[start:i + 1]))
        else:
            if stack == 0:
                ret.append(tokens[i])
    if stack != 0:
        print "Bracket mismatch: " + str(tokens)
    return ret


def matches(match_str, tree):
    tokens = get_tokens(match_str.split())
    return match_tokens(tokens, tree)


def match_tokens(tokens, tree):

    if len(tokens) == 0:
        return True

    if tokens[0] is not '.' and tree.label() not in tokens[0].split('/'):
        return False

    if tokens[-1] == '$':
        if len(tree) != len(tokens[:-1]) - 1:
            return False
        else:
            tokens = tokens[:-1]

    if len(tree) < len(tokens) - 1:
        return False

    for i in xrange(len(tokens) - 1):
        if not match_tokens(tokens[i + 1], tree[i]):
            return False
    return True

# Returns subject


def describe(tree):

    if not isinstance(tree, Tree):
        print "ERROR"
    if tree.label() == "ROOT":
        describe(tree[0])
        return

    # Augment data
    if matches('( S ( NP ) ( VP ( VBP ) ( ADJP ) ) )', tree):
        _, subject = describe(tree[0])
        action = get_root_word(get_word(tree[1][0]))
        action_node = Node(action)
        adj = get_word(tree[1][1])
        adj_node = Node(adj)

    # Sentences
    if matches('( S ( NP ) ( VP ) )', tree):
        _, subject = describe(tree[0])
        action, action_node = describe(tree[1])

        subject.set(action, action_node)
        return action, action_node
    if matches('( S ( VP ) )', tree):
        return describe(tree[0])

    # NOUNS
    if matches('( NP )', tree):
        # Ex: The dog
        word = get_word(tree).upper()
        return word, get_node(word)

    # PROPOSITIONS
    if matches('( PP ( . ) ( NP ) )', tree):
        # to the mall
        # with her parents
        _, obj = describe(tree[1])
        prop = get_word(tree[0])

        return prop, obj
    if matches('( PRT )', tree):
        prt = get_word(tree)
        return prt, Node(prt)

    # VERBS
    if matches('( VP ( VBD ) ( VP ) $ )', tree):
        action = get_root_word(get_word(tree[0]))

        return action, Node(action)

    if matches('( VP ( VB/VBD ) $ )', tree):
        action = get_root_word(get_word(tree))
        return action, Node(action)

    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( PP ) )', tree):
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)
        prop, prop_node = describe(tree[1])
        action_node.set(prop, prop_node)
        return action, action_node

    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( PRT ) ( NP ) )', tree):
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)
        prt, prt_node = describe(tree[1])
        action_node.set(prt, prt_node)
        _, obj = describe(tree[2])
        prt_node.set('.', obj)
        return action, action_node

    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( NP ) )', tree):
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)

        _, obj = describe(tree[1])
        action_node.set('.', obj)

        if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( NP ) ( PP ) )', tree):
            # Assume rest is PP
            for pp_node in tree[2:]:
                prop, prop_node = describe(pp_node)
                action_node.set(prop, prop_node)

        if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( NP ) ( SBAR ) )', tree):
            # SBAR at end
            sbar, sbar_node = describe(tree[2])
            action_node.set(sbar, sbar_node)


        return action, action_node

    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG ) ( S ) )', tree):
        s, s_node = describe(tree[1])
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)

        action_node.set(s, s_node)
        return action, action_node

    if matches('( VP ( TO ) ( VP ) )', tree):
        to_node = Node('to')
        action, action_node = describe(tree[1])

        to_node.set(action, action_node)

        return 'to', to_node

    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( ADJP ) )', tree):
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)

        adj = get_node(get_word(tree[1]))

        action_node.set('.', adj)
        return action, action_node
    if matches('( VP ( VB/VBZ/VBP/VPZ/VBD/VBG/VBN ) ( SBAR ) )', tree):
        action = get_root_word(get_word(tree[0]))
        action_node = Node(action)

        sbar, sbar_node = describe(tree[1])
        action_node.set(sbar, sbar_node)
        return action, action_node

    # SBAR
    if matches('( SBAR ( IN ) ( S ) )', tree):
        prop = get_word(tree[0])
        prop_node = Node(prop)
        s, s_node = describe(tree[1])

        prop_node.set('.', s_node)

        return prop, prop_node

    raise ValueError("ERROR reading " + str(tree))


def answer(tree):
    tree = tree[0]
    if tree.label() != "SBARQ":
        print "ERROR not a question: " + str(tree)
        return None

    # What did Mary / Where did Mary ( ... )
    if matches('( SBARQ ( WHNP/WHADVP ) ( SQ ( VBZ/VBD/VBP ) ( NP ) ) )', tree):

        qtype = get_word(tree[0])
        subject = get_word(tree[1][1]).upper()
        verb = get_root_word(get_word(tree[1][0]))

        if verb is 'is':
            return get_node(subject).get('is').complete([], qtype)
        else:
            tokens = flatten_tree(tree[1][2:])
            return get_node(subject).complete(tokens, qtype)

    # What has blue eyes
    if matches('( SBARQ ( WHNP ) ( SQ ( VP/VBZ ) ) )', tree):
        tokens = flatten_tree(tree[1])
        objs = []
        for obj in smap:
            if smap[obj].matches(tokens):
                objs.append(obj)

        if len(objs) == 0:
            return "Nothing"
        return ','.join(objs)

    print "ERROR answering"

def usage():
    print "Usage: " + sys.argv[0] + " [-d]"

def main(argv):

    debug = False

    try:
        opts, args = getopt.getopt(argv, "hd",["help","debug"])
    except getopt.GetoptError as e:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ["-h", "help"]:
            usage()
            sys.exit(2)
        if opt in ["-d", "debug"]:
            debug = True

    parser = stanford.StanfordParser()

    line = raw_input("Enter line: ")

    while line != 'stop':
        sent = list(parser.raw_parse(line))[0]
        if debug:
            print sent # print parse tree
        if sent[0].label() == "SBARQ":
            print answer(sent)
        else:
            try:
                describe(sent)
            except ValueError as e:
                print "Error describing sentence. " + e
            if debug:
                print smap # print semantic map
        line = raw_input("Enter line: ")


if __name__ == "__main__":
    main(sys.argv[1:])

# Example:
"""
Mary went sledding
Where did Mary go? sledding

The boy played soccer with a ball
What did the boy play? soccer
What did the boy play soccer with? a ball

Mary went to the mall
Where did Mary go? to the mall
Where did Mary go to? the mall

Mary likes eating peanuts
What does Mary like eating? peanuts
What does Mary like? eating peanuts

Mary likes to eat peanuts
What does Mary like? To eat peanuts
What does Mary like to eat? peanuts

Mark likes to smoke
What does Mary like? to smoke

Blueberries are blue
What color are blueberries? blue

James ran because James was scared
Why did James run? because James was scared
"""