# -*- coding: utf-8 -*-
import nltk
from nltk.tree import *
from nltk.parse import stanford
import nltk.data
import nltk.draw
import os

os.environ['STANFORD_PARSER'] = ''
os.environ['STANFORD_MODELS'] = ''


class SVO(object):
    """
    Class Methods to Extract Subject Verb Object Tuples from a Sentence
    """

    def __init__(self):
        """
        Initialize the SVO Methods
        """
        self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
        self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
        self.adjective_types = ["JJ", "JJR", "JJS"]
        self.pred_verb_phrase_siblings = None
        self.parser = stanford.StanfordParser()
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')

    def get_attributes(self, node, parent_node, parent_node_siblings):
        """
        returns the Attributes for a Node
        """

    def get_subject(self, sub_tree):
        """
        Returns the Subject and all attributes for a subject, sub_tree is a Noun Phrase

        :param sub_tree:
        :return:
        """
        sub_nodes = sub_tree.subtrees()
        sub_nodes = [each for each in sub_nodes if each.pos()]
        subject = None

        for each in sub_nodes:

            if each.label() in self.noun_types:
                subject = each.leaves()
                break

        return {'subject': subject}

    def get_object(self, sub_tree):
        """
        Returns an Object with all attributes of an object
        """
        siblings = self.pred_verb_phrase_siblings
        Object = None
        for each_tree in sub_tree:
            if each_tree.label() in ["NP", "PP"]:
                sub_nodes = each_tree.subtrees()
                sub_nodes = [each for each in sub_nodes if each.pos()]

                for each in sub_nodes:
                    if each.label() in self.noun_types:
                        Object = each.leaves()
                        break
                break
            else:
                sub_nodes = each_tree.subtrees()
                sub_nodes = [each for each in sub_nodes if each.pos()]
                for each in sub_nodes:
                    if each.label() in self.adjective_types:
                        Object = each.leaves()
                        break
                # Get first noun in the tree
        self.pred_verb_phrase_siblings = None
        return {'object': Object}

    def get_predicate(self, sub_tree):
        """
        Returns the Verb along with its attributes, Also returns a Verb Phrase

        :param sub_tree:
        :return:
        """

        sub_nodes = sub_tree.subtrees()
        sub_nodes = [each for each in sub_nodes if each.pos()]
        predicate = None
        sub_tree = ParentedTree.convert(sub_tree)
        for each in sub_nodes:
            if each.label() in self.verb_types:
                sub_tree = each
                predicate = each.leaves()

        # get all predicate_verb_phrase_siblings to be able to get the object
        sub_tree = ParentedTree.convert(sub_tree)
        if predicate:
            pred_verb_phrase_siblings = self.tree_root.subtrees()
            pred_verb_phrase_siblings = [each for each in pred_verb_phrase_siblings if
                                         each.label() in ["NP", "PP", "ADJP", "ADVP"]]
            self.pred_verb_phrase_siblings = pred_verb_phrase_siblings

        return {'predicate': predicate}

    def process_parse_tree(self, parse_tree):
        """
        Returns the Subject-Verb-Object Representation of a Parse Tree.
        Can Vary depending on number of 'sub-sentences' in a Parse Tree
        """
        self.tree_root = parse_tree
        # Step 1 - Extract all the parse trees that start with 'S'
        output_list = []
        output_dict = {}

        for idx, subtree in enumerate(parse_tree[0].subtrees()):
            subject = None
            predicate = None
            Object = None
            if subtree.label() in ["S", "SQ", "SBAR", "SBARQ", "SINV", "FRAG"]:
                children_list = subtree
                children_values = [each_child.label() for each_child in children_list]
                children_dict = dict(zip(children_values, children_list))

                # Extract Subject, Verb-Phrase, Objects from Sentence sub-trees
                if children_dict.get("NP") is not None:
                    subject = self.get_subject(children_dict["NP"])

                if children_dict.get("VP") is not None:
                    # Extract Verb and Object
                    # i+=1
                    # """
                    # if i==1:
                    #    pdb.set_trace()
                    # """
                    predicate = self.get_predicate(children_dict["VP"])
                    Object = self.get_object(children_dict["VP"])

                try:
                    if subject['subject'] and predicate['predicate'] and Object['object']:
                        output_dict['subject'] = subject['subject']
                        output_dict['predicate'] = predicate['predicate']
                        output_dict['object'] = Object['object']
                        output_list.append(output_dict)
                except Exception as e:
                    print(e)
                    continue

        return output_list

    def traverse(self, t):
        try:
            t.label()
        except AttributeError:
            print(t)
        else:
            # Now we know that t.node is defined
            print('(', t.label())
            for child in t:
                self.traverse(child)

            print(')')

    def sentence_split(self, text):
        """
        returns the Parse Tree of a Sample
        """
        sentences = self.sent_detector.tokenize(text)
        return sentences

    def get_parse_tree(self, sentence):
        """
        returns the Parse Tree of a Sample
        """
        parse_tree = self.parser.raw_parse(sentence)

        return parse_tree

    def List_To_Tree(self, lst):
        if (not isinstance(lst, basestring)):
            if (len(lst) == 2 and isinstance(lst[0], str) and isinstance(lst[1], str)):
                lst = Tree(str(lst[0]).split('+')[0], [str(lst[1])])
            elif (isinstance(lst[0], str) and not isinstance(lst[1], str)):
                lst = Tree(str(lst[0]).split('+')[0], map(self.List_To_Tree, lst[1: len(lst)]))
        return lst


if __name__ == "__main__":
    svo = SVO()
    sentence = "Andreas loves soccer. He is also very good at it. Barack Obama likes the legislation"
    sentences = svo.sentence_split(sentence)
    val = []
    for sent in sentences:
        root_tree = svo.get_parse_tree(sent)
        val.append(svo.process_parse_tree(next(root_tree)))

    print(val)