Python nltk.parse.stanford.StanfordParser() Examples

The following are 8 code examples of nltk.parse.stanford.StanfordParser(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.parse.stanford , or try the search function .
Example #1
Source Project: acl2017-interactive_summarizer   Author: UKPLab   File: corpus_cleaner.py    License: Apache License 2.0 6 votes vote down vote up
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
        self.datasets_path = datasets_path
        self.corpus_name = corpus_name
        self.corpus_path = path.join(datasets_path, corpus_name)
        self.docs_path = path.join(self.corpus_path, "docs")
        self.topics_file = path.join(self.corpus_path, "topics.xml")
        self.models_path = path.join(self.corpus_path, "models")
        self.smodels_path = path.join(self.corpus_path, "smodels")
        self.jar_path = path.join(PROJECT_PATH, "summarizer", "jars")
        os.environ['CLASSPATH'] = self.jar_path
        self.cleaned_path = path.join(datasets_path, "processed")

        if parse_type == 'parse':
            if lang == 'english':
                self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
                # self.cleaned_path = path.join(datasets_path, "processed.parse")
        if parse_type == 'props':  # TODO
            if lang == 'english':
                self.props_parser = ClausIE.get_instance()
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) 
Example #2
Source Project: py-nltk-svo   Author: klintan   File: svo.py    License: MIT License 5 votes vote down vote up
def __init__(self):
        """
        Initialize the SVO Methods
        """
        self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
        self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
        self.adjective_types = ["JJ", "JJR", "JJS"]
        self.pred_verb_phrase_siblings = None
        self.parser = stanford.StanfordParser()
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') 
Example #3
Source Project: readAI   Author: ayoungprogrammer   File: readai.py    License: GNU General Public License v2.0 5 votes vote down vote up
def main(argv):

    debug = False

    try:
        opts, args = getopt.getopt(argv, "hd",["help","debug"])
    except getopt.GetoptError as e:
        usage()
        sys.exit(2)
    for opt, arg in opts:
        if opt in ["-h", "help"]:
            usage()
            sys.exit(2)
        if opt in ["-d", "debug"]:
            debug = True

    parser = stanford.StanfordParser()

    line = raw_input("Enter line: ")

    while line != 'stop':
        sent = list(parser.raw_parse(line))[0]
        if debug:
            print sent # print parse tree
        if sent[0].label() == "SBARQ":
            print answer(sent)
        else:
            try:
                describe(sent)
            except ValueError as e:
                print "Error describing sentence. " + e
            if debug:
                print smap # print semantic map
        line = raw_input("Enter line: ") 
Example #4
Source Project: RDF-Triple-API   Author: tdpetrou   File: rdf_triple.py    License: MIT License 5 votes vote down vote up
def clear_data(self):
        self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
        self.first_NP = ''
        self.first_VP = ''
        self.parse_tree = None
        self.subject = RDF_Triple.RDF_SOP('subject')
        self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
        self.Object = RDF_Triple.RDF_SOP('object') 
Example #5
Source Project: StrepHit   Author: Wikidata   File: extract_sentences.py    License: GNU General Public License v3.0 5 votes vote down vote up
def setup_extractor(self):
        self.splitter = PunktSentenceSplitter(self.language)
        self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                                     path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                                     java_options=' -mx2G -Djava.ext.dirs=dev/')

        self.token_to_lemma = {}
        for lemma, tokens in self.lemma_to_token.iteritems():
            for t in tokens:
                self.token_to_lemma[t] = lemma
        self.all_verbs = set(self.token_to_lemma.keys()) 
Example #6
Source Project: StrepHit   Author: Wikidata   File: compute_lu_distribution.py    License: GNU General Public License v3.0 5 votes vote down vote up
def main(corpus, verbs, processes, outfile, sub_sentences):
    """ Compute the LU distribution in the corpus, i.e. how many LUs per sentence
    """
    global splitter, tagger, parser, all_verbs
    splitter = PunktSentenceSplitter('en')
    tagger = TTPosTagger('en')
    parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                            path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                            java_options=' -mx1G -Djava.ext.dirs=dev/')  # no way to make classpath work
    all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set())
    all_verbs.discard('be')
    all_verbs.discard('have')

    args = load_corpus(corpus, 'bio', text_only=True)
    worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences
    counter = defaultdict(int)

    for i, counts in enumerate(parallel.map(worker, args, processes)):
        for k, v in counts.iteritems():
            counter[k] += v

        if (i + 1) % 10000 == 0:
            logger.info('Processed %d documents', i + 1)

    counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k))
    for k, v in counter.iteritems():
        print k, v

    json.dump(counter, outfile, indent=2) 
Example #7
Source Project: Lango   Author: ayoungprogrammer   File: parser.py    License: GNU General Public License v2.0 5 votes vote down vote up
def __init__(self):
        self.parser = StanfordParser() 
Example #8
Source Project: Lango   Author: ayoungprogrammer   File: parser.py    License: GNU General Public License v2.0 5 votes vote down vote up
def __init__(self):
        self.parser = StanfordParser(
            model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
        stanford_dir = self.parser._classpath[0].rpartition('/')[0]
        self.parser._classpath = tuple(find_jars_within_path(stanford_dir))