Python nltk.parse.stanford.StanfordParser() Examples

The following are code examples for showing how to use nltk.parse.stanford.StanfordParser(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: ijcai2019-relis   Author: UKPLab   File: corpus_cleaner.py    MIT License 6 votes vote down vote up
def __init__(self, datasets_path, corpus_name, parse_type, lang='english'):
        self.datasets_path = datasets_path
        self.corpus_name = corpus_name
        self.corpus_path = path.join(datasets_path, corpus_name)
        self.docs_path = path.join(self.corpus_path, "docs")
        self.topics_file = path.join(self.corpus_path, "topics.xml")
        self.models_path = path.join(self.corpus_path, "models")
        self.smodels_path = path.join(self.corpus_path, "smodels")
        self.jar_path = JAR_PATH
        os.environ['CLASSPATH'] = self.jar_path
        self.cleaned_path = path.join(datasets_path, "processed_data")

        if parse_type == 'parse':
            if lang == 'english':
                self.parser = stanford.StanfordParser(model_path="%s/englishPCFG.ser.gz" % (self.jar_path))
            if lang == 'german':
                self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path))
                # self.cleaned_path = path.join(datasets_path, "processed.parse")
        #if parse_type == 'props':  # TODO
        #    if lang == 'english':
        #        self.props_parser = ClausIE.get_instance()
        #    if lang == 'german':
        #        self.parser = stanford.StanfordParser(model_path="%s/germanPCFG.ser.gz" % (self.jar_path)) 
Example 2
Project: py-nltk-svo   Author: klintan   File: svo.py    MIT License 5 votes vote down vote up
def __init__(self):
        """
        Initialize the SVO Methods
        """
        self.noun_types = ["NN", "NNP", "NNPS", "NNS", "PRP"]
        self.verb_types = ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]
        self.adjective_types = ["JJ", "JJR", "JJS"]
        self.pred_verb_phrase_siblings = None
        self.parser = stanford.StanfordParser()
        self.sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') 
Example 3
Project: NLIDB   Author: VV123   File: dataset_utils.py    MIT License 5 votes vote down vote up
def TD_Augmenting(TD, configdict):
    '''Provided certain information of TD, we can generate a test dataset
    '''
    parser = stanford.StanfordParser(model_path='/Users/richard_xiong/Documents/DeepLearningMaster/deep_parser/englishPCFG.ser.gz')
    
    queryCollect, logicCollect, schemaCollect = [], [], []
    for lo in TD['examples']:
        collect = TD['examples'][lo]
        queryTiny, logicTiny, schemaTiny = main(parser, TD['schema'], configdict, \
                                                 collect, lo, TD['schema'].keys())
        queryExpa, logicExpa, schemaExpa = expandDatasets(queryTiny, logicTiny, schemaTiny)
        queryCollect.extend(queryExpa)
        logicCollect.extend(logicExpa)
        schemaCollect.extend(schemaExpa)
    return queryCollect, logicCollect, schemaCollect 
Example 4
Project: RDF-Triple-API   Author: tdpetrou   File: rdf_triple.py    MIT License 5 votes vote down vote up
def clear_data(self):
        self.parser = stanford.StanfordParser(model_path=r"/users/ted/stanford nlp/stanford-parser-full-2015-01-30/stanford-parser-3.5.1-models/edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz")
        self.first_NP = ''
        self.first_VP = ''
        self.parse_tree = None
        self.subject = RDF_Triple.RDF_SOP('subject')
        self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
        self.Object = RDF_Triple.RDF_SOP('object') 
Example 5
Project: WebCredibility   Author: DeFacto   File: rdf_triple.py    Apache License 2.0 5 votes vote down vote up
def clear_data(self):
        self.parser = stanford.StanfordParser(model_path=STANFORD_MODEL_PATH)
        self.first_NP = ''
        self.first_VP = ''
        self.parse_tree = None
        self.subject = RDF_Triple.RDF_SOP('subject')
        self.predicate = RDF_Triple.RDF_SOP('predicate', 'VB')
        self.Object = RDF_Triple.RDF_SOP('object') 
Example 6
Project: StrepHit   Author: Wikidata   File: extract_sentences.py    GNU General Public License v3.0 5 votes vote down vote up
def setup_extractor(self):
        self.splitter = PunktSentenceSplitter(self.language)
        self.parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                                     path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                                     java_options=' -mx2G -Djava.ext.dirs=dev/')

        self.token_to_lemma = {}
        for lemma, tokens in self.lemma_to_token.iteritems():
            for t in tokens:
                self.token_to_lemma[t] = lemma
        self.all_verbs = set(self.token_to_lemma.keys()) 
Example 7
Project: StrepHit   Author: Wikidata   File: compute_lu_distribution.py    GNU General Public License v3.0 5 votes vote down vote up
def main(corpus, verbs, processes, outfile, sub_sentences):
    """ Compute the LU distribution in the corpus, i.e. how many LUs per sentence
    """
    global splitter, tagger, parser, all_verbs
    splitter = PunktSentenceSplitter('en')
    tagger = TTPosTagger('en')
    parser = StanfordParser(path_to_jar='dev/stanford-corenlp-3.6.0.jar',
                            path_to_models_jar='dev/stanford-corenlp-3.6.0-models.jar',
                            java_options=' -mx1G -Djava.ext.dirs=dev/')  # no way to make classpath work
    all_verbs = reduce(lambda x, y: x.union(y), imap(set, json.load(verbs).values()), set())
    all_verbs.discard('be')
    all_verbs.discard('have')

    args = load_corpus(corpus, 'bio', text_only=True)
    worker = worker_with_sub_sentences if sub_sentences else worker_with_sentences
    counter = defaultdict(int)

    for i, counts in enumerate(parallel.map(worker, args, processes)):
        for k, v in counts.iteritems():
            counter[k] += v

        if (i + 1) % 10000 == 0:
            logger.info('Processed %d documents', i + 1)

    counter = OrderedDict(sorted(counter.items(), key=lambda (k, v): k))
    for k, v in counter.iteritems():
        print k, v

    json.dump(counter, outfile, indent=2) 
Example 8
Project: NLIDB   Author: VV123   File: tagger.py    MIT License 4 votes vote down vote up
def mainON(field2word, subset):
    ''' process data, from .qu, .lo, and .fi
        to .ta, .lox, .qux
        and .ficorr, .vacorr
    '''

    sub_folder = subset.split('_')[0]
    data_root = FLAGS.data_root
    os.environ['STANFORD_PARSER'] = FLAGS.stanford_parser
    os.environ['STANFORD_MODELS'] = FLAGS.stanford_models
    parser = stanford.StanfordParser(model_path=FLAGS.stanford_model_path)
    schema = ' '.join(field2word.keys())

    if not path.isdir(join(data_root, 'overnight_generated')):
        os.makedirs(join(data_root, 'overnight_generated'))

    (f_ta, f_lox, f_qux, f_ficorr, f_vacorr) = [
        open(
            join(data_root, 'overnight_generated', '%s.%s' % (subset, suffix)),
            'w') for suffix in ['ta', 'lox', 'qux', 'ficorr', 'vacorr']
    ]

    with open(data_root + 'overnight_source/%s/%s.qu' % (sub_folder,subset)) as f_qu, open(
            data_root + 'overnight_source/%s/%s.lon' % (sub_folder,subset)) as f_lo:
        query, logic = f_qu.readline(), f_lo.readline()
        idx = 0
        while query and logic:
            idx += 1
            print '### example: %d ###' % idx
            print query
            print logic
            tagged2, field_corr, value_corr, newquery, newlogical = codebase.tagger.sentTagging_treeON3(
                parser, field2word, query, schema, logic)
            print field_corr
            print value_corr
            print tagged2
            print newquery
            print newlogical
            print '\n'
            f_qux.write(newquery + '\n')
            f_lox.write(newlogical + '\n')
            f_ficorr.write(field_corr + '\n')
            f_vacorr.write(value_corr + '\n')
            f_ta.write(tagged2 + '\n')
            query, logic = f_qu.readline(), f_lo.readline()

    f_ta.close()
    f_lox.close()
    f_qux.close()
    f_vacorr.close()
    f_ficorr.close()
    return