Python nltk.tag.StanfordPOSTagger() Examples
The following are 7
code examples of nltk.tag.StanfordPOSTagger().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.tag
, or try the search function
.
Example #1
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 5 votes |
def get_postag_with_record(records, pairs): path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) tagged_source = [] # Predict on testing data for idx, (record, pair) in enumerate(zip(records, pairs)): # len(test_data_plain) print('*' * 100) print('File: ' + record['name']) print('Input: ' + str(pair[0])) text = pos_tagger.tag(pair[0]) print('[%d/%d][%d] : %s' % (idx, len(records) , len(pair[0]), str(text))) tagged_source.append(text) return tagged_source
Example #2
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 5 votes |
def get_postag_with_index(sources, idx2word, word2idx): path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) tagged_source = [] # Predict on testing data for idx in xrange(len(sources)): # len(test_data_plain) test_s_o = sources[idx] source_text = keyphrase_utils.cut_zero(test_s_o, idx2word) text = pos_tagger.tag(source_text) print('[%d/%d] : %s' % (idx, len(sources), str(text))) tagged_source.append(text) return tagged_source
Example #3
Source File: test_dataset_producer.py From seq2seq-keyphrase-pytorch with Apache License 2.0 | 5 votes |
def load_pos_tagger(): path = os.path.dirname(__file__) path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #4
Source File: EnglishPOSTagger.py From NeuronBlocks with MIT License | 5 votes |
def __init__(self, model_type='english-bidirectional-distsim.tagger'): """ Args: model: model available in $STANFORD_MODELS: english-bidirectional-distsim.tagger english-caseless-left3words-distsim.tagger english-left3words-distsim.tagger """ #self.eng_tagger = StanfordPOSTagger(model_type, java_options='-mx16000m') self.eng_tagger = PerceptronTagger()
Example #5
Source File: test_dataset_producer.py From OpenNMT-kpg-release with MIT License | 5 votes |
def load_pos_tagger(): path = os.path.dirname(__file__) path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #6
Source File: export_dataset.py From OpenNMT-kpg-release with MIT License | 5 votes |
def load_pos_tagger(stanford_base_dir): # path = os.path.dirname(__file__) # path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger') # print(path) # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' jar = stanford_base_dir + '/stanford-postagger.jar' model = stanford_base_dir + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model_filename=model, path_to_jar=jar) stanford_base_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_base_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) return pos_tagger
Example #7
Source File: keyphrase_test_dataset.py From seq2seq-keyphrase with MIT License | 4 votes |
def check_postag(config): train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset']) path = os.path.dirname(__file__) path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/' jar = path + '/stanford-postagger.jar' model = path + '/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) for dataset_name in config['testing_datasets']: # override the original test_set # test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type']) test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config) test_set = test_sets[dataset_name] # print(dataset_name) # print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']]))) test_data_plain = zip(*(test_set['source'], test_set['target'])) test_size = len(test_data_plain) # Alternatively to setting the CLASSPATH add the jar and model via their path: jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar' # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger' model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger' pos_tagger = StanfordPOSTagger(model, jar) for idx in xrange(len(test_data_plain)): # len(test_data_plain) test_s_o, test_t_o = test_data_plain[idx] source = keyphrase_utils.cut_zero(test_s_o, idx2word) print(source) # Add other jars from Stanford directory stanford_dir = jar.rpartition('/')[0] stanford_jars = find_jars_within_path(stanford_dir) pos_tagger._stanford_jar = ':'.join(stanford_jars) text = pos_tagger.tag(source) print(text)