Python nltk.tag.StanfordPOSTagger() Examples

The following are 7 code examples of nltk.tag.StanfordPOSTagger(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module nltk.tag , or try the search function .
Example #1
Source File: keyphrase_test_dataset.py    From seq2seq-keyphrase with MIT License 5 votes vote down vote up
def get_postag_with_record(records, pairs):
    path = os.path.dirname(__file__)
    path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
    print(path)
    # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)
    # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
    # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'

    stanford_dir = jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)
    pos_tagger._stanford_jar = ':'.join(stanford_jars)

    tagged_source = []
    # Predict on testing data
    for idx, (record, pair) in enumerate(zip(records, pairs)):  # len(test_data_plain)
        print('*' * 100)
        print('File: '  + record['name'])
        print('Input: ' + str(pair[0]))
        text = pos_tagger.tag(pair[0])
        print('[%d/%d][%d] : %s' % (idx, len(records) , len(pair[0]), str(text)))
        tagged_source.append(text)

    return tagged_source 
Example #2
Source File: keyphrase_test_dataset.py    From seq2seq-keyphrase with MIT License 5 votes vote down vote up
def get_postag_with_index(sources, idx2word, word2idx):
    path = os.path.dirname(__file__)
    path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
    print(path)
    # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)
    # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
    # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'

    stanford_dir = jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)
    pos_tagger._stanford_jar = ':'.join(stanford_jars)

    tagged_source = []
    # Predict on testing data
    for idx in xrange(len(sources)):  # len(test_data_plain)
        test_s_o = sources[idx]
        source_text = keyphrase_utils.cut_zero(test_s_o, idx2word)
        text = pos_tagger.tag(source_text)
        print('[%d/%d] : %s' % (idx, len(sources), str(text)))

        tagged_source.append(text)

    return tagged_source 
Example #3
Source File: test_dataset_producer.py    From seq2seq-keyphrase-pytorch with Apache License 2.0 5 votes vote down vote up
def load_pos_tagger():
    path = os.path.dirname(__file__)
    path =  os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger')
    print(path)
    # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)

    stanford_dir = jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)
    pos_tagger._stanford_jar = ':'.join(stanford_jars)

    return pos_tagger 
Example #4
Source File: EnglishPOSTagger.py    From NeuronBlocks with MIT License 5 votes vote down vote up
def __init__(self, model_type='english-bidirectional-distsim.tagger'):
        """
        Args:
            model:  model available in $STANFORD_MODELS:
                english-bidirectional-distsim.tagger
                english-caseless-left3words-distsim.tagger
                english-left3words-distsim.tagger
        """
        #self.eng_tagger = StanfordPOSTagger(model_type, java_options='-mx16000m')
        self.eng_tagger = PerceptronTagger() 
Example #5
Source File: test_dataset_producer.py    From OpenNMT-kpg-release with MIT License 5 votes vote down vote up
def load_pos_tagger():
    path = os.path.dirname(__file__)
    path =  os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger')
    print(path)
    # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)

    stanford_dir = jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_dir)
    pos_tagger._stanford_jar = ':'.join(stanford_jars)

    return pos_tagger 
Example #6
Source File: export_dataset.py    From OpenNMT-kpg-release with MIT License 5 votes vote down vote up
def load_pos_tagger(stanford_base_dir):
    # path = os.path.dirname(__file__)
    # path = os.path.join(file_dir[: file_dir.rfind('pykp') + 4], 'stanford-postagger')
    # print(path)
    # jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
    jar = stanford_base_dir + '/stanford-postagger.jar'
    model = stanford_base_dir + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model_filename=model, path_to_jar=jar)

    stanford_base_dir = jar.rpartition('/')[0]
    stanford_jars = find_jars_within_path(stanford_base_dir)
    pos_tagger._stanford_jar = ':'.join(stanford_jars)

    return pos_tagger 
Example #7
Source File: keyphrase_test_dataset.py    From seq2seq-keyphrase with MIT License 4 votes vote down vote up
def check_postag(config):
    train_set, validation_set, test_set, idx2word, word2idx = deserialize_from_file(config['dataset'])

    path = os.path.dirname(__file__)
    path = path[:path.rfind(os.sep, 0, len(path)-10)+1] + 'stanford-postagger/'
    jar = path + '/stanford-postagger.jar'
    model = path + '/models/english-bidirectional-distsim.tagger'
    pos_tagger = StanfordPOSTagger(model, jar)

    for dataset_name in config['testing_datasets']:
        # override the original test_set
        # test_set = load_testing_data(dataset_name, kwargs=dict(basedir=config['path']))(idx2word, word2idx, config['preprocess_type'])

        test_sets = load_additional_testing_data(config['testing_datasets'], idx2word, word2idx, config)
        test_set = test_sets[dataset_name]

        # print(dataset_name)
        # print('Avg length=%d, Max length=%d' % (np.average([len(s) for s in test_set['source']]), np.max([len(s) for s in test_set['source']])))
        test_data_plain = zip(*(test_set['source'], test_set['target']))

        test_size = len(test_data_plain)

        # Alternatively to setting the CLASSPATH add the jar and model via their path:
        jar = '/Users/memray/Project/stanford/stanford-postagger/stanford-postagger.jar'
        # model = '/Users/memray/Project/stanford/stanford-postagger/models/english-left3words-distsim.tagger'
        model = '/Users/memray/Project/stanford/stanford-postagger/models/english-bidirectional-distsim.tagger'
        pos_tagger = StanfordPOSTagger(model, jar)

        for idx in xrange(len(test_data_plain)):  # len(test_data_plain)
            test_s_o, test_t_o = test_data_plain[idx]

            source = keyphrase_utils.cut_zero(test_s_o, idx2word)

            print(source)

            # Add other jars from Stanford directory
            stanford_dir = jar.rpartition('/')[0]
            stanford_jars = find_jars_within_path(stanford_dir)
            pos_tagger._stanford_jar = ':'.join(stanford_jars)

            text = pos_tagger.tag(source)
            print(text)