Python nltk.tokenize.moses.MosesTokenizer() Examples
The following are 5
code examples of nltk.tokenize.moses.MosesTokenizer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
nltk.tokenize.moses
, or try the search function
.
Example #1
Source File: tokenizer.py From prenlp with Apache License 2.0 | 6 votes |
def __init__(self): try: from nltk.tokenize.moses import MosesTokenizer except Exception as ex: import nltk nltk.download('perluniprops') nltk.download('nonbreaking_prefixes') self.tokenizer = MosesTokenizer()
Example #2
Source File: utils.py From decaNLP with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_tokenizer(tokenizer, decap=False): if callable(tokenizer): return tokenizer if tokenizer == "spacy": try: import spacy spacy_en = spacy.load('en') return lambda s: [tok.text for tok in spacy_en.tokenizer(s)] except ImportError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise except AttributeError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise elif tokenizer == "moses": try: from nltk.tokenize.moses import MosesTokenizer moses_tokenizer = MosesTokenizer() return moses_tokenizer.tokenize except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise except LookupError: print("Please install the necessary NLTK corpora. " "See the docs at http://nltk.org for more information.") raise elif tokenizer == 'revtok': try: import revtok return revtok.tokenize except ImportError: print("Please install revtok.") raise elif tokenizer == 'subword': try: import revtok return revtok.tokenize except ImportError: print("Please install revtok.") raise raise ValueError("Requested tokenizer {}, valid choices are a " "callable that takes a single string as input, " "\"revtok\" for the revtok reversible tokenizer, " "\"subword\" for the revtok caps-aware tokenizer, " "\"spacy\" for the SpaCy English tokenizer, or " "\"moses\" for the NLTK port of the Moses tokenization " "script.".format(tokenizer))
Example #3
Source File: utils.py From speaksee with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_tokenizer(tokenizer): if callable(tokenizer): return tokenizer if tokenizer == "spacy": try: import spacy spacy_en = spacy.load('en') return lambda s: [tok.text for tok in spacy_en.tokenizer(s)] except ImportError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise except AttributeError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise elif tokenizer == "moses": try: from nltk.tokenize.moses import MosesTokenizer moses_tokenizer = MosesTokenizer() return moses_tokenizer.tokenize except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise except LookupError: print("Please install the necessary NLTK corpora. " "See the docs at http://nltk.org for more information.") raise elif tokenizer == 'revtok': try: import revtok return revtok.tokenize except ImportError: print("Please install revtok.") raise elif tokenizer == 'subword': try: import revtok return lambda x: revtok.tokenize(x, decap=True) except ImportError: print("Please install revtok.") raise raise ValueError("Requested tokenizer {}, valid choices are a " "callable that takes a single string as input, " "\"revtok\" for the revtok reversible tokenizer, " "\"subword\" for the revtok caps-aware tokenizer, " "\"spacy\" for the SpaCy English tokenizer, or " "\"moses\" for the NLTK port of the Moses tokenization " "script.".format(tokenizer))
Example #4
Source File: utils.py From meshed-memory-transformer with BSD 3-Clause "New" or "Revised" License | 4 votes |
def get_tokenizer(tokenizer): if callable(tokenizer): return tokenizer if tokenizer == "spacy": try: import spacy spacy_en = spacy.load('en') return lambda s: [tok.text for tok in spacy_en.tokenizer(s)] except ImportError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise except AttributeError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise elif tokenizer == "moses": try: from nltk.tokenize.moses import MosesTokenizer moses_tokenizer = MosesTokenizer() return moses_tokenizer.tokenize except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise except LookupError: print("Please install the necessary NLTK corpora. " "See the docs at http://nltk.org for more information.") raise elif tokenizer == 'revtok': try: import revtok return revtok.tokenize except ImportError: print("Please install revtok.") raise elif tokenizer == 'subword': try: import revtok return lambda x: revtok.tokenize(x, decap=True) except ImportError: print("Please install revtok.") raise raise ValueError("Requested tokenizer {}, valid choices are a " "callable that takes a single string as input, " "\"revtok\" for the revtok reversible tokenizer, " "\"subword\" for the revtok caps-aware tokenizer, " "\"spacy\" for the SpaCy English tokenizer, or " "\"moses\" for the NLTK port of the Moses tokenization " "script.".format(tokenizer))
Example #5
Source File: utils.py From pytorch-nlp with MIT License | 4 votes |
def get_tokenizer(tokenizer): if callable(tokenizer): return tokenizer if tokenizer == "spacy": try: import spacy spacy_en = spacy.load('en') return lambda s: [tok.text for tok in spacy_en.tokenizer(s)] except ImportError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise except AttributeError: print("Please install SpaCy and the SpaCy English tokenizer. " "See the docs at https://spacy.io for more information.") raise elif tokenizer == "moses": try: from nltk.tokenize.moses import MosesTokenizer moses_tokenizer = MosesTokenizer() return moses_tokenizer.tokenize except ImportError: print("Please install NLTK. " "See the docs at http://nltk.org for more information.") raise except LookupError: print("Please install the necessary NLTK corpora. " "See the docs at http://nltk.org for more information.") raise elif tokenizer == 'revtok': try: import revtok return revtok.tokenize except ImportError: print("Please install revtok.") raise elif tokenizer == 'subword': try: import revtok return lambda x: revtok.tokenize(x, decap=True) except ImportError: print("Please install revtok.") raise raise ValueError("Requested tokenizer {}, valid choices are a " "callable that takes a single string as input, " "\"revtok\" for the revtok reversible tokenizer, " "\"subword\" for the revtok caps-aware tokenizer, " "\"spacy\" for the SpaCy English tokenizer, or " "\"moses\" for the NLTK port of the Moses tokenization " "script.".format(tokenizer))