Python gensim.models.Word2Vec.load_word2vec_format() Examples

The following are 9 code examples of gensim.models.Word2Vec.load_word2vec_format(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gensim.models.Word2Vec , or try the search function .
Example #1
Source File: doc_word2vec.py    From KATE with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_w2v(file):
    model = Word2Vec.load_word2vec_format(file, binary=True)
    return model 
Example #2
Source File: score_alignments.py    From policy_diffusion with MIT License 5 votes vote down vote up
def load_word2vec():
    model = Word2Vec.load_word2vec_format('/mnt/data/sunlight/GoogleNews-vectors-negative300.bin', binary=True)

    return model 
Example #3
Source File: words2map.py    From words2map with MIT License 5 votes vote down vote up
def load_derived_vectors(filename):
	# loads derived vectors from a previous words2map as a standalone Gensim Word2Vec model (https://radimrehurek.com/gensim/models/word2vec.html)
	filepath = getcwd() + "/derived_vectors/" + filename
	model = Word2Vec.load_word2vec_format(filepath, binary=False)
	return model 
Example #4
Source File: similarity.py    From 4lang with MIT License 5 votes vote down vote up
def get_vec_sim(self):
        model_fn = self.config.get('vectors', 'model')
        model_type = self.config.get('vectors', 'model_type')
        logging.warning('Loading model: {0}'.format(model_fn))
        if model_type == 'word2vec':
            self.vec_model = Word2Vec.load_word2vec_format(model_fn,
                                                           binary=True)
        elif model_type == 'gensim':
            self.vec_model = Word2Vec.load(model_fn)
        else:
            raise Exception('Unknown LSA model format')
        logging.warning('Model loaded: {0}'.format(model_fn)) 
Example #5
Source File: word2vec_cluster.py    From word2vec-cluster with GNU General Public License v3.0 5 votes vote down vote up
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="word2vec model path")
    parser.add_argument("format", help="1 = binary format, 0 = text format", type=int)
    parser.add_argument("k", help="number of clusters", type=int)
    parser.add_argument("output", help="output file")
    args = parser.parse_args()

    start = time.time()
    print("Load word2vec model ... ", end="", flush=True)
    w2v_model = Word2Vec.load_word2vec_format(args.model, binary=bool(args.format))
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)
    word_vectors = w2v_model.wv.syn0
    n_words = word_vectors.shape[0]
    vec_size = word_vectors.shape[1]
    print("#words = {0}, vector size = {1}".format(n_words, vec_size))

    start = time.time()
    print("Compute clustering ... ", end="", flush=True)
    kmeans = KMeans(n_clusters=args.k, n_jobs=-1, random_state=0)
    idx = kmeans.fit_predict(word_vectors)
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    start = time.time()
    print("Generate output file ... ", end="", flush=True)
    word_centroid_list = list(zip(w2v_model.wv.index2word, idx))
    word_centroid_list_sort = sorted(word_centroid_list, key=lambda el: el[1], reverse=False)
    file_out = open(args.output, "w")
    file_out.write("WORD\tCLUSTER_ID\n")
    for word_centroid in word_centroid_list_sort:
        line = word_centroid[0] + '\t' + str(word_centroid[1]) + '\n'
        file_out.write(line)
    file_out.close()
    print("finished in {:.2f} sec.".format(time.time() - start), flush=True)

    return 
Example #6
Source File: feature_generator.py    From entity2vec with Apache License 2.0 5 votes vote down vote up
def get_e2v_embedding(embeddings_file):

	model = Word2Vec.load_word2vec_format(embeddings_file, binary=True)

	return model 
Example #7
Source File: wordtwovec.py    From aristo-mini with Apache License 2.0 5 votes vote down vote up
def __init__(self, model_file: str) -> None:
        if model_file.endswith(".bin"):
            self.model = Word2Vec.load_word2vec_format(model_file, binary=True)
        else:
            self.model = Word2Vec.load(model_file) 
Example #8
Source File: test_analogy.py    From conec with MIT License 5 votes vote down vote up
def evaluate_google():
    # see https://code.google.com/archive/p/word2vec/
    # load pretrained google embeddings and test
    from gensim.models import Word2Vec
    model_google = Word2Vec.load_word2vec_format('data/GoogleNews-vectors-negative300.bin.gz', binary=True)
    _ = accuracy(model_google, "data/questions-words.txt", False) 
Example #9
Source File: kaggle.py    From dl-models-for-qa with Apache License 2.0 4 votes vote down vote up
def get_weights_word2vec(word2idx, w2vfile, w2v_embed_size=300, 
                         is_custom=False):
    word2vec = None
    if is_custom:
        word2vec = Word2Vec.load(w2vfile)
    else:
        word2vec = Word2Vec.load_word2vec_format(w2vfile, binary=True)
    vocab_size = len(word2idx) + 1
    embedding_weights = np.zeros((vocab_size, w2v_embed_size))
    for word, index in word2idx.items():
        try:
            embedding_weights[index, :] = word2vec[word.lower()]
        except KeyError:
            pass  # keep as zero (not ideal, but what else can we do?)
    return embedding_weights