import os import json import codecs import pickle import numpy as np from scipy import sparse def makedirs(directory): if not os.path.exists(directory): os.makedirs(directory) def write_to_json(data, output_filename, indent=2, sort_keys=True): with codecs.open(output_filename, 'w', encoding='utf-8') as output_file: json.dump(data, output_file, indent=indent, sort_keys=sort_keys) def read_json(input_filename): with codecs.open(input_filename, 'r', encoding='utf-8') as input_file: data = json.load(input_file, encoding='utf-8') return data def pickle_data(data, output_filename): with open(output_filename, 'wb') as outfile: pickle.dump(data, outfile, pickle.HIGHEST_PROTOCOL) def unpickle_data(input_filename): with open(input_filename, 'rb') as infile: data = pickle.load(infile) return data def read_text(input_filename): with codecs.open(input_filename, 'r', encoding='utf-8') as input_file: lines = input_file.readlines() return lines def write_list_to_text(lines, output_filename, add_newlines=True, add_final_newline=False): if add_newlines: lines = '\n'.join(lines) if add_final_newline: lines += '\n' else: lines = ''.join(lines) if add_final_newline: lines[-1] += '\n' with codecs.open(output_filename, 'w', encoding='utf-8') as output_file: output_file.writelines(lines) def save_sparse(sparse_matrix, output_filename): assert sparse.issparse(sparse_matrix) if sparse.isspmatrix_coo(sparse_matrix): coo = sparse_matrix else: coo = sparse_matrix.tocoo() row = coo.row col = coo.col data = coo.data shape = coo.shape np.savez(output_filename, row=row, col=col, data=data, shape=shape) def load_sparse(input_filename): npy = np.load(input_filename) coo_matrix = sparse.coo_matrix((npy['data'], (npy['row'], npy['col'])), shape=npy['shape']) return coo_matrix.tocsc()