Python zipfile.ZipFile() Examples
The following are 30
code examples of zipfile.ZipFile().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
zipfile
, or try the search function
.

Example #1
Source File: demo.py From svviz with MIT License | 30 votes |
def downloadDemo(which): try: downloadDir = tempfile.mkdtemp() archivePath = "{}/svviz-data.zip".format(downloadDir) # logging.info("Downloading...") downloadWithProgress("http://svviz.github.io/svviz/assets/examples/{}.zip".format(which), archivePath) logging.info("Decompressing...") archive = zipfile.ZipFile(archivePath) archive.extractall("{}".format(downloadDir)) if not os.path.exists("svviz-examples"): os.makedirs("svviz-examples/") shutil.move("{temp}/{which}".format(temp=downloadDir, which=which), "svviz-examples/") except Exception as e: print("error downloading and decompressing example data: {}".format(e)) return False if not os.path.exists("svviz-examples"): print("error finding example data after download and decompression") return False return True
Example #2
Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 10 votes |
def get_cifar10(data_dir): if not os.path.isdir(data_dir): os.system("mkdir " + data_dir) cwd = os.path.abspath(os.getcwd()) os.chdir(data_dir) if (not os.path.exists('train.rec')) or \ (not os.path.exists('test.rec')) : import urllib, zipfile, glob dirname = os.getcwd() zippath = os.path.join(dirname, "cifar10.zip") urllib.urlretrieve("http://data.mxnet.io/mxnet/data/cifar10.zip", zippath) zf = zipfile.ZipFile(zippath, "r") zf.extractall() zf.close() os.remove(zippath) for f in glob.glob(os.path.join(dirname, "cifar", "*")): name = f.split(os.path.sep)[-1] os.rename(f, os.path.join(dirname, name)) os.rmdir(os.path.join(dirname, "cifar")) os.chdir(cwd) # data
Example #3
Source File: misc_util.py From HardRLWithYoutube with MIT License | 7 votes |
def pickle_load(path, compression=False): """Unpickle a possible compressed pickle. Parameters ---------- path: str path to the output file compression: bool if true assumes that pickle was compressed when created and attempts decompression. Returns ------- obj: object the unpickled object """ if compression: with zipfile.ZipFile(path, "r", compression=zipfile.ZIP_DEFLATED) as myzip: with myzip.open("data") as f: return pickle.load(f) else: with open(path, "rb") as f: return pickle.load(f)
Example #4
Source File: unique_objects.py From vergeml with MIT License | 6 votes |
def __call__(self, args, env): samples_dir = env.get('samples-dir') print("Downloading unique objects to {}.".format(samples_dir)) src_dir = self.download_files([_URL], env=env, dir=env.get('cache-dir')) path = os.path.join(src_dir, "ObjectsAll.zip") zipf = zipfile.ZipFile(path, 'r') zipf.extractall(src_dir) zipf.close() for file in os.listdir(os.path.join(src_dir, "OBJECTSALL")): shutil.copy(os.path.join(src_dir, "OBJECTSALL", file), samples_dir) shutil.rmtree(src_dir) print("Finished downloading unique objects.")
Example #5
Source File: cats_and_dogs.py From vergeml with MIT License | 6 votes |
def __call__(self, args, env): samples_dir = env.get('samples-dir') for label in ("cat", "dog"): dest = os.path.join(samples_dir, label) if os.path.exists(dest): raise VergeMLError("Directory {} already exists in samples dir: {}".format(label, dest)) print("Downloading cats and dogs to {}.".format(samples_dir)) src_dir = self.download_files([(_URL, "catsdogs.zip")], env) path = os.path.join(src_dir, "catsdogs.zip") print("Extracting data.") zipf = zipfile.ZipFile(path, 'r') zipf.extractall(src_dir) zipf.close() for file, dest in (("PetImages/Dog", "dog"), ("PetImages/Cat", "cat")): shutil.copytree(os.path.join(src_dir, file), os.path.join(samples_dir, dest)) shutil.rmtree(src_dir) # WTF? os.unlink(os.path.join(samples_dir, "cat", "666.jpg")) os.unlink(os.path.join(samples_dir, "dog", "11702.jpg")) print("Finished downloading cats and dogs.")
Example #6
Source File: download.py From nmp_qc with MIT License | 6 votes |
def download_figshare(file_name, file_ext, dir_path='./', change_name = None): prepare_data_dir(dir_path) url = 'https://ndownloader.figshare.com/files/' + file_name wget.download(url, out=dir_path) file_path = os.path.join(dir_path, file_name) if file_ext == '.zip': zip_ref = zipfile.ZipFile(file_path,'r') if change_name is not None: dir_path = os.path.join(dir_path, change_name) zip_ref.extractall(dir_path) zip_ref.close() os.remove(file_path) elif file_ext == '.tar.bz2': tar_ref = tarfile.open(file_path,'r:bz2') if change_name is not None: dir_path = os.path.join(dir_path, change_name) tar_ref.extractall(dir_path) tar_ref.close() os.remove(file_path) elif change_name is not None: os.rename(file_path, os.path.join(dir_path, change_name)) # Download QM9 dataset
Example #7
Source File: archive.py From CAMISIM with Apache License 2.0 | 6 votes |
def zip_stream(src_dir, output_stream): """ @param src_dir: @type src_dir: str @param output_stream: @type output_stream: zipfile.ZipFile @return: """ root_path = os.path.dirname(src_dir) assert os.path.isdir(src_dir), "Invalid, not a directory: '{}'".format(src_dir) for root, directories, files in os.walk(src_dir): for file_name in files: file_path = os.path.join(root, file_name) relative_path = os.path.relpath(file_path, root_path) output_stream.write(file_path, arcname=relative_path)
Example #8
Source File: fsm.py From neural-pipeline with MIT License | 6 votes |
def pack(self) -> None: """ Pack all files in zip """ def rm_file(file: str): if os.path.exists(file) and os.path.isfile(file): os.remove(file) def rename_file(file: str): target = file + ".old" rm_file(target) if os.path.exists(file) and os.path.isfile(file): os.rename(file, target) self._check_files([self._weights_file, self._state_file]) rename_file(self._checkpoint_file) with ZipFile(self._checkpoint_file, 'w') as zipfile: zipfile.write(self._weights_file, os.path.basename(self._weights_file)) zipfile.write(self._state_file, os.path.basename(self._state_file)) zipfile.write(self._trainer_file, os.path.basename(self._trainer_file)) self.clear_files()
Example #9
Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def get_mnist(data_dir): if not os.path.isdir(data_dir): os.system("mkdir " + data_dir) os.chdir(data_dir) if (not os.path.exists('train-images-idx3-ubyte')) or \ (not os.path.exists('train-labels-idx1-ubyte')) or \ (not os.path.exists('t10k-images-idx3-ubyte')) or \ (not os.path.exists('t10k-labels-idx1-ubyte')): import urllib, zipfile zippath = os.path.join(os.getcwd(), "mnist.zip") urllib.urlretrieve("http://data.mxnet.io/mxnet/data/mnist.zip", zippath) zf = zipfile.ZipFile(zippath, "r") zf.extractall() zf.close() os.remove(zippath) os.chdir("..")
Example #10
Source File: text.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def _get_data(self): archive_file_name, archive_hash = self._archive_file data_file_name, data_hash = self._data_file[self._segment] path = os.path.join(self._root, data_file_name) if not os.path.exists(path) or not check_sha1(path, data_hash): namespace = 'gluon/dataset/'+self._namespace downloaded_file_path = download(_get_repo_file_url(namespace, archive_file_name), path=self._root, sha1_hash=archive_hash) with zipfile.ZipFile(downloaded_file_path, 'r') as zf: for member in zf.namelist(): filename = os.path.basename(member) if filename: dest = os.path.join(self._root, filename) with zf.open(member) as source, \ open(dest, "wb") as target: shutil.copyfileobj(source, target) data, label = self._read_batch(path) self._data = nd.array(data, dtype=data.dtype).reshape((-1, self._seq_len)) self._label = nd.array(label, dtype=label.dtype).reshape((-1, self._seq_len))
Example #11
Source File: alarmdata.py From SecPi with GNU General Public License v3.0 | 6 votes |
def extract(self): if(hasattr(cherrypy.request, 'json')): if('dir' in cherrypy.request.json and cherrypy.request.json['dir']!='' and 'name' in cherrypy.request.json and cherrypy.request.json['name']!=''): dir = cherrypy.request.json['dir'] name = cherrypy.request.json['name'] fdir = path.join(self.datapath, dir) fp = path.join(fdir, name) if(path.exists(fp)): with zipfile.ZipFile(fp, "r") as z: z.extractall(fdir) return {'status': 'success', 'message': "File %s/%s extracted!"%(dir, name)} else: return {'status': 'error', 'message': "File doesn't exist!"} else: return {'status': 'error', 'message': "Invalid filename!"} else: return {'status': 'error', 'message': "No filename given!"}
Example #12
Source File: plugin.py From skelebot with MIT License | 6 votes |
def execute(self, config, args): """ Execution Hook When the plugin command is provided the plugin from the zip file specified in the plugin argument is installed in the Skelebot Plugins folder inside Skelebot Home """ # Create the ~/.skelebot directory if not already present skelebotHome = os.path.expanduser(SKELEBOT_HOME) if (os.path.exists(skelebotHome) is False): os.makedirs(skelebotHome, exist_ok=True) # Create the ~/.skelebot/plugins directory if not already present pluginsHome = os.path.expanduser(PLUGINS_HOME) if (os.path.exists(pluginsHome) is False): os.makedirs(pluginsHome, exist_ok=True) # Unzip the plugin into the plugins folder zip_ref = zipfile.ZipFile(args.plugin, 'r') zip_ref.extractall(pluginsHome) zip_ref.close()
Example #13
Source File: simple.py From lirpg with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #14
Source File: __init__.py From pyhanlp with Apache License 2.0 | 6 votes |
def install_hanlp_data(the_jar_version=None): if not the_jar_version: the_jar_version = HANLP_JAR_VERSION if HANLP_JAR_VERSION else hanlp_latest_version()[0] for jar_version, jar_url, data_version, data_url in hanlp_releases(): if jar_version == the_jar_version: if data_version == hanlp_installed_data_version(): return False data_zip = 'data-for-{}.zip'.format(data_version) data_zip = os.path.join(STATIC_ROOT, data_zip) download(data_url, os.path.join(STATIC_ROOT, data_zip)) print('解压 data.zip...') with zipfile.ZipFile(data_zip, "r") as zip_ref: zip_ref.extractall(STATIC_ROOT) os.remove(data_zip) write_config(root=STATIC_ROOT) with open_(PATH_DATA_VERSION, 'w', encoding='utf-8') as f: f.write(data_version) global HANLP_DATA_VERSION HANLP_DATA_VERSION = data_version return True
Example #15
Source File: deepq.py From HardRLWithYoutube with MIT License | 6 votes |
def save_act(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #16
Source File: utils.py From TVQAplus with MIT License | 6 votes |
def make_zipfile(src_dir, save_path, enclosing_dir="", exclude_paths=None, exclude_extensions=None): """make a zip file of root_dir, save it to save_path. exclude_paths will be excluded if it is a subdir of root_dir. An enclosing_dir is added is specified. """ abs_src = os.path.abspath(src_dir) with zipfile.ZipFile(save_path, "w") as zf: for dirname, subdirs, files in os.walk(src_dir): # print("dirname", dirname) # print("subdirs", subdirs) # print("files", files) if exclude_paths is not None: for e_p in exclude_paths: if e_p in subdirs: subdirs.remove(e_p) arcname = os.path.join(enclosing_dir, dirname[len(abs_src) + 1:]) zf.write(dirname, arcname) for filename in files: if exclude_extensions is not None: if os.path.splitext(filename)[1] in exclude_extensions: continue # do not zip it absname = os.path.join(dirname, filename) arcname = os.path.join(enclosing_dir, absname[len(abs_src) + 1:]) zf.write(absname, arcname)
Example #17
Source File: files.py From glazier with Apache License 2.0 | 6 votes |
def Run(self): try: zip_file = self._args[0] out_path = self._args[1] except IndexError: raise ActionError('Unable to determine desired paths from %s.' % str(self._args)) try: file_util.CreateDirectories(out_path) except file_util.Error: raise ActionError('Unable to create output path %s.' % out_path) try: zf = zipfile.ZipFile(zip_file) zf.extractall(out_path) except (IOError, zipfile.BadZipfile) as e: raise ActionError('Bad zip file given as input. %s' % e)
Example #18
Source File: utils.py From script.module.inputstreamhelper with MIT License | 6 votes |
def unzip(source, destination, file_to_unzip=None, result=[]): # pylint: disable=dangerous-default-value """Unzip files to specified path""" if not exists(destination): mkdirs(destination) from zipfile import ZipFile zip_obj = ZipFile(compat_path(source)) for filename in zip_obj.namelist(): if file_to_unzip and filename != file_to_unzip: continue # Detect and remove (dangling) symlinks before extraction fullname = os.path.join(destination, filename) if os.path.islink(compat_path(fullname)): log(3, 'Remove (dangling) symlink at {symlink}', symlink=fullname) delete(fullname) zip_obj.extract(filename, compat_path(destination)) result.append(True) # Pass by reference for Thread return bool(result)
Example #19
Source File: widevine.py From script.module.inputstreamhelper with MIT License | 6 votes |
def widevine_eula(): """Displays the Widevine EULA and prompts user to accept it.""" cdm_version = latest_widevine_version(eula=True) if 'x86' in arch(): cdm_os = config.WIDEVINE_OS_MAP[system_os()] cdm_arch = config.WIDEVINE_ARCH_MAP_X86[arch()] else: # grab the license from the x86 files log(0, 'Acquiring Widevine EULA from x86 files.') cdm_os = 'mac' cdm_arch = 'x64' url = config.WIDEVINE_DOWNLOAD_URL.format(version=cdm_version, os=cdm_os, arch=cdm_arch) downloaded = http_download(url, message=localize(30025), background=True) # Acquiring EULA if not downloaded: return False from zipfile import ZipFile with ZipFile(compat_path(store('download_path'))) as archive: with archive.open(config.WIDEVINE_LICENSE_FILE) as file_obj: eula = file_obj.read().decode().strip().replace('\n', ' ') return yesno_dialog(localize(30026), eula, nolabel=localize(30028), yeslabel=localize(30027)) # Widevine CDM EULA
Example #20
Source File: dataset.py From tf2-yolo3 with Apache License 2.0 | 6 votes |
def download_m2nist_if_not_exist(): data_rootdir = os.path.expanduser(os.path.join('~', '.m2nist')) m2nist_zip_path = os.path.join(data_rootdir, 'm2nist.zip') if os.path.exists(m2nist_zip_path): return os.makedirs(data_rootdir, exist_ok=True) m2nist_zip_url = 'https://raw.githubusercontent.com/akkaze/datasets/master/m2nist.zip' fail_counter = 0 while True: try: print('Trying to download m2nist...') download_from_url(m2nist_zip_url, m2nist_zip_path) break except Exception as exc: fail_counter += 1 print('Errors occured : {0}'.format(exc)) if fail_counter >= 6: print( 'Please try to download dataset from {0} by yourself and put it under the directory {1}'.format( m2nist_zip_path), data_rootdir) time.sleep(5) continue zipf = zipfile.ZipFile(m2nist_zip_path) zipf.extractall(data_rootdir) zipf.close()
Example #21
Source File: utils.py From pywren-ibm-cloud with Apache License 2.0 | 6 votes |
def create_function_handler_zip(zip_location, main_exec_file, backend_location): logger.debug("Creating function handler zip in {}".format(zip_location)) def add_folder_to_zip(zip_file, full_dir_path, sub_dir=''): for file in os.listdir(full_dir_path): full_path = os.path.join(full_dir_path, file) if os.path.isfile(full_path): zip_file.write(full_path, os.path.join('pywren_ibm_cloud', sub_dir, file)) elif os.path.isdir(full_path) and '__pycache__' not in full_path: add_folder_to_zip(zip_file, full_path, os.path.join(sub_dir, file)) try: with zipfile.ZipFile(zip_location, 'w', zipfile.ZIP_DEFLATED) as pywren_zip: current_location = os.path.dirname(os.path.abspath(backend_location)) module_location = os.path.dirname(os.path.abspath(pywren_ibm_cloud.__file__)) main_file = os.path.join(current_location, 'entry_point.py') pywren_zip.write(main_file, main_exec_file) add_folder_to_zip(pywren_zip, module_location) except Exception: raise Exception('Unable to create the {} package: {}'.format(zip_location))
Example #22
Source File: data_utils.py From IGMC with MIT License | 6 votes |
def download_dataset(dataset, files, data_dir): """ Downloads dataset if files are not present. """ if not np.all([os.path.isfile(data_dir + f) for f in files]): url = "http://files.grouplens.org/datasets/movielens/" + dataset.replace('_', '-') + '.zip' request = urlopen(url) print('Downloading %s dataset' % dataset) if dataset in ['ml_100k', 'ml_1m']: target_dir = 'raw_data/' + dataset.replace('_', '-') elif dataset == 'ml_10m': target_dir = 'raw_data/' + 'ml-10M100K' else: raise ValueError('Invalid dataset option %s' % dataset) with ZipFile(BytesIO(request.read())) as zip_ref: zip_ref.extractall('raw_data/') os.rename(target_dir, data_dir) #shutil.rmtree(target_dir)
Example #23
Source File: download_glue.py From BERT-Classification-Tutorial with Apache License 2.0 | 5 votes |
def download_and_extract(task, data_dir): print("Downloading and extracting %s..." % task) data_file = "%s.zip" % task urllib.request.urlretrieve(TASK2PATH[task], data_file) with zipfile.ZipFile(data_file) as zip_ref: zip_ref.extractall(data_dir) os.remove(data_file) print("\tCompleted!")
Example #24
Source File: ssm.py From aegea with Apache License 2.0 | 5 votes |
def download_session_manager_plugin_macos(target_path): sm_archive = io.BytesIO() clients.s3.download_fileobj(sm_plugin_bucket, "plugin/latest/mac/sessionmanager-bundle.zip", sm_archive) with zipfile.ZipFile(sm_archive) as zf, open(target_path, "wb") as fh: fh.write(zf.read("sessionmanager-bundle/bin/session-manager-plugin"))
Example #25
Source File: image_batches.py From neural-fingerprinting with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _read_image_list(self, skip_image_ids=None): """Reads list of dataset images from the datastore.""" if skip_image_ids is None: skip_image_ids = [] images = self._storage_client.list_blobs( prefix=os.path.join('dataset', self._dataset_name) + '/') zip_files = [i for i in images if i.endswith('.zip')] if len(zip_files) == 1: # we have a zip archive with images zip_name = zip_files[0] logging.info('Reading list of images from zip file %s', zip_name) blob = self._storage_client.get_blob(zip_name) buf = BytesIO() logging.info('Downloading zip') blob.download_to_file(buf) buf.seek(0) logging.info('Reading content of the zip') with zipfile.ZipFile(buf) as f: images = [os.path.join(zip_name, os.path.basename(n)) for n in f.namelist() if n.endswith('.png')] buf.close() logging.info('Found %d images', len(images)) else: # we have just a directory with images, filter non-PNG files logging.info('Reading list of images from png files in storage') images = [i for i in images if i.endswith('.png')] logging.info('Found %d images', len(images)) # filter images which should be skipped images = [i for i in images if os.path.basename(i)[:-4] not in skip_image_ids] # assign IDs to images images = [(DATASET_IMAGE_ID_PATTERN.format(idx), i) for idx, i in enumerate(sorted(images))] return images
Example #26
Source File: start.py From Starx_Pixiv_Collector with MIT License | 5 votes |
def dynamic_download_and_Synthesizing(illust_id, title=None, prefix=None): tag = 'Dynamic_Download_And_Synthesizing' d_json_data = 'https://www.pixiv.net/ajax/illust/' + str(illust_id) + '/ugoira_meta' d_json_decoded = json.loads(get_text_from_url(d_json_data))['body'] src_zip_url = d_json_decoded['originalSrc'] src_mime_type = d_json_decoded['mime_type'] src_img_delay = int(d_json_decoded['frames'][0]['delay']) / 1000 src_saved_path = save_path + 'TEMP' + global_symbol + str(illust_id) + global_symbol + \ src_zip_url.split('/')[-1] src_saved_dir = save_path + 'TEMP' + global_symbol + str(illust_id) + global_symbol src_final_dir = save_path + 'Dynamic' + global_symbol download_thread(src_zip_url, save_path, None, 'TEMP' + global_symbol + str(illust_id)) while not os.path.exists(src_saved_path + '.done'): time.sleep(1) print_with_tag(tag, 'Waiting for complete...') print_with_tag(tag, ['Zip target downloaded:', src_saved_path]) with zipfile.ZipFile(src_saved_path, 'r') as zip_file: zip_file.extractall(path=src_saved_dir) # get each frame sort_by_num = [] frames = [] for root, dirs, files in os.walk(src_saved_dir): for file in files: if file.endswith('jpg') or file.endswith('png'): sort_by_num.append(src_saved_dir + global_symbol + file) sort_by_num.sort() print_with_tag(tag, 'Reading each frame..') for each_frame in sort_by_num: frames.append(imageio.imread(each_frame)) gif_save_dir = save_path + str(prefix) + global_symbol + year_month + str( day) + global_symbol + 'D-' + str(illust_id) + global_symbol gif_name_format = re.sub('[\/:*?"<>|]', '_', str(title)) + '-' + str(illust_id) + '.gif' if not os.path.exists(gif_save_dir): os.makedirs(gif_save_dir) print_with_tag(tag, 'Synthesizing dynamic images..') try: imageio.mimsave(gif_save_dir + gif_name_format, frames, duration=src_img_delay) except Exception as e: print_with_tag(tag, [gif_save_dir + gif_name_format]) print_with_tag(tag, e) exit()
Example #27
Source File: download.py From nmp_qc with MIT License | 5 votes |
def download_file(url, file_ext, dir_path='./'): file_name = wget.download(url, out=dir_path) file_path = os.path.join(dir_path, file_name) if file_ext == '.zip': zip_ref = zipfile.ZipFile(file_path,'r') zip_ref.extractall(dir_path) zip_ref.close() os.remove(file_path) # Download data from figshare
Example #28
Source File: dataloader.py From models with MIT License | 5 votes |
def inflate_data_sources(input): import zipfile import tempfile import shutil import os dirpath = tempfile.mkdtemp() # make sure the directory is empty shutil.rmtree(dirpath) os.makedirs(dirpath) # load and extract zip file zf = zipfile.ZipFile(input) zf.extractall(dirpath) extracted_folders = os.listdir(dirpath) return {k.split(".")[0]: os.path.join(dirpath, k) for k in extracted_folders}
Example #29
Source File: utils.py From deep-learning-note with MIT License | 5 votes |
def load_data_jay_lyrics(): """加载周杰伦歌词数据集""" with zipfile.ZipFile('./data/JayChou/jaychou_lyrics.txt.zip') as zin: with zin.open('jaychou_lyrics.txt') as f: corpus_chars = f.read().decode('utf-8') corpus_chars = corpus_chars.replace('\n', ' ').replace('\r', ' ') corpus_chars = corpus_chars[0:20000] idx_to_char = list(set(corpus_chars)) char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)]) vocab_size = len(char_to_idx) corpus_indices = [char_to_idx[char] for char in corpus_chars] return corpus_indices, char_to_idx, idx_to_char, vocab_size
Example #30
Source File: w2v_utils.py From deep-learning-note with MIT License | 5 votes |
def read_data(file_path): """ Read data into a list of tokens There should be 17,005,207 tokens """ with zipfile.ZipFile(file_path) as f: words = tf.compat.as_str(f.read(f.namelist()[0])).split() return words