Python zipfile.ZipFile() Examples

The following are 30 code examples of zipfile.ZipFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module zipfile , or try the search function

Example #1

Source File: demo.py From svviz with MIT License

32 votes

def downloadDemo(which):
    try:
        downloadDir = tempfile.mkdtemp()
        archivePath = "{}/svviz-data.zip".format(downloadDir)

        # logging.info("Downloading...")
        downloadWithProgress("http://svviz.github.io/svviz/assets/examples/{}.zip".format(which), archivePath)
        
        logging.info("Decompressing...")
        archive = zipfile.ZipFile(archivePath)
        archive.extractall("{}".format(downloadDir))

        if not os.path.exists("svviz-examples"):
            os.makedirs("svviz-examples/")

        shutil.move("{temp}/{which}".format(temp=downloadDir, which=which), "svviz-examples/")
    except Exception as e:
        print("error downloading and decompressing example data: {}".format(e))
        return False

    if not os.path.exists("svviz-examples"):
        print("error finding example data after download and decompression")
        return False
    return True

Example #2

Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

10 votes

def get_cifar10(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    cwd = os.path.abspath(os.getcwd())
    os.chdir(data_dir)
    if (not os.path.exists('train.rec')) or \
       (not os.path.exists('test.rec')) :
        import urllib, zipfile, glob
        dirname = os.getcwd()
        zippath = os.path.join(dirname, "cifar10.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/cifar10.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
        for f in glob.glob(os.path.join(dirname, "cifar", "*")):
            name = f.split(os.path.sep)[-1]
            os.rename(f, os.path.join(dirname, name))
        os.rmdir(os.path.join(dirname, "cifar"))
    os.chdir(cwd)

# data

Example #3

Source File: fsm.py From neural-pipeline with MIT License

7 votes

def pack(self) -> None:
        """
        Pack all files in zip
        """

        def rm_file(file: str):
            if os.path.exists(file) and os.path.isfile(file):
                os.remove(file)

        def rename_file(file: str):
            target = file + ".old"
            rm_file(target)
            if os.path.exists(file) and os.path.isfile(file):
                os.rename(file, target)

        self._check_files([self._weights_file, self._state_file])

        rename_file(self._checkpoint_file)
        with ZipFile(self._checkpoint_file, 'w') as zipfile:
            zipfile.write(self._weights_file, os.path.basename(self._weights_file))
            zipfile.write(self._state_file, os.path.basename(self._state_file))
            zipfile.write(self._trainer_file, os.path.basename(self._trainer_file))

        self.clear_files()

Example #4

Source File: misc_util.py From HardRLWithYoutube with MIT License

7 votes

def pickle_load(path, compression=False):
    """Unpickle a possible compressed pickle.

    Parameters
    ----------
    path: str
        path to the output file
    compression: bool
        if true assumes that pickle was compressed when created and attempts decompression.

    Returns
    -------
    obj: object
        the unpickled object
    """

    if compression:
        with zipfile.ZipFile(path, "r", compression=zipfile.ZIP_DEFLATED) as myzip:
            with myzip.open("data") as f:
                return pickle.load(f)
    else:
        with open(path, "rb") as f:
            return pickle.load(f)

Example #5

Source File: data_utils.py From IGMC with MIT License

6 votes

def download_dataset(dataset, files, data_dir):
    """ Downloads dataset if files are not present. """

    if not np.all([os.path.isfile(data_dir + f) for f in files]):
        url = "http://files.grouplens.org/datasets/movielens/" + dataset.replace('_', '-') + '.zip'
        request = urlopen(url)

        print('Downloading %s dataset' % dataset)

        if dataset in ['ml_100k', 'ml_1m']:
            target_dir = 'raw_data/' + dataset.replace('_', '-')
        elif dataset == 'ml_10m':
            target_dir = 'raw_data/' + 'ml-10M100K'
        else:
            raise ValueError('Invalid dataset option %s' % dataset)

        with ZipFile(BytesIO(request.read())) as zip_ref:
            zip_ref.extractall('raw_data/')

        os.rename(target_dir, data_dir)
        #shutil.rmtree(target_dir)

Example #6

Source File: widevine.py From script.module.inputstreamhelper with MIT License

6 votes

def widevine_eula():
    """Displays the Widevine EULA and prompts user to accept it."""

    cdm_version = latest_widevine_version(eula=True)
    if 'x86' in arch():
        cdm_os = config.WIDEVINE_OS_MAP[system_os()]
        cdm_arch = config.WIDEVINE_ARCH_MAP_X86[arch()]
    else:  # grab the license from the x86 files
        log(0, 'Acquiring Widevine EULA from x86 files.')
        cdm_os = 'mac'
        cdm_arch = 'x64'

    url = config.WIDEVINE_DOWNLOAD_URL.format(version=cdm_version, os=cdm_os, arch=cdm_arch)
    downloaded = http_download(url, message=localize(30025), background=True)  # Acquiring EULA
    if not downloaded:
        return False

    from zipfile import ZipFile
    with ZipFile(compat_path(store('download_path'))) as archive:
        with archive.open(config.WIDEVINE_LICENSE_FILE) as file_obj:
            eula = file_obj.read().decode().strip().replace('\n', ' ')

    return yesno_dialog(localize(30026), eula, nolabel=localize(30028), yeslabel=localize(30027))  # Widevine CDM EULA

Example #7

Source File: download.py From nmp_qc with MIT License

6 votes

def download_figshare(file_name, file_ext, dir_path='./', change_name = None):
    prepare_data_dir(dir_path)
    url = 'https://ndownloader.figshare.com/files/' + file_name
    wget.download(url, out=dir_path)
    file_path = os.path.join(dir_path, file_name)

    if file_ext == '.zip':
        zip_ref = zipfile.ZipFile(file_path,'r')
        if change_name is not None:
            dir_path = os.path.join(dir_path, change_name)
        zip_ref.extractall(dir_path)
        zip_ref.close()
        os.remove(file_path)
    elif file_ext == '.tar.bz2':
        tar_ref = tarfile.open(file_path,'r:bz2')
        if change_name is not None:
            dir_path = os.path.join(dir_path, change_name)
        tar_ref.extractall(dir_path)
        tar_ref.close()
        os.remove(file_path)
    elif change_name is not None:
        os.rename(file_path, os.path.join(dir_path, change_name))

# Download QM9 dataset

Example #8

Source File: dataset.py From tf2-yolo3 with Apache License 2.0

6 votes

def download_m2nist_if_not_exist():
    data_rootdir = os.path.expanduser(os.path.join('~', '.m2nist'))
    m2nist_zip_path = os.path.join(data_rootdir, 'm2nist.zip')
    if os.path.exists(m2nist_zip_path):
        return
    os.makedirs(data_rootdir, exist_ok=True)
    m2nist_zip_url = 'https://raw.githubusercontent.com/akkaze/datasets/master/m2nist.zip'
    fail_counter = 0
    while True:
        try:
            print('Trying to download m2nist...')
            download_from_url(m2nist_zip_url, m2nist_zip_path)
            break
        except Exception as exc:
            fail_counter += 1
            print('Errors occured : {0}'.format(exc))
            if fail_counter >= 6:
                print(
                    'Please try to download dataset from {0} by yourself and put it under the directory {1}'.format(
                        m2nist_zip_path), data_rootdir)
            time.sleep(5)
            continue
    zipf = zipfile.ZipFile(m2nist_zip_path)
    zipf.extractall(data_rootdir)
    zipf.close()

Example #9

Source File: utils.py From pywren-ibm-cloud with Apache License 2.0

6 votes

def create_function_handler_zip(zip_location, main_exec_file, backend_location):

    logger.debug("Creating function handler zip in {}".format(zip_location))

    def add_folder_to_zip(zip_file, full_dir_path, sub_dir=''):
        for file in os.listdir(full_dir_path):
            full_path = os.path.join(full_dir_path, file)
            if os.path.isfile(full_path):
                zip_file.write(full_path, os.path.join('pywren_ibm_cloud', sub_dir, file))
            elif os.path.isdir(full_path) and '__pycache__' not in full_path:
                add_folder_to_zip(zip_file, full_path, os.path.join(sub_dir, file))

    try:
        with zipfile.ZipFile(zip_location, 'w', zipfile.ZIP_DEFLATED) as pywren_zip:
            current_location = os.path.dirname(os.path.abspath(backend_location))
            module_location = os.path.dirname(os.path.abspath(pywren_ibm_cloud.__file__))
            main_file = os.path.join(current_location, 'entry_point.py')
            pywren_zip.write(main_file, main_exec_file)
            add_folder_to_zip(pywren_zip, module_location)

    except Exception:
        raise Exception('Unable to create the {} package: {}'.format(zip_location))

Example #10

Source File: cats_and_dogs.py From vergeml with MIT License

6 votes

def __call__(self, args, env):
        samples_dir = env.get('samples-dir')
        for label in ("cat", "dog"):
            dest = os.path.join(samples_dir, label)
            if os.path.exists(dest):
                raise VergeMLError("Directory {} already exists in samples dir: {}".format(label, dest))
        print("Downloading cats and dogs to {}.".format(samples_dir))
        src_dir = self.download_files([(_URL, "catsdogs.zip")], env)
        path = os.path.join(src_dir, "catsdogs.zip")

        print("Extracting data.")
        zipf = zipfile.ZipFile(path, 'r')
        zipf.extractall(src_dir)
        zipf.close()

        for file, dest in (("PetImages/Dog", "dog"), ("PetImages/Cat", "cat")):
            shutil.copytree(os.path.join(src_dir, file), os.path.join(samples_dir, dest))

        shutil.rmtree(src_dir)

        # WTF?
        os.unlink(os.path.join(samples_dir, "cat", "666.jpg"))
        os.unlink(os.path.join(samples_dir, "dog", "11702.jpg"))

        print("Finished downloading cats and dogs.")

Example #11

Source File: unique_objects.py From vergeml with MIT License

6 votes

def __call__(self, args, env):
        samples_dir = env.get('samples-dir')

        print("Downloading unique objects to {}.".format(samples_dir))

        src_dir = self.download_files([_URL], env=env, dir=env.get('cache-dir'))
        path = os.path.join(src_dir, "ObjectsAll.zip")

        zipf = zipfile.ZipFile(path, 'r')
        zipf.extractall(src_dir)
        zipf.close()

        for file in os.listdir(os.path.join(src_dir, "OBJECTSALL")):
            shutil.copy(os.path.join(src_dir, "OBJECTSALL", file), samples_dir)

        shutil.rmtree(src_dir)

        print("Finished downloading unique objects.")

Example #12

Source File: utils.py From TVQAplus with MIT License

6 votes

def make_zipfile(src_dir, save_path, enclosing_dir="", exclude_paths=None, exclude_extensions=None):
    """make a zip file of root_dir, save it to save_path.
    exclude_paths will be excluded if it is a subdir of root_dir.
    An enclosing_dir is added is specified.
    """
    abs_src = os.path.abspath(src_dir)
    with zipfile.ZipFile(save_path, "w") as zf:
        for dirname, subdirs, files in os.walk(src_dir):
            # print("dirname", dirname)
            # print("subdirs", subdirs)
            # print("files", files)
            if exclude_paths is not None:
                for e_p in exclude_paths:
                    if e_p in subdirs:
                        subdirs.remove(e_p)
            arcname = os.path.join(enclosing_dir, dirname[len(abs_src) + 1:])
            zf.write(dirname, arcname)
            for filename in files:
                if exclude_extensions is not None:
                    if os.path.splitext(filename)[1] in exclude_extensions:
                        continue  # do not zip it
                absname = os.path.join(dirname, filename)
                arcname = os.path.join(enclosing_dir, absname[len(abs_src) + 1:])
                zf.write(absname, arcname)

Example #13

Source File: utils.py From script.module.inputstreamhelper with MIT License

6 votes

def unzip(source, destination, file_to_unzip=None, result=[]):  # pylint: disable=dangerous-default-value
    """Unzip files to specified path"""

    if not exists(destination):
        mkdirs(destination)

    from zipfile import ZipFile
    zip_obj = ZipFile(compat_path(source))
    for filename in zip_obj.namelist():
        if file_to_unzip and filename != file_to_unzip:
            continue

        # Detect and remove (dangling) symlinks before extraction
        fullname = os.path.join(destination, filename)
        if os.path.islink(compat_path(fullname)):
            log(3, 'Remove (dangling) symlink at {symlink}', symlink=fullname)
            delete(fullname)

        zip_obj.extract(filename, compat_path(destination))
        result.append(True)  # Pass by reference for Thread

    return bool(result)

Example #14

Source File: archive.py From CAMISIM with Apache License 2.0

6 votes

def zip_stream(src_dir, output_stream):
        """

        @param src_dir:
        @type src_dir: str
        @param output_stream:
        @type output_stream: zipfile.ZipFile
        @return:
        """
        root_path = os.path.dirname(src_dir)
        assert os.path.isdir(src_dir), "Invalid, not a directory: '{}'".format(src_dir)
        for root, directories, files in os.walk(src_dir):
            for file_name in files:
                file_path = os.path.join(root, file_name)
                relative_path = os.path.relpath(file_path, root_path)
                output_stream.write(file_path, arcname=relative_path)

Example #15

Source File: deepq.py From HardRLWithYoutube with MIT License

6 votes

def save_act(self, path=None):
        """Save model to a pickle located at `path`"""
        if path is None:
            path = os.path.join(logger.get_dir(), "model.pkl")

        with tempfile.TemporaryDirectory() as td:
            save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            cloudpickle.dump((model_data, self._act_params), f)

Example #16

Source File: plugin.py From skelebot with MIT License

6 votes

def execute(self, config, args):
        """
        Execution Hook

        When the plugin command is provided the plugin from the zip file specified in the plugin
        argument is installed in the Skelebot Plugins folder inside Skelebot Home
        """

        # Create the ~/.skelebot directory if not already present
        skelebotHome = os.path.expanduser(SKELEBOT_HOME)
        if (os.path.exists(skelebotHome) is False):
            os.makedirs(skelebotHome, exist_ok=True)

        # Create the ~/.skelebot/plugins directory if not already present
        pluginsHome = os.path.expanduser(PLUGINS_HOME)
        if (os.path.exists(pluginsHome) is False):
            os.makedirs(pluginsHome, exist_ok=True)

        # Unzip the plugin into the plugins folder
        zip_ref = zipfile.ZipFile(args.plugin, 'r')
        zip_ref.extractall(pluginsHome)
        zip_ref.close()

Example #17

Source File: __init__.py From pyhanlp with Apache License 2.0

6 votes

def install_hanlp_data(the_jar_version=None):
    if not the_jar_version:
        the_jar_version = HANLP_JAR_VERSION if HANLP_JAR_VERSION else hanlp_latest_version()[0]
    for jar_version, jar_url, data_version, data_url in hanlp_releases():
        if jar_version == the_jar_version:
            if data_version == hanlp_installed_data_version():
                return False
            data_zip = 'data-for-{}.zip'.format(data_version)
            data_zip = os.path.join(STATIC_ROOT, data_zip)
            download(data_url, os.path.join(STATIC_ROOT, data_zip))
            print('解压 data.zip...')
            with zipfile.ZipFile(data_zip, "r") as zip_ref:
                zip_ref.extractall(STATIC_ROOT)
            os.remove(data_zip)
            write_config(root=STATIC_ROOT)
            with open_(PATH_DATA_VERSION, 'w', encoding='utf-8') as f:
                f.write(data_version)
            global HANLP_DATA_VERSION
            HANLP_DATA_VERSION = data_version
            return True

Example #18

Source File: get_data.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def get_mnist(data_dir):
    if not os.path.isdir(data_dir):
        os.system("mkdir " + data_dir)
    os.chdir(data_dir)
    if (not os.path.exists('train-images-idx3-ubyte')) or \
       (not os.path.exists('train-labels-idx1-ubyte')) or \
       (not os.path.exists('t10k-images-idx3-ubyte')) or \
       (not os.path.exists('t10k-labels-idx1-ubyte')):
        import urllib, zipfile
        zippath = os.path.join(os.getcwd(), "mnist.zip")
        urllib.urlretrieve("http://data.mxnet.io/mxnet/data/mnist.zip", zippath)
        zf = zipfile.ZipFile(zippath, "r")
        zf.extractall()
        zf.close()
        os.remove(zippath)
    os.chdir("..")

Example #19

Source File: simple.py From lirpg with MIT License

6 votes

def save(self, path=None):
        """Save model to a pickle located at `path`"""
        if path is None:
            path = os.path.join(logger.get_dir(), "model.pkl")

        with tempfile.TemporaryDirectory() as td:
            save_state(os.path.join(td, "model"))
            arc_name = os.path.join(td, "packed.zip")
            with zipfile.ZipFile(arc_name, 'w') as zipf:
                for root, dirs, files in os.walk(td):
                    for fname in files:
                        file_path = os.path.join(root, fname)
                        if file_path != arc_name:
                            zipf.write(file_path, os.path.relpath(file_path, td))
            with open(arc_name, "rb") as f:
                model_data = f.read()
        with open(path, "wb") as f:
            cloudpickle.dump((model_data, self._act_params), f)

Example #20

Source File: files.py From glazier with Apache License 2.0

6 votes

def Run(self):
    try:
      zip_file = self._args[0]
      out_path = self._args[1]
    except IndexError:
      raise ActionError('Unable to determine desired paths from %s.' %
                        str(self._args))

    try:
      file_util.CreateDirectories(out_path)
    except file_util.Error:
      raise ActionError('Unable to create output path %s.' % out_path)

    try:
      zf = zipfile.ZipFile(zip_file)
      zf.extractall(out_path)
    except (IOError, zipfile.BadZipfile) as e:
      raise ActionError('Bad zip file given as input.  %s' % e)

Example #21

Source File: alarmdata.py From SecPi with GNU General Public License v3.0

6 votes

def extract(self):
		if(hasattr(cherrypy.request, 'json')):
			if('dir' in cherrypy.request.json and cherrypy.request.json['dir']!='' and 'name' in cherrypy.request.json and cherrypy.request.json['name']!=''):
				dir = cherrypy.request.json['dir']
				name = cherrypy.request.json['name']
				
				fdir = path.join(self.datapath, dir)
				fp = path.join(fdir, name)
				if(path.exists(fp)):
					with zipfile.ZipFile(fp, "r") as z:
						z.extractall(fdir)
						return {'status': 'success', 'message': "File %s/%s extracted!"%(dir, name)}
				else:
					return {'status': 'error', 'message': "File doesn't exist!"}
			else:
				return {'status': 'error', 'message': "Invalid filename!"}
		else:
			return {'status': 'error', 'message': "No filename given!"}

Example #22

Source File: text.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0

6 votes

def _get_data(self):
        archive_file_name, archive_hash = self._archive_file
        data_file_name, data_hash = self._data_file[self._segment]
        path = os.path.join(self._root, data_file_name)
        if not os.path.exists(path) or not check_sha1(path, data_hash):
            namespace = 'gluon/dataset/'+self._namespace
            downloaded_file_path = download(_get_repo_file_url(namespace, archive_file_name),
                                            path=self._root,
                                            sha1_hash=archive_hash)

            with zipfile.ZipFile(downloaded_file_path, 'r') as zf:
                for member in zf.namelist():
                    filename = os.path.basename(member)
                    if filename:
                        dest = os.path.join(self._root, filename)
                        with zf.open(member) as source, \
                             open(dest, "wb") as target:
                            shutil.copyfileobj(source, target)

        data, label = self._read_batch(path)

        self._data = nd.array(data, dtype=data.dtype).reshape((-1, self._seq_len))
        self._label = nd.array(label, dtype=label.dtype).reshape((-1, self._seq_len))

Example #23

Source File: simple.py From lirpg with MIT License

5 votes

def load(path):
        with open(path, "rb") as f:
            model_data, act_params = cloudpickle.load(f)
        act = deepq.build_act(**act_params)
        sess = tf.Session()
        sess.__enter__()
        with tempfile.TemporaryDirectory() as td:
            arc_path = os.path.join(td, "packed.zip")
            with open(arc_path, "wb") as f:
                f.write(model_data)

            zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td)
            load_state(os.path.join(td, "model"))

        return ActWrapper(act, act_params)

Example #24

Source File: multinli.py From fine-lm with MIT License

5 votes

def _maybe_download_corpora(self, tmp_dir):
    mnli_filename = "MNLI.zip"
    mnli_finalpath = os.path.join(tmp_dir, "MNLI")
    if not tf.gfile.Exists(mnli_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, mnli_filename, self._MNLI_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return mnli_finalpath

Example #25

Source File: cola.py From fine-lm with MIT License

5 votes

def _maybe_download_corpora(self, tmp_dir):
    cola_filename = "CoLA.zip"
    cola_finalpath = os.path.join(tmp_dir, "CoLA")
    if not tf.gfile.Exists(cola_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, cola_filename, self._COLA_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return cola_finalpath

Example #26

Source File: misc_util.py From lirpg with MIT License

5 votes

def relatively_safe_pickle_dump(obj, path, compression=False):
    """This is just like regular pickle dump, except from the fact that failure cases are
    different:

        - It's never possible that we end up with a pickle in corrupted state.
        - If a there was a different file at the path, that file will remain unchanged in the
          even of failure (provided that filesystem rename is atomic).
        - it is sometimes possible that we end up with useless temp file which needs to be
          deleted manually (it will be removed automatically on the next function call)

    The indended use case is periodic checkpoints of experiment state, such that we never
    corrupt previous checkpoints if the current one fails.

    Parameters
    ----------
    obj: object
        object to pickle
    path: str
        path to the output file
    compression: bool
        if true pickle will be compressed
    """
    temp_storage = path + ".relatively_safe"
    if compression:
        # Using gzip here would be simpler, but the size is limited to 2GB
        with tempfile.NamedTemporaryFile() as uncompressed_file:
            pickle.dump(obj, uncompressed_file)
            uncompressed_file.file.flush()
            with zipfile.ZipFile(temp_storage, "w", compression=zipfile.ZIP_DEFLATED) as myzip:
                myzip.write(uncompressed_file.name, "data")
    else:
        with open(temp_storage, "wb") as f:
            pickle.dump(obj, f)
    os.rename(temp_storage, path)

Example #27

Source File: misc_util.py From lirpg with MIT License

5 votes

def pickle_load(path, compression=False):
    """Unpickle a possible compressed pickle.

    Parameters
    ----------
    path: str
        path to the output file
    compression: bool
        if true assumes that pickle was compressed when created and attempts decompression.

    Returns
    -------
    obj: object
        the unpickled object
    """

    if compression:
        with zipfile.ZipFile(path, "r", compression=zipfile.ZIP_DEFLATED) as myzip:
            with myzip.open("data") as f:
                return pickle.load(f)
    else:
        with open(path, "rb") as f:
            return pickle.load(f)

Example #28

Source File: build.py From oscrypto with MIT License

5 votes

def _list_zip(filename):
    """
    Prints all of the files in a .zip file
    """

    zf = zipfile.ZipFile(filename, 'r')
    for name in zf.namelist():
        print('     %s' % name)

Example #29

Source File: generate.py From post--memorization-in-rnns with MIT License

5 votes

def preprocess_generate(**kwargs):
    with ContentDir() as content:
        content.download('text8.zip', 'http://mattmahoney.net/dc/text8.zip')

    with ZipFile(content.filepath('text8.zip')) as zip_reader:
        with zip_reader.open('text8') as text8_file:
            text = io.TextIOWrapper(text8_file).read()
            dataset = build_dataset(text, **kwargs)
            train, valid, test = split_dataset(dataset, **kwargs)

            print('saving train data ...')
            save_tfrecord(content.filepath('generate.train.tfrecord'),
                          train,
                          verbose=True)

            print('saving valid data ...')
            save_tfrecord(content.filepath('generate.valid.tfrecord'),
                          valid,
                          verbose=True)

            print('saving test data ...')
            save_tfrecord(content.filepath('generate.test.tfrecord'),
                          test,
                          verbose=True)

            print('saving maps ...')
            np.savez(content.filepath('generate.map.npz'),
                     char_map=dataset['char_map'],
                     verbose=True)

            print('saving metadata ...')
            metadata = {
                'observations': {
                    'train': len(train['length']),
                    'valid': len(valid['length']),
                    'test': len(test['length'])
                }
            }
            with open(content.filepath('generate.meta.json'), 'w') as fp:
                json.dump(metadata, fp)

Example #30

Source File: rte.py From fine-lm with MIT License

5 votes

def _maybe_download_corpora(self, tmp_dir):
    rte_filename = "RTE.zip"
    rte_finalpath = os.path.join(tmp_dir, "RTE")
    if not tf.gfile.Exists(rte_finalpath):
      zip_filepath = generator_utils.maybe_download(
          tmp_dir, rte_filename, self._RTE_URL)
      zip_ref = zipfile.ZipFile(zip_filepath, "r")
      zip_ref.extractall(tmp_dir)
      zip_ref.close()

    return rte_finalpath