Python tarfile.TarFile() Examples

The following are 30 code examples of tarfile.TarFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module tarfile , or try the search function .
Example #1
Source File: test_dirty_untar.py    From thefuck with MIT License 6 votes vote down vote up
def tar_error(tmpdir):
    def fixture(filename):
        path = os.path.join(str(tmpdir), filename)

        def reset(path):
            os.mkdir('d')
            with tarfile.TarFile(path, 'w') as archive:
                for file in ('a', 'b', 'c', 'd/e'):
                    with open(file, 'w') as f:
                        f.write('*')

                    archive.add(file)

                    os.remove(file)

            with tarfile.TarFile(path, 'r') as archive:
                archive.extractall()

        os.chdir(str(tmpdir))
        reset(path)

        assert set(os.listdir('.')) == {filename, 'a', 'b', 'c', 'd'}
        assert set(os.listdir('./d')) == {'e'}

    return fixture 
Example #2
Source File: test_tarfile.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_ignore_zeros(self):
        # Test TarFile's ignore_zeros option.
        if self.mode.endswith(":gz"):
            _open = gzip.GzipFile
        elif self.mode.endswith(":bz2"):
            _open = bz2.BZ2File
        else:
            _open = open

        for char in ('\0', 'a'):
            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
            # are ignored correctly.
            fobj = _open(tmpname, "wb")
            fobj.write(char * 1024)
            fobj.write(tarfile.TarInfo("foo").tobuf())
            fobj.close()

            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
            self.assertListEqual(tar.getnames(), ["foo"],
                    "ignore_zeros=True should have skipped the %r-blocks" % char)
            tar.close() 
Example #3
Source File: debian.py    From fetchy with MIT License 6 votes vote down vote up
def _unpack_data(self, tar: TarFile, data_archive: TarFile):
        with io.BytesIO(
            str.encode(
                "\n".join(
                    [
                        member.name.lstrip(".")
                        for member in data_archive
                        if member.name.lstrip(".")
                    ]
                )
                + "\n"
            )
        ) as fileobj:
            info = TarInfo("list")
            info.size = fileobj.getbuffer().nbytes
            self._unpack_info_file(tar, info, fileobj)

        names = tar.getnames()

        for member in (member for member in data_archive if member.name not in names):
            if member.islnk() or member.issym() or member.isdir():
                tar.addfile(member)
            else:
                with data_archive.extractfile(member) as fileobj:
                    tar.addfile(member, fileobj) 
Example #4
Source File: download.py    From chainer-compiler with MIT License 6 votes vote down vote up
def extractall(file_path, destination, ext):
    """Extracts an archive file.

    This function extracts an archive file to a destination.

    Args:
        file_path (string): The path of a file to be extracted.
        destination (string): A directory path. The archive file
            will be extracted under this directory.
        ext (string): An extension suffix of the archive file.
            This function supports :obj:`'.zip'`, :obj:`'.tar'`,
            :obj:`'.gz'` and :obj:`'.tgz'`.

    """

    if ext == '.zip':
        with zipfile.ZipFile(file_path, 'r') as z:
            z.extractall(destination)
    elif ext == '.tar':
        with tarfile.TarFile(file_path, 'r') as t:
            t.extractall(destination)
    elif ext == '.gz' or ext == '.tgz':
        with tarfile.open(file_path, 'r:gz') as t:
            t.extractall(destination) 
Example #5
Source File: test_tarfile.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        open(empty, "wb").write("")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            os.remove(empty) 
Example #6
Source File: test_tarfile.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_ignore_zeros(self):
        # Test TarFile's ignore_zeros option.
        for char in (b'\0', b'a'):
            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
            # are ignored correctly.
            with self.open(tmpname, "w") as fobj:
                fobj.write(char * 1024)
                fobj.write(tarfile.TarInfo("foo").tobuf())

            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
            try:
                self.assertListEqual(tar.getnames(), ["foo"],
                    "ignore_zeros=True should have skipped the %r-blocks" %
                    char)
            finally:
                tar.close() 
Example #7
Source File: test_tarfile.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        with open(empty, "wb") as fobj:
            fobj.write(b"")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            support.unlink(empty) 
Example #8
Source File: alexnet.py    From ray-legacy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def load_chunk(tarfile, size=None):
  """Load a number of images from a single imagenet .tar file.

  This function also converts the image from grayscale to RGB if necessary.

  Args:
    tarfile (tarfile.TarFile): The archive from which the files get loaded.
    size (Optional[Tuple[int, int]]): Resize the image to this size if provided.

  Returns:
    numpy.ndarray: Contains the image data in format [batch, w, h, c]
  """
  result = []
  filenames = []
  for member in tarfile.getmembers():
    filename = member.path
    content = tarfile.extractfile(member)
    img = Image.open(content)
    rgbimg = Image.new("RGB", img.size)
    rgbimg.paste(img)
    if size != None:
      rgbimg = rgbimg.resize(size, Image.ANTIALIAS)
    result.append(np.array(rgbimg).reshape(1, rgbimg.size[0], rgbimg.size[1], 3))
    filenames.append(filename)
  return np.concatenate(result), filenames 
Example #9
Source File: test_tarfile.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        with open(empty, "wb") as fobj:
            fobj.write("")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            support.unlink(empty) 
Example #10
Source File: test_tarfile.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        with open(empty, "wb") as fobj:
            fobj.write(b"")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            support.unlink(empty) 
Example #11
Source File: test_tarfile.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_ignore_zeros(self):
        # Test TarFile's ignore_zeros option.
        for char in (b'\0', b'a'):
            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
            # are ignored correctly.
            with self.open(tmpname, "w") as fobj:
                fobj.write(char * 1024)
                fobj.write(tarfile.TarInfo("foo").tobuf())

            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
            try:
                self.assertListEqual(tar.getnames(), ["foo"],
                    "ignore_zeros=True should have skipped the %r-blocks" %
                    char)
            finally:
                tar.close() 
Example #12
Source File: download.py    From chainercv with MIT License 6 votes vote down vote up
def extractall(file_path, destination, ext):
    """Extracts an archive file.

    This function extracts an archive file to a destination.

    Args:
        file_path (string): The path of a file to be extracted.
        destination (string): A directory path. The archive file
            will be extracted under this directory.
        ext (string): An extension suffix of the archive file.
            This function supports :obj:`'.zip'`, :obj:`'.tar'`,
            :obj:`'.gz'` and :obj:`'.tgz'`.

    """

    if ext == '.zip':
        with zipfile.ZipFile(file_path, 'r') as z:
            z.extractall(destination)
    elif ext == '.tar':
        with tarfile.TarFile(file_path, 'r') as t:
            t.extractall(destination)
    elif ext == '.gz' or ext == '.tgz':
        with tarfile.open(file_path, 'r:gz') as t:
            t.extractall(destination) 
Example #13
Source File: micropip.py    From micropython-samples with MIT License 6 votes vote down vote up
def install_pkg(pkg_spec, install_path):
    data = get_pkg_metadata(pkg_spec)

    latest_ver = data["info"]["version"]
    packages = data["releases"][latest_ver]
    assert len(packages) == 1
    package_url = packages[0]["url"]
    print("Installing %s %s from %s" % (pkg_spec, latest_ver, package_url))
    f1 = url_open(package_url)
    s = read_lines(f1)
    try:
        str1 = zlib.decompress(s, gzdict_sz)
        with tempfile.TemporaryFile() as temp_file:
            temp_file.write(str1)
            temp_file.seek(0)
            with tarfile.TarFile(fileobj=temp_file) as tar_file:  # Expects a file object
                meta = install_tar(tar_file, install_path)
    finally:
        f1.close()
    return meta 
Example #14
Source File: test_tarfile.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_init_close_fobj(self):
        # Issue #7341: Close the internal file object in the TarFile
        # constructor in case of an error. For the test we rely on
        # the fact that opening an empty file raises a ReadError.
        empty = os.path.join(TEMPDIR, "empty")
        open(empty, "wb").write("")

        try:
            tar = object.__new__(tarfile.TarFile)
            try:
                tar.__init__(empty)
            except tarfile.ReadError:
                self.assertTrue(tar.fileobj.closed)
            else:
                self.fail("ReadError not raised")
        finally:
            os.remove(empty) 
Example #15
Source File: test_ubuntu_corpus_training.py    From ChatterBot with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _create_test_corpus(self, data):
        """
        Create a small tar in a similar format to the
        Ubuntu corpus file in memory for testing.
        """
        file_path = os.path.join(self.trainer.data_directory, 'ubuntu_dialogs.tgz')
        tar = tarfile.TarFile(file_path, 'w')

        tsv1 = BytesIO(data[0])
        tsv2 = BytesIO(data[1])

        tarinfo = tarfile.TarInfo('dialogs/3/1.tsv')
        tarinfo.size = len(data[0])
        tar.addfile(tarinfo, fileobj=tsv1)

        tarinfo = tarfile.TarInfo('dialogs/3/2.tsv')
        tarinfo.size = len(data[1])
        tar.addfile(tarinfo, fileobj=tsv2)

        tsv1.close()
        tsv2.close()
        tar.close()

        return file_path 
Example #16
Source File: memory.py    From flocker with Apache License 2.0 6 votes vote down vote up
def reader(self, remote_snapshots=None):
        """
        Package up filesystem contents as a tarball.
        """
        result = BytesIO()
        tarball = TarFile(fileobj=result, mode="w")
        for child in self.path.children():
            tarball.add(child.path, arcname=child.basename(), recursive=True)
        tarball.close()

        # You can append anything to the end of a tar stream without corrupting
        # it.  Smuggle some data about the snapshots through here.  This lets
        # tests verify that an incremental stream is really being produced
        # without forcing us to implement actual incremental streams on top of
        # dumb directories.
        if remote_snapshots:
            result.write(
                u"\nincremental stream based on\n{}".format(
                    u"\n".join(snapshot.name for snapshot in remote_snapshots)
                ).encode("ascii")
            )
        result.seek(0, 0)
        yield result 
Example #17
Source File: test_chute_api.py    From Paradrop with Apache License 2.0 6 votes vote down vote up
def test_extract_tarred_chute():
    # Normal case: tar file with a paradrop.yaml file.
    with open("/tmp/paradrop.yaml", "w") as output:
        output.write("name: test")

    tar = tarfile.TarFile(name="/tmp/test_chute.tar", mode="w")
    tar.add("/tmp/paradrop.yaml", arcname="paradrop.yaml")
    tar.close()

    with open("/tmp/test_chute.tar", "r") as source:
        workdir, paradrop_yaml = chute_api.extract_tarred_chute(source)
        assert os.path.isdir(workdir)
        assert paradrop_yaml['name'] == "test"

    # Bad case: empty tar file, no paradrop.yaml.
    tar = tarfile.TarFile(name="/tmp/test_chute.tar", mode="w")
    tar.close()

    with open("/tmp/test_chute.tar", "r") as source:
        assert_raises(Exception, chute_api.extract_tarred_chute, source) 
Example #18
Source File: test_tarfile.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_ignore_zeros(self):
        # Test TarFile's ignore_zeros option.
        if self.mode.endswith(":gz"):
            _open = gzip.GzipFile
        elif self.mode.endswith(":bz2"):
            _open = bz2.BZ2File
        else:
            _open = open

        for char in ('\0', 'a'):
            # Test if EOFHeaderError ('\0') and InvalidHeaderError ('a')
            # are ignored correctly.
            fobj = _open(tmpname, "wb")
            fobj.write(char * 1024)
            fobj.write(tarfile.TarInfo("foo").tobuf())
            fobj.close()

            tar = tarfile.open(tmpname, mode="r", ignore_zeros=True)
            self.assertListEqual(tar.getnames(), ["foo"],
                    "ignore_zeros=True should have skipped the %r-blocks" % char)
            tar.close() 
Example #19
Source File: archive.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def open_archive(self, file_path, compression_type=None, mode='r'):
        """
        Test if archive can be assumed by filename

        @param file_path: Path to file
        @type file_path: str | unicode

        @return: True if stream
        @rtype: tarfile.TarFile
        """
        assert mode in self._modes, "Unsupported mode".format(mode)
        if compression_type is None:
            compression_type = self.get_compression_type(file_path)
        assert compression_type in self._modes[mode], "Unsupported compression '{}' for archive files.".format(
            compression_type)
        assert self.is_archive(file_path)

        if compression_type is None:
            compression_type = 'tar'

        mode = self._modes[mode][compression_type]
        return self._open[compression_type](file_path, mode=mode) 
Example #20
Source File: docker_build.py    From biweeklybudget with GNU Affero General Public License v3.0 6 votes vote down vote up
def _tar_add_string_file(self, tarobj, fpath, content):
        """
        Given a tarfile object, add a file to it at ``fpath``, with content
        ``content``.

        Largely based on: http://stackoverflow.com/a/40392022

        :param tarobj: the tarfile to add to
        :type tarobj: tarfile.TarFile
        :param fpath: path to put the file at in the archive
        :type fpath: str
        :param content: file content
        :type content: str
        """
        logger.debug('Adding %d-length string to tarfile at %s',
                     len(content), fpath)
        data = content.encode('utf-8')
        f = BytesIO(data)
        info = tarfile.TarInfo(name=fpath)
        info.size = len(data)
        tarobj.addfile(tarinfo=info, fileobj=f) 
Example #21
Source File: dirty_untar.py    From thefuck with MIT License 5 votes vote down vote up
def side_effect(old_cmd, command):
    with tarfile.TarFile(_tar_file(old_cmd.script_parts)[0]) as archive:
        for file in archive.getnames():
            try:
                os.remove(file)
            except OSError:
                # does not try to remove directories as we cannot know if they
                # already existed before
                pass 
Example #22
Source File: docker_interface.py    From cloudify-manager with Apache License 2.0 5 votes vote down vote up
def tar_file_content_for_put_archive(content, filename):
        stream = BytesIO()
        t = tarfile.TarFile(fileobj=stream, mode='w')
        file_data = content.encode('utf-8')
        tarinfo = tarfile.TarInfo(name=filename)
        tarinfo.size = len(file_data)
        tarinfo.mtime = time.time()
        t.addfile(tarinfo, BytesIO(file_data))
        t.close()
        stream.seek(0)
        return stream 
Example #23
Source File: debian.py    From fetchy with MIT License 5 votes vote down vote up
def _unpack_control_data(self, tar: TarFile, control_archive: TarFile):
        for member in (member for member in control_archive if member.isfile()):
            with control_archive.extractfile(member) as fileobj:
                self._unpack_info_file(tar, member, fileobj) 
Example #24
Source File: test_tarfile.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_eof(self):
        # __exit__() must write end-of-archive blocks, i.e. call
        # TarFile.close() if there was no error.
        with tarfile.open(tmpname, "w"):
            pass
        self.assertNotEqual(os.path.getsize(tmpname), 0,
                "context manager wrote no end-of-archive block") 
Example #25
Source File: test_tarfile.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_closed(self):
        # The __enter__() method is supposed to raise OSError
        # if the TarFile object is already closed.
        tar = tarfile.open(tarname)
        tar.close()
        with self.assertRaises(OSError):
            with tar:
                pass 
Example #26
Source File: test_tarfile.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def test_read_through(self):
        # Issue #11224: A poorly designed _FileInFile.read() method
        # caused seeking errors with stream tar files.
        for tarinfo in self.tar:
            if not tarinfo.isreg():
                continue
            with self.tar.extractfile(tarinfo) as fobj:
                while True:
                    try:
                        buf = fobj.read(512)
                    except tarfile.StreamError:
                        self.fail("simple read-through using "
                                  "TarFile.extractfile() failed")
                    if not buf:
                        break 
Example #27
Source File: util.py    From gobbli with Apache License 2.0 5 votes vote down vote up
def _extract_tar_junk_path(tarfile_obj: tarfile.TarFile, archive_extract_dir: Path):
    """
    Extract a tarfile while flattening any directory hierarchy
    in the archive.
    """
    for member in tarfile_obj.getmembers():
        if member.isdir():
            # Skip directories
            continue
        # Remove the directory hierarchy from the file
        member.name = Path(member.name).name
        output_file = archive_extract_dir / member.name
        LOGGER.debug(f"Extracting member '{member.name}' to '{output_file}'")
        tarfile_obj.extract(member, path=archive_extract_dir) 
Example #28
Source File: splunk_whisperer.py    From splunk_whisperer with MIT License 5 votes vote down vote up
def create_splunk_bundle(script_path):
    tmp_path = tempfile.mkdtemp()

    bin_dir = os.path.join(tmp_path, "bin")
    os.mkdir(bin_dir)
    shutil.copy(script_path, bin_dir)
    # make the script executable - not 100% certain this makes a difference
    os.chmod(os.path.join(bin_dir, os.path.basename(script_path)), 0o700)

    local_dir = os.path.join(tmp_path, "local")
    os.mkdir(local_dir)
    inputs_conf = os.path.join(local_dir, "inputs.conf")
    with open(inputs_conf, "w") as f:
        inputs = '[script://$SPLUNK_HOME/etc/apps/{}/bin/{}]\n'.format(SPLUNK_APP_NAME, os.path.basename(script_path))
        inputs += 'disabled = false\n'
        inputs += 'index = default\n'
        inputs += 'interval = 60.0\n'
        inputs += 'sourcetype = test\n'
        f.write(inputs)

    (fd, tmp_bundle) = tempfile.mkstemp()
    os.close(fd)
    with tarfile.TarFile(tmp_bundle, mode="w") as tf:
        tf.add(bin_dir, arcname="bin")
        tf.add(local_dir, arcname="local")

    shutil.rmtree(tmp_path)
    return tmp_bundle 
Example #29
Source File: trace.py    From neurodocker with Apache License 2.0 5 votes vote down vote up
def copy_file_to_container(container, src, dest):
    """Copy `local_filepath` into `container`:`container_path`.

    Parameters
    ----------
    container : str or container object
        Container to which file is copied.
    src : str
        Filepath on the host.
    dest : str
        Directory inside container. Original filename is preserved.

    Returns
    -------
    success : bool
        True if copy was a success. False otherwise.
    """
    # https://gist.github.com/zbyte64/6800eae10ce082bb78f0b7a2cca5cbc2

    from io import BytesIO
    import tarfile

    client = get_docker_client()

    try:
        container.put_archive
        container = container
    except AttributeError:
        container = client.containers.get(container)

    with BytesIO() as tar_stream:
        with tarfile.TarFile(fileobj=tar_stream, mode='w') as tar:
            filename = os.path.split(src)[-1]
            tar.add(src, arcname=filename, recursive=False)
        tar_stream.seek(0)
        return container.put_archive(dest, tar_stream) 
Example #30
Source File: trace.py    From neurodocker with Apache License 2.0 5 votes vote down vote up
def copy_file_to_container(container, src, dest):
    """Copy `local_filepath` into `container`:`container_path`.

    Parameters
    ----------
    container : str or container object
        Container to which file is copied.
    src : str
        Filepath on the host.
    dest : str
        Directory inside container. Original filename is preserved.

    Returns
    -------
    success : bool
        True if copy was a success. False otherwise.
    """
    # https://gist.github.com/zbyte64/6800eae10ce082bb78f0b7a2cca5cbc2

    from io import BytesIO
    import tarfile

    client = get_docker_client()

    try:
        container.put_archive
        container = container
    except AttributeError:
        container = client.containers.get(container)

    with BytesIO() as tar_stream:
        with tarfile.TarFile(fileobj=tar_stream, mode='w') as tar:
            filename = os.path.split(src)[-1]
            tar.add(src, arcname=filename, recursive=False)
        tar_stream.seek(0)
        return container.put_archive(dest, tar_stream)