Python zipfile.BadZipfile() Examples

The following are 30 code examples of zipfile.BadZipfile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module zipfile , or try the search function .
Example #1
Source File: test_zipfile.py    From oss-ftp with MIT License 6 votes vote down vote up
def check_read_with_bad_crc(self, compression):
        """Tests that files with bad CRCs raise a BadZipfile exception when read."""
        zipdata = self.zips_with_bad_crc[compression]

        # Using ZipFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            self.assertRaises(zipfile.BadZipfile, zipf.read, 'afile')

        # Using ZipExtFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                self.assertRaises(zipfile.BadZipfile, corrupt_file.read)

        # Same with small reads (in order to exercise the buffering logic)
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                corrupt_file.MIN_READ_SIZE = 2
                with self.assertRaises(zipfile.BadZipfile):
                    while corrupt_file.read(2):
                        pass 
Example #2
Source File: zip.py    From cuckoo-osx-analyzer with MIT License 6 votes vote down vote up
def _extract(self, filename, password):
        archive_path = _prepare_archive_at_path(filename)
        if not archive_path:
            return None
        # Extraction.
        extract_path = environ.get("TEMP", "/tmp")
        with ZipFile(archive_path, "r") as archive:
            try:
                archive.extractall(path=extract_path, pwd=password)
            except BadZipfile:
                raise Exception("Invalid Zip file")
            # Try to extract it again, but with a default password
            except RuntimeError:
                try:
                    archive.extractall(path=extract_path, pwd="infected")
                except RuntimeError as err:
                    raise Exception("Unable to extract Zip file: %s" % err)
            finally:
                self._extract_nested_archives(archive, extract_path, password)
        return archive.namelist() 
Example #3
Source File: zip.py    From cuckoo-osx-analyzer with MIT License 6 votes vote down vote up
def _prepare_archive_at_path(filename):
    """ Verifies that there's a readable zip archive at the given path.

    This function returns a new name for the archive (for most cases it's
    the same as the original one; but if an archive named "foo.zip" contains
    a file named "foo" this archive will be renamed to avoid being overwrite.
    """
    # Verify that the archive is actually readable
    try:
        with ZipFile(filename, "r") as archive:
            archive.close()
    except BadZipfile:
        return None
    # Test if zip file contains a file named as itself
    if _is_overwritten(filename):
        log.debug("ZIP file contains a file with the same name, original is \
        going to be overwrite")
        # In this case we just change the file name
        new_zip_path = filename + _random_extension()
        move(filename, new_zip_path)
        filename = new_zip_path
    return filename 
Example #4
Source File: zip.py    From mac-a-mal-cuckoo with MIT License 6 votes vote down vote up
def _extract(self, filename, password):
        archive_path = _prepare_archive_at_path(filename)
        if not archive_path:
            return None
        # Extraction.
        extract_path = environ.get("TEMP", "/tmp")
        with ZipFile(archive_path, "r") as archive:
            try:
                archive.extractall(path=extract_path, pwd=password)
            except BadZipfile:
                raise Exception("Invalid Zip file")
            # Try to extract it again, but with a default password
            except RuntimeError:
                try:
                    archive.extractall(path=extract_path, pwd="infected")
                except RuntimeError as err:
                    raise Exception("Unable to extract Zip file: %s" % err)
            finally:
                self._extract_nested_archives(archive, extract_path, password)
        return archive.namelist() 
Example #5
Source File: zip.py    From mac-a-mal-cuckoo with MIT License 6 votes vote down vote up
def _prepare_archive_at_path(filename):
    """ Verifies that there's a readable zip archive at the given path.

    This function returns a new name for the archive (for most cases it's
    the same as the original one; but if an archive named "foo.zip" contains
    a file named "foo" this archive will be renamed to avoid being overwrite.
    """
    # Verify that the archive is actually readable
    try:
        with ZipFile(filename, "r") as archive:
            archive.close()
    except BadZipfile:
        return None
    # Test if zip file contains a file named as itself
    if _is_overwritten(filename):
        log.debug("ZIP file contains a file with the same name, original is \
        going to be overwrite")
        # In this case we just change the file name
        new_zip_path = filename + _random_extension()
        move(filename, new_zip_path)
        filename = new_zip_path
    return filename 
Example #6
Source File: oxml.py    From plaso with Apache License 2.0 6 votes vote down vote up
def _ParseRelationshipsXMLFile(self, xml_data):
    """Parses the relationships XML file (_rels/.rels).

    Args:
      xml_data (bytes): data of a _rels/.rels XML file.

    Returns:
      list[str]: property file paths. The path is relative to the root of
          the ZIP file.

    Raises:
      zipfile.BadZipfile: if the relationship XML file cannot be read.
    """
    xml_root = ElementTree.fromstring(xml_data)

    property_files = []
    for xml_element in xml_root.iter():
      type_attribute = xml_element.get('Type')
      if 'properties' in repr(type_attribute):
        target_attribute = xml_element.get('Target')
        property_files.append(target_attribute)

    return property_files 
Example #7
Source File: mangascrapper.py    From MangaScrapper with Apache License 2.0 6 votes vote down vote up
def _create_cbz_(dirpath, archivename):
        """
        Create a Comic Book Archive in .cbz and .cbr format (Tar Compression)

        :param dirpath: Directory location to save the the book archive.
        :param archivename: Name of the archive.
        """
        currdir = os.getcwd()
        try:
            import zlib

            compression = zipfile.ZIP_DEFLATED
        except ImportError:
            logging.warning("zlib library not available. Using ZIP_STORED compression.")
            compression = zipfile.ZIP_STORED
        try:
            with zipfile.ZipFile(archivename, "w", compression) as zf:
                os.chdir(os.path.abspath(os.path.join(dirpath, os.pardir)))
                name = os.path.basename(dirpath)
                for file in os.listdir(name):
                    zf.write(os.path.join(name, file))
        except zipfile.BadZipfile:
            logging.error("Unable to compile CBR file ")
        os.chdir(currdir) 
Example #8
Source File: test_zipstream.py    From Safejumper-for-Desktop with GNU General Public License v2.0 6 votes vote down vote up
def test_unsupportedCompression(self):
        """
        A zipfile which describes an unsupported compression mechanism should
        raise BadZipfile.
        """
        fn = self.mktemp()
        with zipfile.ZipFile(fn, "w") as zf:
            zi = zipfile.ZipInfo("0")
            zf.writestr(zi, "some data")
            # Mangle its compression type in the central directory; can't do
            # this before the writestr call or zipfile will (correctly) tell us
            # not to pass bad compression types :)
            zi.compress_type = 1234

        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0") 
Example #9
Source File: serial.py    From locality-sensitive-hashing with MIT License 6 votes vote down vote up
def main():
    """
    Read input zip file, minhash the documents in it and put them in buckets
    The zip file should have been created with data_prep/prepare_blobstore_zips
    """
    try:
        filename = os.path.abspath(sys.argv[1])
    except IndexError:
        print 'filename not provided'
        exit(1)
    try:
        zip_reader = zipfile.ZipFile(filename)
    except IOError:
        print 'unable to read file {file}'.format(file = filename)
        exit(1)
    except zipfile.BadZipfile:
        print 'file {file} is not a zip file'.format(file = filename)
        exit(1)

    lsh_zipfile(PeerbeltLine, zip_reader, 'bash', filename) 
Example #10
Source File: test_zipstream.py    From Safejumper-for-Desktop with GNU General Public License v2.0 6 votes vote down vote up
def test_filenameMismatch(self):
        """
        A zipfile entry with a different filename than is found in the central
        directory should raise BadZipfile.
        """
        fn = self.makeZipFile([b"test contents",
                               b"more contents"])
        with zipfile.ZipFile(fn, "r") as zf:
            info = zf.getinfo("0")
            info.filename = "not zero"
        with open(fn, "r+b") as scribble:
            scribble.seek(info.header_offset, 0)
            scribble.write(info.FileHeader())

        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0")
            with czf.readfile("1") as zfe:
                self.assertEqual(zfe.read(), b"more contents") 
Example #11
Source File: test_zipstream.py    From Safejumper-for-Desktop with GNU General Public License v2.0 6 votes vote down vote up
def test_invalidHeader(self):
        """
        A zipfile entry with the wrong magic number should raise BadZipfile for
        readfile(), but that should not affect other files in the archive.
        """
        fn = self.makeZipFile(["test contents",
                               "more contents"])
        with zipfile.ZipFile(fn, "r") as zf:
            zeroOffset = zf.getinfo("0").header_offset
        # Zero out just the one header.
        with open(fn, "r+b") as scribble:
            scribble.seek(zeroOffset, 0)
            scribble.write(b'0' * 4)
        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0")
            with czf.readfile("1") as zfe:
                self.assertEqual(zfe.read(), b"more contents") 
Example #12
Source File: test_zipfile.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def check_read_with_bad_crc(self, compression):
        """Tests that files with bad CRCs raise a BadZipfile exception when read."""
        zipdata = self.zips_with_bad_crc[compression]

        # Using ZipFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            self.assertRaises(zipfile.BadZipfile, zipf.read, 'afile')

        # Using ZipExtFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                self.assertRaises(zipfile.BadZipfile, corrupt_file.read)

        # Same with small reads (in order to exercise the buffering logic)
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                corrupt_file.MIN_READ_SIZE = 2
                with self.assertRaises(zipfile.BadZipfile):
                    while corrupt_file.read(2):
                        pass 
Example #13
Source File: test_zipfile.py    From BinderFilter with MIT License 6 votes vote down vote up
def check_read_with_bad_crc(self, compression):
        """Tests that files with bad CRCs raise a BadZipfile exception when read."""
        zipdata = self.zips_with_bad_crc[compression]

        # Using ZipFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            self.assertRaises(zipfile.BadZipfile, zipf.read, 'afile')

        # Using ZipExtFile.read()
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                self.assertRaises(zipfile.BadZipfile, corrupt_file.read)

        # Same with small reads (in order to exercise the buffering logic)
        with zipfile.ZipFile(io.BytesIO(zipdata), mode="r") as zipf:
            with zipf.open('afile', 'r') as corrupt_file:
                corrupt_file.MIN_READ_SIZE = 2
                with self.assertRaises(zipfile.BadZipfile):
                    while corrupt_file.read(2):
                        pass 
Example #14
Source File: test_zipstream.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def test_invalidHeader(self):
        """
        A zipfile entry with the wrong magic number should raise BadZipfile for
        readfile(), but that should not affect other files in the archive.
        """
        fn = self.makeZipFile(["test contents",
                               "more contents"])
        with zipfile.ZipFile(fn, "r") as zf:
            zeroOffset = zf.getinfo("0").header_offset
        # Zero out just the one header.
        with open(fn, "r+b") as scribble:
            scribble.seek(zeroOffset, 0)
            scribble.write(b'0' * 4)
        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0")
            with czf.readfile("1") as zfe:
                self.assertEqual(zfe.read(), b"more contents") 
Example #15
Source File: test_zipstream.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def test_filenameMismatch(self):
        """
        A zipfile entry with a different filename than is found in the central
        directory should raise BadZipfile.
        """
        fn = self.makeZipFile([b"test contents",
                               b"more contents"])
        with zipfile.ZipFile(fn, "r") as zf:
            info = zf.getinfo("0")
            info.filename = "not zero"
        with open(fn, "r+b") as scribble:
            scribble.seek(info.header_offset, 0)
            scribble.write(info.FileHeader())

        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0")
            with czf.readfile("1") as zfe:
                self.assertEqual(zfe.read(), b"more contents") 
Example #16
Source File: test_zipstream.py    From learn_python3_spider with MIT License 6 votes vote down vote up
def test_unsupportedCompression(self):
        """
        A zipfile which describes an unsupported compression mechanism should
        raise BadZipfile.
        """
        fn = self.mktemp()
        with zipfile.ZipFile(fn, "w") as zf:
            zi = zipfile.ZipInfo("0")
            zf.writestr(zi, "some data")
            # Mangle its compression type in the central directory; can't do
            # this before the writestr call or zipfile will (correctly) tell us
            # not to pass bad compression types :)
            zi.compress_type = 1234

        with zipstream.ChunkingZipFile(fn) as czf:
            self.assertRaises(zipfile.BadZipfile, czf.readfile, "0") 
Example #17
Source File: input.py    From flatten-tool with MIT License 6 votes vote down vote up
def read_sheets(self):
        try:
            self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
        except BadZipFile as e:  # noqa
            # TODO when we have python3 only add 'from e' to show exception chain
            raise BadXLSXZipFile(
                "The supplied file has extension .xlsx but isn't an XLSX file."
            )

        self.sheet_names_map = OrderedDict(
            (sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames
        )
        if self.include_sheets:
            for sheet in list(self.sheet_names_map):
                if sheet not in self.include_sheets:
                    self.sheet_names_map.pop(sheet)
        for sheet in self.exclude_sheets or []:
            self.sheet_names_map.pop(sheet, None)

        sheet_names = list(sheet for sheet in self.sheet_names_map.keys())
        self.sub_sheet_names = sheet_names
        self.configure_sheets() 
Example #18
Source File: Backup.py    From Cura with GNU Lesser General Public License v3.0 6 votes vote down vote up
def _makeArchive(self, buffer: "io.BytesIO", root_path: str) -> Optional[ZipFile]:
        """Make a full archive from the given root path with the given name.

        :param root_path: The root directory to archive recursively.
        :return: The archive as bytes.
        """

        ignore_string = re.compile("|".join(self.IGNORED_FILES))
        try:
            archive = ZipFile(buffer, "w", ZIP_DEFLATED)
            for root, folders, files in os.walk(root_path):
                for item_name in folders + files:
                    absolute_path = os.path.join(root, item_name)
                    if ignore_string.search(absolute_path):
                        continue
                    archive.write(absolute_path, absolute_path[len(root_path) + len(os.sep):])
            archive.close()
            return archive
        except (IOError, OSError, BadZipfile) as error:
            Logger.log("e", "Could not create archive from user data directory: %s", error)
            self._showMessage(
                self.catalog.i18nc("@info:backup_failed",
                                   "Could not create archive from user data directory: {}".format(error)))
            return None 
Example #19
Source File: files.py    From glazier with Apache License 2.0 6 votes vote down vote up
def Run(self):
    try:
      zip_file = self._args[0]
      out_path = self._args[1]
    except IndexError:
      raise ActionError('Unable to determine desired paths from %s.' %
                        str(self._args))

    try:
      file_util.CreateDirectories(out_path)
    except file_util.Error:
      raise ActionError('Unable to create output path %s.' % out_path)

    try:
      zf = zipfile.ZipFile(zip_file)
      zf.extractall(out_path)
    except (IOError, zipfile.BadZipfile) as e:
      raise ActionError('Bad zip file given as input.  %s' % e) 
Example #20
Source File: release.py    From simplification with MIT License 5 votes vote down vote up
def retrieve(url):
    sess = requests.Session()
    print("Getting %s" % urlsplit(url).path.split("/")[-1])
    retrieved = sess.get(url, stream=True)
    # don't continue if something's wrong
    retrieved.raise_for_status()
    try:
        raw_zip = zipfile.ZipFile(io.BytesIO(retrieved.content))
        raw_zip.extractall(path)
    except zipfile.BadZipfile:
        # it's a tar
        tar = tarfile.open(mode="r:gz", fileobj=io.BytesIO(retrieved.content))
        tar.extractall(path) 
Example #21
Source File: awslambda.py    From bash-lambda-layer with MIT License 5 votes vote down vote up
def _should_contain_zip_content(value):
    if not isinstance(value, bytes):
        # If it's not bytes it's basically impossible for
        # this to be valid zip content, but we'll at least
        # still try to load the contents as a zip file
        # to be absolutely sure.
        value = value.encode('utf-8')
    fileobj = six.BytesIO(value)
    try:
        with closing(zipfile.ZipFile(fileobj)) as f:
            f.infolist()
    except zipfile.BadZipfile:
        raise ValueError(ERROR_MSG) 
Example #22
Source File: decompression.py    From learn_python3_spider with MIT License 5 votes vote down vote up
def _is_zip(self, response):
        archive = BytesIO(response.body)
        try:
            zip_file = zipfile.ZipFile(archive)
        except zipfile.BadZipfile:
            return

        namelist = zip_file.namelist()
        body = zip_file.read(namelist[0])
        respcls = responsetypes.from_args(filename=namelist[0], body=body)
        return response.replace(body=body, cls=respcls) 
Example #23
Source File: test_excel.py    From quantipy with MIT License 5 votes vote down vote up
def _load_zip(path):
    try:
        z = ZipFile(path, 'r')
    except (BadZipfile, LargeZipFile):
        raise BadZipfile('%s: %s' % (path, sys.exc_info()[1]))
    else:
        return z 
Example #24
Source File: wheel.py    From pipenv with MIT License 5 votes vote down vote up
def read_wheel_metadata_file(source, path):
    # type: (ZipFile, str) -> bytes
    try:
        return source.read(path)
        # BadZipFile for general corruption, KeyError for missing entry,
        # and RuntimeError for password-protected files
    except (BadZipFile, KeyError, RuntimeError) as e:
        raise UnsupportedWheel(
            "could not read {!r} file: {!r}".format(path, e)
        ) 
Example #25
Source File: ez_setup.py    From grizli with MIT License 5 votes vote down vote up
def archive_context(filename):
    """
    Unzip filename to a temporary directory, set to the cwd.

    The unzipped target is cleaned up after.
    """
    tmpdir = tempfile.mkdtemp()
    log.warn('Extracting in %s', tmpdir)
    old_wd = os.getcwd()
    try:
        os.chdir(tmpdir)
        try:
            with ContextualZipFile(filename) as archive:
                archive.extractall()
        except zipfile.BadZipfile as err:
            if not err.args:
                err.args = ('', )
            err.args = err.args + (
                MEANINGFUL_INVALID_ZIP_ERR_MSG.format(filename),
            )
            raise

        # going in the directory
        subdir = os.path.join(tmpdir, os.listdir(tmpdir)[0])
        os.chdir(subdir)
        log.warn('Now working in %s', subdir)
        yield

    finally:
        os.chdir(old_wd)
        shutil.rmtree(tmpdir) 
Example #26
Source File: mail.py    From abusehelper with MIT License 5 votes vote down vote up
def handle_application_zip(self, msg):
        self.log.info("Opening a ZIP attachment")
        data = yield msg.get_payload(decode=True)
        try:
            zip = zipfile.ZipFile(StringIO(data))
        except zipfile.BadZipfile as error:
            self.log.error("ZIP handling failed ({0})".format(error))
            idiokit.stop(False)

        for filename in zip.namelist():
            csv_data = zip.open(filename)

            self.log.info("Parsing CSV data from the ZIP attachment")
            result = yield self.parse_csv(filename, csv_data)
            idiokit.stop(result) 
Example #27
Source File: oxml.py    From plaso with Apache License 2.0 5 votes vote down vote up
def _ParsePropertiesXMLFile(self, xml_data):
    """Parses a properties XML file.

    Args:
      xml_data (bytes): data of a _rels/.rels XML file.

    Returns:
      dict[str, object]: properties.

    Raises:
      zipfile.BadZipfile: if the properties XML file cannot be read.
    """
    xml_root = ElementTree.fromstring(xml_data)

    properties = {}
    for xml_element in xml_root.iter():
      if not xml_element.text:
        continue

      # The property name is formatted as: {URL}name
      # For example: {http://purl.org/dc/terms/}modified
      _, _, name = xml_element.tag.partition('}')

      # Do not including the 'lpstr' attribute because it is very verbose.
      if name == 'lpstr':
        continue

      property_name = self._PROPERTY_NAMES.get(name, None)
      if not property_name:
        property_name = self._FormatPropertyName(name)

      properties[property_name] = xml_element.text

    return properties 
Example #28
Source File: czip.py    From plaso with Apache License 2.0 5 votes vote down vote up
def ParseFileObject(self, parser_mediator, file_object):
    """Parses a compound ZIP file-like object.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      file_object (dfvfs.FileIO): a file-like object.

    Raises:
      UnableToParseFile: when the file cannot be parsed.
    """
    display_name = parser_mediator.GetDisplayName()

    if not zipfile.is_zipfile(file_object):
      raise errors.UnableToParseFile(
          '[{0:s}] unable to parse file: {1:s} with error: {2:s}'.format(
              self.NAME, display_name, 'Not a Zip file.'))

    try:
      zip_file = zipfile.ZipFile(file_object, 'r', allowZip64=True)
      self._ProcessZipFileWithPlugins(parser_mediator, zip_file)
      zip_file.close()

    # Some non-ZIP files return true for is_zipfile but will fail with a
    # negative seek (IOError) or another error.
    except (zipfile.BadZipfile, struct.error) as exception:
      raise errors.UnableToParseFile(
          '[{0:s}] unable to parse file: {1:s} with error: {2!s}'.format(
              self.NAME, display_name, exception)) 
Example #29
Source File: shadowservermail.py    From abusehelper with MIT License 5 votes vote down vote up
def handle_application_zip(self, headers, fileobj):
        self.log.info("Opening a ZIP attachment")
        fileobj = self._decode(headers, fileobj)
        try:
            zip = zipfile.ZipFile(fileobj)
        except zipfile.BadZipfile as error:
            self.log.error("ZIP handling failed ({0})".format(error))
            idiokit.stop(False)

        for filename in zip.namelist():
            csv_data = StringIO(zip.read(filename))

            self.log.info("Parsing CSV data from the ZIP attachment")
            result = yield self.parse_csv(headers, filename, csv_data)
            idiokit.stop(result) 
Example #30
Source File: problem.py    From judge-server with GNU Affero General Public License v3.0 5 votes vote down vote up
def _resolve_archive_files(self):
        if self.config.archive:
            archive_path = os.path.join(self.root_dir, self.config.archive)
            if not os.path.exists(archive_path):
                raise InvalidInitException('archive file "%s" does not exist' % archive_path)
            try:
                archive = zipfile.ZipFile(archive_path, 'r')
            except zipfile.BadZipfile:
                raise InvalidInitException('bad archive: "%s"' % archive_path)
            return archive
        return None