Python magic.from_file() Examples

The following are 30 code examples of magic.from_file(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module magic , or try the search function .
Example #1
Source File: __init__.py    From platypush with MIT License 6 votes vote down vote up
def get_mime_type(resource):
    import magic
    if resource.startswith('file://'):
        resource = resource[len('file://'):]

    if resource.startswith('http://') or resource.startswith('https://'):
        with urllib.request.urlopen(resource) as response:
            return response.info().get_content_type()
    else:
        if hasattr(magic, 'detect_from_filename'):
            mime = magic.detect_from_filename(resource)
        elif hasattr(magic, 'from_file'):
            mime = magic.from_file(resource, mime=True)
        else:
            raise RuntimeError('The installed magic version provides neither detect_from_filename nor from_file')

        if mime:
            return mime.mime_type 
Example #2
Source File: engine.py    From refextract with GNU General Public License v2.0 6 votes vote down vote up
def get_plaintext_document_body(fpath, keep_layout=False):
    """Given a file-path to a full-text, return a list of unicode strings
       whereby each string is a line of the fulltext.
       In the case of a plain-text document, this simply means reading the
       contents in from the file. In the case of a PDF however,
       this means converting the document to plaintext.
       It raises UnknownDocumentTypeError if the document is not a PDF or
       plain text.
       @param fpath: (string) - the path to the fulltext file
       @return: (list) of strings - each string being a line in the document.
    """
    textbody = []
    mime_type = magic.from_file(fpath, mime=True)

    if mime_type == "text/plain":
        with open(fpath, "r") as f:
            textbody = f.readlines()

    elif mime_type == "application/pdf":
        textbody = convert_PDF_to_plaintext(fpath, keep_layout)

    else:
        raise UnknownDocumentTypeError(mime_type)

    return textbody 
Example #3
Source File: app.py    From threatspec with MIT License 6 votes vote down vote up
def get_parser_for_path(self, path, config_path):
        if config_path.mime:
            mime = config_path.mime
        else:
            mime = magic.from_file(path, mime=True)
        _, ext = os.path.splitext(path)

        if mime == "text/plain":
            if ext in [".yaml", ".yml"]:
                return parser.YamlFileParser(self.threatmodel)
            elif ext in [".json"]:
                return parser.YamlFileParser(self.threatmodel)
            elif ext in [".txt"]:
                return parser.TextFileParser(self.threatmodel)
            else:
                logger.warn("Unsupported file extension {} for mime type text/plain for file {}".format(ext, path))
                return None
        else:
            return parser.SourceFileParser(self.threatmodel, mime) 
Example #4
Source File: extractor.py    From imago-forensics with MIT License 6 votes vote down vote up
def ela(filename, output_path):
    print "****ELA is in BETA****"
    if magic.from_file(filename, mime=True) == "image/jpeg":
        quality_level = 85
        tmp_img = os.path.join(output_path,os.path.basename(filename)+".tmp.jpg")
        ela = os.path.join(output_path,os.path.basename(filename)+".ela.jpg")
        image = Image.open(filename)
        image.save(tmp_img, 'JPEG', quality=quality_level)
        tmp_img_file = Image.open(tmp_img)
        ela_image = ImageChops.difference(image, tmp_img_file)
        extrema = ela_image.getextrema()
        max_diff = max([ex[1] for ex in extrema])
        scale = 255.0/max_diff
        ela_image = ImageEnhance.Brightness(ela_image).enhance(scale)
        ela_image.save(ela)
        os.remove(tmp_img)
    else:
        print "ELA works only with JPEG"


#Modified version of a gist by: https://github.com/erans 
Example #5
Source File: file.py    From fame with GNU General Public License v3.0 6 votes vote down vote up
def _compute_default_properties(self, hash_only=False):
        if not hash_only:
            self['names'] = [os.path.basename(self['filepath'])]
            self['detailed_type'] = magic.from_file(self['filepath'])
            self['mime'] = magic.from_file(self['filepath'], mime=True)
            self['size'] = os.path.getsize(self['filepath'])

        # Init antivirus status
        self['antivirus'] = {}

        for module in dispatcher.get_antivirus_modules():
            self['antivirus'][module.name] = False

        self._set_type(hash_only)

    # initialize all necessary values for hash analysis 
Example #6
Source File: MsgDecorator.py    From efb-qq-slave with GNU General Public License v3.0 6 votes vote down vote up
def qq_record_wrapper(self, data, chat: Chat = None):  # Experimental!
        efb_msg = Message()
        try:
            transformed_file = self.inst.coolq_api_query("get_record", file=data['file'], out_format='mp3')
            efb_msg.type = MsgType.Audio
            efb_msg.file = download_voice(transformed_file['file'],
                                          self.inst.client_config['api_root'].rstrip("/"),
                                          self.inst.client_config['access_token'])
            mime = magic.from_file(efb_msg.file.name, mime=True)
            if isinstance(mime, bytes):
                mime = mime.decode()
            efb_msg.path = efb_msg.file.name
            efb_msg.mime = mime
        except Exception:
            efb_msg.type = MsgType.Unsupported
            efb_msg.text = self._('[Voice Message] Please check it on your QQ')
            logging.getLogger(__name__).exception("Failed to download voice")
        return [efb_msg] 
Example #7
Source File: objects.py    From CIRTKit with MIT License 6 votes vote down vote up
def get_type(self):
        try:
            ms = magic.open(magic.MAGIC_NONE)
            ms.load()
            file_type = ms.file(self.path)
        except:
            try:
                file_type = magic.from_file(self.path)
            except:
                try:
                    import subprocess
                    file_process = subprocess.Popen(['file', '-b', self.path], stdout = subprocess.PIPE)
                    file_type = file_process.stdout.read().strip()
                except:
                    return ''
        finally:
            try:
                ms.close()
            except:
                pass

        return file_type 
Example #8
Source File: MsgDecorator.py    From efb-qq-slave with GNU General Public License v3.0 6 votes vote down vote up
def qq_image_wrapper(self, data, chat: Chat = None):
        efb_msg = Message()
        if 'url' not in data:
            efb_msg.type = MsgType.Text
            efb_msg.text = self._('[Image Source missing]')
            return [efb_msg]

        efb_msg.file = cq_get_image(data['url'])
        if efb_msg.file is None:
            efb_msg.type = MsgType.Text
            efb_msg.text = self._('[Download image failed, please check on your QQ client]')
            return [efb_msg]

        efb_msg.type = MsgType.Image
        mime = magic.from_file(efb_msg.file.name, mime=True)
        if isinstance(mime, bytes):
            mime = mime.decode()
        efb_msg.filename = data['file'] if 'file' in data else efb_msg.file.name
        efb_msg.path = efb_msg.file.name
        efb_msg.mime = mime
        if "gif" in mime:
            efb_msg.type = MsgType.Animation
        return [efb_msg] 
Example #9
Source File: models.py    From cadasta-platform with GNU Affero General Public License v3.0 6 votes vote down vote up
def create_spatial_resource(sender, instance, created, **kwargs):
    if created or instance._original_url != instance.file.url:
        if instance.mime_type in GPX_MIME_TYPES:
            temp = io.ensure_dirs()
            with tempfile.NamedTemporaryFile(mode='wb', dir=temp) as f:
                instance_file = instance.file.open()
                f.write(instance_file.read())
                instance_file.close()
                f.seek(0)
                # need to double check the mime-type here as browser detection
                # of gpx mime type is not reliable
                mime_type = magic.from_file(f.name, mime=True)
                if mime_type in GPX_MIME_TYPES:
                    processor = GPXProcessor(f.name)
                    layers = processor.get_layers()
                    for layer in layers.keys():
                        if len(layers[layer]) > 0:
                            SpatialResource.objects.create(
                                resource=instance, name=layer,
                                geom=layers[layer])
                else:
                    raise InvalidGPXFile(
                        _("Invalid GPX mime type: {error}".format(
                            error=mime_type))
                    ) 
Example #10
Source File: metamodel.py    From haros with MIT License 6 votes vote down vote up
def _get_language(self):
        file_type = file_cmd.from_file(self.path).lower()
        if file_type.startswith(self.CPP):
            return 'cpp'
        if self.PYTHON in file_type:
            return 'python'
        if self.name.endswith(self.LAUNCH):
            return 'launch'
        if self.name == self.PKG_XML:
            return 'package'
        if self.name.endswith(self.MSG):
            return 'msg'
        if self.name.endswith(self.SRV):
            return 'srv'
        if self.name.endswith(self.ACTION):
            return 'action'
        if self.name.endswith(self.YAML):
            return 'yaml'
        if self.name == self.CMAKELISTS:
            return 'cmake'
        return 'unknown' 
Example #11
Source File: files.py    From piicatcher with Apache License 2.0 6 votes vote down vote up
def scan(self):
        logging.debug("Scanning %s" % self._path)
        if os.path.isfile(self._path):
            mime_type = magic.from_file(self._path, mime=True)
            self._files.append(File(self._path, mime_type))
            logging.debug('\t- full path: %s, mime_type: %s' % (os.path.abspath(self._path), mime_type))
        else:
            for root, subdirs, files in os.walk(self._path):
                for filename in files:
                    file_path = os.path.join(root, filename)
                    mime_type = magic.from_file(file_path, mime=True)

                    logging.debug('\t- full path: %s, mime_type: %s' % (file_path, mime_type))
                    self._files.append(File(file_path, mime_type))

        context = {'tokenizer': Tokenizer(), 'regex': RegexScanner(), 'ner': NERScanner()}
        for f in self._files:
            f.scan(context) 
Example #12
Source File: local.py    From S4 with GNU General Public License v3.0 6 votes vote down vote up
def _load_index(self):
        index_path = self.index_path()
        if not os.path.exists(index_path):
            return {}

        content_type = magic.from_file(index_path, mime=True)
        if content_type in ("application/json", "text/plain"):
            logger.debug("Detected %s encoding for reading index", content_type)
            method = open
        elif content_type in ("application/gzip", "application/x-gzip"):
            logger.debug("Detected gzip encoding for reading index")
            method = gzip.open
        else:
            raise ValueError("Index is of unknown type", content_type)

        with method(index_path, "rt") as fp:
            data = json.load(fp)
        return data 
Example #13
Source File: core_cook.py    From PeFixup with GNU General Public License v3.0 5 votes vote down vote up
def populate_metadata(self):
        """ Populate cooked metadata into our JSON model"""
        _md = self.model['cooked_payload']['metadata']
        _md['file_name'] = self.args.LIVE
        _md['md5'] = core_hash.MD5.get_hash_hexdigest(self.cooked)
        _md['sha1'] = core_hash.SHA1.get_hash_hexdigest(self.cooked)
        _md['sha256'] = core_hash.SHA256.get_hash_hexdigest(self.cooked)
        _md['sha512'] = core_hash.SHA512.get_hash_hexdigest(self.cooked)
        _md['imphash'] = core_hash.IMP.get_hash_hexdigest(self.args.LIVE)
        _md['ssdeep'] = core_hash.SSDEEP.get_hash_hexdigest(self.cooked)
        _md['magic'] = magic.from_file(self.args.LIVE)
        ex = exif.get_json(self.args.LIVE)[0]
        _md['exif'] = ex
        self.print_cooked_payload_metadata() 
Example #14
Source File: app.py    From web_develop with GNU General Public License v3.0 5 votes vote down vote up
def create_by_old_paste(cls, filehash, symlink):
        filepath = get_file_path(filehash)
        mimetype = magic.from_file(filepath, mime=True)
        filestat = os.stat(filepath)
        size = filestat.st_size

        rst = cls(filehash, mimetype, size, filehash=filehash, symlink=symlink)
        return rst 
Example #15
Source File: examine.py    From ypkg with GNU General Public License v3.0 5 votes vote down vote up
def add_solink(self, file, pretty):
        """ .so links are almost always split into -devel subpackages in ypkg,
            unless explicitly overriden. However, they are useless without the
            actual versioned so they link to. Therefore, we add an automatic
            dependency to the hosting package when we find one of these, i.e:

            zlib:
                /usr/lib64/libz.so.1.2.8
            zlib-devel:
                /usr/lib64/libz.so -> libz.so.1.2.8

            zlib-devel -> zlib
            """
        fpath = readlink(file)

        dirn = os.path.dirname(file)
        fobj = os.path.join(dirn, fpath)

        try:
            mg = magic.from_file(fobj)
        except Exception as e:
            return

        if not v_dyn.match(mg):
            return
        fpath = remove_prefix(fobj, share_ctx.get_install_dir())
        if not self.soname_links:
            self.soname_links = set()
        self.soname_links.add(fpath) 
Example #16
Source File: onedrive.py    From packtpub-crawler with MIT License 5 votes vote down vote up
def __guess_info(self, file_path):
        if not exists(file_path):
            raise IOError('file not found!')

        self.info = {
            'path': file_path,
            'name': file_path.split('/')[-1],
            'mime_type': magic.from_file(file_path, mime=True),
        }
        log_info('[+] new file upload on OneDrive:')
        log_info(self.info['name']) 
Example #17
Source File: scpUpload.py    From packtpub-crawler with MIT License 5 votes vote down vote up
def __guess_info(self, file_path):
        if not exists(file_path):
            raise IOError('file not found!')

        self.info = {
            'path': file_path,
            'name': file_path.split('/')[-1],
            'mime_type': magic.from_file(file_path, mime=True),
        }
        log_info('[+] new file upload via scp:')
        # log_dict(self.file_info) 
Example #18
Source File: Reader.py    From OCR-Manga with GNU Affero General Public License v3.0 5 votes vote down vote up
def main():
    parser = argparse.ArgumentParser(description="OCR Manga Reader")
    parser.add_argument('mangafile', metavar='file', help="a .cbz/.zip, "
                        ".cbr/.rar, .tar, or directory containing your manga")
    args = parser.parse_args()
    path = args.mangafile.lower()
    filename = args.mangafile
    
    dir_check = os.path.isdir(filename)
    if dir_check:
        filetype = "Directory"

    if not dir_check:
        try:
            filetype = str(magic.from_file(filename))
        except OSError:
            print("Error: file '%s' does not exist!" % filename)
            sys.exit()

    if filetype == "Directory":
        images = Tree(args.mangafile)
    elif "Zip archive data" in filetype or "tar archive" in filetype:
        images = Zip(args.mangafile)
    elif "RAR archive data" in filetype:
        images = Rar(args.mangafile)
    else:
        print("Error: Unsupported filetype for '%s'\n"
              "Please specify a valid .cbz/.zip, .cbr/.rar, .tar, or directory."
              % filename)
        sys.exit()

    app = Application(images)
    app.master.title('OCR Manga Reader')
    app.update_screen()
    app.mainloop() 
Example #19
Source File: models.py    From ndrive with MIT License 5 votes vote down vote up
def put(self, file_path, upload_path = ''):
        """PUT

        Args:
            file_path: Full path for a file you want to upload
            upload_path: Ndrive path where you want to upload file
                ex) /Picture/

        Returns:
            True: Upload success
            False: Upload failed

        """
        f = open(file_path, "r")
        c = f.read()

        file_name = os.path.basename(file_path)

        now = datetime.datetime.now().isoformat()
        url = nurls['put'] + upload_path + file_name

        headers = {'userid': self.user_id,
                   'useridx': self.useridx,
                   'MODIFYDATE': now,
                   'Content-Type': magic.from_file(file_path, mime=True),
                   'charset': 'UTF-8',
                   'Origin': 'http://ndrive2.naver.com',
        }
        r = self.session.put(url = url, data = c, headers = headers)

        return self.resultManager(r.text) 
Example #20
Source File: googledrive.py    From packtpub-crawler with MIT License 5 votes vote down vote up
def __guess_info(self, file_path):
        if not exists(file_path):
            raise IOError('file not found!')

        self.info = {
            'path': file_path,
            'name': file_path.split('/')[-1],
            'mime_type': magic.from_file(file_path, mime=True),
        }
        log_info('[+] new file upload on Google Drive:')
        # log_dict(self.file_info) 
Example #21
Source File: basic_analyze.py    From MalAnalyzer with GNU General Public License v3.0 5 votes vote down vote up
def run(self):
        '''
        return {filename,filetype,filesize(Byte)}
        '''
        try:
            # get basic info
            self.filename = os.path.basename(self.filepath)
            self.filetype = magic.from_file(self.filepath)
            self.filesize = int(os.path.getsize(self.filepath))
            # get hash
            self.md5 = self.hash_file('md5')
            self.sha256 = self.hash_file('sha256')
            self.crc32 = self.get_crc32()
            self.ssdeep = self.get_ssdeep()

            # get strings
            self.get_strings()
            self.strings = {"ascii":self.ascii_strings,"unicode":self.unicode_strings}

            # get info (include packer info)
            #if self.filetype.startswith('PE32'):
            #    self.get_pe_info()
            #elif self.filetype.startswith('ELF'):
            #    self.get_elf_info()

        except Exception as e:
            self.logger.exception('%s: %s' % (Exception, e))

    # output list 
Example #22
Source File: objects.py    From CuckooSploit with GNU General Public License v3.0 5 votes vote down vote up
def get_type(self):
        """Get MIME file type.
        @return: file type.
        """
        file_type = None
        if HAVE_MAGIC:
            try:
                ms = magic.open(magic.MAGIC_NONE)
                ms.load()
                file_type = ms.file(self.file_path)
            except:
                try:
                    file_type = magic.from_file(self.file_path)
                except:
                    pass
            finally:
                try:
                    ms.close()
                except:
                    pass

        if file_type is None:
            try:
                p = subprocess.Popen(["file", "-b", self.file_path],
                                     stdout=subprocess.PIPE)
                file_type = p.stdout.read().strip()
            except:
                pass

        return file_type 
Example #23
Source File: forms.py    From Spirit with MIT License 5 votes vote down vote up
def clean_file(self):
        file = self.cleaned_data['file']

        if not magic:
           raise forms.ValidationError(_("The file could not be validated"))

        # Won't ever raise. Has at most one '.' so lstrip is fine here
        ext = os.path.splitext(file.name)[1].lstrip('.').lower()
        if ext not in settings.ST_ALLOWED_UPLOAD_FILE_MEDIA_TYPE:
            raise forms.ValidationError(
                _("Unsupported file extension %(extension)s. "
                  "Supported extensions are %(supported)s.") % {
                    'extension': ext,
                    'supported': ", ".join(
                        sorted(settings.ST_ALLOWED_UPLOAD_FILE_MEDIA_TYPE.keys()))})

        try:
            if isinstance(file, TemporaryUploadedFile):
                file_mime = magic.from_file(file.temporary_file_path(), mime=True)
            else:  # In-memory file
                file_mime = magic.from_buffer(file.read(), mime=True)
        except magic.MagicException as e:
            logger.exception(e)
            raise forms.ValidationError(_("The file could not be validated"))

        mime = settings.ST_ALLOWED_UPLOAD_FILE_MEDIA_TYPE.get(ext, None)
        if mime != file_mime:
            raise forms.ValidationError(
                _("Unsupported file mime type %(mime)s. "
                  "Supported types are %(supported)s.") % {
                    'mime': file_mime,
                    'supported': ", ".join(
                        sorted(settings.ST_ALLOWED_UPLOAD_FILE_MEDIA_TYPE.values()))})

        return file 
Example #24
Source File: MsgDecorator.py    From efb-qq-slave with GNU General Public License v3.0 5 votes vote down vote up
def qq_file_after_wrapper(self, data):
        efb_msg = Message()
        efb_msg.file = data['file']
        efb_msg.type = MsgType.File
        mime = magic.from_file(efb_msg.file.name, mime=True)
        if isinstance(mime, bytes):
            mime = mime.decode()
        efb_msg.path = efb_msg.file.name
        efb_msg.mime = mime
        efb_msg.filename = quote(data['filename'])
        return efb_msg 
Example #25
Source File: malwareclustering_api.py    From Cortex-Analyzers with GNU Affero General Public License v3.0 5 votes vote down vote up
def check_file(self, f):
        if magic.from_file(f).find('PE32') == -1:
            return False
        if magic.from_file(f).find('self-extracting') != -1:
            return False
        try:
            pe = pefile.PE(f)
            matches = self.signatures.match_all(pe, ep_only = True)
            if matches:
                return False
            return True
        except:
            return False 
Example #26
Source File: models.py    From web_develop with GNU General Public License v3.0 5 votes vote down vote up
def create_by_old_paste(cls, filehash):
        filepath = get_file_path(filehash)
        mimetype = magic.from_file(filepath, mime=True)
        filestat = os.stat(filepath)
        size = filestat.st_size

        rst = cls(filehash, mimetype, size, filehash=filehash)
        return rst 
Example #27
Source File: models.py    From web_develop with GNU General Public License v3.0 5 votes vote down vote up
def create_by_old_paste(cls, filehash):
        filepath = get_file_path(filehash)
        mimetype = magic.from_file(filepath, mime=True)
        filestat = os.stat(filepath)
        size = filestat.st_size

        rst = cls(filehash, mimetype, size, filehash=filehash)
        return rst 
Example #28
Source File: graphityUtils.py    From r2graphity with MIT License 5 votes vote down vote up
def getFiletype(path):
	return magic.from_file(path) 
Example #29
Source File: models.py    From web_develop with GNU General Public License v3.0 5 votes vote down vote up
def create_by_old_paste(cls, filehash):
        filepath = get_file_path(filehash)
        mimetype = magic.from_file(filepath, mime=True)
        filestat = os.stat(filepath)
        size = filestat.st_size

        rst = cls(filehash, mimetype, size, filehash=filehash)
        return rst 
Example #30
Source File: models.py    From web_develop with GNU General Public License v3.0 5 votes vote down vote up
def create_by_old_paste(cls, filehash):
        filepath = get_file_path(filehash)
        mimetype = magic.from_file(filepath, mime=True)
        filestat = os.stat(filepath)
        size = filestat.st_size

        rst = cls(filehash, mimetype, size, filehash=filehash)
        return rst