Python chardet.UniversalDetector() Examples

The following are 6 code examples of chardet.UniversalDetector(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chardet , or try the search function .
Example #1
Source File: utils.py    From doccano with MIT License 6 votes vote down vote up
def __init__(self, fobj, buffer_size=io.DEFAULT_BUFFER_SIZE, default_encoding='utf-8'):
        buffer = b''
        detector = UniversalDetector()

        while True:
            read = fobj.read(buffer_size)
            detector.feed(read)
            buffer += read
            if detector.done or len(read) < buffer_size:
                break

        if detector.done:
            self.encoding = detector.result['encoding']
        else:
            self.encoding = default_encoding

        self._fobj = fobj
        self._buffer = buffer 
Example #2
Source File: members.py    From byro with Apache License 2.0 5 votes vote down vote up
def get_encoding(form):
    detector = UniversalDetector()
    for chunk in form.cleaned_data["upload_file"].chunks():
        detector.feed(chunk)
        if detector.done:
            break
    detector.close()
    return detector.result["encoding"] 
Example #3
Source File: encoding.py    From ingestors with MIT License 5 votes vote down vote up
def detect_list_encoding(self, items, default=DEFAULT_ENCODING):
        detector = chardet.UniversalDetector()
        for text in items:
            if not isinstance(text, bytes):
                continue
            detector.feed(text)
            if detector.done:
                break

        detector.close()
        return normalize_result(detector.result, default) 
Example #4
Source File: encoding.py    From android_universal with MIT License 5 votes vote down vote up
def guess_file_encoding(fh, default=DEFAULT_ENCODING):
    """Guess encoding from a file handle."""
    start = fh.tell()
    detector = chardet.UniversalDetector()
    while True:
        data = fh.read(1024 * 10)
        if not data:
            detector.close()
            break
        detector.feed(data)
        if detector.done:
            break

    fh.seek(start)
    return normalize_result(detector.result, default=default) 
Example #5
Source File: utils.py    From CleverCSV with MIT License 5 votes vote down vote up
def get_encoding(filename):
    """Get the encoding of the file

    This function uses the chardet package for detecting the encoding of a 
    file.

    Parameters
    ----------
    filename: str
        Path to a file

    Returns
    -------
    encoding: str
        Encoding of the file.
    """
    detector = chardet.UniversalDetector()
    final_chunk = False
    blk_size = 65536
    with open(filename, "rb") as fid:
        while (not final_chunk) and (not detector.done):
            chunk = fid.read(blk_size)
            if len(chunk) < blk_size:
                final_chunk = True
            detector.feed(chunk)
    detector.close()
    encoding = detector.result.get("encoding", None)
    return encoding 
Example #6
Source File: human.py    From CleverCSV with MIT License 5 votes vote down vote up
def get_encoding(filename):
    detector = chardet.UniversalDetector()
    final_chunk = False
    blk_size = 65536
    with open(filename, "rb") as fid:
        while (not final_chunk) and (not detector.done):
            chunk = fid.read(blk_size)
            if len(chunk) < blk_size:
                final_chunk = True
            detector.feed(chunk)
    detector.close()
    encoding = detector.result.get("encoding", None)
    return encoding