Python codecs.utf_8_decode() Examples

The following are 30 code examples of codecs.utf_8_decode(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module codecs , or try the search function .
Example #1
Source File: test_codecs.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def test_decode_unicode(self):
        # Most decoders don't accept unicode input
        decoders = [
            codecs.utf_7_decode,
            codecs.utf_8_decode,
            codecs.utf_16_le_decode,
            codecs.utf_16_be_decode,
            codecs.utf_16_ex_decode,
            codecs.utf_32_decode,
            codecs.utf_32_le_decode,
            codecs.utf_32_be_decode,
            codecs.utf_32_ex_decode,
            codecs.latin_1_decode,
            codecs.ascii_decode,
            codecs.charmap_decode,
        ]
        if hasattr(codecs, "mbcs_decode"):
            decoders.append(codecs.mbcs_decode)
        for decoder in decoders:
            self.assertRaises(TypeError, decoder, "xxx") 
Example #2
Source File: utf_8_sig.py    From Fluid-Designer with GNU General Public License v3.0 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #3
Source File: __init__.py    From nocolon with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def decode(input, errors='strict', *args):
    """Finds indent and add a colon on previous line"""
    u, l = codecs.utf_8_decode(input, errors, True)
    out = []
    offset = 0
    for line in u.split('\n'):
        if line.strip():
            indent = len(line) - len(line.lstrip())
            if indent > offset:
                i = -1
                while not out[i].strip() and len(out) > -i:
                    i -= 1

                if out[i].rstrip()[-1] != ':':
                    out[i] += ':'
            offset = indent
        out.append(line)
    return '\n'.join(out), l 
Example #4
Source File: utf_8_sig.py    From kobo-predict with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #5
Source File: utf_8_sig.py    From Imogen with MIT License 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #6
Source File: utf_8_sig.py    From scylla with Apache License 2.0 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #7
Source File: utf_8_sig.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #8
Source File: utf_8_sig.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return ("", 0)
                else:
                    self.first = 0
            else:
                self.first = 0
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = \
                       codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #9
Source File: utf_8_sig.py    From telegram-robot-rss with Mozilla Public License 2.0 5 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #10
Source File: reader.py    From bash-lambda-layer with MIT License 5 votes vote down vote up
def determine_encoding(self):
        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, bytes):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1) 
Example #11
Source File: utf_8.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True) 
Example #12
Source File: utf_8_sig.py    From telegram-robot-rss with Mozilla Public License 2.0 5 votes vote down vote up
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API 
Example #13
Source File: utf_8.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True) 
Example #14
Source File: utf_8_sig.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return ("", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API 
Example #15
Source File: utf_8.py    From telegram-robot-rss with Mozilla Public License 2.0 5 votes vote down vote up
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True) 
Example #16
Source File: reader.py    From cronyo with MIT License 5 votes vote down vote up
def determine_encoding(self):
        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, bytes):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1) 
Example #17
Source File: utf_8_sig.py    From ironpython3 with Apache License 2.0 5 votes vote down vote up
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return ("", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API 
Example #18
Source File: utf_8_sig.py    From Imogen with MIT License 5 votes vote down vote up
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix) 
Example #19
Source File: utf_8_sig.py    From Fluid-Designer with GNU General Public License v3.0 5 votes vote down vote up
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix) 
Example #20
Source File: reader.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def determine_encoding(self):
        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, bytes):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1) 
Example #21
Source File: utf_8.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True) 
Example #22
Source File: utf_8_sig.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return ("", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API 
Example #23
Source File: utf_8_sig.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix) 
Example #24
Source File: __init__.py    From mongo-mockup-db with Apache License 2.0 5 votes vote down vote up
def _get_c_string(data, position):
    """Decode a BSON 'C' string to python unicode string."""
    end = data.index(b"\x00", position)
    return _utf_8_decode(data[position:end], None, True)[0], end + 1 
Example #25
Source File: utf_8.py    From pmatic with GNU General Public License v2.0 5 votes vote down vote up
def decode(input, errors='strict'):
    return codecs.utf_8_decode(input, errors, True) 
Example #26
Source File: utf_8_sig.py    From pmatic with GNU General Public License v2.0 5 votes vote down vote up
def decode(self, input, errors='strict'):
        if len(input) < 3:
            if codecs.BOM_UTF8.startswith(input):
                # not enough data to decide if this is a BOM
                # => try again on the next call
                return (u"", 0)
        elif input[:3] == codecs.BOM_UTF8:
            self.decode = codecs.utf_8_decode
            (output, consumed) = codecs.utf_8_decode(input[3:],errors)
            return (output, consumed+3)
        # (else) no BOM present
        self.decode = codecs.utf_8_decode
        return codecs.utf_8_decode(input, errors)

### encodings module API 
Example #27
Source File: utf_8_sig.py    From pmatic with GNU General Public License v2.0 5 votes vote down vote up
def _buffer_decode(self, input, errors, final):
        if self.first:
            if len(input) < 3:
                if codecs.BOM_UTF8.startswith(input):
                    # not enough data to decide if this really is a BOM
                    # => try again on the next call
                    return (u"", 0)
                else:
                    self.first = None
            else:
                self.first = None
                if input[:3] == codecs.BOM_UTF8:
                    (output, consumed) = codecs.utf_8_decode(input[3:], errors, final)
                    return (output, consumed+3)
        return codecs.utf_8_decode(input, errors, final) 
Example #28
Source File: utf_8_sig.py    From pmatic with GNU General Public License v2.0 5 votes vote down vote up
def decode(input, errors='strict'):
    prefix = 0
    if input[:3] == codecs.BOM_UTF8:
        input = input[3:]
        prefix = 3
    (output, consumed) = codecs.utf_8_decode(input, errors, True)
    return (output, consumed+prefix) 
Example #29
Source File: helper.py    From OpenDoor with GNU General Public License v3.0 5 votes vote down vote up
def decode(str, errors='strict'):
        """
        Decode strings

        :param str str: input string
        :param str errors:error level
        :return: str
        """

        output = ''
        try:
            if len(str) < 3:
                if codecs.BOM_UTF8.startswith(str):
                    # not enough data to decide if this is a BOM
                    # => try again on the next call
                    output = ""

            elif str[:3] == codecs.BOM_UTF8:
                (output, sizes) = codecs.utf_8_decode(str[3:], errors)
            elif str[:3] == codecs.BOM_UTF16:
                output = str[3:].decode('utf16')
            else:
                # (else) no BOM present
                (output, sizes) = codecs.utf_8_decode(str, errors)
            return str(output)
        except (UnicodeDecodeError, Exception):
            # seems, its getting not a content (images, file, etc)
            try:
                return str.decode('cp1251')
            except (UnicodeDecodeError, Exception):
                return "" 
Example #30
Source File: reader.py    From pipenv with MIT License 5 votes vote down vote up
def determine_encoding(self):
        while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
            self.update_raw()
        if isinstance(self.raw_buffer, bytes):
            if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):
                self.raw_decode = codecs.utf_16_le_decode
                self.encoding = 'utf-16-le'
            elif self.raw_buffer.startswith(codecs.BOM_UTF16_BE):
                self.raw_decode = codecs.utf_16_be_decode
                self.encoding = 'utf-16-be'
            else:
                self.raw_decode = codecs.utf_8_decode
                self.encoding = 'utf-8'
        self.update(1)