Python codecs.BOM_UTF32 Examples

The following are 8 code examples of codecs.BOM_UTF32(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module codecs , or try the search function

Example #1

Source File: main.py From srtmerger with GNU General Public License v3.0

6 votes

def _insert_bom(self, content, encoding):
        encoding = encoding.replace('-','')\
                   .replace('_','')\
                   .replace(' ','')\
                   .upper()
        if encoding in ['UTF64LE', 'UTF16', 'UTF16LE']:
            return codecs.BOM + content
        if encoding in ['UTF8']:
            return codecs.BOM_UTF8 + content   
        if encoding in ['UTF32LE']:
            return codecs.BOM_UTF32_LE + content
        if encoding in ['UTF64BE']:
            return codecs.BOM_UTF64_BE + content
        if encoding in ['UTF16BE']:
            return codecs.BOM_UTF32_BE + content
        if encoding in ['UTF32BE']:
            return codecs.BOM_UTF32_BE + content
        if encoding in ['UTF32']:
            return codecs.BOM_UTF32 + content
        return content

Example #2

Source File: encoding.py From codimension with GNU General Public License v3.0

5 votes

def detectFileEncodingToRead(fName, text=None):
    """Detects the read encoding"""
    if text is None:
        with open(fName, 'rb') as diskfile:
            text = diskfile.read(1024)

    # Step 1: check for BOM
    if text.startswith(BOM_UTF8):
        return 'bom-utf-8'
    if text.startswith(BOM_UTF16):
        return 'bom-utf-16'
    if text.startswith(BOM_UTF32):
        return 'bom-utf-32'

    # Check if it was a user assigned encoding
    userAssignedEncoding = getFileEncoding(fName)
    if userAssignedEncoding:
        return userAssignedEncoding

    # Step 3: extract encoding from the file
    encFromFile = getCodingFromBytes(text)
    if encFromFile:
        return encFromFile

    # Step 4: check the project default encoding
    project = GlobalData().project
    if project.isLoaded():
        projectEncoding = project.props['encoding']
        if projectEncoding:
            return projectEncoding

    # Step 5: checks the IDE encoding
    ideEncoding = Settings()['encoding']
    if ideEncoding:
        return ideEncoding

    # Step 6: default
    return DEFAULT_ENCODING

Example #3

Source File: encoding.py From codimension with GNU General Public License v3.0

5 votes

def writeEncodedFile(fName, content, encoding):
    """Writes into a file taking care of encoding"""
    normEnc = getNormalizedEncoding(encoding)
    try:
        if normEnc.startswith('bom_'):
            enc = normEnc[4:]
            if enc == 'utf_8':
                encContent = BOM_UTF8 + content.encode(enc)
            elif enc == 'utf_16':
                encContent = BOM_UTF16 + content.encode(enc)
            else:
                encContent = BOM_UTF32 + content.encode(enc)
        else:
            encContent = content.encode(normEnc)

            # Workaround for empty files: if there is no visible content and
            # the file is saved then the editor reports precisely \n which is
            # saved on disk and then detected as octet-stream. If there are
            # more than one \n then the file is detected as plain text.
            # The octet stream files are not openable in Codimension
            if encContent == b'\n':
                encContent = b''
    except (UnicodeError, LookupError) as exc:
        raise Exception('Error encoding the buffer content with ' + encoding +
                        ': ' + str(exc))

    try:
        with open(fName, 'wb') as diskfile:
            diskfile.write(encContent)
    except Exception as exc:
        raise Exception('Error writing encoded buffer content into ' +
                        fName + ': ' + str(exc))

Example #4

Source File: test_codecs.py From ironpython3 with Apache License 2.0

5 votes

def test_utf_32_encode(self):
        # On little-endian systems, UTF-32 encodes in UTF-32-LE prefixed with BOM
        data, num_processed = codecs.utf_32_encode("abc")
        self.assertEqual(data, codecs.BOM_UTF32 + b'a\0\0\0b\0\0\0c\0\0\0')
        self.assertEqual(num_processed, 3)

        self.assertRaises(TypeError, codecs.utf_32_encode, b"abc")

Example #5

Source File: test_surrogateescape.py From ironpython3 with Apache License 2.0

5 votes

def test_utf_32(self):
        b_89dabcdef = b'\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf'
        s_89dabcdef = b_89dabcdef.decode("utf_32", errors="surrogateescape")
        encoded = s_89dabcdef.encode("utf_32", errors="surrogateescape")
        # encoded will have BOM added
        self.assertEqual(encoded, codecs.BOM_UTF32 + b_89dabcdef)

Example #6

Source File: encoding.py From conda-manager with MIT License

5 votes

def decode(text):
    """
    Function to decode a text.
    @param text text to decode (string)
    @return decoded text and encoding
    """
    try:
        if text.startswith(BOM_UTF8):
            # UTF-8 with BOM
            return to_text_string(text[len(BOM_UTF8):], 'utf-8'), 'utf-8-bom'
        elif text.startswith(BOM_UTF16):
            # UTF-16 with BOM
            return to_text_string(text[len(BOM_UTF16):], 'utf-16'), 'utf-16'
        elif text.startswith(BOM_UTF32):
            # UTF-32 with BOM
            return to_text_string(text[len(BOM_UTF32):], 'utf-32'), 'utf-32'
        coding = get_coding(text)
        if coding:
            return to_text_string(text, coding), coding
    except (UnicodeError, LookupError):
        pass
    # Assume UTF-8
    try:
        return to_text_string(text, 'utf-8'), 'utf-8-guessed'
    except (UnicodeError, LookupError):
        pass
    # Assume Latin-1 (behaviour before 3.7.1)
    return to_text_string(text, "latin-1"), 'latin-1-guessed'

Example #7

Source File: encoding.py From conda-manager with MIT License

5 votes

def is_text_file(filename):
    """
    Test if the given path is a text-like file.

    Adapted from: http://stackoverflow.com/a/3002505

    Original Authors: Trent Mick <TrentM@ActiveState.com>
                      Jorge Orpinel <jorge@orpinel.com>
    """
    try:
        open(filename)
    except Exception:
        return False
    with open(filename, 'rb') as fid:
        try:
            CHUNKSIZE = 1024
            chunk = fid.read(CHUNKSIZE)
            # check for a UTF BOM
            for bom in [BOM_UTF8, BOM_UTF16, BOM_UTF32]:
                if chunk.startswith(bom):
                    return True
            chunk = chunk.decode('utf-8')
            while 1:
                if '\0' in chunk:  # found null byte
                    return False
                if len(chunk) < CHUNKSIZE:
                    break  # done
                chunk = fid.read(CHUNKSIZE).decode('utf-8')
        except UnicodeDecodeError:
            return False
        except Exception:
            pass
    return True

Example #8

Source File: encoding.py From codimension with GNU General Public License v3.0

4 votes

def detectEncodingOnClearExplicit(fName, content):
    """Provides the reading encoding as a file would be read"""
    # The function is used in case the user reset the explicit encoding
    # so the current encoding needs to be set as if the file would be
    # read again
    try:
        with open(fName, 'rb') as diskfile:
            text = diskfile.read(1024)

        if text.startswith(BOM_UTF8):
            return 'bom-utf-8'
        if text.startswith(BOM_UTF16):
            return 'bom-utf-16'
        if text.startswith(BOM_UTF32):
            return 'bom-utf-32'

        # The function is called when an explicit encoding is reset so
        # there is no need to check for it

        encFromBuffer = getCodingFromText(content)
        if encFromBuffer:
            if isValidEncoding(encFromBuffer):
                return encFromBuffer

        project = GlobalData().project
        if project.isLoaded():
            projectEncoding = project.props['encoding']
            if projectEncoding:
                if isValidEncoding(projectEncoding):
                    return projectEncoding

        ideEncoding = Settings()['encoding']
        if ideEncoding:
            if isValidEncoding(ideEncoding):
                return ideEncoding

        return DEFAULT_ENCODING
    except Exception as exc:
        logging.warning('Error while guessing encoding for reading %s: %s',
                        fName, str(exc))
        logging.warning('The default encoding %s will be used',
                        DEFAULT_ENCODING)
        return DEFAULT_ENCODING