python source code of truetype

import struct
unpack = struct.unpack

OFFSET_TABLE_SIZE = 12
TABLE_DIR_SIZE = 16
NAME_TABLE_SIZE = 6
NAME_RECORD_SIZE = 12

class ParseError(Exception):
    def __init__(self, msg):
        Exception.__init__(self, msg)
        self.msg = msg

    def __str__(self):
        return str(self.msg)

def check(val):
    if not val:
        raise ParseError("")

# a parser for TrueType/OpenType fonts.
# http://www.microsoft.com/typography/otspec/default.htm contained the
# spec at the time of the writing.
class Font:

    # load font from string s, which is the whole contents of a font file
    def __init__(self, s):
        # is this a valid font
        self.ok = False

        # parse functions for tables, and a flag for whether each has been
        # parsed successfully
        self.parseFuncs = {
            "head" : [self.parseHead, False],
            "name" : [self.parseName, False],
            "OS/2" : [self.parseOS2, False]
            }

        try:
            self.parse(s)
        except (struct.error, ParseError), e:
            self.error = e

            return

        self.ok = True

    # check if font was parsed correctly. none of the other
    # (user-oriented) functions can be called if this returns False.
    def isOK(self):
        return self.ok

    # get font's Postscript name.
    def getPostscriptName(self):
        return self.psName

    # returns True if font allows embedding.
    def allowsEmbedding(self):
        return self.embeddingOK

    # parse whole file
    def parse(self, s):
        version, self.tableCnt = unpack(">LH", s[:6])

        check(version == 0x00010000)

        offset = OFFSET_TABLE_SIZE

        for i in range(self.tableCnt):
            self.parseTag(offset, s)
            offset += TABLE_DIR_SIZE

        for name, func in self.parseFuncs.iteritems():
            if not func[1]:
                raise ParseError("Table %s missing/invalid" % name)

    # parse a single tag
    def parseTag(self, offset, s):
        tag, checkSum, tagOffset, length = unpack(">4s3L",
            s[offset : offset + TABLE_DIR_SIZE])

        check(tagOffset >= (OFFSET_TABLE_SIZE +
                            self.tableCnt * TABLE_DIR_SIZE))

        func = self.parseFuncs.get(tag)
        if func:
            func[0](s[tagOffset : tagOffset + length])
            func[1] = True

    # parse head table
    def parseHead(self, s):
        magic = unpack(">L", s[12:16])[0]

        check(magic == 0x5F0F3CF5)

    # parse name table
    def parseName(self, s):
        fmt, nameCnt, storageOffset = unpack(">3H", s[:NAME_TABLE_SIZE])

        check(fmt == 0)

        storage = s[storageOffset:]
        offset = NAME_TABLE_SIZE

        for i in range(nameCnt):
            if self.parseNameRecord(s[offset : offset + NAME_RECORD_SIZE],
                                    storage):
                return

            offset += NAME_RECORD_SIZE

        raise ParseError("No Postscript name found")

    # parse a single name record. s2 is string storage. returns True if
    # this record is a valid Postscript name.
    def parseNameRecord(self, s, s2):
        platformID, encodingID, langID, nameID, strLen, strOffset = \
                    unpack(">6H", s)

        if nameID != 6:
            return False

        if (platformID == 1) and (encodingID == 0) and (langID == 0):
            # Macintosh, 1-byte strings

            self.psName = unpack("%ds" % strLen,
                                 s2[strOffset : strOffset + strLen])[0]

            return True

        elif (platformID == 3) and (encodingID == 1) and (langID == 0x409):
            # Windows, UTF-16BE

            tmp = unpack("%ds" % strLen,
                                 s2[strOffset : strOffset + strLen])[0]

            self.psName = tmp.decode("UTF-16BE", "ignore").encode(
                "ISO-8859-1", "ignore")

            return True

        return False

    def parseOS2(self, s):
        fsType = unpack(">H", s[8:10])[0]

        # the font embedding bits are a mess, the meanings have changed
        # over time in the TrueType/OpenType specs. this is the least
        # restrictive interpretation common to them all.
        self.embeddingOK = (fsType & 0xF) != 2