Python unicodedata.name() Examples

The following are 30 code examples of unicodedata.name(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module unicodedata , or try the search function .
Example #1
Source File: transliterator.py    From indic_transliteration with MIT License 6 votes vote down vote up
def _equivalent(self, char, prev, next, implicitA):
        """ Transliterate a Devanagari character to Latin.
        
        Add implicit As unless overridden by VIRAMA.
        
        """
        implicitA = False  # Force it!
        result = []
        if char.chr != DevanagariCharacter._VIRAMA:
            result.append(char.equivalents[self.name])
        """ Append implicit A to consonants if the next character isn't a vowel. """
        if implicitA and char.isConsonant \
        and ((next is not None \
        and next.chr != DevanagariCharacter._VIRAMA \
        and not next.isVowel) \
        or next is None):
            result.append(characterBlocks['DEVANAGARI']\
                   [DevanagariCharacter._LETTER_A].equivalents[self.name])
        return result 
Example #2
Source File: test_ucn.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 
Example #3
Source File: gftools-ttf2cp.py    From gftools with Apache License 2.0 6 votes vote down vote up
def main(argv):
  if len(argv) < 2:
    sys.exit('Must specify one or more font files.')

  cps = set()
  for filename in argv[1:]:
    if not os.path.isfile(filename):
      sys.exit('%s is not a file' % filename)
    cps |= fonts.CodepointsInFont(filename)

  for cp in sorted(cps):
    show_char = ''
    if FLAGS.show_char:
      show_char = (' ' + unichr(cp).strip() + ' ' +
                   unicodedata.name(unichr(cp), ''))
    show_subset = ''
    if FLAGS.show_subsets:
      show_subset = ' subset:%s' % ','.join(fonts.SubsetsForCodepoint(cp))

    print(u'0x%04X%s%s' % (cp, show_char, show_subset)) 
Example #4
Source File: signs_server.py    From concurrency2017 with MIT License 6 votes vote down vote up
def main(global_delay, local_delay, concurrency):
    global global_sleep, local_sleep, semaphore, index
    global_sleep = global_delay
    local_sleep = local_delay
    semaphore = asyncio.Semaphore(concurrency)
    print('Global delay =', global_delay)
    print('Local delay =', local_delay)
    print('Max. concurrency =', concurrency)
    print('Building inverted index...')
    index = build_index()

    app = web.Application()
    app.router.add_get('/', usage)
    app.router.add_get('/index/{word}', index_for)
    app.router.add_get('/name/{char}', char_name)

    print('Listening on port', PORT)
    web.run_app(app, port=PORT) 
Example #5
Source File: test_codeccallbacks.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass 
Example #6
Source File: test_ucn.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        ) 
Example #7
Source File: test_regressions.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
Example #8
Source File: test_codeccallbacks.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass 
Example #9
Source File: test_ucn.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 
Example #10
Source File: test_ucn.py    From BinderFilter with MIT License 6 votes vote down vote up
def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        ) 
Example #11
Source File: mathtext.py    From Computable with MIT License 6 votes vote down vote up
def get_unicode_index(symbol):
    """get_unicode_index(symbol) -> integer

Return the integer index (from the Unicode table) of symbol.  *symbol*
can be a single unicode character, a TeX command (i.e. r'\pi'), or a
Type1 symbol name (i.e. 'phi').
"""
    # From UTF #25: U+2212 minus sign is the preferred
    # representation of the unary and binary minus sign rather than
    # the ASCII-derived U+002D hyphen-minus, because minus sign is
    # unambiguous and because it is rendered with a more desirable
    # length, usually longer than a hyphen.
    if symbol == '-':
        return 0x2212
    try:# This will succeed if symbol is a single unicode char
        return ord(symbol)
    except TypeError:
        pass
    try:# Is symbol a TeX symbol (i.e. \alpha)
        return tex2uni[symbol.strip("\\")]
    except KeyError:
        message = """'%(symbol)s' is not a valid Unicode character or
TeX/Type1 symbol"""%locals()
        raise ValueError(message) 
Example #12
Source File: test_ucn.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_hangul_syllables(self):
        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")

        import unicodedata
        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 
Example #13
Source File: test_codeccallbacks.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_longstrings(self):
        # test long strings to check for memory overflow problems
        errors = [ "strict", "ignore", "replace", "xmlcharrefreplace",
                   "backslashreplace"]
        # register the handlers under different names,
        # to prevent the codec from recognizing the name
        for err in errors:
            codecs.register_error("test." + err, codecs.lookup_error(err))
        l = 1000
        errors += [ "test." + err for err in errors ]
        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
            for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15",
                        "utf-8", "utf-7", "utf-16", "utf-32"):
                for err in errors:
                    try:
                        uni.encode(enc, err)
                    except UnicodeError:
                        pass 
Example #14
Source File: transliterator.py    From indic_transliteration with MIT License 6 votes vote down vote up
def __init__(self, unicodeHexValue, block):
        """ Set up a unicode character.
        
        Arguments:
        unicodeHexValue -- an integer that should correspond to a 
                           Unicode code point.
        block -- the CharacterBlock this character belongs to.
        
        Raises:
        ValueError -- if unicodeHexValue is not a valid code point.
        
        """
        if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF:
            raise (ValueError, "numeric value outside Unicode range")
        self.unicodeHexValue = unicodeHexValue
        """ Use name check to filter out unused characters.
              unicodedata.name() raises ValueError for these
        """
        self.chr = chr(self.unicodeHexValue)
        self.name = unicodedata.name(self.chr)
        self.equivalents = {}
        self._block = block 
Example #15
Source File: itrans_transliterator.py    From cltk with MIT License 6 votes vote down vote up
def _equivalent(self, char, prev, next, implicitA):
        """ Transliterate a Devanagari character to Latin.
        
        Add implicit As unless overridden by VIRAMA.
        
        """
        result = []
        if char.unichr != DevanagariCharacter._VIRAMA:
            result.append(char.equivalents[self.name])
        """ Append implicit A to consonants if the next character isn't a vowel. """
        if implicitA and char.isConsonant \
        and ((next is not None \
        and next.unichr != DevanagariCharacter._VIRAMA \
        and not next.isVowel) \
        or next is None):
            result.append(characterBlocks['DEVANAGARI']\
                   [DevanagariCharacter._LETTER_A].equivalents[self.name])
        return result 
Example #16
Source File: itrans_transliterator.py    From cltk with MIT License 6 votes vote down vote up
def __init__(self, unicodeHexValue, block):
        """ Set up a unicode character.
        
        Arguments:
        unicodeHexValue -- an integer that should correspond to a 
                           Unicode code point.
        block -- the CharacterBlock this character belongs to.
        
        Raises:
        ValueError -- if unicodeHexValue is not a valid code point.
        
        """
        if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF:
            raise ValueError("numeric value outside Unicode range")
        self.unicodeHexValue = unicodeHexValue
        """ Use name check to filter out unused characters.
              unicodedata.name() raises ValueError for these
        """
        self.unichr = py23char(self.unicodeHexValue)
        self.name = unicodedata.name(self.unichr)
        self.equivalents = {}
        self._block = block 
Example #17
Source File: phonology.py    From cltk with MIT License 6 votes vote down vote up
def remove_diacritics(self):
        """
        :return: str: the input string stripped of its diacritics

        Examples:
            >>> Word('ġelǣd').remove_diacritics()
            'gelæd'

        """

        w = ''
        for c in unicodedata.normalize('NFKD', self.word):
            if 'LATIN' == unicodedata.name(c)[:5]:
                w += c

        return w 
Example #18
Source File: mathtext.py    From matplotlib-4-abaqus with MIT License 6 votes vote down vote up
def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi):
        """
        Draw a glyph at

          - *ox*, *oy*: position

          - *facename*: One of the TeX face names

          - *font_class*:

          - *sym*: TeX symbol name or single character

          - *fontsize*: fontsize in points

          - *dpi*: The dpi to draw at.
        """
        info = self._get_info(facename, font_class, sym, fontsize, dpi)
        realpath, stat_key = get_realpath_and_stat(info.font.fname)
        used_characters = self.used_characters.setdefault(
            stat_key, (realpath, set()))
        used_characters[1].add(info.num)
        self.mathtext_backend.render_glyph(ox, oy, info) 
Example #19
Source File: mathtext.py    From matplotlib-4-abaqus with MIT License 6 votes vote down vote up
def get_unicode_index(symbol):
    """get_unicode_index(symbol) -> integer

Return the integer index (from the Unicode table) of symbol.  *symbol*
can be a single unicode character, a TeX command (i.e. r'\pi'), or a
Type1 symbol name (i.e. 'phi').
"""
    # From UTF #25: U+2212 minus sign is the preferred
    # representation of the unary and binary minus sign rather than
    # the ASCII-derived U+002D hyphen-minus, because minus sign is
    # unambiguous and because it is rendered with a more desirable
    # length, usually longer than a hyphen.
    if symbol == '-':
        return 0x2212
    try:# This will succeed if symbol is a single unicode char
        return ord(symbol)
    except TypeError:
        pass
    try:# Is symbol a TeX symbol (i.e. \alpha)
        return tex2uni[symbol.strip("\\")]
    except KeyError:
        message = """'%(symbol)s' is not a valid Unicode character or
TeX/Type1 symbol"""%locals()
        raise ValueError(message) 
Example #20
Source File: plugin.py    From limnoria-plugins with Do What The F*ck You Want To Public License 6 votes vote down vote up
def unicode(self, irc, msg, args, query):
        """[character]
        Look up unicode character details
        """
        url = "http://unicodelookup.com/lookup?"
        url = url + urlencode({"q": query, "o": 0})
        data = web.getUrl(url)
        try:
            data = json.loads(data)
            responses = []
            for result in data["results"]:
                ucode = result[2].replace("0x", "U+")
                name = unicodedata.name("{0}".format(query))
                responses.append(
                    "%s (%s): %s [HTML: %s / Decimal: %s / Hex: %s]"
                    % (ucode, name, result[4], result[3], result[1], result[2])
                )
            response = "; ".join(responses)
            irc.reply(response)
        except ValueError:
            irc.reply("No unicode characters matching /" + query + "/ found.") 
Example #21
Source File: test_ucn.py    From oss-ftp with MIT License 6 votes vote down vote up
def test_strict_eror_handling(self):
        # bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{blah}", 'unicode-escape', 'strict'
        )
        # long bogus character name
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
        )
        # missing closing brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
        )
        # missing opening brace
        self.assertRaises(
            UnicodeError,
            unicode, "\\NSPACE", 'unicode-escape', 'strict'
        ) 
Example #22
Source File: mathtext.py    From Computable with MIT License 6 votes vote down vote up
def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi):
        """
        Draw a glyph at

          - *ox*, *oy*: position

          - *facename*: One of the TeX face names

          - *font_class*:

          - *sym*: TeX symbol name or single character

          - *fontsize*: fontsize in points

          - *dpi*: The dpi to draw at.
        """
        info = self._get_info(facename, font_class, sym, fontsize, dpi)
        realpath, stat_key = get_realpath_and_stat(info.font.fname)
        used_characters = self.used_characters.setdefault(
            stat_key, (realpath, set()))
        used_characters[1].add(info.num)
        self.mathtext_backend.render_glyph(ox, oy, info) 
Example #23
Source File: core.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v 
Example #24
Source File: gftools-unicode-names.py    From gftools with Apache License 2.0 5 votes vote down vote up
def _ReformatLine(line):
  if line.startswith('0x'):
    codepoint = int(line[2:6], 16)
    out = unichr(codepoint) + ' ' + unicodedata.name(unichr(codepoint), '')
    return '0x%04X  %s' % (codepoint, out)
  else:
    return line 
Example #25
Source File: utils.py    From bot with MIT License 5 votes vote down vote up
def charinfo(self, ctx: Context, *, characters: str) -> None:
        """Shows you information on up to 25 unicode characters."""
        match = re.match(r"<(a?):(\w+):(\d+)>", characters)
        if match:
            embed = Embed(
                title="Non-Character Detected",
                description=(
                    "Only unicode characters can be processed, but a custom Discord emoji "
                    "was found. Please remove it and try again."
                )
            )
            embed.colour = Colour.red()
            await ctx.send(embed=embed)
            return

        if len(characters) > 25:
            embed = Embed(title=f"Too many characters ({len(characters)}/25)")
            embed.colour = Colour.red()
            await ctx.send(embed=embed)
            return

        def get_info(char: str) -> Tuple[str, str]:
            digit = f"{ord(char):x}"
            if len(digit) <= 4:
                u_code = f"\\u{digit:>04}"
            else:
                u_code = f"\\U{digit:>08}"
            url = f"https://www.compart.com/en/unicode/U+{digit:>04}"
            name = f"[{unicodedata.name(char, '')}]({url})"
            info = f"`{u_code.ljust(10)}`: {name} - {utils.escape_markdown(char)}"
            return info, u_code

        charlist, rawlist = zip(*(get_info(c) for c in characters))

        embed = Embed(description="\n".join(charlist))
        embed.set_author(name="Character Info")

        if len(characters) > 1:
            embed.add_field(name='Raw', value=f"`{''.join(rawlist)}`", inline=False)

        await ctx.send(embed=embed) 
Example #26
Source File: utils.py    From bot with MIT License 5 votes vote down vote up
def send_pep_zero(self, ctx: Context) -> None:
        """Send information about PEP 0."""
        pep_embed = Embed(
            title="**PEP 0 - Index of Python Enhancement Proposals (PEPs)**",
            description="[Link](https://www.python.org/dev/peps/)"
        )
        pep_embed.set_thumbnail(url=ICON_URL)
        pep_embed.add_field(name="Status", value="Active")
        pep_embed.add_field(name="Created", value="13-Jul-2000")
        pep_embed.add_field(name="Type", value="Informational")

        await ctx.send(embed=pep_embed) 
Example #27
Source File: core.py    From pipenv with MIT License 5 votes vote down vote up
def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v 
Example #28
Source File: core.py    From pipenv with MIT License 5 votes vote down vote up
def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v 
Example #29
Source File: core.py    From deepWordBug with Apache License 2.0 5 votes vote down vote up
def _combining_class(cp):
    v = unicodedata.combining(unichr(cp))
    if v == 0:
        if not unicodedata.name(unichr(cp)):
            raise ValueError("Unknown character in unicodedata")
    return v 
Example #30
Source File: mathtext.py    From matplotlib-4-abaqus with MIT License 5 votes vote down vote up
def __init__(self, glue_type, copy=False):
        Node.__init__(self)
        self.glue_subtype   = 'normal'
        if is_string_like(glue_type):
            glue_spec = GlueSpec.factory(glue_type)
        elif isinstance(glue_type, GlueSpec):
            glue_spec = glue_type
        else:
            raise ArgumentError("glue_type must be a glue spec name or instance.")
        if copy:
            glue_spec = glue_spec.copy()
        self.glue_spec      = glue_spec