Python unicodedata.name() Examples
The following are 30
code examples of unicodedata.name().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
unicodedata
, or try the search function
.
Example #1
Source File: transliterator.py From indic_transliteration with MIT License | 6 votes |
def _equivalent(self, char, prev, next, implicitA): """ Transliterate a Devanagari character to Latin. Add implicit As unless overridden by VIRAMA. """ implicitA = False # Force it! result = [] if char.chr != DevanagariCharacter._VIRAMA: result.append(char.equivalents[self.name]) """ Append implicit A to consonants if the next character isn't a vowel. """ if implicitA and char.isConsonant \ and ((next is not None \ and next.chr != DevanagariCharacter._VIRAMA \ and not next.isVowel) \ or next is None): result.append(characterBlocks['DEVANAGARI']\ [DevanagariCharacter._LETTER_A].equivalents[self.name]) return result
Example #2
Source File: test_ucn.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_hangul_syllables(self): self.checkletter("HANGUL SYLLABLE GA", u"\uac00") self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") self.checkletter("HANGUL SYLLABLE YI", u"\uc758") self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") import unicodedata self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
Example #3
Source File: gftools-ttf2cp.py From gftools with Apache License 2.0 | 6 votes |
def main(argv): if len(argv) < 2: sys.exit('Must specify one or more font files.') cps = set() for filename in argv[1:]: if not os.path.isfile(filename): sys.exit('%s is not a file' % filename) cps |= fonts.CodepointsInFont(filename) for cp in sorted(cps): show_char = '' if FLAGS.show_char: show_char = (' ' + unichr(cp).strip() + ' ' + unicodedata.name(unichr(cp), '')) show_subset = '' if FLAGS.show_subsets: show_subset = ' subset:%s' % ','.join(fonts.SubsetsForCodepoint(cp)) print(u'0x%04X%s%s' % (cp, show_char, show_subset))
Example #4
Source File: signs_server.py From concurrency2017 with MIT License | 6 votes |
def main(global_delay, local_delay, concurrency): global global_sleep, local_sleep, semaphore, index global_sleep = global_delay local_sleep = local_delay semaphore = asyncio.Semaphore(concurrency) print('Global delay =', global_delay) print('Local delay =', local_delay) print('Max. concurrency =', concurrency) print('Building inverted index...') index = build_index() app = web.Application() app.router.add_get('/', usage) app.router.add_get('/index/{word}', index_for) app.router.add_get('/name/{char}', char_name) print('Listening on port', PORT) web.run_app(app, port=PORT)
Example #5
Source File: test_codeccallbacks.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: codecs.register_error("test." + err, codecs.lookup_error(err)) l = 1000 errors += [ "test." + err for err in errors ] for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16", "utf-32"): for err in errors: try: uni.encode(enc, err) except UnicodeError: pass
Example #6
Source File: test_ucn.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_strict_eror_handling(self): # bogus character name self.assertRaises( UnicodeError, unicode, "\\N{blah}", 'unicode-escape', 'strict' ) # long bogus character name self.assertRaises( UnicodeError, unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' ) # missing closing brace self.assertRaises( UnicodeError, unicode, "\\N{SPACE", 'unicode-escape', 'strict' ) # missing opening brace self.assertRaises( UnicodeError, unicode, "\\NSPACE", 'unicode-escape', 'strict' )
Example #7
Source File: test_regressions.py From ironpython2 with Apache License 2.0 | 6 votes |
def test_ipy2_gh357(self): """https://github.com/IronLanguages/ironpython2/issues/357""" import unicodedata if is_cli: self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>') else: self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D') self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d') self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0) self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d') self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0) self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d') self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0) self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo') self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L') self.assertEqual(unicodedata.combining(u'\u4e2d'), 0) self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W') self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0) self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '')
Example #8
Source File: test_codeccallbacks.py From BinderFilter with MIT License | 6 votes |
def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: codecs.register_error("test." + err, codecs.lookup_error(err)) l = 1000 errors += [ "test." + err for err in errors ] for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16", "utf-32"): for err in errors: try: uni.encode(enc, err) except UnicodeError: pass
Example #9
Source File: test_ucn.py From BinderFilter with MIT License | 6 votes |
def test_hangul_syllables(self): self.checkletter("HANGUL SYLLABLE GA", u"\uac00") self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") self.checkletter("HANGUL SYLLABLE YI", u"\uc758") self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") import unicodedata self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
Example #10
Source File: test_ucn.py From BinderFilter with MIT License | 6 votes |
def test_strict_eror_handling(self): # bogus character name self.assertRaises( UnicodeError, unicode, "\\N{blah}", 'unicode-escape', 'strict' ) # long bogus character name self.assertRaises( UnicodeError, unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' ) # missing closing brace self.assertRaises( UnicodeError, unicode, "\\N{SPACE", 'unicode-escape', 'strict' ) # missing opening brace self.assertRaises( UnicodeError, unicode, "\\NSPACE", 'unicode-escape', 'strict' )
Example #11
Source File: mathtext.py From Computable with MIT License | 6 votes |
def get_unicode_index(symbol): """get_unicode_index(symbol) -> integer Return the integer index (from the Unicode table) of symbol. *symbol* can be a single unicode character, a TeX command (i.e. r'\pi'), or a Type1 symbol name (i.e. 'phi'). """ # From UTF #25: U+2212 minus sign is the preferred # representation of the unary and binary minus sign rather than # the ASCII-derived U+002D hyphen-minus, because minus sign is # unambiguous and because it is rendered with a more desirable # length, usually longer than a hyphen. if symbol == '-': return 0x2212 try:# This will succeed if symbol is a single unicode char return ord(symbol) except TypeError: pass try:# Is symbol a TeX symbol (i.e. \alpha) return tex2uni[symbol.strip("\\")] except KeyError: message = """'%(symbol)s' is not a valid Unicode character or TeX/Type1 symbol"""%locals() raise ValueError(message)
Example #12
Source File: test_ucn.py From oss-ftp with MIT License | 6 votes |
def test_hangul_syllables(self): self.checkletter("HANGUL SYLLABLE GA", u"\uac00") self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") self.checkletter("HANGUL SYLLABLE YI", u"\uc758") self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") import unicodedata self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
Example #13
Source File: test_codeccallbacks.py From oss-ftp with MIT License | 6 votes |
def test_longstrings(self): # test long strings to check for memory overflow problems errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] # register the handlers under different names, # to prevent the codec from recognizing the name for err in errors: codecs.register_error("test." + err, codecs.lookup_error(err)) l = 1000 errors += [ "test." + err for err in errors ] for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16", "utf-32"): for err in errors: try: uni.encode(enc, err) except UnicodeError: pass
Example #14
Source File: transliterator.py From indic_transliteration with MIT License | 6 votes |
def __init__(self, unicodeHexValue, block): """ Set up a unicode character. Arguments: unicodeHexValue -- an integer that should correspond to a Unicode code point. block -- the CharacterBlock this character belongs to. Raises: ValueError -- if unicodeHexValue is not a valid code point. """ if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF: raise (ValueError, "numeric value outside Unicode range") self.unicodeHexValue = unicodeHexValue """ Use name check to filter out unused characters. unicodedata.name() raises ValueError for these """ self.chr = chr(self.unicodeHexValue) self.name = unicodedata.name(self.chr) self.equivalents = {} self._block = block
Example #15
Source File: itrans_transliterator.py From cltk with MIT License | 6 votes |
def _equivalent(self, char, prev, next, implicitA): """ Transliterate a Devanagari character to Latin. Add implicit As unless overridden by VIRAMA. """ result = [] if char.unichr != DevanagariCharacter._VIRAMA: result.append(char.equivalents[self.name]) """ Append implicit A to consonants if the next character isn't a vowel. """ if implicitA and char.isConsonant \ and ((next is not None \ and next.unichr != DevanagariCharacter._VIRAMA \ and not next.isVowel) \ or next is None): result.append(characterBlocks['DEVANAGARI']\ [DevanagariCharacter._LETTER_A].equivalents[self.name]) return result
Example #16
Source File: itrans_transliterator.py From cltk with MIT License | 6 votes |
def __init__(self, unicodeHexValue, block): """ Set up a unicode character. Arguments: unicodeHexValue -- an integer that should correspond to a Unicode code point. block -- the CharacterBlock this character belongs to. Raises: ValueError -- if unicodeHexValue is not a valid code point. """ if unicodeHexValue < 0 or unicodeHexValue > 0x10FFFF: raise ValueError("numeric value outside Unicode range") self.unicodeHexValue = unicodeHexValue """ Use name check to filter out unused characters. unicodedata.name() raises ValueError for these """ self.unichr = py23char(self.unicodeHexValue) self.name = unicodedata.name(self.unichr) self.equivalents = {} self._block = block
Example #17
Source File: phonology.py From cltk with MIT License | 6 votes |
def remove_diacritics(self): """ :return: str: the input string stripped of its diacritics Examples: >>> Word('ġelǣd').remove_diacritics() 'gelæd' """ w = '' for c in unicodedata.normalize('NFKD', self.word): if 'LATIN' == unicodedata.name(c)[:5]: w += c return w
Example #18
Source File: mathtext.py From matplotlib-4-abaqus with MIT License | 6 votes |
def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi): """ Draw a glyph at - *ox*, *oy*: position - *facename*: One of the TeX face names - *font_class*: - *sym*: TeX symbol name or single character - *fontsize*: fontsize in points - *dpi*: The dpi to draw at. """ info = self._get_info(facename, font_class, sym, fontsize, dpi) realpath, stat_key = get_realpath_and_stat(info.font.fname) used_characters = self.used_characters.setdefault( stat_key, (realpath, set())) used_characters[1].add(info.num) self.mathtext_backend.render_glyph(ox, oy, info)
Example #19
Source File: mathtext.py From matplotlib-4-abaqus with MIT License | 6 votes |
def get_unicode_index(symbol): """get_unicode_index(symbol) -> integer Return the integer index (from the Unicode table) of symbol. *symbol* can be a single unicode character, a TeX command (i.e. r'\pi'), or a Type1 symbol name (i.e. 'phi'). """ # From UTF #25: U+2212 minus sign is the preferred # representation of the unary and binary minus sign rather than # the ASCII-derived U+002D hyphen-minus, because minus sign is # unambiguous and because it is rendered with a more desirable # length, usually longer than a hyphen. if symbol == '-': return 0x2212 try:# This will succeed if symbol is a single unicode char return ord(symbol) except TypeError: pass try:# Is symbol a TeX symbol (i.e. \alpha) return tex2uni[symbol.strip("\\")] except KeyError: message = """'%(symbol)s' is not a valid Unicode character or TeX/Type1 symbol"""%locals() raise ValueError(message)
Example #20
Source File: plugin.py From limnoria-plugins with Do What The F*ck You Want To Public License | 6 votes |
def unicode(self, irc, msg, args, query): """[character] Look up unicode character details """ url = "http://unicodelookup.com/lookup?" url = url + urlencode({"q": query, "o": 0}) data = web.getUrl(url) try: data = json.loads(data) responses = [] for result in data["results"]: ucode = result[2].replace("0x", "U+") name = unicodedata.name("{0}".format(query)) responses.append( "%s (%s): %s [HTML: %s / Decimal: %s / Hex: %s]" % (ucode, name, result[4], result[3], result[1], result[2]) ) response = "; ".join(responses) irc.reply(response) except ValueError: irc.reply("No unicode characters matching /" + query + "/ found.")
Example #21
Source File: test_ucn.py From oss-ftp with MIT License | 6 votes |
def test_strict_eror_handling(self): # bogus character name self.assertRaises( UnicodeError, unicode, "\\N{blah}", 'unicode-escape', 'strict' ) # long bogus character name self.assertRaises( UnicodeError, unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' ) # missing closing brace self.assertRaises( UnicodeError, unicode, "\\N{SPACE", 'unicode-escape', 'strict' ) # missing opening brace self.assertRaises( UnicodeError, unicode, "\\NSPACE", 'unicode-escape', 'strict' )
Example #22
Source File: mathtext.py From Computable with MIT License | 6 votes |
def render_glyph(self, ox, oy, facename, font_class, sym, fontsize, dpi): """ Draw a glyph at - *ox*, *oy*: position - *facename*: One of the TeX face names - *font_class*: - *sym*: TeX symbol name or single character - *fontsize*: fontsize in points - *dpi*: The dpi to draw at. """ info = self._get_info(facename, font_class, sym, fontsize, dpi) realpath, stat_key = get_realpath_and_stat(info.font.fname) used_characters = self.used_characters.setdefault( stat_key, (realpath, set())) used_characters[1].add(info.num) self.mathtext_backend.render_glyph(ox, oy, info)
Example #23
Source File: core.py From Mastering-Elasticsearch-7.0 with MIT License | 5 votes |
def _combining_class(cp): v = unicodedata.combining(unichr(cp)) if v == 0: if not unicodedata.name(unichr(cp)): raise ValueError("Unknown character in unicodedata") return v
Example #24
Source File: gftools-unicode-names.py From gftools with Apache License 2.0 | 5 votes |
def _ReformatLine(line): if line.startswith('0x'): codepoint = int(line[2:6], 16) out = unichr(codepoint) + ' ' + unicodedata.name(unichr(codepoint), '') return '0x%04X %s' % (codepoint, out) else: return line
Example #25
Source File: utils.py From bot with MIT License | 5 votes |
def charinfo(self, ctx: Context, *, characters: str) -> None: """Shows you information on up to 25 unicode characters.""" match = re.match(r"<(a?):(\w+):(\d+)>", characters) if match: embed = Embed( title="Non-Character Detected", description=( "Only unicode characters can be processed, but a custom Discord emoji " "was found. Please remove it and try again." ) ) embed.colour = Colour.red() await ctx.send(embed=embed) return if len(characters) > 25: embed = Embed(title=f"Too many characters ({len(characters)}/25)") embed.colour = Colour.red() await ctx.send(embed=embed) return def get_info(char: str) -> Tuple[str, str]: digit = f"{ord(char):x}" if len(digit) <= 4: u_code = f"\\u{digit:>04}" else: u_code = f"\\U{digit:>08}" url = f"https://www.compart.com/en/unicode/U+{digit:>04}" name = f"[{unicodedata.name(char, '')}]({url})" info = f"`{u_code.ljust(10)}`: {name} - {utils.escape_markdown(char)}" return info, u_code charlist, rawlist = zip(*(get_info(c) for c in characters)) embed = Embed(description="\n".join(charlist)) embed.set_author(name="Character Info") if len(characters) > 1: embed.add_field(name='Raw', value=f"`{''.join(rawlist)}`", inline=False) await ctx.send(embed=embed)
Example #26
Source File: utils.py From bot with MIT License | 5 votes |
def send_pep_zero(self, ctx: Context) -> None: """Send information about PEP 0.""" pep_embed = Embed( title="**PEP 0 - Index of Python Enhancement Proposals (PEPs)**", description="[Link](https://www.python.org/dev/peps/)" ) pep_embed.set_thumbnail(url=ICON_URL) pep_embed.add_field(name="Status", value="Active") pep_embed.add_field(name="Created", value="13-Jul-2000") pep_embed.add_field(name="Type", value="Informational") await ctx.send(embed=pep_embed)
Example #27
Source File: core.py From pipenv with MIT License | 5 votes |
def _combining_class(cp): v = unicodedata.combining(unichr(cp)) if v == 0: if not unicodedata.name(unichr(cp)): raise ValueError("Unknown character in unicodedata") return v
Example #28
Source File: core.py From pipenv with MIT License | 5 votes |
def _combining_class(cp): v = unicodedata.combining(unichr(cp)) if v == 0: if not unicodedata.name(unichr(cp)): raise ValueError("Unknown character in unicodedata") return v
Example #29
Source File: core.py From deepWordBug with Apache License 2.0 | 5 votes |
def _combining_class(cp): v = unicodedata.combining(unichr(cp)) if v == 0: if not unicodedata.name(unichr(cp)): raise ValueError("Unknown character in unicodedata") return v
Example #30
Source File: mathtext.py From matplotlib-4-abaqus with MIT License | 5 votes |
def __init__(self, glue_type, copy=False): Node.__init__(self) self.glue_subtype = 'normal' if is_string_like(glue_type): glue_spec = GlueSpec.factory(glue_type) elif isinstance(glue_type, GlueSpec): glue_spec = glue_type else: raise ArgumentError("glue_type must be a glue spec name or instance.") if copy: glue_spec = glue_spec.copy() self.glue_spec = glue_spec