Python unicodedata.lookup() Examples
The following are 30
code examples of unicodedata.lookup().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
unicodedata
, or try the search function
.
Example #1
Source File: paraparser.py From stdm with GNU General Public License v2.0 | 6 votes |
def start_unichar(self, attr): if 'name' in attr: if 'code' in attr: self._syntax_error('<unichar/> invalid with both name and code attributes') try: v = unicodedata.lookup(attr['name']).encode('utf8') except KeyError: self._syntax_error('<unichar/> invalid name attribute\n"%s"' % name) v = '\0' elif 'code' in attr: try: v = unichr(int(eval(attr['code']))).encode('utf8') except: self._syntax_error('<unichar/> invalid code attribute %s' % attr['code']) v = '\0' else: v = None if attr: self._syntax_error('<unichar/> invalid attribute %s' % attr.keys()[0]) if v is not None: self.handle_data(v) self._push(_selfClosingTag='unichar')
Example #2
Source File: python.py From pyspelling with MIT License | 6 votes |
def replace_unicode(self, m): """Replace escapes.""" groups = m.groupdict() esc = m.group(0) if groups.get('fesc'): value = m.group(0) elif groups.get('format'): value = ' ' elif groups.get('special'): value = BACK_SLASH_TRANSLATION[esc] elif groups.get('char'): try: value = chr(int(esc[2:], 16)) except Exception: value = esc elif groups.get('oct'): value = chr(int(esc[1:], 8)) elif groups.get('name'): try: value = unicodedata.lookup(esc[3:-1]) except Exception: value = esc return value.replace('\x00', '\n')
Example #3
Source File: paraparser.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def start_unichar(self, attr): if 'name' in attr: if 'code' in attr: self._syntax_error('<unichar/> invalid with both name and code attributes') try: v = unicodedata.lookup(attr['name']) except KeyError: self._syntax_error('<unichar/> invalid name attribute\n"%s"' % ascii(attr['name'])) v = '\0' elif 'code' in attr: try: v = int(eval(attr['code'])) v = chr(v) if isPy3 else unichr(v) except: self._syntax_error('<unichar/> invalid code attribute %s' % ascii(attr['code'])) v = '\0' else: v = None if attr: self._syntax_error('<unichar/> invalid attribute %s' % list(attr.keys())[0]) if v is not None: self.handle_data(v) self._push('unichar',_selfClosingTag='unichar')
Example #4
Source File: utilities.py From particle with BSD 3-Clause "New" or "Revised" License | 6 votes |
def greek_letter_name_to_unicode(letter): # type: (str) -> str """ Return a greek letter name as a Unicode character. Examples -------- Lamda -> Λ (Unicodedata library uses "lamda" for "lambda" :S!) Omega -> Ω omega -> ω """ return unicodedata.lookup( "GREEK {case} LETTER {name}".format( case="SMALL" if letter == letter.lower() else "CAPITAL", name=letter.upper() ) )
Example #5
Source File: test_re_jy.py From CTFCrackTools with GNU General Public License v3.0 | 6 votes |
def test_unicode_whitespace(self): # Test for http://bugs.jython.org/issue2226 ws_re = re.compile(r'\s', re.UNICODE) not_ws_re = re.compile(r'\S', re.UNICODE) separator_categories = set(['Zl', 'Zp', 'Zs']) separators = {chr(c) for c in [28, 29, 30, 31]} special = set([ unicodedata.lookup('MONGOLIAN VOWEL SEPARATOR'), u'\u0085', # NEXT LINE (NEL) ]) cpython_whitespace = set(' \t\n\r\f\v') | separators | special for i in xrange(0xFFFF): # could test to sys.maxunicode, but does not appear to be necessary if i >= 0xD800 and i <= 0xDFFF: continue c = unichr(i) if c in cpython_whitespace or category(c) in separator_categories: self.assertRegexpMatches(c, ws_re) self.assertNotRegexpMatches(c, not_ws_re) else: self.assertNotRegexpMatches(c, ws_re) self.assertRegexpMatches(c, not_ws_re)
Example #6
Source File: latex2text.py From Menotexport with GNU General Public License v3.0 | 6 votes |
def _greekletters(letterlist): for l in letterlist: ucharname = l.upper() if (ucharname == 'LAMBDA'): ucharname = 'LAMDA' smallname = "GREEK SMALL LETTER "+ucharname; if (ucharname == 'EPSILON'): smallname = "GREEK LUNATE EPSILON SYMBOL" if (ucharname == 'PHI'): smallname = "GREEK PHI SYMBOL" _default_macro_list.append( (l, unicodedata.lookup(smallname)) ); _default_macro_list.append( (l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname)) );
Example #7
Source File: test_ucn.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 6 votes |
def test_aliases(self): # Check that the aliases defined in the NameAliases.txt file work. # This should be updated when new aliases are added or the file # should be downloaded and parsed instead. See #12753. aliases = [ ('LATIN CAPITAL LETTER GHA', 0x01A2), ('LATIN SMALL LETTER GHA', 0x01A3), ('KANNADA LETTER LLLA', 0x0CDE), ('LAO LETTER FO FON', 0x0E9D), ('LAO LETTER FO FAY', 0x0E9F), ('LAO LETTER RO', 0x0EA3), ('LAO LETTER LO', 0x0EA5), ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0), ('YI SYLLABLE ITERATION MARK', 0xA015), ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18), ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5) ] for alias, codepoint in aliases: self.checkletter(alias, chr(codepoint)) name = unicodedata.name(chr(codepoint)) self.assertNotEqual(name, alias) self.assertEqual(unicodedata.lookup(alias), unicodedata.lookup(name)) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(alias)
Example #8
Source File: test_ucn.py From Fluid-Designer with GNU General Public License v3.0 | 6 votes |
def test_aliases(self): # Check that the aliases defined in the NameAliases.txt file work. # This should be updated when new aliases are added or the file # should be downloaded and parsed instead. See #12753. aliases = [ ('LATIN CAPITAL LETTER GHA', 0x01A2), ('LATIN SMALL LETTER GHA', 0x01A3), ('KANNADA LETTER LLLA', 0x0CDE), ('LAO LETTER FO FON', 0x0E9D), ('LAO LETTER FO FAY', 0x0E9F), ('LAO LETTER RO', 0x0EA3), ('LAO LETTER LO', 0x0EA5), ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0), ('YI SYLLABLE ITERATION MARK', 0xA015), ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18), ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5) ] for alias, codepoint in aliases: self.checkletter(alias, chr(codepoint)) name = unicodedata.name(chr(codepoint)) self.assertNotEqual(name, alias) self.assertEqual(unicodedata.lookup(alias), unicodedata.lookup(name)) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(alias)
Example #9
Source File: test_ucn.py From ironpython3 with Apache License 2.0 | 6 votes |
def test_named_sequences_full(self): # Check all the named sequences url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" % unicodedata.unidata_version) try: testdata = support.open_urlresource(url, encoding="utf-8", check=check_version) except (OSError, HTTPException): self.skipTest("Could not retrieve " + url) self.addCleanup(testdata.close) for line in testdata: line = line.strip() if not line or line.startswith('#'): continue seqname, codepoints = line.split(';') codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split()) self.assertEqual(unicodedata.lookup(seqname), codepoints) with self.assertRaises(SyntaxError): self.checkletter(seqname, None) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(seqname)
Example #10
Source File: test_ucn.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 6 votes |
def test_named_sequences_full(self): # Check all the named sequences url = ("http://www.pythontest.net/unicode/%s/NamedSequences.txt" % unicodedata.unidata_version) try: testdata = support.open_urlresource(url, encoding="utf-8", check=check_version) except (OSError, HTTPException): self.skipTest("Could not retrieve " + url) self.addCleanup(testdata.close) for line in testdata: line = line.strip() if not line or line.startswith('#'): continue seqname, codepoints = line.split(';') codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split()) self.assertEqual(unicodedata.lookup(seqname), codepoints) with self.assertRaises(SyntaxError): self.checkletter(seqname, None) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(seqname)
Example #11
Source File: test_re_jy.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 6 votes |
def test_unicode_whitespace(self): # Test for http://bugs.jython.org/issue2226 ws_re = re.compile(r'\s', re.UNICODE) not_ws_re = re.compile(r'\S', re.UNICODE) separator_categories = set(['Zl', 'Zp', 'Zs']) separators = {chr(c) for c in [28, 29, 30, 31]} special = set([ unicodedata.lookup('MONGOLIAN VOWEL SEPARATOR'), u'\u0085', # NEXT LINE (NEL) ]) cpython_whitespace = set(' \t\n\r\f\v') | separators | special for i in xrange(0xFFFF): # could test to sys.maxunicode, but does not appear to be necessary if i >= 0xD800 and i <= 0xDFFF: continue c = unichr(i) if c in cpython_whitespace or category(c) in separator_categories: self.assertRegexpMatches(c, ws_re) self.assertNotRegexpMatches(c, not_ws_re) else: self.assertNotRegexpMatches(c, ws_re) self.assertRegexpMatches(c, not_ws_re)
Example #12
Source File: _defaultspecs.py From pylatexenc with MIT License | 6 votes |
def _greekletters(letterlist): for l in letterlist: ucharname = l.upper() if ucharname == 'LAMBDA': ucharname = 'LAMDA' smallname = "GREEK SMALL LETTER "+ucharname if ucharname == 'EPSILON': smallname = "GREEK LUNATE EPSILON SYMBOL" if ucharname == 'PHI': smallname = "GREEK PHI SYMBOL" _latex_specs_base['macros'].append( MacroTextSpec(l, unicodedata.lookup(smallname)) ) _latex_specs_base['macros'].append( MacroTextSpec(l[0].upper()+l[1:], unicodedata.lookup("GREEK CAPITAL LETTER "+ucharname)) )
Example #13
Source File: test_ucn.py From ironpython3 with Apache License 2.0 | 6 votes |
def test_aliases(self): # Check that the aliases defined in the NameAliases.txt file work. # This should be updated when new aliases are added or the file # should be downloaded and parsed instead. See #12753. aliases = [ ('LATIN CAPITAL LETTER GHA', 0x01A2), ('LATIN SMALL LETTER GHA', 0x01A3), ('KANNADA LETTER LLLA', 0x0CDE), ('LAO LETTER FO FON', 0x0E9D), ('LAO LETTER FO FAY', 0x0E9F), ('LAO LETTER RO', 0x0EA3), ('LAO LETTER LO', 0x0EA5), ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0), ('YI SYLLABLE ITERATION MARK', 0xA015), ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18), ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5) ] for alias, codepoint in aliases: self.checkletter(alias, chr(codepoint)) name = unicodedata.name(chr(codepoint)) self.assertNotEqual(name, alias) self.assertEqual(unicodedata.lookup(alias), unicodedata.lookup(name)) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(alias)
Example #14
Source File: test_ucn.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def test_bmp_characters(self): import unicodedata count = 0 for code in xrange(0x10000): char = unichr(code) name = unicodedata.name(char, None) if name is not None: self.assertEqual(unicodedata.lookup(name), char) count += 1
Example #15
Source File: test_reshape.py From twitter-stock-recommendation with MIT License | 5 votes |
def test_unicode(self, sparse): # See GH 6885 - get_dummies chokes on unicode values import unicodedata e = 'e' eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') s = [e, eacute, eacute] res = get_dummies(s, prefix='letter', sparse=sparse) exp = DataFrame({'letter_e': [1, 0, 0], u('letter_%s') % eacute: [0, 1, 1]}, dtype=np.uint8) assert_frame_equal(res, exp)
Example #16
Source File: test_ucn.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def test_errors(self): import unicodedata self.assertRaises(TypeError, unicodedata.name) self.assertRaises(TypeError, unicodedata.name, u'xx') self.assertRaises(TypeError, unicodedata.lookup) self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
Example #17
Source File: _base.py From MIA-Dictionary-Addon with GNU General Public License v3.0 | 5 votes |
def __init__(self, vk=None, char=None, is_dead=False): self.vk = vk self.char = six.text_type(char) if char is not None else None self.is_dead = is_dead if self.is_dead: self.combining = unicodedata.lookup( 'COMBINING ' + unicodedata.name(self.char)) if not self.combining: raise KeyError(char) else: self.combining = None
Example #18
Source File: test_ucn.py From CTFCrackTools with GNU General Public License v3.0 | 5 votes |
def test_ascii_letters(self): import unicodedata for char in "".join(map(chr, xrange(ord("a"), ord("z")))): name = "LATIN SMALL LETTER %s" % char.upper() code = unicodedata.lookup(name) self.assertEqual(unicodedata.name(code), name)
Example #19
Source File: test_reshape.py From coffeegrindsize with MIT License | 5 votes |
def test_unicode(self, sparse): # See GH 6885 - get_dummies chokes on unicode values import unicodedata e = 'e' eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') s = [e, eacute, eacute] res = get_dummies(s, prefix='letter', sparse=sparse) exp = DataFrame({'letter_e': [1, 0, 0], u('letter_%s') % eacute: [0, 1, 1]}, dtype=np.uint8) if sparse: exp = exp.apply(pd.SparseArray, fill_value=0) assert_frame_equal(res, exp)
Example #20
Source File: glyphIgo.py From glyphIgo with MIT License | 5 votes |
def __lookup_character(self): results = [] query = self.__args.character if ("heuristic" in self.__args): # try fuzzy match qw = query.upper().split(" ") effective_qw = [] for q in qw: if (len(q) > 0): effective_qw.append(q) # Unicode codepoints range from 0 to 0x10FFFF = 1114111 for i in range(1114112): c = unichr(i) name = unicodedata.name(c, "UNKNOWN").split(" ") is_match = True for e in effective_qw: if (not (e in name)): is_match = False break if (is_match): results.append(c) else: # try char, codepoint or exact name lookup if (len(query) == 1): # Unicode char results = [ u"" + query ] elif (re.match(self.PATTERN_HEX_0x, query) != None): # hex results = [ unichr(int(query[2:], 16)) ] elif (re.match(self.PATTERN_HEX_x, query) != None): # hex results = [ unichr(int(query[1:], 16)) ] elif (re.match(self.PATTERN_DEC, query) != None): # decimal results = [ unichr(int(query)) ] else: # exact name results = [ unicodedata.lookup(query) ] return results
Example #21
Source File: util.py From odoo13-x64 with GNU General Public License v3.0 | 5 votes |
def _mk_char_map(mapping): """Transform a dictionary with comma separated uniode chracter names to tuples with unicode characters as key.""" for key, value in mapping.items(): for char in key.split(','): try: yield (unicodedata.lookup(char), value) except KeyError: # pragma: no cover (does not happen on Python3) pass # build mapping of Unicode characters to equivalent ASCII characters
Example #22
Source File: test_reshape.py From elasticintel with GNU General Public License v3.0 | 5 votes |
def test_unicode(self ): # See GH 6885 - get_dummies chokes on unicode values import unicodedata e = 'e' eacute = unicodedata.lookup('LATIN SMALL LETTER E WITH ACUTE') s = [e, eacute, eacute] res = get_dummies(s, prefix='letter', sparse=self.sparse) exp = DataFrame({'letter_e': {0: 1, 1: 0, 2: 0}, u('letter_%s') % eacute: {0: 0, 1: 1, 2: 1}}, dtype=np.uint8) assert_frame_equal(res, exp)
Example #23
Source File: util.py From odoo12-x64 with GNU General Public License v3.0 | 5 votes |
def _mk_char_map(mapping): """Transform a dictionary with comma separated uniode chracter names to tuples with unicode characters as key.""" for key, value in mapping.items(): for char in key.split(','): try: yield (unicodedata.lookup(char), value) except KeyError: # pragma: no cover (does not happen on Python3) pass # build mapping of Unicode characters to equivalent ASCII characters
Example #24
Source File: test_ucn.py From CTFCrackTools-V2 with GNU General Public License v3.0 | 5 votes |
def test_errors(self): import unicodedata self.assertRaises(TypeError, unicodedata.name) self.assertRaises(TypeError, unicodedata.name, u'xx') self.assertRaises(TypeError, unicodedata.lookup) self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
Example #25
Source File: test_ucn.py From medicare-demo with Apache License 2.0 | 5 votes |
def test_ascii_letters(self): import unicodedata for char in "".join(map(chr, xrange(ord("a"), ord("z")))): name = "LATIN SMALL LETTER %s" % char.upper() code = unicodedata.lookup(name) self.assertEqual(unicodedata.name(code), name)
Example #26
Source File: test_util.py From magic-wormhole-mailbox-server with MIT License | 5 votes |
def test_to_bytes(self): b = util.to_bytes("abc") self.assertIsInstance(b, type(b"")) self.assertEqual(b, b"abc") A = unicodedata.lookup("LATIN SMALL LETTER A WITH DIAERESIS") b = util.to_bytes(A + "bc") self.assertIsInstance(b, type(b"")) self.assertEqual(b, b"\xc3\xa4\x62\x63")
Example #27
Source File: test_ucn.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_named_sequences_sample(self): # Check a few named sequences. See #12753. sequences = [ ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'), ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'), ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'), ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'), ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'), ] for seqname, codepoints in sequences: self.assertEqual(unicodedata.lookup(seqname), codepoints) with self.assertRaises(SyntaxError): self.checkletter(seqname, None) with self.assertRaises(KeyError): unicodedata.ucd_3_2_0.lookup(seqname)
Example #28
Source File: test_ucn.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_bmp_characters(self): for code in range(0x10000): char = chr(code) name = unicodedata.name(char, None) if name is not None: self.assertEqual(unicodedata.lookup(name), char)
Example #29
Source File: test_ucn.py From Project-New-Reign---Nemesis-Main with GNU General Public License v3.0 | 5 votes |
def test_ascii_letters(self): for char in "".join(map(chr, range(ord("a"), ord("z")))): name = "LATIN SMALL LETTER %s" % char.upper() code = unicodedata.lookup(name) self.assertEqual(unicodedata.name(code), name)
Example #30
Source File: test_ucn.py From gcblue with BSD 3-Clause "New" or "Revised" License | 5 votes |
def test_errors(self): import unicodedata self.assertRaises(TypeError, unicodedata.name) self.assertRaises(TypeError, unicodedata.name, u'xx') self.assertRaises(TypeError, unicodedata.lookup) self.assertRaises(KeyError, unicodedata.lookup, u'unknown')