Python unicodedata.east_asian_width() Examples

The following are 30 code examples of unicodedata.east_asian_width(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module unicodedata , or try the search function .
Example #1
Source File: statemachine.py    From AWS-Transit-Gateway-Demo-MultiAccount with MIT License 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #2
Source File: terminal.py    From TerminalView with MIT License 6 votes vote down vote up
def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
Example #3
Source File: terminal.py    From TerminalView with MIT License 6 votes vote down vote up
def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
Example #4
Source File: roberteldersoftwarediff.py    From roberteldersoftwarediff with Apache License 2.0 6 votes vote down vote up
def get_east_asian_width(unicode_str):
    r = unicodedata.east_asian_width(unicode_str)
    if r == "F":    #  Fullwidth
        return 1
    elif r == "H":  #  Half-width
        return 1
    elif r == "W":  #  Wide
        return 2
    elif r == "Na": #  Narrow
        return 1
    elif r == "A":  #  Ambiguous, go with 2
        return 1
    elif r == "N":  #  Neutral
        return 1
    else:
        return 1 
Example #5
Source File: ncm2.py    From ncm2 with MIT License 6 votes vote down vote up
def strdisplaywidth(self, s):
        def get_char_display_width(unicode_str):
            r = unicodedata.east_asian_width(unicode_str)
            if r == "F":  # Fullwidth
                return 1
            elif r == "H":  # Half-width
                return 1
            elif r == "W":  # Wide
                return 2
            elif r == "Na":  # Narrow
                return 1
            elif r == "A":  # Ambiguous, go with 2
                return 1
            elif r == "N":  # Neutral
                return 1
            else:
                return 1

        s = unicodedata.normalize('NFC', s)
        w = 0
        for c in s:
            w += get_char_display_width(c)
        return w 
Example #6
Source File: test_regressions.py    From ironpython2 with Apache License 2.0 6 votes vote down vote up
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
Example #7
Source File: terminalui.py    From imaginary with MIT License 6 votes vote down vote up
def width(ch):
    """
    Compute the display width of the given character.

    Useful for cursor-repositioning tasks, however this is not entirely
    reliable since different terminal emulators have different behavior in
    this area.

    @see: U{http://unicode.org/reports/tr11/}

    @return: The width in 1/2 ems of the given single-length unicode string.
    @rtype: C{int}
    """
    widthSpecifier = unicodedata.east_asian_width(ch)
    try:
        return _widths[widthSpecifier]
    except KeyError:
        raise KeyError("%r has a width that is not supported: %s"
                       % (ch, widthSpecifier)) 
Example #8
Source File: statemachine.py    From faces with GNU General Public License v2.0 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #9
Source File: statemachine.py    From faces with GNU General Public License v2.0 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #10
Source File: cpp_lint.py    From Deep-Exemplar-based-Colorization with MIT License 6 votes vote down vote up
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
Example #11
Source File: cpp_lint.py    From Deep-Learning-Based-Structural-Damage-Detection with MIT License 6 votes vote down vote up
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if six.PY2:
    if isinstance(line, unicode):
      width = 0
      for uc in unicodedata.normalize('NFC', line):
        if unicodedata.east_asian_width(uc) in ('W', 'F'):
          width += 2
        elif not unicodedata.combining(uc):
          width += 1
      return width
  return len(line) 
Example #12
Source File: cpplint.py    From ecs with MIT License 6 votes vote down vote up
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
Example #13
Source File: statemachine.py    From deepWordBug with Apache License 2.0 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #14
Source File: statemachine.py    From AWS-Transit-Gateway-Demo-MultiAccount with MIT License 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #15
Source File: statemachine.py    From cadquery-freecad-module with GNU Lesser General Public License v3.0 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #16
Source File: test_regressions.py    From ironpython3 with Apache License 2.0 6 votes vote down vote up
def test_ipy2_gh357(self):
        """https://github.com/IronLanguages/ironpython2/issues/357"""

        import unicodedata

        if is_cli:
            self.assertEqual(unicodedata.name(u'\u4e2d'), '<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>')
        else:
            self.assertEqual(unicodedata.name(u'\u4e2d'), 'CJK UNIFIED IDEOGRAPH-4E2D')

        self.assertRaises(ValueError, unicodedata.decimal, u'\u4e2d')
        self.assertEqual(unicodedata.decimal(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.digit, u'\u4e2d')
        self.assertEqual(unicodedata.digit(u'\u4e2d', 0), 0)
        self.assertRaises(ValueError, unicodedata.numeric, u'\u4e2d')
        self.assertEqual(unicodedata.numeric(u'\u4e2d', 0), 0)
        self.assertEqual(unicodedata.category(u'\u4e2d'), 'Lo')
        self.assertEqual(unicodedata.bidirectional(u'\u4e2d'), 'L')
        self.assertEqual(unicodedata.combining(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.east_asian_width(u'\u4e2d'), 'W')
        self.assertEqual(unicodedata.mirrored(u'\u4e2d'), 0)
        self.assertEqual(unicodedata.decomposition(u'\u4e2d'), '') 
Example #17
Source File: statemachine.py    From blackmamba with MIT License 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #18
Source File: terminal.py    From django-gateone with GNU General Public License v3.0 6 votes vote down vote up
def cursor_right(self, n=1):
        """ESCnC CUF (Cursor Forward)"""
        #logging.debug('cursor_right(%s)' % n)
        if not n:
            n = 1
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_right:
            self.double_width_right = False
            return
        self.cursorX += n
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_right = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
Example #19
Source File: terminal.py    From django-gateone with GNU General Public License v3.0 6 votes vote down vote up
def cursor_left(self, n=1):
        """ESCnD CUB (Cursor Back)"""
        # Commented out to save CPU (and the others below too)
        #logging.debug('cursor_left(%s)' % n)
        n = int(n)
        # This logic takes care of double-width unicode characters
        if self.double_width_left:
            self.double_width_left = False
            return
        self.cursorX = max(0, self.cursorX - n) # Ensures positive value
        try:
            char = self.screen[self.cursorY][self.cursorX]
        except IndexError: # Cursor is past the right-edge of the screen; ignore
            char = u' ' # This is a safe default/fallback
        if unicodedata.east_asian_width(char) == 'W':
            # This lets us skip the next call (get called 2x for 2x width)
            self.double_width_left = True
        try:
            for callback in self.callbacks[CALLBACK_CURSOR_POS].values():
                callback()
        except TypeError:
            pass 
Example #20
Source File: statemachine.py    From aws-extender with MIT License 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, unicode):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #21
Source File: statemachine.py    From bash-lambda-layer with MIT License 6 votes vote down vote up
def pad_double_width(self, pad_char):
        """
        Pad all double-width characters in self by appending `pad_char` to each.
        For East Asian language support.
        """
        if hasattr(unicodedata, 'east_asian_width'):
            east_asian_width = unicodedata.east_asian_width
        else:
            return                      # new in Python 2.4
        for i in range(len(self.data)):
            line = self.data[i]
            if isinstance(line, str):
                new = []
                for char in line:
                    new.append(char)
                    if east_asian_width(char) in 'WF': # 'W'ide & 'F'ull-width
                        new.append(pad_char)
                self.data[i] = ''.join(new) 
Example #22
Source File: cpp_lint.py    From mix-and-match with MIT License 6 votes vote down vote up
def GetLineWidth(line):
  """Determines the width of the line in column positions.

  Args:
    line: A string, which may be a Unicode string.

  Returns:
    The width of the line in column positions, accounting for Unicode
    combining characters and wide characters.
  """
  if isinstance(line, unicode):
    width = 0
    for uc in unicodedata.normalize('NFC', line):
      if unicodedata.east_asian_width(uc) in ('W', 'F'):
        width += 2
      elif not unicodedata.combining(uc):
        width += 1
    return width
  else:
    return len(line) 
Example #23
Source File: __init__.py    From coffeegrindsize with MIT License 5 votes vote down vote up
def east_asian_len(data, encoding=None, ambiguous_width=1):
        """
        Calculate display width considering unicode East Asian Width
        """
        if isinstance(data, text_type):
            try:
                data = data.decode(encoding)
            except UnicodeError:
                pass
            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
        else:
            return len(data) 
Example #24
Source File: curses_util.py    From ci_edit with Apache License 2.0 5 votes vote down vote up
def charWidth(ch, column, tabWidth=8):
        if ch == u"\t":
            return tabWidth - (column % tabWidth)
        elif ch == u"" or ch < u" ":
            return 0
        elif ch < u"ᄀ":
            # Optimization.
            return 1
        elif unicodedata.east_asian_width(ch) in (u"F", r"W"):
            return 2
        return 1 
Example #25
Source File: curses_util.py    From ci_edit with Apache License 2.0 5 votes vote down vote up
def isDoubleWidth(ch):
        if ch == u"" or ch < u"ᄀ":
            # Optimization.
            return False
        width = unicodedata.east_asian_width(ch)
        if width in (u"F", u"W"):
            return True
        return False 
Example #26
Source File: curses_util.py    From ci_edit with Apache License 2.0 5 votes vote down vote up
def isZeroWidth(ch):
        return ch == u"" or ch < u" "  #or unicodedata.east_asian_width(ch) == "N" 
Example #27
Source File: curses_util.py    From ci_edit with Apache License 2.0 5 votes vote down vote up
def charWidth(ch, column, tabWidth=8):
        if ch == u"\t":
            return tabWidth - (column % tabWidth)
        elif ch == u"" or ch < u" ":
            return 0
        elif ch < u"ᄀ":
            # Optimization.
            return 1
        elif unicodedata.east_asian_width(ch) == u"W":
            return 2
        return 1 
Example #28
Source File: __init__.py    From AWS-Transit-Gateway-Demo-MultiAccount with MIT License 5 votes vote down vote up
def column_width(text):
    """Return the column width of text.

    Correct ``len(text)`` for wide East Asian and combining Unicode chars.
    """
    if isinstance(text, str) and sys.version_info < (3,0):
        return len(text)
    try:
        width = sum([east_asian_widths[unicodedata.east_asian_width(c)]
                     for c in text])
    except AttributeError:  # east_asian_width() New in version 2.4.
        width = len(text)
    # correction for combining chars:
    width -= len(find_combining_chars(text))
    return width 
Example #29
Source File: string.py    From stig with GNU General Public License v3.0 5 votes vote down vote up
def strwidth(string):
    """Return displayed width of `string`, considering wide characters"""
    return len(string) + sum(1 for char in string
                             if _east_asian_width(char) in 'FW') 
Example #30
Source File: string.py    From stig with GNU General Public License v3.0 5 votes vote down vote up
def strcrop(string, width, tail=None):
    """Return `string` cropped to `width`, considering wide characters

    If `tail` is not None, it must be a string that is appended to the cropped
    string.
    """
    def widechar_indexes(s):
        for i,c in enumerate(s):
            if _east_asian_width(c) in 'FW':
                yield i

    if strwidth(string) <= width:
        return string  # string is already short enough

    if tail is not None:
        width -= strwidth(tail)  # Account for tail in final width

    indexes = list(widechar_indexes(string)) + [len(string)]
    if not indexes:
        return string[:width]  # No wide chars, regular cropping is ok

    parts = []
    start = 0
    end = 0
    currwidth = strwidth(''.join(parts))

    while indexes and currwidth < width and end < len(string):
        end = indexes.pop(0)
        if end > 0:
            parts.append(string[start:end])
            currwidth = strwidth(''.join(parts))
            start = end

    if currwidth > width:
        excess = currwidth - width
        parts[-1] = parts[-1][:-excess]

    if tail is not None:
        parts.append(tail)

    return ''.join(parts)