Python numpy.unicode() Examples

The following are 30 code examples for showing how to use numpy.unicode(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may check out the related API usage on the sidebar.

You may also want to check out all available functions/classes of the module numpy , or try the search function .

Example 1
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example 2
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example 3
Project: recruit   Author: Frank-qlu   File: test_nditer.py    License: Apache License 2.0 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example 4
Project: auto-alt-text-lambda-api   Author: abhisuri97   File: test_nditer.py    License: MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example 5
Project: vnpy_crypto   Author: birforce   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example 6
Project: vnpy_crypto   Author: birforce   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example 7
Project: vnpy_crypto   Author: birforce   File: test_nditer.py    License: MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example 8
Project: Computable   Author: ktraunmueller   File: test_nditer.py    License: MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example 9
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example 10
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example 11
Project: Mastering-Elasticsearch-7.0   Author: PacktPublishing   File: test_nditer.py    License: MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example 12
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example 13
Project: GraphicDesignPatternByPython   Author: Relph1119   File: test_io.py    License: MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example 14
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_unicode(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        with tempdir() as tmpdir:
            # set encoding as on windows it may not be unicode even on py3
            np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
                       encoding='UTF-8') 
Example 15
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_unicode_bytestream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = BytesIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read().decode('UTF-8'), utf8 + '\n') 
Example 16
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_unicode_stringstream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = StringIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read(), utf8 + '\n') 
Example 17
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_stringload(self):
        # umlaute
        nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(nonascii.encode("UTF-16"))
            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
            assert_array_equal(x, nonascii) 
Example 18
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_converters_decode(self):
        # test converters that decode strings
        c = TextIO()
        c.write(b'\xcf\x96')
        c.seek(0)
        x = self.loadfunc(c, dtype=np.unicode,
                          converters={0: lambda x: x.decode('UTF-8')})
        a = np.array([b'\xcf\x96'.decode('UTF-8')])
        assert_array_equal(x, a) 
Example 19
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_converters_nodecode(self):
        # test native string converters enabled by setting an encoding
        utf8 = b'\xcf\x96'.decode('UTF-8')
        with temppath() as path:
            with io.open(path, 'wt', encoding='UTF-8') as f:
                f.write(utf8)
            x = self.loadfunc(path, dtype=np.unicode,
                              converters={0: lambda x: x + 't'},
                              encoding='UTF-8')
            a = np.array([utf8 + 't'])
            assert_array_equal(x, a) 
Example 20
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_binary_load(self):
        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                b"20,2,3,\xc3\x95scar\n\r"
        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(butf8)
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
            assert_array_equal(x, sutf8)
            # test broken latin1 conversion people now rely on
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
            assert_array_equal(x, np.array(x, dtype="S")) 
Example 21
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_utf8_userconverters_with_explicit_dtype(self):
        utf8 = b'\xcf\x96'
        with temppath() as path:
            with open(path, 'wb') as f:
                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
                                 usecols=(2, 3), converters={2: np.unicode},
                                 encoding='UTF-8')
        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                           dtype=[('', '|U11'), ('', float)])
        assert_equal(test, control) 
Example 22
Project: recruit   Author: Frank-qlu   File: test_io.py    License: Apache License 2.0 5 votes vote down vote up
def test_gft_using_generator(self):
        # gft doesn't work with unicode.
        def count():
            for i in range(10):
                yield asbytes("%d" % i)

        res = np.genfromtxt(count())
        assert_array_equal(res, np.arange(10)) 
Example 23
Project: recruit   Author: Frank-qlu   File: test_regression.py    License: Apache License 2.0 5 votes vote down vote up
def test_unaligned_unicode_access(self):
        # Ticket #825
        for i in range(1, 9):
            msg = 'unicode offset: %d chars' % i
            t = np.dtype([('a', 'S%d' % i), ('b', 'U2')])
            x = np.array([(b'a', u'b')], dtype=t)
            if sys.version_info[0] >= 3:
                assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg)
            else:
                assert_equal(str(x), "[('a', u'b')]", err_msg=msg) 
Example 24
Project: recruit   Author: Frank-qlu   File: test_regression.py    License: Apache License 2.0 5 votes vote down vote up
def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for NumPy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([u'abcd'])
        assert_equal(a.dtype.itemsize, 16) 
Example 25
Project: recruit   Author: Frank-qlu   File: test_regression.py    License: Apache License 2.0 5 votes vote down vote up
def test_unicode_alloc_dealloc_match(self):
        # Ticket #1578, the mismatch only showed up when running
        # python-debug for python versions >= 2.7, and then as
        # a core dump and error message.
        a = np.array(['abc'], dtype=np.unicode)[0]
        del a 
Example 26
Project: dgl   Author: dmlc   File: knowledge_graph.py    License: Apache License 2.0 5 votes vote down vote up
def to_unicode(input):
    # FIXME (lingfan): not sure about python 2 and 3 str compatibility
    return str(input)
    """ lingfan: comment out for now
    if isinstance(input, unicode):
        return input
    elif isinstance(input, str):
        return input.decode('utf-8', errors='replace')
    return str(input).decode('utf-8', errors='replace')
    """ 
Example 27
Project: auto-alt-text-lambda-api   Author: abhisuri97   File: test_dtype.py    License: MIT License 5 votes vote down vote up
def test_run(self):
        """Only test hash runs at all."""
        for t in [np.int, np.float, np.complex, np.int32, np.str, np.object,
                np.unicode]:
            dt = np.dtype(t)
            hash(dt) 
Example 28
Project: auto-alt-text-lambda-api   Author: abhisuri97   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_unaligned_unicode_access(self, level=rlevel):
        # Ticket #825
        for i in range(1, 9):
            msg = 'unicode offset: %d chars' % i
            t = np.dtype([('a', 'S%d' % i), ('b', 'U2')])
            x = np.array([(asbytes('a'), sixu('b'))], dtype=t)
            if sys.version_info[0] >= 3:
                assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg)
            else:
                assert_equal(str(x), "[('a', u'b')]", err_msg=msg) 
Example 29
Project: auto-alt-text-lambda-api   Author: abhisuri97   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for Numpy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([sixu('abcd')])
        assert_equal(a.dtype.itemsize, 16) 
Example 30
Project: auto-alt-text-lambda-api   Author: abhisuri97   File: test_regression.py    License: MIT License 5 votes vote down vote up
def test_unicode_alloc_dealloc_match(self):
        # Ticket #1578, the mismatch only showed up when running
        # python-debug for python versions >= 2.7, and then as
        # a core dump and error message.
        a = np.array(['abc'], dtype=np.unicode)[0]
        del a