Python numpy.unicode() Examples

The following are 30 code examples of numpy.unicode(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module numpy , or try the search function .
Example #1
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example #2
Source File: test_nditer.py    From auto-alt-text-lambda-api with MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example #3
Source File: test_nditer.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example #4
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example #5
Source File: test_io.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example #6
Source File: test_io.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example #7
Source File: test_nditer.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example #8
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example #9
Source File: test_io.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example #10
Source File: test_nditer.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                  op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], b'abc')
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'))
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], u'abc')
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example #11
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def test_utf8_file(self):
        utf8 = b"\xcf\x96"
        latin1 = b"\xf6\xfc\xf6"
        with temppath() as path:
            with open(path, "wb") as f:
                f.write((b"test1,testNonethe" + utf8 + b",test3\n") * 2)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            ctl = np.array([
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"],
                     ["test1", "testNonethe" + utf8.decode("UTF-8"), "test3"]],
                     dtype=np.unicode)
            assert_array_equal(test, ctl)

            # test a mixed dtype
            with open(path, "wb") as f:
                f.write(b"0,testNonethe" + utf8)
            test = np.genfromtxt(path, dtype=None, comments=None,
                                 delimiter=',', encoding="UTF-8")
            assert_equal(test['f0'], 0)
            assert_equal(test['f1'], "testNonethe" + utf8.decode("UTF-8")) 
Example #12
Source File: test_nditer.py    From Computable with MIT License 6 votes vote down vote up
def test_iter_buffering_string():
    # Safe casting disallows shrinking strings
    a = np.array(['abc', 'a', 'abcd'], dtype=np.bytes_)
    assert_equal(a.dtype, np.dtype('S4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='S2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='S6')
    assert_equal(i[0], asbytes('abc'))
    assert_equal(i[0].dtype, np.dtype('S6'))

    a = np.array(['abc', 'a', 'abcd'], dtype=np.unicode)
    assert_equal(a.dtype, np.dtype('U4'));
    assert_raises(TypeError, nditer, a, ['buffered'], ['readonly'],
                    op_dtypes='U2')
    i = nditer(a, ['buffered'], ['readonly'], op_dtypes='U6')
    assert_equal(i[0], sixu('abc'))
    assert_equal(i[0].dtype, np.dtype('U6')) 
Example #13
Source File: test_io.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unicode_roundtrip(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        # our gz wrapper support encoding
        suffixes = ['', '.gz']
        # stdlib 2 versions do not support encoding
        if MAJVER > 2:
            if HAS_BZ2:
                suffixes.append('.bz2')
            if HAS_LZMA:
                suffixes.extend(['.xz', '.lzma'])
        with tempdir() as tmpdir:
            for suffix in suffixes:
                np.savetxt(os.path.join(tmpdir, 'test.csv' + suffix), a,
                           fmt=['%s'], encoding='UTF-16-LE')
                b = np.loadtxt(os.path.join(tmpdir, 'test.csv' + suffix),
                               encoding='UTF-16-LE', dtype=np.unicode)
                assert_array_equal(a, b) 
Example #14
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_converters_nodecode(self):
        # test native string converters enabled by setting an encoding
        utf8 = b'\xcf\x96'.decode('UTF-8')
        with temppath() as path:
            with io.open(path, 'wt', encoding='UTF-8') as f:
                f.write(utf8)
            x = self.loadfunc(path, dtype=np.unicode,
                              converters={0: lambda x: x + 't'},
                              encoding='UTF-8')
            a = np.array([utf8 + 't'])
            assert_array_equal(x, a) 
Example #15
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_binary_load(self):
        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                b"20,2,3,\xc3\x95scar\n\r"
        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(butf8)
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
            assert_array_equal(x, sutf8)
            # test broken latin1 conversion people now rely on
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
            assert_array_equal(x, np.array(x, dtype="S")) 
Example #16
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_utf8_userconverters_with_explicit_dtype(self):
        utf8 = b'\xcf\x96'
        with temppath() as path:
            with open(path, 'wb') as f:
                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
                                 usecols=(2, 3), converters={2: np.unicode},
                                 encoding='UTF-8')
        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                           dtype=[('', '|U11'), ('', float)])
        assert_equal(test, control) 
Example #17
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_gft_using_generator(self):
        # gft doesn't work with unicode.
        def count():
            for i in range(10):
                yield asbytes("%d" % i)

        res = np.genfromtxt(count())
        assert_array_equal(res, np.arange(10)) 
Example #18
Source File: test_io.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_utf8_userconverters_with_explicit_dtype(self):
        utf8 = b'\xcf\x96'
        with temppath() as path:
            with open(path, 'wb') as f:
                f.write(b'skip,skip,2001-01-01' + utf8 + b',1.0,skip')
            test = np.genfromtxt(path, delimiter=",", names=None, dtype=float,
                                 usecols=(2, 3), converters={2: np.unicode},
                                 encoding='UTF-8')
        control = np.array([('2001-01-01' + utf8.decode('UTF-8'), 1.)],
                           dtype=[('', '|U11'), ('', float)])
        assert_equal(test, control) 
Example #19
Source File: test_dtype.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_run(self):
        """Only test hash runs at all."""
        for t in [int, float, complex, np.int32, str, object,
                np.unicode]:
            dt = np.dtype(t)
            hash(dt) 
Example #20
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_binary_load(self):
        butf8 = b"5,6,7,\xc3\x95scarscar\n\r15,2,3,hello\n\r"\
                b"20,2,3,\xc3\x95scar\n\r"
        sutf8 = butf8.decode("UTF-8").replace("\r", "").splitlines()
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(butf8)
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype=np.unicode)
            assert_array_equal(x, sutf8)
            # test broken latin1 conversion people now rely on
            with open(path, "rb") as f:
                x = np.loadtxt(f, encoding="UTF-8", dtype="S")
            x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
            assert_array_equal(x, np.array(x, dtype="S")) 
Example #21
Source File: test_regression.py    From Computable with MIT License 5 votes vote down vote up
def test_unaligned_unicode_access(self, level=rlevel) :
        """Ticket #825"""
        for i in range(1, 9) :
            msg = 'unicode offset: %d chars'%i
            t = np.dtype([('a', 'S%d'%i), ('b', 'U2')])
            x = np.array([(asbytes('a'), sixu('b'))], dtype=t)
            if sys.version_info[0] >= 3:
                assert_equal(str(x), "[(b'a', 'b')]", err_msg=msg)
            else:
                assert_equal(str(x), "[('a', u'b')]", err_msg=msg) 
Example #22
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_converters_nodecode(self):
        # test native string converters enabled by setting an encoding
        utf8 = b'\xcf\x96'.decode('UTF-8')
        with temppath() as path:
            with io.open(path, 'wt', encoding='UTF-8') as f:
                f.write(utf8)
            x = self.loadfunc(path, dtype=np.unicode,
                              converters={0: lambda x: x + 't'},
                              encoding='UTF-8')
            a = np.array([utf8 + 't'])
            assert_array_equal(x, a) 
Example #23
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_converters_decode(self):
        # test converters that decode strings
        c = TextIO()
        c.write(b'\xcf\x96')
        c.seek(0)
        x = self.loadfunc(c, dtype=np.unicode,
                          converters={0: lambda x: x.decode('UTF-8')})
        a = np.array([b'\xcf\x96'.decode('UTF-8')])
        assert_array_equal(x, a) 
Example #24
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_stringload(self):
        # umlaute
        nonascii = b'\xc3\xb6\xc3\xbc\xc3\xb6'.decode("UTF-8")
        with temppath() as path:
            with open(path, "wb") as f:
                f.write(nonascii.encode("UTF-16"))
            x = self.loadfunc(path, encoding="UTF-16", dtype=np.unicode)
            assert_array_equal(x, nonascii) 
Example #25
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unicode_stringstream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = StringIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read(), utf8 + '\n') 
Example #26
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unicode_bytestream(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        s = BytesIO()
        np.savetxt(s, a, fmt=['%s'], encoding='UTF-8')
        s.seek(0)
        assert_equal(s.read().decode('UTF-8'), utf8 + '\n') 
Example #27
Source File: test_io.py    From Mastering-Elasticsearch-7.0 with MIT License 5 votes vote down vote up
def test_unicode(self):
        utf8 = b'\xcf\x96'.decode('UTF-8')
        a = np.array([utf8], dtype=np.unicode)
        with tempdir() as tmpdir:
            # set encoding as on windows it may not be unicode even on py3
            np.savetxt(os.path.join(tmpdir, 'test.csv'), a, fmt=['%s'],
                       encoding='UTF-8') 
Example #28
Source File: test_regression.py    From Computable with MIT License 5 votes vote down vote up
def test_unicode_alloc_dealloc_match(self):
        # Ticket #1578, the mismatch only showed up when running
        # python-debug for python versions >= 2.7, and then as
        # a core dump and error message.
        a = np.array(['abc'], dtype=np.unicode)[0]
        del a 
Example #29
Source File: test_regression.py    From Computable with MIT License 5 votes vote down vote up
def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for Numpy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([sixu('abcd')])
        assert_equal(a.dtype.itemsize, 16) 
Example #30
Source File: test_regression.py    From GraphicDesignPatternByPython with MIT License 5 votes vote down vote up
def test_string_truncation_ucs2(self):
        # Ticket #2081. Python compiled with two byte unicode
        # can lead to truncation if itemsize is not properly
        # adjusted for NumPy's four byte unicode.
        if sys.version_info[0] >= 3:
            a = np.array(['abcd'])
        else:
            a = np.array([u'abcd'])
        assert_equal(a.dtype.itemsize, 16)