Python cupy.int32() Examples

The following are 30 code examples of cupy.int32(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module cupy , or try the search function .
Example #1
Source File: measurements.py    From cupy with MIT License 6 votes vote down vote up
def _kernel_finalize():
    return cupy.ElementwiseKernel(
        'int32 maxlabel', 'raw int32 labels, raw Y y',
        '''
        if (y[i] < 0) {
            y[i] = 0;
            continue;
        }
        int yi = y[i];
        int j_min = 0;
        int j_max = maxlabel - 1;
        int j = (j_min + j_max) / 2;
        while (j_min < j_max) {
            if (yi == labels[j]) break;
            if (yi < labels[j]) j_max = j - 1;
            else j_min = j + 1;
            j = (j_min + j_max) / 2;
        }
        y[i] = j + 1;
        ''',
        'cupyx_nd_label_finalize') 
Example #2
Source File: non_maximum_suppression.py    From chainercv with MIT License 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #3
Source File: non_maximum_suppression.py    From chainercv with MIT License 6 votes vote down vote up
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return cp.zeros((0,), dtype=np.int32)

    n_bbox = bbox.shape[0]

    if score is not None:
        order = score.argsort()[::-1].astype(np.int32)
    else:
        order = cp.arange(n_bbox, dtype=np.int32)

    sorted_bbox = bbox[order, :]
    selec, n_selec = _call_nms_kernel(
        sorted_bbox, thresh)
    selec = selec[:n_selec]
    selec = order[selec]
    if limit is not None:
        selec = selec[:limit]
    return selec 
Example #4
Source File: test_raw.py    From cupy with MIT License 6 votes vote down vote up
def test_template_specialization(self):
        if self.backend == 'nvcc':
            self.skipTest('nvcc does not support template specialization')

        # compile code
        name_expressions = ['my_sqrt<int>', 'my_sqrt<float>',
                            'my_sqrt<complex<double>>', 'my_func']
        mod = cupy.RawModule(code=test_cxx_template, options=('--std=c++11',),
                             name_expressions=name_expressions)

        dtypes = (cupy.int32, cupy.float32, cupy.complex128, cupy.float64)
        for ker_T, dtype in zip(name_expressions, dtypes):
            # get specialized kernels
            ker = mod.get_function(ker_T)

            # prepare inputs & expected outputs
            in_arr = cupy.testing.shaped_random((10,), dtype=dtype)
            out_arr = in_arr**2

            # run
            ker((1,), (10,), (in_arr, 10))

            # check results
            assert cupy.allclose(in_arr, out_arr) 
Example #5
Source File: measurements.py    From cupy with MIT License 6 votes vote down vote up
def _label(x, structure, y):
    elems = numpy.where(structure != 0)
    vecs = [elems[dm] - 1 for dm in range(x.ndim)]
    offset = vecs[0]
    for dm in range(1, x.ndim):
        offset = offset * 3 + vecs[dm]
    indxs = numpy.where(offset < 0)[0]
    dirs = [[vecs[dm][dr] for dm in range(x.ndim)] for dr in indxs]
    dirs = cupy.array(dirs, dtype=numpy.int32)
    ndirs = indxs.shape[0]
    y_shape = cupy.array(y.shape, dtype=numpy.int32)
    count = cupy.zeros(2, dtype=numpy.int32)
    _kernel_init()(x, y)
    _kernel_connect()(y_shape, dirs, ndirs, x.ndim, y, size=y.size)
    _kernel_count()(y, count, size=y.size)
    maxlabel = int(count[0])
    labels = cupy.empty(maxlabel, dtype=numpy.int32)
    _kernel_labels()(y, count, labels, size=y.size)
    _kernel_finalize()(maxlabel, cupy.sort(labels), y, size=y.size)
    return maxlabel 
Example #6
Source File: non_maximum_suppression.py    From FATE with Apache License 2.0 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    # PyTorch does not support unsigned long Tensor.
    # Doesn't matter,since it returns ndarray finally.
    # So I'll keep it unmodified.
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = _load_kernel('nms_kernel', _nms_gpu_code)
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #7
Source File: non_maximum_suppression.py    From FATE with Apache License 2.0 6 votes vote down vote up
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return cp.zeros((0,), dtype=np.int32)

    n_bbox = bbox.shape[0]

    if score is not None:
        order = score.argsort()[::-1].astype(np.int32)
    else:
        order = cp.arange(n_bbox, dtype=np.int32)

    sorted_bbox = bbox[order, :]
    selec, n_selec = _call_nms_kernel(
        sorted_bbox, thresh)
    selec = selec[:n_selec]
    selec = order[selec]
    if limit is not None:
        selec = selec[:limit]
    return cp.asnumpy(selec) 
Example #8
Source File: non_maximum_suppression.py    From chainer-compiler with MIT License 6 votes vote down vote up
def _call_nms_kernel(bbox, thresh):
    assert False, "Not supported."
    n_bbox = bbox.shape[0]
    threads_per_block = 64
    col_blocks = np.ceil(n_bbox / threads_per_block).astype(np.int32)
    blocks = (col_blocks, col_blocks, 1)
    threads = (threads_per_block, 1, 1)

    mask_dev = cp.zeros((n_bbox * col_blocks,), dtype=np.uint64)
    bbox = cp.ascontiguousarray(bbox, dtype=np.float32)
    kern = cp.RawKernel(_nms_gpu_code, 'nms_kernel')
    kern(blocks, threads, args=(cp.int32(n_bbox), cp.float32(thresh),
                                bbox, mask_dev))

    mask_host = mask_dev.get()
    selection, n_selec = _nms_gpu_post(
        mask_host, n_bbox, threads_per_block, col_blocks)
    return selection, n_selec 
Example #9
Source File: non_maximum_suppression.py    From chainer-compiler with MIT License 6 votes vote down vote up
def _non_maximum_suppression_gpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return cp.zeros((0,), dtype=np.int32)

    n_bbox = bbox.shape[0]

    if score is not None:
        order = score.argsort()[::-1].astype(np.int32)
    else:
        order = cp.arange(n_bbox, dtype=np.int32)

    sorted_bbox = bbox[order, :]
    selec, n_selec = _call_nms_kernel(
        sorted_bbox, thresh)
    selec = selec[:n_selec]
    selec = order[selec]
    if limit is not None:
        selec = selec[:limit]
    return selec 
Example #10
Source File: char_encdec.py    From knmt with GNU General Public License v3.0 6 votes vote down vote up
def do_eval(args):   
    ced, charlist, chardict = load_encdec_from_config(args.config, args.model)
    
    if args.gpu is not None:
        chainer.cuda.Device(args.gpu).use()
        import cupy
        ced = ced.to_gpu(args.gpu)
        xp = cupy
    else:
        xp = np
    
    def enc(word):
        w_array=xp.array([chardict[c] for c in word], dtype=xp.int32)
        hx=ced.enc.compute_h((w_array,), train=False)
        return hx
    
    def dec(hx):
        decoded = ced.dec.decode(hx, length = 40, train = False)
        return "".join([charlist[int(idx)] for idx in decoded[0]])
    
    IPython.embed() 
Example #11
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_single_dim(self, xp, scp, dtype):
        image = self._make_image((100,), xp, dtype)
        label = testing.shaped_random((100,), xp, dtype=xp.int32, scale=3)
        index = xp.array([0, 1, 2])
        return getattr(scp.ndimage, self.op)(image, label, index) 
Example #12
Source File: char_encdec.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def append_eos_id(self, a):
        return self.xp.concatenate((a, self.xp.array([self.eos_id], dtype = self.xp.int32)), axis = 0) 
Example #13
Source File: char_encdec.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def decode(self, hx, length = 10, verbose = False, train = False):
        hx_dec = hx
        cx_dec = None
#         prev_word = xp.array([self.start_id], dtype = xp.float32)
        nb_inpt = hx.data.shape[1]
        result = [[] for _ in xrange(nb_inpt)]
        finished = [False] * nb_inpt
        for i in xrange(length):
            logits = self.lin_out(hx_dec.reshape(-1, self.H))
            if verbose:
                print "logits", i
                print logits.data
            prev_word = self.xp.argmax(logits.data, axis = 1).astype(self.xp.int32)
            for num_inpt in xrange(nb_inpt):
                if prev_word[num_inpt] == self.eos_id:
                    finished[num_inpt] = True
                if not finished[num_inpt]:
                    result[num_inpt].append(prev_word[num_inpt])
                if finished[num_inpt]:
                    prev_word[num_inpt] = 0
                    
            if verbose:
                print "prev_word", prev_word
#             print prev_word
            prev_word_emb = F.split_axis(self.c_emb_dec(prev_word), len(prev_word), axis = 0, force_tuple = True)
            hx_dec, cx_dec, xs_dec = self.nstep_dec(hx_dec, cx_dec, prev_word_emb, train = train)
        return result 
Example #14
Source File: non_maximum_suppression.py    From chainercv with MIT License 5 votes vote down vote up
def _non_maximum_suppression_cpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return np.zeros((0,), dtype=np.int32)

    if score is not None:
        order = score.argsort()[::-1]
        bbox = bbox[order]
    bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)

    selec = np.zeros(bbox.shape[0], dtype=bool)
    for i, b in enumerate(bbox):
        tl = np.maximum(b[:2], bbox[selec, :2])
        br = np.minimum(b[2:], bbox[selec, 2:])
        area = np.prod(br - tl, axis=1) * (tl < br).all(axis=1)

        iou = area / (bbox_area[i] + bbox_area[selec] - area)
        if (iou >= thresh).any():
            continue

        selec[i] = True
        if limit is not None and np.count_nonzero(selec) >= limit:
            break

    selec = np.where(selec)[0]
    if score is not None:
        selec = order[selec]
    return selec.astype(np.int32) 
Example #15
Source File: test_tiling.py    From cupy with MIT License 5 votes vote down vote up
def test_method(self):
        a = testing.shaped_arange((2, 3, 4), cupy)
        repeats = cupy.array([2, 3], dtype=cupy.int32)
        with pytest.raises(ValueError, match=r'repeats'):
            a.repeat(repeats) 
Example #16
Source File: test_tiling.py    From cupy with MIT License 5 votes vote down vote up
def test_func(self):
        a = testing.shaped_arange((2, 3, 4), cupy)
        repeats = cupy.array([2, 3], dtype=cupy.int32)
        with pytest.raises(ValueError, match=r'repeats'):
            cupy.repeat(a, repeats) 
Example #17
Source File: char_encdec.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, V, Ec, H):
        super(CharDec, self).__init__(
            lin_out = L.Linear(H, V + 1),
            c_emb_dec = L.EmbedID(V, Ec),
            nstep_dec = L.NStepLSTM(1, Ec, H, dropout = 0.5)
        )
#         self.start_id = V
        self.H = H
        self.eos_id = V #self.xp.array([V], dtype = self.xp.int32) 
Example #18
Source File: test_raw.py    From cupy with MIT License 5 votes vote down vote up
def test_const_memory(self):
        mod = cupy.RawModule(code=test_const_mem, backend=self.backend)
        ker = mod.get_function('multiply_by_const')
        mem_ptr = mod.get_global('some_array')
        const_arr = cupy.ndarray((100,), cupy.float32, mem_ptr)
        data = cupy.arange(100, dtype=cupy.float32)
        const_arr[...] = data
        output_arr = cupy.ones(100, dtype=cupy.float32)
        ker((1,), (100,), (output_arr, cupy.int32(100)))
        assert (data == output_arr).all() 
Example #19
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_wrong_index_type(self):
        image = self._make_image((100,), cupy, cupy.int32)
        label = cupy.random.randint(1, 3, dtype=cupy.int32, size=100)
        index = [1, 2, 3]
        with pytest.raises(TypeError):
            getattr(cupyx.scipy.ndimage, self.op)(image, label, index) 
Example #20
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_wrong_image_type(self):
        image = list(range(100))
        label = cupy.random.randint(1, 3, dtype=cupy.int32, size=100)
        index = cupy.array([1, 2, 3])
        with pytest.raises(TypeError):
            getattr(cupyx.scipy.ndimage, self.op)(image, label, index) 
Example #21
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_wrong_label_shape(self):
        image = self._make_image((100,), cupy, cupy.int32)
        label = cupy.random.randint(1, 3, dtype=cupy.int32, size=50)
        index = cupy.array([1, 2, 3])
        with pytest.raises(ValueError):
            getattr(cupyx.scipy.ndimage, self.op)(image, label, index) 
Example #22
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_wrong_dtype(self, dtype):
        image = self._make_image((100,), cupy, dtype)
        label = cupy.random.randint(1, 4, dtype=cupy.int32)
        index = cupy.array([1, 2, 3])
        with pytest.raises(TypeError):
            getattr(cupyx.scipy.ndimage, self.op)(image, label, index) 
Example #23
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_scalar_index(self, xp, scp, dtype):
        image = self._make_image((100,), xp, dtype)
        label = testing.shaped_random((100,), xp, dtype=xp.int32, scale=3)
        return getattr(scp.ndimage, self.op)(image, label, 1) 
Example #24
Source File: test_measurements.py    From cupy with MIT License 5 votes vote down vote up
def test_ndimage_no_index(self, xp, scp, dtype):
        image = self._make_image((100,), xp, dtype)
        label = testing.shaped_random((100,), xp, dtype=xp.int32, scale=3)
        return getattr(scp.ndimage, self.op)(image, label) 
Example #25
Source File: non_maximum_suppression.py    From chainer-compiler with MIT License 5 votes vote down vote up
def _non_maximum_suppression_cpu(bbox, thresh, score=None, limit=None):
    if len(bbox) == 0:
        return np.zeros((0,), dtype=np.int32)

    if score is not None:
        order = score.argsort()[::-1]
        bbox = bbox[order]
    bbox_area = np.prod(bbox[:, 2:] - bbox[:, :2], axis=1)

    selec = np.zeros(bbox.shape[0], dtype=bool)
    for i, b in enumerate(bbox):
        tl = np.maximum(b[:2], bbox[selec, :2])
        br = np.minimum(b[2:], bbox[selec, 2:])
        area = np.prod(br - tl, axis=1) * (tl < br).all(axis=1)

        iou = area / (bbox_area[i] + bbox_area[selec] - area)
        if (iou >= thresh).any():
            continue

        selec[i] = True
        if limit is not None and np.count_nonzero(selec) >= limit:
            break

    selec = np.where(selec)[0]
    if score is not None:
        selec = order[selec]
    return selec.astype(np.int32) 
Example #26
Source File: test_construct.py    From cupy with MIT License 5 votes vote down vote up
def test_csc_with_dtype(self):

        A, B = self.data()

        actual = construct.hstack([A.tocsc(), B.tocsc()],
                                  dtype=self.dtype)
        self.assertEqual(actual.indices.dtype, cupy.int32)
        self.assertEqual(actual.indptr.dtype, cupy.int32) 
Example #27
Source File: test_construct.py    From cupy with MIT License 5 votes vote down vote up
def test_csr_with_dtype(self):

        A, B = self.data()

        actual = construct.vstack([A.tocsr(), B.tocsr()],
                                  dtype=self.dtype)
        self.assertEqual(actual.dtype, self.dtype)
        self.assertEqual(actual.indices.dtype, cupy.int32)
        self.assertEqual(actual.indptr.dtype, cupy.int32) 
Example #28
Source File: char_encdec.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, V, Hw, Hs):
        super(CharDec, self).__init__(
            lin_out = L.Linear(Hs, Hw),
            nstep_dec = L.NStepLSTM(1, Hw, Hs, dropout = 0.5)
        )
#         self.start_id = V
#         self.H = H
#         self.eos_id = V #self.xp.array([V], dtype = self.xp.int32) 
Example #29
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_count():
    return cupy.ElementwiseKernel(
        '', 'raw Y y, raw int32 count',
        '''
        if (y[i] < 0) continue;
        int j = i;
        while (j != y[j]) { j = y[j]; }
        if (j != i) y[i] = j;
        else atomicAdd(&count[0], 1);
        ''',
        'cupyx_nd_label_count') 
Example #30
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _kernel_connect():
    return cupy.ElementwiseKernel(
        'raw int32 shape, raw int32 dirs, int32 ndirs, int32 ndim',
        'raw Y y',
        '''
        if (y[i] < 0) continue;
        for (int dr = 0; dr < ndirs; dr++) {
            int j = i;
            int rest = j;
            int stride = 1;
            int k = 0;
            for (int dm = ndim-1; dm >= 0; dm--) {
                int pos = rest % shape[dm] + dirs[dm + dr * ndim];
                if (pos < 0 || pos >= shape[dm]) {
                    k = -1;
                    break;
                }
                k += pos * stride;
                rest /= shape[dm];
                stride *= shape[dm];
            }
            if (k < 0) continue;
            if (y[k] < 0) continue;
            while (1) {
                while (j != y[j]) { j = y[j]; }
                while (k != y[k]) { k = y[k]; }
                if (j == k) break;
                if (j < k) {
                    int old = atomicCAS( &y[k], k, j );
                    if (old == k) break;
                    k = old;
                }
                else {
                    int old = atomicCAS( &y[j], j, k );
                    if (old == j) break;
                    j = old;
                }
            }
        }
        ''',
        'cupyx_nd_label_connect')