Python cupy.zeros_like() Examples

Example #1
Source File:    From cupy with MIT License 6 votes vote down vote up
def _correlate_or_convolve(input, weights, output, mode, cval, origin,
    origins, int_type = _check_nd_args(input, weights, mode, origin)
    if weights.size == 0:
        return cupy.zeros_like(input)
    if convolution:
        weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]
        origins = list(origins)
        for i, wsize in enumerate(weights.shape):
            origins[i] = -origins[i]
            if wsize % 2 == 0:
                origins[i] -= 1
        origins = tuple(origins)
    kernel = _get_correlate_kernel(mode, weights.shape, int_type,
                                   origins, cval)
    return _call_kernel(kernel, input, weights, output) 
Example #2
Source File:    From cupy with MIT License 6 votes vote down vote up
def _rank_filter(input, get_rank, size=None, footprint=None, output=None,
                 mode="reflect", cval=0.0, origin=0):
    _, footprint, _ = _check_size_footprint_structure(
        input.ndim, size, footprint, None, force_footprint=True)
    origins, int_type = _check_nd_args(input, footprint, mode, origin,
    if footprint.size == 0:
        return cupy.zeros_like(input)
    filter_size = int(footprint.sum())
    rank = get_rank(filter_size)
    if rank < 0 or rank >= filter_size:
        raise RuntimeError('rank not within filter footprint size')
    if rank == 0:
        return _min_or_max_filter(input, None, footprint, None, output, mode,
                                  cval, origins, 'min')
    if rank == filter_size - 1:
        return _min_or_max_filter(input, None, footprint, None, output, mode,
                                  cval, origins, 'max')
    kernel = _get_rank_kernel(filter_size, rank, mode, footprint.shape,
                              origins, float(cval), int_type)
    return _call_kernel(kernel, input, footprint, output, None, bool) 
Example #3
Source File:    From neural_renderer with MIT License 6 votes vote down vote up
def forward_gpu(self, inputs):
        a, b = inputs
        c = cp.zeros_like(a, 'float32')
            'int32 j, raw T a, raw T b',
            'raw T c',
                float* ap = (float*)&a[j * 3];
                float* bp = (float*)&b[j * 3];
                float* cp = (float*)&c[j * 3];
                cp[0] = ap[1] * bp[2] - ap[2] * bp[1];
                cp[1] = ap[2] * bp[0] - ap[0] * bp[2];
                cp[2] = ap[0] * bp[1] - ap[1] * bp[0];
            cp.arange(a.size / 3).astype('int32'), a, b, c,
        return c, 
Example #4
Source File:    From cupy with MIT License 5 votes vote down vote up
def _mean_driver(input, labels, index, return_count=False, use_kern=False):
    if use_kern:
        return _ndimage_mean_kernel_2(input, labels, index,

    out = cupy.zeros_like(index, cupy.float64)
    count = cupy.zeros_like(index, dtype=cupy.uint64)
    sum, count = _ndimage_mean_kernel(input,
                                      labels, index, index.size, out, count)
    if return_count:
        return sum / count, count
    return sum / count 
Example #5
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_single_proc_single_dev(self):
        comms = cuda.nccl.NcclCommunicator.initAll(1)
        for comm in comms:
            sendbuf = cupy.arange(10)
            recvbuf = cupy.zeros_like(sendbuf)
            comm.allReduce(,, 10,
                           cuda.nccl.NCCL_INT64, cuda.nccl.NCCL_SUM,
        assert cupy.allclose(sendbuf, recvbuf) 
Example #6
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like(self, xp, dtype, order):
        a = xp.ndarray((2, 3, 4), dtype=dtype)
        return xp.zeros_like(a, order=order) 
Example #7
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like_subok(self):
        a = cupy.ndarray((2, 3, 4))
        with pytest.raises(TypeError):
            cupy.zeros_like(a, subok=True) 
Example #8
Source File:    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like_reshape(self, xp, dtype, order):
        a = xp.ndarray((2, 3, 4), dtype=dtype)
        return xp.zeros_like(a, order=order, shape=self.shape) 
Example #9
Source File:    From DeepRL with MIT License 5 votes vote down vote up
def grad(self, _cur_output, _next_output, _next_action,
             _batch_tuples, _err_list, _err_count, _k):
        # alloc
        if self.config.gpu:
            _cur_output.grad = cupy.zeros_like(
            _cur_output.grad = np.zeros_like(

        # compute grad from each tuples
        for i in range(len(_batch_tuples)):
            # if use bootstrap and masked
            if not _batch_tuples[i].mask[_k]:

            cur_action_value = \
            reward = _batch_tuples[i].reward
            target_value = reward
            # if not empty position, not terminal state
            if _batch_tuples[i].next_state.in_game:
                next_action_value = \
                target_value += self.config.gamma * next_action_value
            loss = cur_action_value - target_value
            _cur_output.grad[i][_batch_tuples[i].action] = 2 * loss

            _err_list[i] += abs(loss)
            _err_count[i] += 1 
Example #10
Source File:    From cupy with MIT License 4 votes vote down vote up
def _min_or_max_filter(input, size, footprint, structure, output, mode, cval,
                       origin, func):
    # structure is used by morphology.grey_erosion() and grey_dilation()
    # and not by the regular min/max filters

    sizes, footprint, structure = _check_size_footprint_structure(
        input.ndim, size, footprint, structure)

    if sizes is not None:
        # Seperable filter, run as a series of 1D filters
        fltr = minimum_filter1d if func == 'min' else maximum_filter1d
        output_orig = output
        output = _get_output(output, input)
        sizes = _fix_sequence_arg(sizes, input.ndim, 'size', int)
        modes = _fix_sequence_arg(mode, input.ndim, 'mode', _check_mode)
        origins = _fix_sequence_arg(origin, input.ndim, 'origin', int)
        n_filters = sum(size > 1 for size in sizes)
        if n_filters == 0:
            output[...] = input[...]
            return output
        # We can't operate in-place efficiently, so use a 2-buffer system
        temp = _get_output(output.dtype, input) if n_filters > 1 else None
        first = True
        iterator = zip(sizes, modes, origins)
        for axis, (size, mode, origin) in enumerate(iterator):
            if size <= 1:
            fltr(input, size, axis, output, mode, cval, origin)
            input, output = output, temp if first else input
        if isinstance(output_orig, cupy.ndarray) and input is not output_orig:
            output_orig[...] = input
            input = output_orig
        return input

    origins, int_type = _check_nd_args(input, footprint, mode, origin,
    if structure is not None and structure.ndim != input.ndim:
        raise RuntimeError('structure array has incorrect shape')

    if footprint.size == 0:
        return cupy.zeros_like(input)
    center = tuple(x//2 + origin
                   for x, origin in zip(footprint.shape, origins))
    kernel = _get_min_or_max_kernel(mode, footprint.shape, func,
                                    origins, float(cval), int_type,
                                    has_structure=structure is not None,
    return _call_kernel(kernel, input, footprint, output, structure,
Example #11
Source File:    From cupy with MIT License 4 votes vote down vote up
def sum(input, labels=None, index=None):
    """Calculates the sum of the values of an n-D image array, optionally
       at specified sub-regions.

        input (cupy.ndarray): Nd-image data to process.
        labels (cupy.ndarray or None): Labels defining sub-regions in `input`.
            If not None, must be same shape as `input`.
        index (cupy.ndarray or None): `labels` to include in output. If None
            (default), all values where `labels` is non-zero are used.

       sum (cupy.ndarray): sum of values, for each sub-region if
       `labels` and `index` are specified.

    .. seealso:: :func:`scipy.ndimage.sum`
    if not isinstance(input, cupy.ndarray):
        raise TypeError('input must be cupy.ndarray')

    if input.dtype in (cupy.complex64, cupy.complex128):
        raise TypeError("cupyx.scipy.ndimage.sum doesnt support %{}".format(

    use_kern = False
    # There is constraints on types because of atomicAdd() in CUDA.
    if input.dtype not in [cupy.int32, cupy.float16, cupy.float32,
                           cupy.float64, cupy.uint32, cupy.uint64,
            'Using the slower implmentation as '
            'cupyx.scipy.ndimage.sum supports int32, float16, '
            'float32, float64, uint32, uint64 as data types'
            'for the fast implmentation', util.PerformanceWarning)
        use_kern = True

    if labels is None:
        return input.sum()
    if len(labels) == 0:
        return cupy.array([], dtype=cupy.int64)

    if not isinstance(labels, cupy.ndarray):
        raise TypeError('label must be cupy.ndarray')

    if index is None:
        return input[labels != 0].sum()

    input, labels = cupy.broadcast_arrays(input, labels)

    if not isinstance(index, cupy.ndarray):
        if not isinstance(index, int):
            raise TypeError('index must be cupy.ndarray or a scalar int')
            return (input[labels == index]).sum()

    out = cupy.zeros_like(index, dtype=cupy.float64)

    # The following parameters for sum where determined using a Tesla P100.
    if (input.size >= 262144 and index.size <= 4) or use_kern:
        return _ndimage_sum_kernel_2(input, labels, index, out)
    return _ndimage_sum_kernel(input, labels, index, index.size, out) 
Example #12
Source File:    From cupy with MIT License 4 votes vote down vote up
def test_array_gen_cpy(self):
        xp = numpy if self.xp == 'numpy' else cupy
        stream = None if not else cupy.cuda.Stream()
        width, height, depth = self.dimensions
        n_channel = self.n_channels

        dim = 3 if depth != 0 else 2 if height != 0 else 1
        shape = (depth, height, n_channel*width) if dim == 3 else \
                (height, n_channel*width) if dim == 2 else \

        # generate input data and allocate output buffer
        if self.dtype in (numpy.float16, numpy.float32):
            arr = xp.random.random(shape).astype(self.dtype)
            kind = runtime.cudaChannelFormatKindFloat
        else:  # int
            # randint() in NumPy <= 1.10 does not have the dtype argument...
            arr = xp.random.randint(100, size=shape).astype(self.dtype)
            if self.dtype in (numpy.int8, numpy.int16, numpy.int32):
                kind = runtime.cudaChannelFormatKindSigned
                kind = runtime.cudaChannelFormatKindUnsigned
        arr2 = xp.zeros_like(arr)

        assert arr.flags['C_CONTIGUOUS']
        assert arr2.flags['C_CONTIGUOUS']

        # create a CUDA array
        ch_bits = [0, 0, 0, 0]
        for i in range(n_channel):
            ch_bits[i] = arr.dtype.itemsize*8
        # unpacking arguments using *ch_bits is not supported before PY35...
        ch = ChannelFormatDescriptor(ch_bits[0], ch_bits[1], ch_bits[2],
                                     ch_bits[3], kind)
        cu_arr = CUDAarray(ch, width, height, depth)

        # copy from input to CUDA array, and back to output
        cu_arr.copy_from(arr, stream)
        cu_arr.copy_to(arr2, stream)

        # check input and output are identical
        if stream is not None:
        assert (arr == arr2).all()