Python cupy.zeros_like() Examples

The following are 12 code examples of cupy.zeros_like(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module cupy , or try the search function .
Example #1
Source File: filters.py    From cupy with MIT License 6 votes vote down vote up
def _correlate_or_convolve(input, weights, output, mode, cval, origin,
                           convolution=False):
    origins, int_type = _check_nd_args(input, weights, mode, origin)
    if weights.size == 0:
        return cupy.zeros_like(input)
    if convolution:
        weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]
        origins = list(origins)
        for i, wsize in enumerate(weights.shape):
            origins[i] = -origins[i]
            if wsize % 2 == 0:
                origins[i] -= 1
        origins = tuple(origins)
    kernel = _get_correlate_kernel(mode, weights.shape, int_type,
                                   origins, cval)
    return _call_kernel(kernel, input, weights, output) 
Example #2
Source File: filters.py    From cupy with MIT License 6 votes vote down vote up
def _rank_filter(input, get_rank, size=None, footprint=None, output=None,
                 mode="reflect", cval=0.0, origin=0):
    _, footprint, _ = _check_size_footprint_structure(
        input.ndim, size, footprint, None, force_footprint=True)
    origins, int_type = _check_nd_args(input, footprint, mode, origin,
                                       'footprint')
    if footprint.size == 0:
        return cupy.zeros_like(input)
    filter_size = int(footprint.sum())
    rank = get_rank(filter_size)
    if rank < 0 or rank >= filter_size:
        raise RuntimeError('rank not within filter footprint size')
    if rank == 0:
        return _min_or_max_filter(input, None, footprint, None, output, mode,
                                  cval, origins, 'min')
    if rank == filter_size - 1:
        return _min_or_max_filter(input, None, footprint, None, output, mode,
                                  cval, origins, 'max')
    kernel = _get_rank_kernel(filter_size, rank, mode, footprint.shape,
                              origins, float(cval), int_type)
    return _call_kernel(kernel, input, footprint, output, None, bool) 
Example #3
Source File: cross.py    From neural_renderer with MIT License 6 votes vote down vote up
def forward_gpu(self, inputs):
        a, b = inputs
        c = cp.zeros_like(a, 'float32')
        chainer.cuda.elementwise(
            'int32 j, raw T a, raw T b',
            'raw T c',
            '''
                float* ap = (float*)&a[j * 3];
                float* bp = (float*)&b[j * 3];
                float* cp = (float*)&c[j * 3];
                cp[0] = ap[1] * bp[2] - ap[2] * bp[1];
                cp[1] = ap[2] * bp[0] - ap[0] * bp[2];
                cp[2] = ap[0] * bp[1] - ap[1] * bp[0];
            ''',
            'function',
        )(
            cp.arange(a.size / 3).astype('int32'), a, b, c,
        )
        return c, 
Example #4
Source File: measurements.py    From cupy with MIT License 5 votes vote down vote up
def _mean_driver(input, labels, index, return_count=False, use_kern=False):
    if use_kern:
        return _ndimage_mean_kernel_2(input, labels, index,
                                      return_count=return_count)

    out = cupy.zeros_like(index, cupy.float64)
    count = cupy.zeros_like(index, dtype=cupy.uint64)
    sum, count = _ndimage_mean_kernel(input,
                                      labels, index, index.size, out, count)
    if return_count:
        return sum / count, count
    return sum / count 
Example #5
Source File: test_nccl.py    From cupy with MIT License 5 votes vote down vote up
def test_single_proc_single_dev(self):
        comms = cuda.nccl.NcclCommunicator.initAll(1)
        cuda.nccl.groupStart()
        for comm in comms:
            cuda.Device(comm.device_id()).use()
            sendbuf = cupy.arange(10)
            recvbuf = cupy.zeros_like(sendbuf)
            comm.allReduce(sendbuf.data.ptr, recvbuf.data.ptr, 10,
                           cuda.nccl.NCCL_INT64, cuda.nccl.NCCL_SUM,
                           cuda.Stream.null.ptr)
        cuda.nccl.groupEnd()
        assert cupy.allclose(sendbuf, recvbuf) 
Example #6
Source File: test_basic.py    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like(self, xp, dtype, order):
        a = xp.ndarray((2, 3, 4), dtype=dtype)
        return xp.zeros_like(a, order=order) 
Example #7
Source File: test_basic.py    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like_subok(self):
        a = cupy.ndarray((2, 3, 4))
        with pytest.raises(TypeError):
            cupy.zeros_like(a, subok=True) 
Example #8
Source File: test_basic.py    From cupy with MIT License 5 votes vote down vote up
def test_zeros_like_reshape(self, xp, dtype, order):
        a = xp.ndarray((2, 3, 4), dtype=dtype)
        return xp.zeros_like(a, order=order, shape=self.shape) 
Example #9
Source File: BootQAgent.py    From DeepRL with MIT License 5 votes vote down vote up
def grad(self, _cur_output, _next_output, _next_action,
             _batch_tuples, _err_list, _err_count, _k):
        # alloc
        if self.config.gpu:
            _cur_output.grad = cupy.zeros_like(_cur_output.data)
        else:
            _cur_output.grad = np.zeros_like(_cur_output.data)

        # compute grad from each tuples
        for i in range(len(_batch_tuples)):
            # if use bootstrap and masked
            if not _batch_tuples[i].mask[_k]:
                continue

            cur_action_value = \
                _cur_output.data[i][_batch_tuples[i].action].tolist()
            reward = _batch_tuples[i].reward
            target_value = reward
            # if not empty position, not terminal state
            if _batch_tuples[i].next_state.in_game:
                next_action_value = \
                    _next_output.data[i][_next_action[i]].tolist()
                target_value += self.config.gamma * next_action_value
            loss = cur_action_value - target_value
            _cur_output.grad[i][_batch_tuples[i].action] = 2 * loss

            _err_list[i] += abs(loss)
            _err_count[i] += 1 
Example #10
Source File: filters.py    From cupy with MIT License 4 votes vote down vote up
def _min_or_max_filter(input, size, footprint, structure, output, mode, cval,
                       origin, func):
    # structure is used by morphology.grey_erosion() and grey_dilation()
    # and not by the regular min/max filters

    sizes, footprint, structure = _check_size_footprint_structure(
        input.ndim, size, footprint, structure)

    if sizes is not None:
        # Seperable filter, run as a series of 1D filters
        fltr = minimum_filter1d if func == 'min' else maximum_filter1d
        output_orig = output
        output = _get_output(output, input)
        sizes = _fix_sequence_arg(sizes, input.ndim, 'size', int)
        modes = _fix_sequence_arg(mode, input.ndim, 'mode', _check_mode)
        origins = _fix_sequence_arg(origin, input.ndim, 'origin', int)
        n_filters = sum(size > 1 for size in sizes)
        if n_filters == 0:
            output[...] = input[...]
            return output
        # We can't operate in-place efficiently, so use a 2-buffer system
        temp = _get_output(output.dtype, input) if n_filters > 1 else None
        first = True
        iterator = zip(sizes, modes, origins)
        for axis, (size, mode, origin) in enumerate(iterator):
            if size <= 1:
                continue
            fltr(input, size, axis, output, mode, cval, origin)
            input, output = output, temp if first else input
        if isinstance(output_orig, cupy.ndarray) and input is not output_orig:
            output_orig[...] = input
            input = output_orig
        return input

    origins, int_type = _check_nd_args(input, footprint, mode, origin,
                                       'footprint')
    if structure is not None and structure.ndim != input.ndim:
        raise RuntimeError('structure array has incorrect shape')

    if footprint.size == 0:
        return cupy.zeros_like(input)
    center = tuple(x//2 + origin
                   for x, origin in zip(footprint.shape, origins))
    kernel = _get_min_or_max_kernel(mode, footprint.shape, func,
                                    origins, float(cval), int_type,
                                    has_structure=structure is not None,
                                    has_central_value=bool(footprint[center]))
    return _call_kernel(kernel, input, footprint, output, structure,
                        weights_dtype=bool) 
Example #11
Source File: measurements.py    From cupy with MIT License 4 votes vote down vote up
def sum(input, labels=None, index=None):
    """Calculates the sum of the values of an n-D image array, optionally
       at specified sub-regions.

    Args:
        input (cupy.ndarray): Nd-image data to process.
        labels (cupy.ndarray or None): Labels defining sub-regions in `input`.
            If not None, must be same shape as `input`.
        index (cupy.ndarray or None): `labels` to include in output. If None
            (default), all values where `labels` is non-zero are used.

    Returns:
       sum (cupy.ndarray): sum of values, for each sub-region if
       `labels` and `index` are specified.

    .. seealso:: :func:`scipy.ndimage.sum`
    """
    if not isinstance(input, cupy.ndarray):
        raise TypeError('input must be cupy.ndarray')

    if input.dtype in (cupy.complex64, cupy.complex128):
        raise TypeError("cupyx.scipy.ndimage.sum doesnt support %{}".format(
            input.dtype.type))

    use_kern = False
    # There is constraints on types because of atomicAdd() in CUDA.
    if input.dtype not in [cupy.int32, cupy.float16, cupy.float32,
                           cupy.float64, cupy.uint32, cupy.uint64,
                           cupy.ulonglong]:
        warnings.warn(
            'Using the slower implmentation as '
            'cupyx.scipy.ndimage.sum supports int32, float16, '
            'float32, float64, uint32, uint64 as data types'
            'for the fast implmentation', util.PerformanceWarning)
        use_kern = True

    if labels is None:
        return input.sum()
    if len(labels) == 0:
        return cupy.array([], dtype=cupy.int64)

    if not isinstance(labels, cupy.ndarray):
        raise TypeError('label must be cupy.ndarray')

    if index is None:
        return input[labels != 0].sum()

    input, labels = cupy.broadcast_arrays(input, labels)

    if not isinstance(index, cupy.ndarray):
        if not isinstance(index, int):
            raise TypeError('index must be cupy.ndarray or a scalar int')
        else:
            return (input[labels == index]).sum()

    out = cupy.zeros_like(index, dtype=cupy.float64)

    # The following parameters for sum where determined using a Tesla P100.
    if (input.size >= 262144 and index.size <= 4) or use_kern:
        return _ndimage_sum_kernel_2(input, labels, index, out)
    return _ndimage_sum_kernel(input, labels, index, index.size, out) 
Example #12
Source File: test_texture.py    From cupy with MIT License 4 votes vote down vote up
def test_array_gen_cpy(self):
        xp = numpy if self.xp == 'numpy' else cupy
        stream = None if not self.stream else cupy.cuda.Stream()
        width, height, depth = self.dimensions
        n_channel = self.n_channels

        dim = 3 if depth != 0 else 2 if height != 0 else 1
        shape = (depth, height, n_channel*width) if dim == 3 else \
                (height, n_channel*width) if dim == 2 else \
                (n_channel*width,)

        # generate input data and allocate output buffer
        if self.dtype in (numpy.float16, numpy.float32):
            arr = xp.random.random(shape).astype(self.dtype)
            kind = runtime.cudaChannelFormatKindFloat
        else:  # int
            # randint() in NumPy <= 1.10 does not have the dtype argument...
            arr = xp.random.randint(100, size=shape).astype(self.dtype)
            if self.dtype in (numpy.int8, numpy.int16, numpy.int32):
                kind = runtime.cudaChannelFormatKindSigned
            else:
                kind = runtime.cudaChannelFormatKindUnsigned
        arr2 = xp.zeros_like(arr)

        assert arr.flags['C_CONTIGUOUS']
        assert arr2.flags['C_CONTIGUOUS']

        # create a CUDA array
        ch_bits = [0, 0, 0, 0]
        for i in range(n_channel):
            ch_bits[i] = arr.dtype.itemsize*8
        # unpacking arguments using *ch_bits is not supported before PY35...
        ch = ChannelFormatDescriptor(ch_bits[0], ch_bits[1], ch_bits[2],
                                     ch_bits[3], kind)
        cu_arr = CUDAarray(ch, width, height, depth)

        # copy from input to CUDA array, and back to output
        cu_arr.copy_from(arr, stream)
        cu_arr.copy_to(arr2, stream)

        # check input and output are identical
        if stream is not None:
            dev.synchronize()
        assert (arr == arr2).all()