Python pycuda.driver.Out() Examples

The following are 4 code examples of pycuda.driver.Out(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pycuda.driver , or try the search function .
Example #1
Source File: gpu.py    From Jamais-Vu with MIT License 6 votes vote down vote up
def maximum_filter_2d(arr2D, footprint): ## Make sure arr2D is our datatype float32 and footprint of int32
    arr2DMaxed = numpy.empty_like(arr2D)
    head, tail = os.path.split(os.path.abspath(__file__)) # Used so that we can always get the kernel which should be in the same directory as this file

    maxFunction = open(head + "/2DSlidingMaxFootprintKernel.c", "rt")
    maxFunction = SourceModule(maxFunction.read())
    slidingMaxKernel = maxFunction.get_function("slidingMaxiumum2D")

    blockSize = [16, 16] # To-do: Add a variable to this, can affect performance based on GPU
    gridSize = getGridSize(blockSize, arr2D.shape) # Get the size of our grid based on the size of a grid (blocksize)


    slidingMaxKernel(cuda.In(arr2D),                   # Input
                    cuda.Out(arr2DMaxed),              # Output
                    numpy.int32(footprint.shape[1]),   # Kernel Size
                    numpy.int32(arr2D.shape[1]),       # Row Stride
                    numpy.int32(1),                    # Column Stride
                    numpy.int32(int(arr2D.shape[1])),  # Array Column Count
                    numpy.int32(int(arr2D.shape[0])),  # Array Row Count
                    cuda.In(footprint),
                    block=(blockSize[0],blockSize[1],1),
                    grid=(gridSize[0],gridSize[1],1)
    )

    return arr2DMaxed 
Example #2
Source File: gpu_render.py    From Pix2Pose with MIT License 6 votes vote down vote up
def cuda_render(self,pts,face_set):
        pts = pts.astype(np.float32)
        v = ((np.round(self.fy*pts[:,1]/pts[:,2]+self.cy)).astype(np.int)).astype(np.float32)
        u = ((np.round(self.fx*pts[:,0]/pts[:,2]+self.cx)).astype(np.int)).astype(np.float32)
        depth_b = gpuarray.zeros((self.res_y*self.res_x), dtype=np.float32)+100#+90000
        depth_mask = np.zeros((self.res_y*self.res_x),dtype=np.float32)
        bbox = gpuarray.zeros((4),dtype=np.float32)
        bbox[0:2]=np.array([9999,9999],dtype=np.float32)


        max_idx = np.ones((face_set.shape[0]), dtype=np.float32)
        grid_n= int((face_set.shape[0]/self.n_block))+1
        self.rendering(drv.In(v[face_set[:,0]]), drv.In(v[face_set[:,1]]),drv.In(v[face_set[:,2]]),
                          drv.In(u[face_set[:,0]]), drv.In(u[face_set[:,1]]),drv.In(u[face_set[:,2]]),
                          drv.In(pts[face_set[:,0],2]), drv.In(pts[face_set[:,1],2]),drv.In(pts[face_set[:,2],2]),
                          depth_b,drv.In(max_idx), drv.Out(depth_mask),bbox,
                          block=(self.n_block, 1, 1), grid=(grid_n, 1, 1))
        img = depth_b.get()
        img[img==100]=0
        img= np.reshape(img,(self.res_y,self.res_x))
        mask = np.reshape(depth_mask,(self.res_y,self.res_x)).astype(bool)
        bbox_final = bbox.get()
        return img,mask,bbox_final.astype(np.int) 
Example #3
Source File: test_pycuda_theano_simple.py    From D-VAE with MIT License 6 votes vote down vote up
def test_pycuda_only():
    """Run pycuda only example to test that pycuda works."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    # Test with pycuda in/out of numpy.ndarray
    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)
    dest = numpy.zeros_like(a)
    multiply_them(
        drv.Out(dest), drv.In(a), drv.In(b),
        block=(400, 1, 1), grid=(1, 1))
    assert (dest == a * b).all() 
Example #4
Source File: test_pycuda_theano_simple.py    From attention-lvcsr with MIT License 6 votes vote down vote up
def test_pycuda_only():
    """Run pycuda only example to test that pycuda works."""
    from pycuda.compiler import SourceModule
    mod = SourceModule("""
__global__ void multiply_them(float *dest, float *a, float *b)
{
  const int i = threadIdx.x;
  dest[i] = a[i] * b[i];
}
""")

    multiply_them = mod.get_function("multiply_them")

    # Test with pycuda in/out of numpy.ndarray
    a = numpy.random.randn(100).astype(numpy.float32)
    b = numpy.random.randn(100).astype(numpy.float32)
    dest = numpy.zeros_like(a)
    multiply_them(
        drv.Out(dest), drv.In(a), drv.In(b),
        block=(400, 1, 1), grid=(1, 1))
    assert (dest == a * b).all()