Python pycuda.driver.Out() Examples
The following are 4
code examples of pycuda.driver.Out().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
pycuda.driver
, or try the search function
.
Example #1
Source File: gpu.py From Jamais-Vu with MIT License | 6 votes |
def maximum_filter_2d(arr2D, footprint): ## Make sure arr2D is our datatype float32 and footprint of int32 arr2DMaxed = numpy.empty_like(arr2D) head, tail = os.path.split(os.path.abspath(__file__)) # Used so that we can always get the kernel which should be in the same directory as this file maxFunction = open(head + "/2DSlidingMaxFootprintKernel.c", "rt") maxFunction = SourceModule(maxFunction.read()) slidingMaxKernel = maxFunction.get_function("slidingMaxiumum2D") blockSize = [16, 16] # To-do: Add a variable to this, can affect performance based on GPU gridSize = getGridSize(blockSize, arr2D.shape) # Get the size of our grid based on the size of a grid (blocksize) slidingMaxKernel(cuda.In(arr2D), # Input cuda.Out(arr2DMaxed), # Output numpy.int32(footprint.shape[1]), # Kernel Size numpy.int32(arr2D.shape[1]), # Row Stride numpy.int32(1), # Column Stride numpy.int32(int(arr2D.shape[1])), # Array Column Count numpy.int32(int(arr2D.shape[0])), # Array Row Count cuda.In(footprint), block=(blockSize[0],blockSize[1],1), grid=(gridSize[0],gridSize[1],1) ) return arr2DMaxed
Example #2
Source File: gpu_render.py From Pix2Pose with MIT License | 6 votes |
def cuda_render(self,pts,face_set): pts = pts.astype(np.float32) v = ((np.round(self.fy*pts[:,1]/pts[:,2]+self.cy)).astype(np.int)).astype(np.float32) u = ((np.round(self.fx*pts[:,0]/pts[:,2]+self.cx)).astype(np.int)).astype(np.float32) depth_b = gpuarray.zeros((self.res_y*self.res_x), dtype=np.float32)+100#+90000 depth_mask = np.zeros((self.res_y*self.res_x),dtype=np.float32) bbox = gpuarray.zeros((4),dtype=np.float32) bbox[0:2]=np.array([9999,9999],dtype=np.float32) max_idx = np.ones((face_set.shape[0]), dtype=np.float32) grid_n= int((face_set.shape[0]/self.n_block))+1 self.rendering(drv.In(v[face_set[:,0]]), drv.In(v[face_set[:,1]]),drv.In(v[face_set[:,2]]), drv.In(u[face_set[:,0]]), drv.In(u[face_set[:,1]]),drv.In(u[face_set[:,2]]), drv.In(pts[face_set[:,0],2]), drv.In(pts[face_set[:,1],2]),drv.In(pts[face_set[:,2],2]), depth_b,drv.In(max_idx), drv.Out(depth_mask),bbox, block=(self.n_block, 1, 1), grid=(grid_n, 1, 1)) img = depth_b.get() img[img==100]=0 img= np.reshape(img,(self.res_y,self.res_x)) mask = np.reshape(depth_mask,(self.res_y,self.res_x)).astype(bool) bbox_final = bbox.get() return img,mask,bbox_final.astype(np.int)
Example #3
Source File: test_pycuda_theano_simple.py From D-VAE with MIT License | 6 votes |
def test_pycuda_only(): """Run pycuda only example to test that pycuda works.""" from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void multiply_them(float *dest, float *a, float *b) { const int i = threadIdx.x; dest[i] = a[i] * b[i]; } """) multiply_them = mod.get_function("multiply_them") # Test with pycuda in/out of numpy.ndarray a = numpy.random.randn(100).astype(numpy.float32) b = numpy.random.randn(100).astype(numpy.float32) dest = numpy.zeros_like(a) multiply_them( drv.Out(dest), drv.In(a), drv.In(b), block=(400, 1, 1), grid=(1, 1)) assert (dest == a * b).all()
Example #4
Source File: test_pycuda_theano_simple.py From attention-lvcsr with MIT License | 6 votes |
def test_pycuda_only(): """Run pycuda only example to test that pycuda works.""" from pycuda.compiler import SourceModule mod = SourceModule(""" __global__ void multiply_them(float *dest, float *a, float *b) { const int i = threadIdx.x; dest[i] = a[i] * b[i]; } """) multiply_them = mod.get_function("multiply_them") # Test with pycuda in/out of numpy.ndarray a = numpy.random.randn(100).astype(numpy.float32) b = numpy.random.randn(100).astype(numpy.float32) dest = numpy.zeros_like(a) multiply_them( drv.Out(dest), drv.In(a), drv.In(b), block=(400, 1, 1), grid=(1, 1)) assert (dest == a * b).all()