jcuda.driver.JCudaDriver Java Examples

The following examples show how to use jcuda.driver.JCudaDriver. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JCudaDriverSimpleLWJGL.java    From jcuda-samples with MIT License 6 votes vote down vote up
/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}
 
Example #2
Source File: JCudaDriverSimpleJOGL.java    From jcuda-samples with MIT License 6 votes vote down vote up
/**
 * Initialize the JCudaDriver. Note that this has to be done from the
 * same thread that will later use the JCudaDriver API
 */
private void initJCuda()
{
    JCudaDriver.setExceptionsEnabled(true);

    // Create a device and a context
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    // Prepare the PTX file containing the kernel
    String ptxFileName = JCudaSamplesUtils.preparePtxFile(
        "src/main/resources/kernels/JCudaDriverSimpleGLKernel.cu");
    
    // Load the PTX file containing the kernel
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function. This function
    // will later be called during the animation, in the display 
    // method of this GLEventListener.
    function = new CUfunction();
    cuModuleGetFunction(function, module, "simple_vbo_kernel");
}
 
Example #3
Source File: CUDAInnerLoop.java    From ocular with GNU General Public License v3.0 6 votes vote down vote up
public void compute(final float[] scores, final float[] whiteObservations, final float[] blackObservations, final int sequenceLength) {
	int gridSizeX = (int) Math.ceil(((double) sequenceLength) / (BLOCK_SIZE_X*ROLL_X));
	int extendedSeqLength = gridSizeX * (BLOCK_SIZE_X*ROLL_X);
	cuMemcpyHtoD(d_Ow, Pointer.to(CudaUtil.extendWithZeros(whiteObservations, (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT)), (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT * Sizeof.FLOAT);
	cuMemcpyHtoD(d_Ob, Pointer.to(CudaUtil.extendWithZeros(blackObservations, (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT)), (extendedSeqLength+maxTemplateWidth-1)*CharacterTemplate.LINE_HEIGHT * Sizeof.FLOAT);
	for (int tw=minTemplateWidth; tw<=maxTemplateWidth; ++tw) {
		if (templateNumIndices[tw-minTemplateWidth] > 0) {
			CUfunction function = new CUfunction();
			cuModuleGetFunction(function, cudaModule, "compute_emissions_"+tw);
			JCudaDriver.cuFuncSetCacheConfig(function, CUfunc_cache.CU_FUNC_CACHE_PREFER_SHARED);
			JCudaDriver.cuFuncSetSharedMemConfig(function, CUsharedconfig.CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE);
			Pointer kernelParameters = Pointer.to(Pointer.to(new int[] {templateIndicesOffsets[tw-minTemplateWidth]*sequenceLength}), Pointer.to(new int[] {sequenceLength}), Pointer.to(new int[] {templateNumIndices[tw-minTemplateWidth]}), Pointer.to(d_Tw[tw-minTemplateWidth]), Pointer.to(d_Tb[tw-minTemplateWidth]), Pointer.to(d_Ow), Pointer.to(d_Ob), Pointer.to(d_scores));
			int gridSizeY = (int) Math.ceil(((double) templateNumIndices[tw-minTemplateWidth]) / BLOCK_SIZE_Y);
			cuLaunchKernel(function, 
					gridSizeX, gridSizeY, 1,      // Grid dimension
					BLOCK_SIZE_X, BLOCK_SIZE_Y, 1,      // Block dimension
					0, null,               // Shared memory size and stream
					kernelParameters, null // Kernel- and extra parameters
					);
		}
	}
	cuMemcpyDtoH(Pointer.to(scores), d_scores, sequenceLength*totalTemplateNumIndices * Sizeof.FLOAT);
}
 
Example #4
Source File: JCudaDriverPrimaryContextTest.java    From jcuda with MIT License 6 votes vote down vote up
@Test
public void testPrimaryContextCreation()
{
    JCudaDriver.setExceptionsEnabled(true);
    
    cuInit(0);

    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    
    CUcontext context = new CUcontext();
    cuDevicePrimaryCtxRetain(context, device);
    
    CUcontext nullContext = new CUcontext();
    assertFalse(context.equals(nullContext));
}
 
Example #5
Source File: JCudaAbstractKernelTest.java    From jcuda with MIT License 6 votes vote down vote up
/**
 * Tries to compile the specified .CU file into a PTX file, loads this
 * PTX file as a module, obtains the specified function from this module
 * and returns it.
 * 
 * @param cuFileName The .CU file name
 * @param functionName The kernel function name
 * @return The function
 * @throws CudaException If an error occurs
 */
protected final CUfunction initialize(
    String cuFileName, String functionName)
{
    // Enable exceptions and omit all subsequent error checks
    JCudaDriver.setExceptionsEnabled(true);
   
    // Initialize the driver and create a context for the first device.
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);

    String ptxFileName = JCudaTestUtils.preparePtxFile(cuFileName);
    
    // Load the ptx file.
    CUmodule module = new CUmodule();
    cuModuleLoad(module, ptxFileName);

    // Obtain a function pointer to the kernel function.
    CUfunction function = new CUfunction();
    cuModuleGetFunction(function, module, functionName);
    
    return function;
}
 
Example #6
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void scalarSet(Matrix A, float alpha) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarSet");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #7
Source File: Kernel.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
public Function(String name) {
	func = new CUfunction();
	JCudaDriver.cuModuleGetFunction(func, module, name);
	pArgs = null;
	numBlocks = 1;
	blockThreads = 1;
	sharedMemCalc = new SharedMemCalculator.None();
}
 
Example #8
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void mul(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMul");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #9
Source File: JCudaDriverBasicGraphExample.java    From jcuda-samples with MIT License 5 votes vote down vote up
/**
 * Perform a default initialization of CUDA, creating a context
 * for the first device
 */
private static void initialize()
{
    JCudaDriver.setExceptionsEnabled(true);
    JNvrtc.setExceptionsEnabled(true);
    cuInit(0);
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    CUcontext context = new CUcontext();
    cuCtxCreate(context, 0, device);
}
 
Example #10
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void gemmBatched(float alpha, List<Matrix> A, List<Matrix> B, float beta, List<Matrix> C) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	Pointer[] Cpointers = new Pointer[C.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
		Cpointers[i] = C.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Cpointers_d = new Pointer();
	JCuda.cudaMalloc(Cpointers_d, C.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Cpointers_d, Pointer.to(Cpointers), C.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgemmBatched(cublasHandle, cublasOperation.CUBLAS_OP_N, cublasOperation.CUBLAS_OP_N, C.get(0).rows, C.get(0).cols, B.get(0).rows, Pointer.to(new float[] {alpha}), Apointers_d, A.get(0).rows, Bpointers_d, B.get(0).rows, Pointer.to(new float[] {beta}), Cpointers_d, C.get(0).rows, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(Cpointers_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #11
Source File: GpuStream.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
public void cleanup() {
	if (stream != null) {
		byteBuffers.cleanup();
		intBuffers.cleanup();
		longBuffers.cleanup();
		doubleBuffers.cleanup();
		try {
			JCudaDriver.cuStreamDestroy(stream);
		} catch (Throwable t) {
			t.printStackTrace(System.err);
		}
		stream = null;
	}
}
 
Example #12
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void getrfGetriBatched(List<Matrix> A, List<Matrix> B) {
	Pointer[] Apointers = new Pointer[A.size()];
	Pointer[] Bpointers = new Pointer[B.size()];
	for (int i=0; i<A.size(); ++i) {
		Apointers[i] = A.get(i).data_d;
		Bpointers[i] = B.get(i).data_d;
	}
	Pointer Apointers_d = new Pointer();
	JCuda.cudaMalloc(Apointers_d, A.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Apointers_d, Pointer.to(Apointers), A.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer Bpointers_d = new Pointer();
	JCuda.cudaMalloc(Bpointers_d, B.size() * Sizeof.POINTER);
	JCuda.cudaMemcpy(Bpointers_d, Pointer.to(Bpointers), B.size() * Sizeof.POINTER, cudaMemcpyKind.cudaMemcpyHostToDevice);
	Pointer info_d = new Pointer();
	JCuda.cudaMalloc(info_d, A.size() * Sizeof.INT);
	Pointer pivots_d = new Pointer();
	JCuda.cudaMalloc(pivots_d, A.get(0).rows * A.size() * Sizeof.INT);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetrfBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCublas2.cublasSgetriBatched(cublasHandle, A.get(0).rows, Apointers_d, A.get(0).rows, pivots_d, Bpointers_d, B.get(0).rows, info_d, A.size());
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	
	JCuda.cudaFree(Apointers_d);
	JCuda.cudaFree(Bpointers_d);
	JCuda.cudaFree(info_d);
	JCuda.cudaFree(pivots_d);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #13
Source File: JCudaDriverHostFunction.java    From jcuda-samples with MIT License 5 votes vote down vote up
/**
 * Entry point
 * 
 * @param args Not used
 */
public static void main(String[] args)
{
    // Default initialization
    JCudaDriver.setExceptionsEnabled(true);
    cuInit(0);
    CUcontext context = new CUcontext();
    CUdevice device = new CUdevice();
    cuDeviceGet(device, 0);
    cuCtxCreate(context, 0, device);

    // Create a stream
    CUstream stream = new CUstream();
    cuStreamCreate(stream, 0);
    
    // Define a host function and launch it
    CUhostFn fn = new CUhostFn()
    {
        @Override
        public void call(Object userData)
        {
            System.out.println("Called with " + userData);
        }
    };
    cuLaunchHostFunc(stream, fn, "Example user object");
    
    // Wait for the stream to finish
    cuStreamSynchronize(stream);

    // Clean up
    cuCtxDestroy(context);
    
    System.out.println("Done");
}
 
Example #14
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void max(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMax");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #15
Source File: CudaUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
public static void startup(int deviceId) {
       JCudaDriver.setExceptionsEnabled(true);
       JCudaDriver.cuInit(0);
       device = new CUdevice();
       cuDeviceGet(device, deviceId);
       context = new CUcontext();
       cuCtxCreate(context, 0, device);
}
 
Example #16
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
public static Matrix build(float[][] mat) {
	Matrix result = new Matrix(mat.length, mat[0].length);
	float[] data_h = toColMajor(mat);
	JCublas2.cublasSetMatrix(result.rows, result.cols, Sizeof.FLOAT, Pointer.to(data_h), result.rows, result.data_d, result.rows);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	return result;
}
 
Example #17
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
public static void freeAllBut(Collection<Matrix> keep) {
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
	LinkedList<Matrix> remainingAllocated = new LinkedList<Matrix>();
	while (!allocated.isEmpty()) {
		Matrix mat = allocated.poll();
		if (!keep.contains(mat) && !mat.dontFree) {
			mat.free();
		} else {
			remainingAllocated.add(mat);
		}
	}
	allocated = remainingAllocated;
}
 
Example #18
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void scalarAdd(Matrix A, float alpha, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorScalarAdd");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {alpha}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #19
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void min(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorMin");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #20
Source File: Gpu.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
public Gpu(CUdevice device) {
	
	this.device = device;
	
	// get name
	byte[] bytes = new byte[1024];
	JCudaDriver.cuDeviceGetName(bytes, bytes.length, device);
	int len = 0;
	while (bytes[len++] != 0);
	name = new String(bytes).substring(0, len - 1);
	
	// get total and free memory
	// (if it's even possible... if a GPU is out of memory, we can't even query it)
	try {
		CUcontext cuCtx = new CUcontext();
		JCudaDriver.cuCtxCreate(cuCtx, 0, device);
		long[][] longs = new long[2][1];
		JCudaDriver.cuMemGetInfo(longs[0], longs[1]);
		freeMemory = longs[0][0];
		totalMemory = longs[1][0];
		JCudaDriver.cuCtxDestroy(cuCtx);
	} catch (Throwable t) {
		// assume out of memory
		freeMemory = 0;
		totalMemory = 0;
	}
	
	// get attributes
	computeVersion = new int[] {
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR),
		getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR)
	};
	warpThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE);
	maxBlockThreads = getAttribute(CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X);
}
 
Example #21
Source File: Context.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
public Context(Gpu gpu) {
	
	this.gpu = gpu;
	
	// create the cuda context
	context = new CUcontext();
	//int flags = CUctx_flags.CU_CTX_SCHED_YIELD;
	//int flags = CUctx_flags.CU_CTX_SCHED_SPIN;
	int flags = CUctx_flags.CU_CTX_SCHED_BLOCKING_SYNC;
	JCudaDriver.cuCtxCreate(context, flags, gpu.getDevice());
	
	kernels = new HashMap<>();
}
 
Example #22
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void exp(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorExp");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #23
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
public float[] toArray() {
			float[] data_h = new float[rows*cols];
//			JCublas2.cublasGetVector(data_h.length, Sizeof.FLOAT, data_d, 1, Pointer.to(data_h), 1);
			JCublas2.cublasGetMatrix(rows, cols, Sizeof.FLOAT, data_d, rows, Pointer.to(data_h), rows);
			if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
			return data_h;
		}
 
Example #24
Source File: Gpus.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
private Gpus() {
	
	print("Discovering CUDA GPUs...");
	gpus = new ArrayList<>();

	try {
		// according to docs, init flags must always be zero
		JCudaDriver.setExceptionsEnabled(true);
		JCudaDriver.cuInit(0);
		
		// how many gpus are there?
		int[] ints = new int[1];
		JCudaDriver.cuDeviceGetCount(ints);
		int count = ints[0];
		
		// get the ones that have double support
		for (int i=0; i<count; i++) {
			
			CUdevice device = new CUdevice();
			JCudaDriver.cuDeviceGet(device, i);
			Gpu gpu = new Gpu(device);
			
			if (gpu.supportsDoubles()) {
				gpus.add(gpu);
			}
		}
	} catch (UnsatisfiedLinkError ex) {
		StringWriter buf = new StringWriter();
		ex.printStackTrace(new PrintWriter(buf));
		print(buf.toString());
	} finally {
		if (gpus.isEmpty()) {
			print(" none found\n");
		} else {
			print(" found " + gpus.size() + "\n");
		}
	}
}
 
Example #25
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void sign(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSign");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #26
Source File: Context.java    From OSPREY3 with GNU General Public License v2.0 5 votes vote down vote up
public synchronized void cleanup() {
	try {
		for (CUmodule kernel : kernels.values()) {
			JCudaDriver.cuModuleUnload(kernel);
		}
		kernels.clear();
		
		JCudaDriver.cuCtxDestroy(context);
	} catch (Throwable t) {
		t.printStackTrace(System.err);
	}
}
 
Example #27
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void div(Matrix A, Matrix B, Matrix C) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorDiv");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(C.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #28
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void sqrt(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqrt");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #29
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void sqr(Matrix A, Matrix B) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorSqr");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}
 
Example #30
Source File: CublasUtil.java    From murphy with Apache License 2.0 5 votes vote down vote up
private static void pow(Matrix A, Matrix B, float val) {
	int n = A.rows*A.cols;
	CUfunction function = new CUfunction();
	cuModuleGetFunction(function, helperModule, "vectorPow");
	Pointer kernelParameters = Pointer.to(Pointer.to(A.data_d), Pointer.to(B.data_d), Pointer.to(new float[] {val}), Pointer.to(new int[] {n}));
	int blockSize = Math.min(n, BLOCK_SIZE);
	int gridSizeX = (int) Math.ceil((double) n / blockSize);
	cuLaunchKernel(function,
			gridSizeX, 1, 1,      // Grid dimension
			blockSize, 1, 1,      // Block dimension
			0, null,               // Shared memory size and stream
			kernelParameters, null // Kernel- and extra parameters
			);
	if (DEBUG_SYNC) JCudaDriver.cuCtxSynchronize();
}