GopherProxy

	tThe number of memory chunks increased to 95% of total memory - cuda-memscrub -…
	git clone git://src.adamsgaard.dk/cuda-memscrub
	Log
	Files
	Refs
	README
	LICENSE
	---
	commit 4bd9658bceab5d4dc01e41b8410802fd3fef3859
	parent 1cf91a1710beee16200b48487cacf9bc45bb4938
	Author: Anders Damsgaard <[email protected]>
	Date: Mon, 13 Jan 2014 14:21:07 +0100

	The number of memory chunks increased to 95% of total memory

	Diffstat:
	M scrub.cu \| 43 +++++++++++++++++++----------…
	M utility.cu \| 11 +++++++++++
	M utility.cuh \| 1 +

	3 files changed, 38 insertions(+), 17 deletions(-)
	---
	diff --git a/scrub.cu b/scrub.cu
	t@@ -6,6 +6,7 @@

	#define VERSION "0.1"
	#define VALUETOWRITE 1234
	+#define MEMCHUNKS 19

	__global__ void write_value(int* d_mem, long unsigned int n_ints,
	unsigned int nx, unsigned int ny)
	t@@ -78,19 +79,24 @@ int main(int argc, char** argv)
	size_t mem_size = prop.totalGlobalMem;
	printf("global memory size: %lu bytes\n", mem_size);

	- long unsigned int n_ints = mem_size/sizeof(int)/20;
	+ long unsigned int n_ints = mem_size/sizeof(int)/(MEMCHUNKS+1);
	printf("overwriting the first %ld bytes, corresponding to %ld int values "
	"or the first %.1f%% of the global device memory.\n",
	- n_ints*sizeof(int), n_ints,
	- (float)100n_intssizeof(int)/prop.totalGlobalMem);
	- int* d_mem;
	- if (cudaMalloc((void*)&d_mem, n_intssizeof(int))
	- == cudaErrorMemoryAllocation) {
	- fprintf(stderr, "Error: Could not allocate the requested amount of "
	- "global memory on the device.\n");
	- cudaDeviceReset();
	- exit(EXIT_FAILURE);
	+ n_intssizeof(int)MEMCHUNKS, n_ints*MEMCHUNKS,
	+ (float)100n_intssizeof(int)*MEMCHUNKS/prop.totalGlobalMem);
	+
	+ int* d[MEMCHUNKS]; // array of device pointers
	+ int i;
	+ for (i=0; i<MEMCHUNKS; i++) {
	+ if (cudaMalloc((void*)&d[i], n_intssizeof(int))
	+ == cudaErrorMemoryAllocation) {
	+ fprintf(stderr, "Error: Could not allocate the requested amount of…
	+ "global memory on the device.\n");
	+ cudaDeviceReset();
	+ exit(EXIT_FAILURE);
	+ }
	}
	+ checkForCudaErrors("After memory allocation");

	dim3 dimBlock(prop.maxThreadsPerBlock, 1, 1);
	unsigned int grid_size = iDivUp(n_ints, prop.maxThreadsPerBlock);
	t@@ -101,15 +107,18 @@ int main(int argc, char** argv)
	exit(EXIT_FAILURE);
	}
	dim3 dimGrid(grid_size, 1, 1);
	- //printf("dimBlock = %d,%d,%d\n", dimBlock.x, dimBlock.y, dimBlock.z);
	- //printf("dimGrid = %d,%d,%d\n", dimGrid.x, dimGrid.y, dimGrid.z);

	- write_value<<<dimGrid, dimBlock>>>(d_mem, n_ints, 1, 1);
	- cudaThreadSynchronize();
	- checkForCudaErrors("After write_value");
	+ for (i=0; i<MEMCHUNKS; i++) {
	+ write_value<<<dimGrid, dimBlock>>>(d[i], n_ints, 1, 1);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("After write_value", i);
	+ }

	- cudaFree(d_mem);
	- checkForCudaErrors("After cudaFree(d_mem)");
	+
	+ for (i=0; i<MEMCHUNKS; i++) {
	+ cudaFree(d[i]);
	+ checkForCudaErrors("After cudaFree(d[i])", i);
	+ }

	cudaDeviceReset();

	diff --git a/utility.cu b/utility.cu
	t@@ -17,6 +17,17 @@ void checkForCudaErrors(const char* checkpoint_description)
	}
	}

	+void checkForCudaErrors(const char* checkpoint_description, int iteration)
	+{
	+ cudaError_t err = cudaGetLastError();
	+ if (err != cudaSuccess) {
	+ fprintf(stderr, "CUDA error detected at: %s at iteration %d.\n"
	+ "System error string: %s\n", checkpoint_description, iteration,
	+ cudaGetErrorString(err));
	+ exit(EXIT_FAILURE);
	+ }
	+}
	+
	//Round a / b to nearest higher integer value
	unsigned int iDivUp(unsigned int a, unsigned int b)
	{
	diff --git a/utility.cuh b/utility.cuh
	t@@ -3,6 +3,7 @@
	#define UTILITY_CUH_

	void checkForCudaErrors(const char* checkpoint_description);
	+void checkForCudaErrors(const char* checkpoint_description, int iteration);
	unsigned int iDivUp(unsigned int a, unsigned int b);

	#endif