GopherProxy

	tFixed threadsPerBlock value - sphere - GPU-based 3D discrete element method al…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 5c10dece10e2b7eb6aebc93258c423f586a83dd6
	parent 18da38f2435da18e108b208f2054eb4c11107207
	Author: Anders Damsgaard <[email protected]>
	Date: Thu, 2 May 2013 09:58:15 +0200

	Fixed threadsPerBlock value

	Diffstat:
	M CMakeLists.txt \| 10 +++++-----
	M python/sphere.py \| 37 ++++++++++++++++++++++-------…
	M src/device.cu \| 4 ++--
	M src/latticeboltzmann.cuh \| 1 -

	4 files changed, 33 insertions(+), 19 deletions(-)
	---
	diff --git a/CMakeLists.txt b/CMakeLists.txt
	t@@ -24,13 +24,13 @@ find_package(OpenMP)
	enable_testing()

	# Set build type = Debug
	-#set(CMAKE_BUILD_TYPE Debug)
	-#if (CUDA_FOUND)
	-# set(CUDA_NVCC_FLAGS -g;-G)
	-#endif()
	+set(CMAKE_BUILD_TYPE Debug)
	+if (CUDA_FOUND)
	+ set(CUDA_NVCC_FLAGS -g;-G)
	+endif()

	# Set build type = Release
	-set(CMAKE_BUILD_TYPE Release)
	+#set(CMAKE_BUILD_TYPE Release)

	# Add source directory to project.
	add_subdirectory(src)
	diff --git a/python/sphere.py b/python/sphere.py
	t@@ -116,9 +116,9 @@ class Spherebin:

	self.nu = numpy.zeros(1, dtype=numpy.float64)
	self.f_v = numpy.zeros(
	- (self.num[0] * self.num[1] * self.num[2], self.nd),
	+ (self.num[0], self.num[1], self.num[2], self.nd),
	dtype=numpy.float64)
	- self.f_rho = numpy.zeros(self.num[0] * self.num[1] * self.num[2],
	+ self.f_rho = numpy.zeros((self.num[0], self.num[1], self.num[2]),
	dtype=numpy.float64)

	def __cmp__(self, other):
	t@@ -308,22 +308,34 @@ class Spherebin:
	self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32)
	for i in range(self.nb0):
	- self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32, c…
	- self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32, c…
	- self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64, c…
	- self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, c…
	- self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64, c…
	- self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, c…
	+ self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32,
	+ count=1)
	+ self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32,
	+ count=1)
	+ self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nb0)
	+ self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nb0*self.nd).reshape(self.nb0, self.nd)
	+ self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nb0)
	+ self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nb0*self.nd).reshape(self.nb0, self.nd)
	else:
	self.nb0 = numpy.zeros(1, dtype=numpy.uint32)

	if (fluid == True):
	ncells = self.num[0]self.num[1]self.num[2]
	self.nu = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.f_v = numpy.empty(ncells*self.nd, dtype=numpy.float64)
	+ self.f_v = numpy.empty(
	+ (self.num[0], self.num[1], self.num[2], self.nd),
	+ dtype=numpy.float64)
	self.f_rho = numpy.empty(ncells, dtype=numpy.float64)
	- self.f_v = numpy.fromfile(fh, dtype=numpy.float64, count=ncell…
	- self.f_rho = numpy.fromfile(fh, dtype=numpy.float64, count=nce…
	+ self.f_v = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=ncells*self.nd).reshape(
	+ self.num[0], self.num[1], self.num[2], self.nd)
	+ self.f_rho = numpy.fromfile(fh, dtype=numpy.float64,
	+ count=ncells).reshape(
	+ self.num[0], self.num[1], self.num[2])

	finally:
	if fh is not None:
	t@@ -1771,6 +1783,9 @@ class Spherebin:
	fig.savefig('../img_out/' + self.sid + '-ts-x1x3-slipangles.png')
	fig.clf()

	+ def plotRho(self):
	+ x=2
	+

	def convert(graphicsformat = "png",
	folder = "../img_out"):
	diff --git a/src/device.cu b/src/device.cu
	t@@ -562,8 +562,8 @@ __host__ void DEM::startTime()
	tic = clock();

	//// GPU workload configuration
	- //unsigned int threadsPerBlock = 256;
	- unsigned int threadsPerBlock = 512;
	+ unsigned int threadsPerBlock = 256;
	+ //unsigned int threadsPerBlock = 512;

	// Create enough blocks to accomodate the particles
	unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
	diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh
	t@@ -124,7 +124,6 @@ __global__ void latticeBoltzmannD3Q19(

	//printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx);

	-
	// Load the fluid distribution into local registers
	__syncthreads();
	Float f_0 = dev_f[grid2index(x,y,z,0)];