tFixed threadsPerBlock value - sphere - GPU-based 3D discrete element method al… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 5c10dece10e2b7eb6aebc93258c423f586a83dd6 | |
parent 18da38f2435da18e108b208f2054eb4c11107207 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 2 May 2013 09:58:15 +0200 | |
Fixed threadsPerBlock value | |
Diffstat: | |
M CMakeLists.txt | 10 +++++----- | |
M python/sphere.py | 37 ++++++++++++++++++++++-------… | |
M src/device.cu | 4 ++-- | |
M src/latticeboltzmann.cuh | 1 - | |
4 files changed, 33 insertions(+), 19 deletions(-) | |
--- | |
diff --git a/CMakeLists.txt b/CMakeLists.txt | |
t@@ -24,13 +24,13 @@ find_package(OpenMP) | |
enable_testing() | |
# Set build type = Debug | |
-#set(CMAKE_BUILD_TYPE Debug) | |
-#if (CUDA_FOUND) | |
-# set(CUDA_NVCC_FLAGS -g;-G) | |
-#endif() | |
+set(CMAKE_BUILD_TYPE Debug) | |
+if (CUDA_FOUND) | |
+ set(CUDA_NVCC_FLAGS -g;-G) | |
+endif() | |
# Set build type = Release | |
-set(CMAKE_BUILD_TYPE Release) | |
+#set(CMAKE_BUILD_TYPE Release) | |
# Add source directory to project. | |
add_subdirectory(src) | |
diff --git a/python/sphere.py b/python/sphere.py | |
t@@ -116,9 +116,9 @@ class Spherebin: | |
self.nu = numpy.zeros(1, dtype=numpy.float64) | |
self.f_v = numpy.zeros( | |
- (self.num[0] * self.num[1] * self.num[2], self.nd), | |
+ (self.num[0], self.num[1], self.num[2], self.nd), | |
dtype=numpy.float64) | |
- self.f_rho = numpy.zeros(self.num[0] * self.num[1] * self.num[2], | |
+ self.f_rho = numpy.zeros((self.num[0], self.num[1], self.num[2]), | |
dtype=numpy.float64) | |
def __cmp__(self, other): | |
t@@ -308,22 +308,34 @@ class Spherebin: | |
self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32) | |
for i in range(self.nb0): | |
- self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32, c… | |
- self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32, c… | |
- self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64, c… | |
- self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
- self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64, c… | |
- self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
+ self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32, | |
+ count=1) | |
+ self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32, | |
+ count=1) | |
+ self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=self.nb0) | |
+ self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=self.nb0*self.nd).reshape(self.nb0, self.nd) | |
+ self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=self.nb0) | |
+ self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=self.nb0*self.nd).reshape(self.nb0, self.nd) | |
else: | |
self.nb0 = numpy.zeros(1, dtype=numpy.uint32) | |
if (fluid == True): | |
ncells = self.num[0]*self.num[1]*self.num[2] | |
self.nu = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
- self.f_v = numpy.empty(ncells*self.nd, dtype=numpy.float64) | |
+ self.f_v = numpy.empty( | |
+ (self.num[0], self.num[1], self.num[2], self.nd), | |
+ dtype=numpy.float64) | |
self.f_rho = numpy.empty(ncells, dtype=numpy.float64) | |
- self.f_v = numpy.fromfile(fh, dtype=numpy.float64, count=ncell… | |
- self.f_rho = numpy.fromfile(fh, dtype=numpy.float64, count=nce… | |
+ self.f_v = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=ncells*self.nd).reshape( | |
+ self.num[0], self.num[1], self.num[2], self.nd) | |
+ self.f_rho = numpy.fromfile(fh, dtype=numpy.float64, | |
+ count=ncells).reshape( | |
+ self.num[0], self.num[1], self.num[2]) | |
finally: | |
if fh is not None: | |
t@@ -1771,6 +1783,9 @@ class Spherebin: | |
fig.savefig('../img_out/' + self.sid + '-ts-x1x3-slipangles.png') | |
fig.clf() | |
+ def plotRho(self): | |
+ x=2 | |
+ | |
def convert(graphicsformat = "png", | |
folder = "../img_out"): | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -562,8 +562,8 @@ __host__ void DEM::startTime() | |
tic = clock(); | |
//// GPU workload configuration | |
- //unsigned int threadsPerBlock = 256; | |
- unsigned int threadsPerBlock = 512; | |
+ unsigned int threadsPerBlock = 256; | |
+ //unsigned int threadsPerBlock = 512; | |
// Create enough blocks to accomodate the particles | |
unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock); | |
diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh | |
t@@ -124,7 +124,6 @@ __global__ void latticeBoltzmannD3Q19( | |
//printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx); | |
- | |
// Load the fluid distribution into local registers | |
__syncthreads(); | |
Float f_0 = dev_f[grid2index(x,y,z,0)]; |