tinvestigating segfaultm small fixes - sphere - GPU-based 3D discrete element m… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 0285d52a7249a194ceed526710d22e6a2c473c9a | |
parent 3c22af0f8ea4b02887e75dbb9e2c05796cbd14a5 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 5 Jun 2014 07:31:38 +0200 | |
investigating segfaultm small fixes | |
Diffstat: | |
M src/device.cu | 10 +++++----- | |
M src/navierstokes.cuh | 18 ++++++++++-------- | |
M tests/io_tests_fluid.py | 1 + | |
3 files changed, 16 insertions(+), 13 deletions(-) | |
--- | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -571,11 +571,11 @@ __host__ void DEM::startTime() | |
// Use 3D block and grid layout for cell-face fluid calculations | |
dim3 dimBlockFluidFace(8, 8, 8); // 512 threads per block | |
dim3 dimGridFluidFace( | |
- iDivUp(grid.num[0]+1, dimBlockFluid.x), | |
- iDivUp(grid.num[1]+1, dimBlockFluid.y), | |
- iDivUp(grid.num[2]+1, dimBlockFluid.z)); | |
- if (dimGridFluid.z > 64 && navierstokes == 1) { | |
- cerr << "Error: dimGridFluid.z > 64" << endl; | |
+ iDivUp(grid.num[0]+1, dimBlockFluidFace.x), | |
+ iDivUp(grid.num[1]+1, dimBlockFluidFace.y), | |
+ iDivUp(grid.num[2]+1, dimBlockFluidFace.z)); | |
+ if (dimGridFluidFace.z > 64 && navierstokes == 1) { | |
+ cerr << "Error: dimGridFluidFace.z > 64" << endl; | |
exit(1); | |
} | |
diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh | |
t@@ -59,7 +59,7 @@ __device__ int checkFiniteFloat3( | |
void DEM::initNSmemDev(void) | |
{ | |
// size of scalar field | |
- unsigned int memSizeF = sizeof(Float)*NScells(); | |
+ unsigned int memSizeF = sizeof(Float)*NScells(); | |
// size of velocity arrays in staggered grid discretization | |
unsigned int memSizeFvel = sizeof(Float)*NScellsVelocity(); | |
t@@ -3204,6 +3204,7 @@ __global__ void interpolateCenterToFace( | |
const Float z_val = (center.z - zn.z)/2.0; | |
__syncthreads(); | |
+ //printf("c2f [%d,%d,%d] = %f,%f,%f\n", x,y,z, x_val, y_val, z_val); | |
dev_out_x[faceidx] = x_val; | |
dev_out_y[faceidx] = y_val; | |
dev_out_z[faceidx] = z_val; | |
t@@ -3225,15 +3226,13 @@ __global__ void interpolateFaceToCenter( | |
// Check that we are not outside the fluid grid | |
if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
- const unsigned int cellidx = idx(x,y,z); | |
- | |
__syncthreads(); | |
const Float x_n = dev_in_x[vidx(x,y,z)]; | |
const Float x_p = dev_in_x[vidx(x+1,y,z)]; | |
- const Float y_n = dev_in_x[vidx(x,y,z)]; | |
- const Float y_p = dev_in_x[vidx(x,y+1,z)]; | |
- const Float z_n = dev_in_x[vidx(x,y,z)]; | |
- const Float z_p = dev_in_x[vidx(x,y,z+1)]; | |
+ const Float y_n = dev_in_y[vidx(x,y,z)]; | |
+ const Float y_p = dev_in_y[vidx(x,y+1,z)]; | |
+ const Float z_n = dev_in_z[vidx(x,y,z)]; | |
+ const Float z_p = dev_in_z[vidx(x,y,z+1)]; | |
const Float3 val = MAKE_FLOAT3( | |
(x_n + x_p)/2.0, | |
t@@ -3241,7 +3240,8 @@ __global__ void interpolateFaceToCenter( | |
(z_n + z_p)/2.0); | |
__syncthreads(); | |
- dev_out[cellidx] = val; | |
+ //printf("[%d,%d,%d] = %f\n", x,y,z, val); | |
+ dev_out[idx(x,y,z)] = val; | |
} | |
} | |
t@@ -3317,6 +3317,8 @@ __global__ void findFaceDivTau( | |
(v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz)); | |
__syncthreads(); | |
+ printf("div_tau [%d,%d,%d] = %f, %f, %f\n", x,y,z, | |
+ div_tau_x, div_tau_y, div_tau_z); | |
dev_ns_div_tau_x[faceidx] = div_tau_x; | |
dev_ns_div_tau_y[faceidx] = div_tau_y; | |
dev_ns_div_tau_z[faceidx] = div_tau_z; | |
diff --git a/tests/io_tests_fluid.py b/tests/io_tests_fluid.py | |
t@@ -23,6 +23,7 @@ compare(orig, py, "Python IO:") | |
# Test C++ IO routines | |
#orig.run(verbose=True, hideinputfile=True) | |
+orig.run(dry=True) | |
orig.run(verbose=True, hideinputfile=False, cudamemcheck=True) | |
cpp = sphere.sim(fluid=True) | |
cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False) |