tfix various minor issues related to memory initialization, suggest clang-3.8 t… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit f892592dcad73329dce16040772caa240511678b | |
parent 103a8713606c2aa6257b0d5bcf4ae4b6562fea66 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 7 Sep 2017 12:54:25 -0700 | |
fix various minor issues related to memory initialization, suggest clang-3.8 to… | |
Diffstat: | |
M src/CMakeLists.txt | 11 +++++++---- | |
M src/device.cu | 74 ++++++++++++++++-------------… | |
M src/sphere.cpp | 4 ++-- | |
M tests/io_tests.py | 1 + | |
4 files changed, 49 insertions(+), 41 deletions(-) | |
--- | |
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt | |
t@@ -3,7 +3,7 @@ | |
#LINK_LIBRARIES("-L${CUDA_SDK_ROOT_DIR}/lib -lcutil_x86_64") # For 64 bit syst… | |
# Ohter folders to include | |
-SET(CUDA_SDK_ROOT_DIR "/usr/local/cuda-5.0/samples") | |
+#SET(CUDA_SDK_ROOT_DIR "/usr/local/cuda-5.0/samples") | |
INCLUDE_DIRECTORIES("${CUDA_SDK_ROOT_DIR}/common/inc") | |
INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/src") | |
SET(EXECUTABLE_OUTPUT_PATH "../") | |
t@@ -16,13 +16,16 @@ INCLUDE(FindCUDA) | |
IF (GPU_GENERATION EQUAL 1) # Kepler | |
SET(CUDA_NVCC_FLAGS | |
#"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35… | |
- "--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35\… | |
+ #"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35… | |
+ #"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35… | |
+ "--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35\… | |
ELSE() # Fermi | |
SET(CUDA_NVCC_FLAGS | |
#"--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20… | |
- "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\… | |
+ #"--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20… | |
+ "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\… | |
ENDIF (GPU_GENERATION EQUAL 1) | |
-SET(CMAKE_CXX_FLAGS "-fPIC ${CMAKE_CXX_FLAGS}") | |
+#SET(CMAKE_CXX_FLAGS "-fPIC ${CMAKE_CXX_FLAGS}") | |
# Rule to build executable program | |
CUDA_ADD_EXECUTABLE(sphere | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -48,15 +48,14 @@ int cudaCoresPerSM(int major, int minor) | |
else if (major == 6 && minor == 1) | |
return 128; | |
else | |
- printf("Error in cudaCoresPerSM", | |
- "Device compute capability value (%d.%d) not recognized.", | |
- major, minor); | |
+ printf("Error in cudaCoresPerSM Device compute capability value " | |
+ "(%d.%d) not recognized.", major, minor); | |
return -1; | |
} | |
// Wrapper function for initializing the CUDA components. | |
// Called from main.cpp | |
-__host__ void DEM::initializeGPU(void) | |
+void DEM::initializeGPU(void) | |
{ | |
using std::cout; // stdout | |
t@@ -149,13 +148,13 @@ __host__ void DEM::initializeGPU(void) | |
} | |
// Start timer for kernel profiling | |
-__host__ void startTimer(cudaEvent_t* kernel_tic) | |
+void startTimer(cudaEvent_t* kernel_tic) | |
{ | |
cudaEventRecord(*kernel_tic); | |
} | |
// Stop timer for kernel profiling and time to function sum | |
-__host__ void stopTimer(cudaEvent_t *kernel_tic, | |
+void stopTimer(cudaEvent_t *kernel_tic, | |
cudaEvent_t *kernel_toc, | |
float *kernel_elapsed, | |
double* sum) | |
t@@ -280,7 +279,7 @@ __global__ void checkParticlePositions( | |
// Copy the constant data components to device memory, | |
// and check whether the values correspond to the | |
// values in constant memory. | |
-__host__ void DEM::checkConstantMemory() | |
+void DEM::checkConstantMemory() | |
{ | |
// Allocate space in global device memory | |
Grid* dev_grid; | |
t@@ -322,7 +321,7 @@ __host__ void DEM::checkConstantMemory() | |
} | |
// Copy selected constant components to constant device memory. | |
-__host__ void DEM::transferToConstantDeviceMemory() | |
+void DEM::transferToConstantDeviceMemory() | |
{ | |
using std::cout; | |
t@@ -361,7 +360,7 @@ __global__ void printWorldSize(Float4* dev_walls_nx) | |
dev_walls_nx[0].w); | |
} | |
-__host__ void DEM::updateGridSize() | |
+void DEM::updateGridSize() | |
{ | |
//printf("\nDEM::updateGridSize() start\n"); | |
Float* Lz = new Float; | |
t@@ -396,7 +395,7 @@ __host__ void DEM::updateGridSize() | |
// Allocate device memory for particle variables, | |
// tied to previously declared pointers in structures | |
-__host__ void DEM::allocateGlobalDeviceMemory(void) | |
+void DEM::allocateGlobalDeviceMemory(void) | |
{ | |
// Particle memory size | |
unsigned int memSizeF = sizeof(Float) * np; | |
t@@ -482,7 +481,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
// Allocate global memory on other devices required for "interact" function. | |
// The values of domain_size[ndevices] must be set beforehand. | |
-__host__ void DEM::allocateHelperDeviceMemory(void) | |
+void DEM::allocateHelperDeviceMemory(void) | |
{ | |
// Particle memory size | |
unsigned int memSizeF4 = sizeof(Float4) * np; | |
t@@ -555,7 +554,7 @@ __host__ void DEM::allocateHelperDeviceMemory(void) | |
cudaSetDevice(device); // select main GPU | |
} | |
-__host__ void DEM::freeHelperDeviceMemory() | |
+void DEM::freeHelperDeviceMemory() | |
{ | |
for (int d=0; d<ndevices; d++) { | |
t@@ -593,7 +592,7 @@ __host__ void DEM::freeHelperDeviceMemory() | |
cudaSetDevice(device); // select primary GPU | |
} | |
-__host__ void DEM::freeGlobalDeviceMemory() | |
+void DEM::freeGlobalDeviceMemory() | |
{ | |
if (verbose == 1) | |
printf("\nFreeing device memory: "); | |
t@@ -658,7 +657,7 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
} | |
-__host__ void DEM::transferToGlobalDeviceMemory(int statusmsg) | |
+void DEM::transferToGlobalDeviceMemory(int statusmsg) | |
{ | |
if (verbose == 1 && statusmsg == 1) | |
std::cout << " Transfering data to the device: "; | |
t@@ -745,7 +744,7 @@ __host__ void DEM::transferToGlobalDeviceMemory(int status… | |
std::cout << "Done" << std::endl; | |
} | |
-__host__ void DEM::transferFromGlobalDeviceMemory() | |
+void DEM::transferFromGlobalDeviceMemory() | |
{ | |
//std::cout << " Transfering data from the device: "; | |
t@@ -824,7 +823,7 @@ __host__ void DEM::transferFromGlobalDeviceMemory() | |
// Iterate through time by explicit time integration | |
-__host__ void DEM::startTime() | |
+void DEM::startTime() | |
{ | |
using std::cout; | |
using std::cerr; | |
t@@ -1002,16 +1001,18 @@ __host__ void DEM::startTime() | |
unsigned int wall0_iz = 10000000; | |
// weight of fluid between two cells in z direction | |
Float dp_dz; | |
- if (cfd_solver == 0) | |
- dp_dz = fabs(ns.rho_f*params.g[2]*grid.L[2]/grid.num[2]); | |
- else if (cfd_solver == 1) { | |
- dp_dz = fabs(darcy.rho_f*params.g[2]*grid.L[2]/grid.num[2]); | |
- | |
- // determine pressure at top wall at t=0 | |
- darcy.p_top_orig = darcy.p[d_idx(0,0,darcy.nz-1)] | |
- - darcy.p_mod_A | |
- *sin(2.0*M_PI*darcy.p_mod_f*time.current | |
- + darcy.p_mod_phi); | |
+ if (fluid == 1) { | |
+ if (cfd_solver == 0) | |
+ dp_dz = fabs(ns.rho_f*params.g[2]*grid.L[2]/grid.num[2]); | |
+ else if (cfd_solver == 1) { | |
+ dp_dz = fabs(darcy.rho_f*params.g[2]*grid.L[2]/grid.num[2]); | |
+ | |
+ // determine pressure at top wall at t=0 | |
+ darcy.p_top_orig = darcy.p[d_idx(0,0,darcy.nz-1)] | |
+ - darcy.p_mod_A | |
+ *sin(2.0*M_PI*darcy.p_mod_f*time.current | |
+ + darcy.p_mod_phi); | |
+ } | |
} | |
//std::cout << "dp_dz = " << dp_dz << std::endl; | |
t@@ -2589,13 +2590,15 @@ __host__ void DEM::startTime() | |
iter); | |
// Empty the dphi values after device to host transfer | |
- if (fluid == 1 && cfd_solver == 1) { | |
- setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>> | |
- (dev_darcy_dphi); | |
- cudaThreadSynchronize(); | |
- checkForCudaErrorsIter( | |
- "After setDarcyZeros(dev_darcy_dphi) after transfer", | |
- iter); | |
+ if (fluid == 1) { | |
+ if (cfd_solver == 1) { | |
+ setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>> | |
+ (dev_darcy_dphi); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrorsIter( | |
+ "After setDarcyZeros(dev_darcy_dphi) after transfe… | |
+ iter); | |
+ } | |
} | |
// Pause the CPU thread until all CUDA calls previously issued are | |
t@@ -2603,8 +2606,9 @@ __host__ void DEM::startTime() | |
cudaThreadSynchronize(); | |
// Check the numerical stability of the NS solver | |
- if (fluid == 1 && cfd_solver == 0) | |
- checkNSstability(); | |
+ if (fluid == 1) | |
+ if (cfd_solver == 0) | |
+ checkNSstability(); | |
// Write binary output file | |
time.step_count += 1; | |
diff --git a/src/sphere.cpp b/src/sphere.cpp | |
t@@ -21,7 +21,7 @@ DEM::DEM(const std::string inputbin, | |
const int transferConstMem, | |
const int fluidFlow, | |
const int device) | |
-: verbose(verbosity), fluid(fluidFlow), device(device) | |
+: verbose(verbosity), device(device), fluid(fluidFlow) | |
{ | |
using std::cout; | |
using std::cerr; | |
t@@ -874,7 +874,7 @@ void DEM::forcechains(const std::string format, const int … | |
cout << k.x[i].z; | |
cout << " to " << k.x[j].x << ','; | |
if (threedim == 1) | |
- cout << k.x[j].y, ','; | |
+ cout << k.x[j].y << ','; | |
cout << k.x[j].z; | |
cout << " nohead " | |
<< "lw " << ratio * thickness_scaling | |
diff --git a/tests/io_tests.py b/tests/io_tests.py | |
t@@ -27,6 +27,7 @@ compare(orig, py, "Python IO:") | |
# Test C++ IO routines | |
#orig.run(verbose=True, hideinputfile=True) | |
orig.run(dry=True) | |
+#orig.run(valgrind=True) | |
orig.run() | |
cpp = sphere.sim() | |
cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False) |