Introduction
Introduction Statistics Contact Development Disclaimer Help
tfix various minor issues related to memory initialization, suggest clang-3.8 t…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit f892592dcad73329dce16040772caa240511678b
parent 103a8713606c2aa6257b0d5bcf4ae4b6562fea66
Author: Anders Damsgaard <[email protected]>
Date: Thu, 7 Sep 2017 12:54:25 -0700
fix various minor issues related to memory initialization, suggest clang-3.8 to…
Diffstat:
M src/CMakeLists.txt | 11 +++++++----
M src/device.cu | 74 ++++++++++++++++-------------…
M src/sphere.cpp | 4 ++--
M tests/io_tests.py | 1 +
4 files changed, 49 insertions(+), 41 deletions(-)
---
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
t@@ -3,7 +3,7 @@
#LINK_LIBRARIES("-L${CUDA_SDK_ROOT_DIR}/lib -lcutil_x86_64") # For 64 bit syst…
# Ohter folders to include
-SET(CUDA_SDK_ROOT_DIR "/usr/local/cuda-5.0/samples")
+#SET(CUDA_SDK_ROOT_DIR "/usr/local/cuda-5.0/samples")
INCLUDE_DIRECTORIES("${CUDA_SDK_ROOT_DIR}/common/inc")
INCLUDE_DIRECTORIES("${CMAKE_BINARY_DIR}/src")
SET(EXECUTABLE_OUTPUT_PATH "../")
t@@ -16,13 +16,16 @@ INCLUDE(FindCUDA)
IF (GPU_GENERATION EQUAL 1) # Kepler
SET(CUDA_NVCC_FLAGS
#"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35…
- "--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35\…
+ #"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35…
+ #"--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35…
+ "--use_fast_math;-O3;-gencode=arch=compute_35,code=\"sm_35,compute_35\…
ELSE() # Fermi
SET(CUDA_NVCC_FLAGS
#"--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20…
- "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\…
+ #"--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20…
+ "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\…
ENDIF (GPU_GENERATION EQUAL 1)
-SET(CMAKE_CXX_FLAGS "-fPIC ${CMAKE_CXX_FLAGS}")
+#SET(CMAKE_CXX_FLAGS "-fPIC ${CMAKE_CXX_FLAGS}")
# Rule to build executable program
CUDA_ADD_EXECUTABLE(sphere
diff --git a/src/device.cu b/src/device.cu
t@@ -48,15 +48,14 @@ int cudaCoresPerSM(int major, int minor)
else if (major == 6 && minor == 1)
return 128;
else
- printf("Error in cudaCoresPerSM",
- "Device compute capability value (%d.%d) not recognized.",
- major, minor);
+ printf("Error in cudaCoresPerSM Device compute capability value "
+ "(%d.%d) not recognized.", major, minor);
return -1;
}
// Wrapper function for initializing the CUDA components.
// Called from main.cpp
-__host__ void DEM::initializeGPU(void)
+void DEM::initializeGPU(void)
{
using std::cout; // stdout
t@@ -149,13 +148,13 @@ __host__ void DEM::initializeGPU(void)
}
// Start timer for kernel profiling
-__host__ void startTimer(cudaEvent_t* kernel_tic)
+void startTimer(cudaEvent_t* kernel_tic)
{
cudaEventRecord(*kernel_tic);
}
// Stop timer for kernel profiling and time to function sum
-__host__ void stopTimer(cudaEvent_t *kernel_tic,
+void stopTimer(cudaEvent_t *kernel_tic,
cudaEvent_t *kernel_toc,
float *kernel_elapsed,
double* sum)
t@@ -280,7 +279,7 @@ __global__ void checkParticlePositions(
// Copy the constant data components to device memory,
// and check whether the values correspond to the
// values in constant memory.
-__host__ void DEM::checkConstantMemory()
+void DEM::checkConstantMemory()
{
// Allocate space in global device memory
Grid* dev_grid;
t@@ -322,7 +321,7 @@ __host__ void DEM::checkConstantMemory()
}
// Copy selected constant components to constant device memory.
-__host__ void DEM::transferToConstantDeviceMemory()
+void DEM::transferToConstantDeviceMemory()
{
using std::cout;
t@@ -361,7 +360,7 @@ __global__ void printWorldSize(Float4* dev_walls_nx)
dev_walls_nx[0].w);
}
-__host__ void DEM::updateGridSize()
+void DEM::updateGridSize()
{
//printf("\nDEM::updateGridSize() start\n");
Float* Lz = new Float;
t@@ -396,7 +395,7 @@ __host__ void DEM::updateGridSize()
// Allocate device memory for particle variables,
// tied to previously declared pointers in structures
-__host__ void DEM::allocateGlobalDeviceMemory(void)
+void DEM::allocateGlobalDeviceMemory(void)
{
// Particle memory size
unsigned int memSizeF = sizeof(Float) * np;
t@@ -482,7 +481,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
// Allocate global memory on other devices required for "interact" function.
// The values of domain_size[ndevices] must be set beforehand.
-__host__ void DEM::allocateHelperDeviceMemory(void)
+void DEM::allocateHelperDeviceMemory(void)
{
// Particle memory size
unsigned int memSizeF4 = sizeof(Float4) * np;
t@@ -555,7 +554,7 @@ __host__ void DEM::allocateHelperDeviceMemory(void)
cudaSetDevice(device); // select main GPU
}
-__host__ void DEM::freeHelperDeviceMemory()
+void DEM::freeHelperDeviceMemory()
{
for (int d=0; d<ndevices; d++) {
t@@ -593,7 +592,7 @@ __host__ void DEM::freeHelperDeviceMemory()
cudaSetDevice(device); // select primary GPU
}
-__host__ void DEM::freeGlobalDeviceMemory()
+void DEM::freeGlobalDeviceMemory()
{
if (verbose == 1)
printf("\nFreeing device memory: ");
t@@ -658,7 +657,7 @@ __host__ void DEM::freeGlobalDeviceMemory()
}
-__host__ void DEM::transferToGlobalDeviceMemory(int statusmsg)
+void DEM::transferToGlobalDeviceMemory(int statusmsg)
{
if (verbose == 1 && statusmsg == 1)
std::cout << " Transfering data to the device: ";
t@@ -745,7 +744,7 @@ __host__ void DEM::transferToGlobalDeviceMemory(int status…
std::cout << "Done" << std::endl;
}
-__host__ void DEM::transferFromGlobalDeviceMemory()
+void DEM::transferFromGlobalDeviceMemory()
{
//std::cout << " Transfering data from the device: ";
t@@ -824,7 +823,7 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
// Iterate through time by explicit time integration
-__host__ void DEM::startTime()
+void DEM::startTime()
{
using std::cout;
using std::cerr;
t@@ -1002,16 +1001,18 @@ __host__ void DEM::startTime()
unsigned int wall0_iz = 10000000;
// weight of fluid between two cells in z direction
Float dp_dz;
- if (cfd_solver == 0)
- dp_dz = fabs(ns.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
- else if (cfd_solver == 1) {
- dp_dz = fabs(darcy.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
-
- // determine pressure at top wall at t=0
- darcy.p_top_orig = darcy.p[d_idx(0,0,darcy.nz-1)]
- - darcy.p_mod_A
- *sin(2.0*M_PI*darcy.p_mod_f*time.current
- + darcy.p_mod_phi);
+ if (fluid == 1) {
+ if (cfd_solver == 0)
+ dp_dz = fabs(ns.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
+ else if (cfd_solver == 1) {
+ dp_dz = fabs(darcy.rho_f*params.g[2]*grid.L[2]/grid.num[2]);
+
+ // determine pressure at top wall at t=0
+ darcy.p_top_orig = darcy.p[d_idx(0,0,darcy.nz-1)]
+ - darcy.p_mod_A
+ *sin(2.0*M_PI*darcy.p_mod_f*time.current
+ + darcy.p_mod_phi);
+ }
}
//std::cout << "dp_dz = " << dp_dz << std::endl;
t@@ -2589,13 +2590,15 @@ __host__ void DEM::startTime()
iter);
// Empty the dphi values after device to host transfer
- if (fluid == 1 && cfd_solver == 1) {
- setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>>
- (dev_darcy_dphi);
- cudaThreadSynchronize();
- checkForCudaErrorsIter(
- "After setDarcyZeros(dev_darcy_dphi) after transfer",
- iter);
+ if (fluid == 1) {
+ if (cfd_solver == 1) {
+ setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>>
+ (dev_darcy_dphi);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter(
+ "After setDarcyZeros(dev_darcy_dphi) after transfe…
+ iter);
+ }
}
// Pause the CPU thread until all CUDA calls previously issued are
t@@ -2603,8 +2606,9 @@ __host__ void DEM::startTime()
cudaThreadSynchronize();
// Check the numerical stability of the NS solver
- if (fluid == 1 && cfd_solver == 0)
- checkNSstability();
+ if (fluid == 1)
+ if (cfd_solver == 0)
+ checkNSstability();
// Write binary output file
time.step_count += 1;
diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -21,7 +21,7 @@ DEM::DEM(const std::string inputbin,
const int transferConstMem,
const int fluidFlow,
const int device)
-: verbose(verbosity), fluid(fluidFlow), device(device)
+: verbose(verbosity), device(device), fluid(fluidFlow)
{
using std::cout;
using std::cerr;
t@@ -874,7 +874,7 @@ void DEM::forcechains(const std::string format, const int …
cout << k.x[i].z;
cout << " to " << k.x[j].x << ',';
if (threedim == 1)
- cout << k.x[j].y, ',';
+ cout << k.x[j].y << ',';
cout << k.x[j].z;
cout << " nohead "
<< "lw " << ratio * thickness_scaling
diff --git a/tests/io_tests.py b/tests/io_tests.py
t@@ -27,6 +27,7 @@ compare(orig, py, "Python IO:")
# Test C++ IO routines
#orig.run(verbose=True, hideinputfile=True)
orig.run(dry=True)
+#orig.run(valgrind=True)
orig.run()
cpp = sphere.sim()
cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False)
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.