timplemented ndem functionality into Darcy solver, not tested when ndem!=1 - sp… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit ea8067eeef0eaeb962695192e3b978fa6b413fc5 | |
parent 629b0c945feb44b5d12abbd98e5e47f67cbab702 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 6 Nov 2014 15:45:59 +0100 | |
implemented ndem functionality into Darcy solver, not tested when ndem!=1 | |
Diffstat: | |
M python/halfshear-darcy-starter.py | 6 +++--- | |
M src/darcy.cuh | 36 +++++++++++++++++++++++------… | |
M src/device.cu | 414 ++++++++++++++++-------------… | |
3 files changed, 242 insertions(+), 214 deletions(-) | |
--- | |
diff --git a/python/halfshear-darcy-starter.py b/python/halfshear-darcy-starter… | |
t@@ -34,8 +34,8 @@ sim.cleanup() | |
sim.adjustUpperWall() | |
sim.zeroKinematics() | |
-#sim.shear(0.0/20.0) | |
-sim.shear(1.0/20.0) | |
+sim.shear(0.0/20.0) | |
+#sim.shear(1.0/20.0) | |
if fluid: | |
#sim.num[2] *= 2 | |
t@@ -63,7 +63,7 @@ sim.setDampingTangential(0.0) | |
sim.fixvel[:] = -1.0 | |
sim.initTemporal(total = 20.0, file_dt = 0.01, epsilon=0.07) | |
-sim.time_dt[0] *= 1.0e-2 | |
+#sim.time_dt[0] *= 1.0e-2 | |
#sim.initTemporal(total = 1.0e-4, file_dt = 1.0e-5, epsilon=0.07) | |
# Fix lowermost particles | |
diff --git a/src/darcy.cuh b/src/darcy.cuh | |
t@@ -170,6 +170,23 @@ __global__ void setDarcyNormZero( | |
} | |
} | |
+// Set an array of scalars to 0.0 inside devC_grid | |
+ template<typename T> | |
+__global__ void setDarcyZeros(T* __restrict__ dev_scalarfield) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // check that we are not outside the fluid grid | |
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
+ __syncthreads(); | |
+ dev_scalarfield[d_idx(x,y,z)] = 0.0; | |
+ } | |
+} | |
+ | |
+ | |
// Update a field in the ghost nodes from their parent cell values. The edge | |
// (diagonal) cells are not written since they are not read. Launch this kernel | |
// for all cells in the grid using | |
t@@ -234,7 +251,7 @@ __global__ void findDarcyPorosities( | |
const unsigned int np, // in | |
const Float c_phi, // in | |
Float* __restrict__ dev_darcy_phi, // in + out | |
- Float* __restrict__ dev_darcy_dphi) // out | |
+ Float* __restrict__ dev_darcy_dphi) // in + out | |
{ | |
// 3D thread index | |
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
t@@ -395,8 +412,8 @@ __global__ void findDarcyPorosities( | |
__syncthreads(); | |
//phi = 0.5; dphi = 0.0; // disable porosity effects | |
const unsigned int cellidx = d_idx(x,y,z); | |
- dev_darcy_phi[cellidx] = phi*c_phi; | |
- dev_darcy_dphi[cellidx] = dphi*c_phi; | |
+ dev_darcy_phi[cellidx] = phi*c_phi; | |
+ dev_darcy_dphi[cellidx] += dphi*c_phi; | |
//dev_darcy_vp_avg[cellidx] = v_avg; | |
//dev_darcy_d_avg[cellidx] = d_avg; | |
t@@ -719,10 +736,11 @@ __global__ void findDarcyPermeabilityGradients( | |
// Find the temporal gradient in pressure using the backwards Euler method | |
__global__ void findDarcyPressureChange( | |
- const Float* __restrict__ dev_darcy_p_old, | |
- const Float* __restrict__ dev_darcy_p, | |
- const unsigned int iter, | |
- Float* __restrict__ dev_darcy_dpdt) | |
+ const Float* __restrict__ dev_darcy_p_old, // in | |
+ const Float* __restrict__ dev_darcy_p, // in | |
+ const unsigned int iter, // in | |
+ const unsigned int ndem, // in | |
+ Float* __restrict__ dev_darcy_dpdt) // out | |
{ | |
// 3D thread index | |
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
t@@ -739,7 +757,7 @@ __global__ void findDarcyPressureChange( | |
const Float p_old = dev_darcy_p_old[cellidx]; | |
const Float p = dev_darcy_p[cellidx]; | |
- Float dpdt = (p - p_old)/devC_dt; | |
+ Float dpdt = (p - p_old)/(ndem*devC_dt); | |
// Ignore the large initial pressure gradients caused by solver "warm | |
// up" towards hydrostatic pressure distribution | |
t@@ -862,7 +880,7 @@ __global__ void updateDarcySolution( | |
/(2.0*(dxdx*dydy + dxdx*dzdz + dydy*dzdz));*/ | |
Float p_new = p_old | |
- + devC_dt/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p)) | |
+ + (ndem*devC_dt)/(beta_f*phi*mu)*(k*laplace_p + dot(grad_k, grad_p… | |
- dphi/(beta_f*phi*(1.0 - phi)); | |
// Dirichlet BC at dynamic top wall. wall0_iz will be larger than the | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -1745,8 +1745,7 @@ __host__ void DEM::startTime() | |
else if (cfd_solver == 1) { | |
#if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS) | |
- std::cout | |
- << "\n\n@@@@@@ TIME STEP " << iter << " @@@" | |
+ std::cout << "\n\n@@@@@@ TIME STEP " << iter << " @@@" | |
<< std::endl; | |
#endif | |
t@@ -1796,116 +1795,77 @@ __host__ void DEM::startTime() | |
iter); | |
} | |
- // Modulate the pressures at the upper boundary cells | |
- if ((darcy.p_mod_A > 1.0e-5 || darcy.p_mod_A < -1.0e-5) && | |
- darcy.p_mod_f > 1.0e-7) { | |
- // original pressure | |
- Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //orig… | |
- + darcy.p_mod_A*sin(2.0*M_PI*darcy.p_mod_f*time.current | |
- + darcy.p_mod_phi); | |
+ if ((iter % ns.ndem) == 0) { | |
+ | |
+ // Modulate the pressures at the upper boundary cells | |
+ if ((darcy.p_mod_A > 1.0e-5 || darcy.p_mod_A < -1.0e-5) && | |
+ darcy.p_mod_f > 1.0e-7) { | |
+ // original pressure | |
+ Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //… | |
+ + darcy.p_mod_A*sin(2.0*M_PI*darcy.p_mod_f*time.cu… | |
+ + darcy.p_mod_phi); | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>( | |
+ new_pressure, | |
+ dev_darcy_p); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse… | |
+ &t_setDarcyTopPressure); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrorsIter("Post setUpperPressureNS", iter… | |
+ } | |
+ | |
+ if (walls.nw > 0 && walls.wmode[0] == 1) { | |
+ wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]); | |
+ /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFlui… | |
+ new_pressure, | |
+ wall0_iz, | |
+ dev_darcy_p); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrorsIter("Post setDarcyTopWallPressure… | |
+ iter);*/ | |
+ } | |
+ | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>( | |
- new_pressure, | |
- dev_darcy_p); | |
+ findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>( | |
+ darcy.k_c, dev_darcy_phi, dev_darcy_k); | |
+ cudaThreadSynchronize(); | |
if (PROFILING == 1) | |
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_setDarcyTopPressure); | |
- cudaThreadSynchronize(); | |
- checkForCudaErrorsIter("Post setUpperPressureNS", iter); | |
- } | |
- | |
- if (walls.nw > 0 && walls.wmode[0] == 1) { | |
- wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]); | |
- /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFluid>>>( | |
- new_pressure, | |
- wall0_iz, | |
- dev_darcy_p); | |
- cudaThreadSynchronize(); | |
- checkForCudaErrorsIter("Post setDarcyTopWallPressure", | |
- iter);*/ | |
- } | |
- | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>( | |
- darcy.k_c, dev_darcy_phi, dev_darcy_k); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_findDarcyPermeabilities); | |
- checkForCudaErrorsIter("Post findDarcyPermeabilities", | |
- iter); | |
- | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_k, darcy.bc_bot, darcy.bc_top); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_setDarcyGhostNodes); | |
- checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_k)", | |
- iter); | |
- | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFluid>>… | |
- dev_darcy_k, dev_darcy_grad_k); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_findDarcyPermeabilityGradients); | |
- checkForCudaErrorsIter("Post findDarcyPermeabilityGradients", | |
- iter); | |
+ &t_findDarcyPermeabilities); | |
+ checkForCudaErrorsIter("Post findDarcyPermeabilities", | |
+ iter); | |
- if (iter == 0) { | |
- setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_norm); | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_k, darcy.bc_bot, darcy.bc_top); | |
cudaThreadSynchronize(); | |
- checkForCudaErrorsIter("Post setDarcyNormZero", iter); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_setDarcyGhostNodes); | |
+ checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_… | |
+ iter); | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- copyValues<Float><<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p, | |
- dev_darcy_p_old); | |
+ findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFlu… | |
+ dev_darcy_k, dev_darcy_grad_k); | |
cudaThreadSynchronize(); | |
if (PROFILING == 1) | |
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_copyValues); | |
- checkForCudaErrorsIter("Post copyValues(p -> p_old)", | |
+ &t_findDarcyPermeabilityGradients); | |
+ checkForCudaErrorsIter("Post findDarcyPermeabilityGradient… | |
iter); | |
- } | |
- | |
- /*if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p_old, | |
- dev_darcy_p, | |
- iter, | |
- dev_darcy_dpdt); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_findDarcyPressureChange); | |
- checkForCudaErrorsIter("Post findDarcyPressureChange", iter);*/ | |
- | |
- // Solve the system of epsilon using a Jacobi iterative solver. | |
- // The average normalized residual is initialized to a large | |
- // value. | |
- //double avg_norm_res; | |
- double max_norm_res; | |
- | |
- // Write a log file of the normalized residuals during the | |
- // Jacobi iterations | |
- std::ofstream reslog; | |
- if (write_res_log == 1) | |
- reslog.open("max_res_norm.dat"); | |
- for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac) { | |
+ if (iter == 0) { | |
+ setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_norm); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrorsIter("Post setDarcyNormZero", iter); | |
- if (nijac == 0) { | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
copyValues<Float><<<dimGridFluid, dimBlockFluid>>>( | |
t@@ -1919,130 +1879,180 @@ __host__ void DEM::startTime() | |
iter); | |
} | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p, darcy.bc_bot, darcy.bc_top); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_setDarcyGhostNodes); | |
- checkForCudaErrorsIter("Post setDarcyGhostNodes(" | |
- "dev_darcy_p) in Jacobi loop", iter); | |
+ /*if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_p_old, | |
+ dev_darcy_p, | |
+ iter, | |
+ dev_darcy_dpdt); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findDarcyPressureChange); | |
+ checkForCudaErrorsIter("Post findDarcyPressureChange", i… | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p_old, | |
- //dev_darcy_dpdt, | |
- dev_darcy_p, | |
- dev_darcy_k, | |
- dev_darcy_phi, | |
- dev_darcy_dphi, | |
- dev_darcy_grad_k, | |
- darcy.beta_f, | |
- darcy.mu, | |
- darcy.bc_bot, | |
- darcy.bc_top, | |
- darcy.ndem, | |
- wall0_iz, | |
- dev_darcy_p_new, | |
- dev_darcy_norm); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_updateDarcySolution); | |
- checkForCudaErrorsIter("Post updateDarcySolution", iter); | |
+ // Solve the system of epsilon using a Jacobi iterative so… | |
+ // The average normalized residual is initialized to a lar… | |
+ // value. | |
+ //double avg_norm_res; | |
+ double max_norm_res; | |
- // Copy new values to current values | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- copyValues<Float><<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p_new, | |
- dev_darcy_p); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_copyValues); | |
- checkForCudaErrorsIter("Post copyValues(p_new -> p)", iter… | |
+ // Write a log file of the normalized residuals during the | |
+ // Jacobi iterations | |
+ std::ofstream reslog; | |
+ if (write_res_log == 1) | |
+ reslog.open("max_res_norm.dat"); | |
+ | |
+ for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac)… | |
+ | |
+ if (nijac == 0) { | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_p, | |
+ dev_darcy_p_old); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_el… | |
+ &t_copyValues); | |
+ checkForCudaErrorsIter("Post copyValues(p -> p_old… | |
+ iter); | |
+ } | |
+ | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFlui… | |
+ dev_darcy_p, darcy.bc_bot, darcy.bc_top); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse… | |
+ &t_setDarcyGhostNodes); | |
+ checkForCudaErrorsIter("Post setDarcyGhostNodes(" | |
+ "dev_darcy_p) in Jacobi loop", iter); | |
+ | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_p_old, | |
+ //dev_darcy_dpdt, | |
+ dev_darcy_p, | |
+ dev_darcy_k, | |
+ dev_darcy_phi, | |
+ dev_darcy_dphi, | |
+ dev_darcy_grad_k, | |
+ darcy.beta_f, | |
+ darcy.mu, | |
+ darcy.bc_bot, | |
+ darcy.bc_top, | |
+ darcy.ndem, | |
+ wall0_iz, | |
+ dev_darcy_p_new, | |
+ dev_darcy_norm); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse… | |
+ &t_updateDarcySolution); | |
+ checkForCudaErrorsIter("Post updateDarcySolution", ite… | |
+ | |
+ // Copy new values to current values | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_p_new, | |
+ dev_darcy_p); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse… | |
+ &t_copyValues); | |
+ checkForCudaErrorsIter("Post copyValues(p_new -> p)", … | |
#ifdef REPORT_EPSILON | |
- std::cout << "\n###### JACOBI ITERATION " | |
- << nijac << " after copyValues ######" << std::endl; | |
- transferDarcyPressuresFromGlobalDeviceMemory(); | |
- printDarcyArray(stdout, darcy.p, "p"); | |
+ std::cout << "\n###### JACOBI ITERATION " | |
+ << nijac << " after copyValues ######" << std::end… | |
+ transferDarcyPressuresFromGlobalDeviceMemory(); | |
+ printDarcyArray(stdout, darcy.p, "p"); | |
#endif | |
- if (nijac % nijacnorm == 0) { | |
- // Read the normalized residuals from the device | |
- transferDarcyNormFromGlobalDeviceMemory(); | |
+ if (nijac % nijacnorm == 0) { | |
+ // Read the normalized residuals from the device | |
+ transferDarcyNormFromGlobalDeviceMemory(); | |
- // Write the normalized residuals to the terminal | |
- //printDarcyArray(stdout, darcy.norm, "norm"); | |
+ // Write the normalized residuals to the terminal | |
+ //printDarcyArray(stdout, darcy.norm, "norm"); | |
- // Find the maximum value of the normalized residuals | |
- max_norm_res = maxNormResDarcy(); | |
+ // Find the maximum value of the normalized residu… | |
+ max_norm_res = maxNormResDarcy(); | |
- // Write the Jacobi iteration number and maximum value | |
- // of the normalized residual to the log file | |
- if (write_res_log == 1) | |
- reslog << nijac << '\t' << max_norm_res | |
- << std::endl; | |
+ // Write the Jacobi iteration number and maximum v… | |
+ // of the normalized residual to the log file | |
+ if (write_res_log == 1) | |
+ reslog << nijac << '\t' << max_norm_res | |
+ << std::endl; | |
- if (max_norm_res <= darcy.tolerance) { | |
- if (write_conv_log == 1 | |
- && iter % conv_log_interval == 0) | |
- convlog << iter+1 << '\t' << nijac << std::end… | |
+ if (max_norm_res <= darcy.tolerance) { | |
+ if (write_conv_log == 1 | |
+ && iter % conv_log_interval == 0) | |
+ convlog << iter+1 << '\t' << nijac << std:… | |
- break; // solution has converged, exit Jacobi loop | |
+ break; // solution has converged, exit Jacobi… | |
+ } | |
} | |
- } | |
- if (nijac == darcy.maxiter-1) { | |
+ if (nijac == darcy.maxiter-1) { | |
- if (write_conv_log == 1) | |
- convlog << iter+1 << '\t' << nijac << std::endl; | |
+ if (write_conv_log == 1) | |
+ convlog << iter+1 << '\t' << nijac << std::end… | |
- std::cerr << "\nIteration " << iter << ", time " | |
- << iter*time.dt << " s: " | |
- "Error, the pressure solution in the fluid " | |
- "calculations did not converge. Try increasing " | |
- "the value of 'darcy.maxiter' (" | |
- << darcy.maxiter | |
- << ") or increase 'darcy.tolerance' (" | |
- << darcy.tolerance << ")." << std::endl; | |
- } | |
+ std::cerr << "\nIteration " << iter << ", time " | |
+ << iter*time.dt << " s: " | |
+ "Error, the pressure solution in the fluid " | |
+ "calculations did not converge. Try increasing… | |
+ "the value of 'darcy.maxiter' (" | |
+ << darcy.maxiter | |
+ << ") or increase 'darcy.tolerance' (" | |
+ << darcy.tolerance << ")." << std::endl; | |
+ } | |
- if (write_res_log == 1) | |
- reslog.close(); | |
+ if (write_res_log == 1) | |
+ reslog.close(); | |
- //break; // end after first iteration | |
- } | |
+ //break; // end after first iteration | |
+ } | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>> | |
- (dev_darcy_p, darcy.bc_bot, darcy.bc_top); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_setDarcyGhostNodes); | |
- checkForCudaErrorsIter("Post setDarcyGhostNodes(" | |
- "dev_darcy_p) after Jacobi loop", iter); | |
+ // Zero all dphi values right after they are used in fluid | |
+ // solution | |
+ setDarcyZeros<Float> <<<dimGridFluid, dimBlockFluid>>> | |
+ (dev_darcy_dphi); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrorsIter("After setDarcyZeros(dev_darcy_dphi… | |
+ iter); | |
- if (PROFILING == 1) | |
- startTimer(&kernel_tic); | |
- findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>( | |
- dev_darcy_p, | |
- dev_darcy_phi, | |
- dev_darcy_k, | |
- darcy.mu, | |
- dev_darcy_v); | |
- cudaThreadSynchronize(); | |
- if (PROFILING == 1) | |
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
- &t_findDarcyVelocities); | |
- checkForCudaErrorsIter("Post findDarcyVelocities", iter); | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>> | |
+ (dev_darcy_p, darcy.bc_bot, darcy.bc_top); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_setDarcyGhostNodes); | |
+ checkForCudaErrorsIter("Post setDarcyGhostNodes(" | |
+ "dev_darcy_p) after Jacobi loop", iter); | |
+ | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_darcy_p, | |
+ dev_darcy_phi, | |
+ dev_darcy_k, | |
+ darcy.mu, | |
+ dev_darcy_v); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findDarcyVelocities); | |
+ checkForCudaErrorsIter("Post findDarcyVelocities", iter); | |
+ } | |
} | |
} | |
//break; // end after first iteration |