tdarcy.cuh: remove synchronization calls for coalesced darcy calls - sphere - G… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 7da84647e15a6ce230d9c2f0fcbbe1111fea6bf8 | |
parent e6637c4d721e810b55d400e45a2ad74a0d7cf239 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Tue, 21 Feb 2023 15:01:35 +0100 | |
darcy.cuh: remove synchronization calls for coalesced darcy calls | |
Diffstat: | |
M src/darcy.cuh | 48 ++---------------------------… | |
1 file changed, 2 insertions(+), 46 deletions(-) | |
--- | |
diff --git a/src/darcy.cuh b/src/darcy.cuh | |
t@@ -164,10 +164,8 @@ __global__ void setDarcyNormZero( | |
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
// check that we are not outside the fluid grid | |
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
- __syncthreads(); | |
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) | |
dev_darcy_norm[d_idx(x,y,z)] = 0.0; | |
- } | |
} | |
// Set an array of scalars to 0.0 inside devC_grid | |
t@@ -180,10 +178,8 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarf… | |
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
// check that we are not outside the fluid grid | |
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
- __syncthreads(); | |
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) | |
dev_scalarfield[d_idx(x,y,z)] = 0.0; | |
- } | |
} | |
t@@ -455,21 +451,18 @@ __global__ void findDarcyPorositiesLinear( | |
* targetCell.z; | |
// Lowest particle index in cell | |
- __syncthreads(); | |
startIdx = dev_cellStart[cellID]; | |
// Make sure cell is not empty | |
if (startIdx != 0xffffffff) { | |
// Highest particle index in cell | |
- __syncthreads(); | |
endIdx = dev_cellEnd[cellID]; | |
// Iterate over cell particles | |
for (i=startIdx; i<endIdx; ++i) { | |
// Read particle position and radius | |
- __syncthreads(); | |
xr = dev_x_sorted[i]; | |
v = dev_vel_sorted[i]; | |
//x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z); | |
t@@ -582,7 +575,6 @@ __global__ void copyDarcyPorositiesToEdges( | |
const unsigned int readidx = d_idx(x, y_read, z); | |
const unsigned int writeidx = d_idx(x, y, z); | |
- __syncthreads(); | |
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx]; | |
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx]; | |
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx]; | |
t@@ -616,7 +608,6 @@ __global__ void copyDarcyPorositiesToBottom( | |
const unsigned int readidx = d_idx(x, y, 1); | |
const unsigned int writeidx = d_idx(x, y, z); | |
- __syncthreads(); | |
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx]; | |
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx]; | |
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx]; | |
t@@ -679,7 +670,6 @@ __global__ void findDarcyPorosities( | |
//unsigned int n = 0; | |
// Read old porosity | |
- __syncthreads(); | |
Float phi_0 = dev_darcy_phi[d_idx(x,y,z)]; | |
// The cell 3d index | |
t@@ -713,21 +703,18 @@ __global__ void findDarcyPorosities( | |
* targetCell.z; | |
// Lowest particle index in cell | |
- __syncthreads(); | |
startIdx = dev_cellStart[cellID]; | |
// Make sure cell is not empty | |
if (startIdx != 0xffffffff) { | |
// Highest particle index in cell | |
- __syncthreads(); | |
endIdx = dev_cellEnd[cellID]; | |
// Iterate over cell particles | |
for (i=startIdx; i<endIdx; ++i) { | |
// Read particle position and radius | |
- __syncthreads(); | |
xr = dev_x_sorted[i]; | |
v = dev_vel_sorted[i]; | |
r = xr.w; | |
t@@ -796,7 +783,6 @@ __global__ void findDarcyPorosities( | |
dphi = 0.5*(phi_new - phi_0); | |
// Save porosity and porosity change | |
- __syncthreads(); | |
//phi = 0.5; dphi = 0.0; // disable porosity effects | |
const unsigned int cellidx = d_idx(x,y,z); | |
dev_darcy_phi[cellidx] = phi*c_phi; | |
t@@ -823,10 +809,7 @@ __global__ void findDarcyPorosities( | |
(void)checkFiniteFloat("dphi", x, y, z, dphi); | |
#endif | |
} else { | |
- | |
- __syncthreads(); | |
const unsigned int cellidx = d_idx(x,y,z); | |
- | |
dev_darcy_phi[cellidx] = 0.999; | |
dev_darcy_dphi[cellidx] = 0.0; | |
} | |
t@@ -854,7 +837,6 @@ __global__ void findDarcyParticleVelocityDivergence( | |
if (x < nx && y < ny && z < nz) { | |
// read values | |
- __syncthreads(); | |
Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)]; | |
Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)]; | |
Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)]; | |
t@@ -873,7 +855,6 @@ __global__ void findDarcyParticleVelocityDivergence( | |
(v_p_yp - v_p_yn)/dy + | |
(v_p_zp - v_p_zn)/dz; | |
- __syncthreads(); | |
dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p; | |
#ifdef CHECK_FLUID_FINITE | |
t@@ -900,7 +881,6 @@ __global__ void findDarcyPressureGradient( | |
if (x < nx && y < ny && z < nz) { | |
// read values | |
- __syncthreads(); | |
Float p_xn = dev_darcy_p[d_idx(x-1,y,z)]; | |
Float p_xp = dev_darcy_p[d_idx(x+1,y,z)]; | |
Float p_yn = dev_darcy_p[d_idx(x,y-1,z)]; | |
t@@ -919,7 +899,6 @@ __global__ void findDarcyPressureGradient( | |
(p_yp - p_yn)/(dy + dy), | |
(p_zp - p_zn)/(dz + dz)); | |
- __syncthreads(); | |
dev_darcy_grad_p[d_idx(x,y,z)] = grad_p; | |
//if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0) | |
t@@ -957,7 +936,6 @@ __global__ void findDarcyPressureForceLinear( | |
if (i < devC_np) { | |
// read particle information | |
- __syncthreads(); | |
const Float4 x = dev_x[i]; | |
const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z); | |
t@@ -979,7 +957,6 @@ __global__ void findDarcyPressureForceLinear( | |
const Float dz = devC_grid.L[2]/nz; | |
// read fluid information | |
- __syncthreads(); | |
//const Float phi = dev_darcy_phi[cellidx]; | |
// Cell center position | |
t@@ -1002,7 +979,6 @@ __global__ void findDarcyPressureForceLinear( | |
iy_n = i_y + d_iy; | |
iz_n = i_z + d_iz; | |
- __syncthreads(); | |
grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)]; | |
// make sure edge and corner ghost nodes aren't read | |
t@@ -1092,7 +1068,6 @@ __global__ void findDarcyPressureForceLinear( | |
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p); | |
#endif | |
// save force | |
- __syncthreads(); | |
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
} | |
t@@ -1115,7 +1090,6 @@ __global__ void findDarcyPressureForce( | |
if (i < devC_np) { | |
// read particle information | |
- __syncthreads(); | |
const Float4 x = dev_x[i]; | |
// determine fluid cell containing the particle | |
t@@ -1133,7 +1107,6 @@ __global__ void findDarcyPressureForce( | |
const Float dz = devC_grid.L[2]/devC_grid.num[2]; | |
// read fluid information | |
- __syncthreads(); | |
//const Float phi = dev_darcy_phi[cellidx]; | |
const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)]; | |
const Float p = dev_darcy_p[cellidx]; | |
t@@ -1182,7 +1155,6 @@ __global__ void findDarcyPressureForce( | |
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p); | |
#endif | |
// save force | |
- __syncthreads(); | |
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
} | |
t@@ -1207,7 +1179,6 @@ __global__ void setDarcyTopPressure( | |
const unsigned int cellidx = idx(x,y,z); | |
// Write the new pressure the top boundary cells | |
- __syncthreads(); | |
dev_darcy_p[cellidx] = new_pressure; | |
} | |
} | |
t@@ -1231,7 +1202,6 @@ __global__ void setDarcyTopWallPressure( | |
const unsigned int cellidx = idx(x,y,z); | |
// Write the new pressure the top boundary cells | |
- __syncthreads(); | |
dev_darcy_p[cellidx] = new_pressure; | |
} | |
} | |
t@@ -1252,7 +1222,6 @@ __global__ void setDarcyTopWallFixedFlow( | |
z == wall0_iz+1) { | |
// Write the new pressure the top boundary cells | |
- __syncthreads(); | |
const Float new_pressure = dev_darcy_p[idx(x,y,z-2)]; | |
dev_darcy_p[idx(x,y,z)] = new_pressure; | |
} | |
t@@ -1280,7 +1249,6 @@ __global__ void findDarcyPermeabilities( | |
// 1D thread index | |
const unsigned int cellidx = d_idx(x,y,z); | |
- __syncthreads(); | |
Float phi = dev_darcy_phi[cellidx]; | |
// avoid division by zero | |
t@@ -1300,7 +1268,6 @@ __global__ void findDarcyPermeabilities( | |
//k = fmin(2.7e-9, k); | |
k = fmin(2.7e-10, k); | |
- __syncthreads(); | |
dev_darcy_k[cellidx] = k; | |
#ifdef CHECK_FLUID_FINITE | |
t@@ -1335,7 +1302,6 @@ __global__ void findDarcyPermeabilityGradients( | |
const unsigned int cellidx = d_idx(x,y,z); | |
// read values | |
- __syncthreads(); | |
const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)]; | |
const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)]; | |
const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)]; | |
t@@ -1350,7 +1316,6 @@ __global__ void findDarcyPermeabilityGradients( | |
(k_zp - k_zn)/(dz+dz)); | |
// write result | |
- __syncthreads(); | |
dev_darcy_grad_k[cellidx] = grad_k; | |
/*printf("%d,%d,%d findK:\n" | |
t@@ -1389,7 +1354,6 @@ __global__ void findDarcyPressureChange( | |
const unsigned int cellidx = d_idx(x,y,z); | |
// read values | |
- __syncthreads(); | |
const Float p_old = dev_darcy_p_old[cellidx]; | |
const Float p = dev_darcy_p[cellidx]; | |
t@@ -1401,7 +1365,6 @@ __global__ void findDarcyPressureChange( | |
dpdt = 0.0; | |
// write result | |
- __syncthreads(); | |
dev_darcy_dpdt[cellidx] = dpdt; | |
/*printf("%d,%d,%d\n" | |
t@@ -1461,7 +1424,6 @@ __global__ void firstDarcySolution( | |
const unsigned int cellidx = d_idx(x,y,z); | |
// read values | |
- __syncthreads(); | |
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)]; | |
const Float phi = dev_darcy_phi[cellidx]; | |
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)]; | |
t@@ -1585,7 +1547,6 @@ __global__ void firstDarcySolution( | |
#endif | |
// save explicit integrated pressure change | |
- __syncthreads(); | |
dev_darcy_dp_expl[cellidx] = dp_expl; | |
#ifdef CHECK_FLUID_FINITE | |
t@@ -1641,7 +1602,6 @@ __global__ void updateDarcySolution( | |
const unsigned int cellidx = d_idx(x,y,z); | |
// read values | |
- __syncthreads(); | |
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)]; | |
const Float phi = dev_darcy_phi[cellidx]; | |
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)]; | |
t@@ -1778,7 +1738,6 @@ __global__ void updateDarcySolution( | |
#endif | |
// save new pressure and the residual | |
- __syncthreads(); | |
dev_darcy_p_new[cellidx] = p_new; | |
dev_darcy_norm[cellidx] = res_norm; | |
t@@ -1817,7 +1776,6 @@ __global__ void findNewPressure( | |
const Float dp = dev_darcy_dp[cellidx]; | |
// save new pressure | |
- __syncthreads(); | |
dev_darcy_p[cellidx] += dp; | |
/*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n", | |
t@@ -1860,7 +1818,6 @@ __global__ void findDarcyVelocities( | |
const unsigned int cellidx = d_idx(x,y,z); | |
- __syncthreads(); | |
const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)]; | |
const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)]; | |
const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)]; | |
t@@ -1888,7 +1845,6 @@ __global__ void findDarcyVelocities( | |
const Float3 v = (-k/mu * grad_p)/phi; | |
// Save velocity | |
- __syncthreads(); | |
dev_darcy_v[cellidx] = v; | |
} | |
} |