| tdarcy.cuh: remove synchronization calls for coalesced darcy calls - sphere - G… | |
| git clone git://src.adamsgaard.dk/sphere | |
| Log | |
| Files | |
| Refs | |
| LICENSE | |
| --- | |
| commit 7da84647e15a6ce230d9c2f0fcbbe1111fea6bf8 | |
| parent e6637c4d721e810b55d400e45a2ad74a0d7cf239 | |
| Author: Anders Damsgaard <[email protected]> | |
| Date: Tue, 21 Feb 2023 15:01:35 +0100 | |
| darcy.cuh: remove synchronization calls for coalesced darcy calls | |
| Diffstat: | |
| M src/darcy.cuh | 48 ++---------------------------… | |
| 1 file changed, 2 insertions(+), 46 deletions(-) | |
| --- | |
| diff --git a/src/darcy.cuh b/src/darcy.cuh | |
| t@@ -164,10 +164,8 @@ __global__ void setDarcyNormZero( | |
| const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| // check that we are not outside the fluid grid | |
| - if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
| - __syncthreads(); | |
| + if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) | |
| dev_darcy_norm[d_idx(x,y,z)] = 0.0; | |
| - } | |
| } | |
| // Set an array of scalars to 0.0 inside devC_grid | |
| t@@ -180,10 +178,8 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarf… | |
| const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| // check that we are not outside the fluid grid | |
| - if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
| - __syncthreads(); | |
| + if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) | |
| dev_scalarfield[d_idx(x,y,z)] = 0.0; | |
| - } | |
| } | |
| t@@ -455,21 +451,18 @@ __global__ void findDarcyPorositiesLinear( | |
| * targetCell.z; | |
| // Lowest particle index in cell | |
| - __syncthreads(); | |
| startIdx = dev_cellStart[cellID]; | |
| // Make sure cell is not empty | |
| if (startIdx != 0xffffffff) { | |
| // Highest particle index in cell | |
| - __syncthreads(); | |
| endIdx = dev_cellEnd[cellID]; | |
| // Iterate over cell particles | |
| for (i=startIdx; i<endIdx; ++i) { | |
| // Read particle position and radius | |
| - __syncthreads(); | |
| xr = dev_x_sorted[i]; | |
| v = dev_vel_sorted[i]; | |
| //x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z); | |
| t@@ -582,7 +575,6 @@ __global__ void copyDarcyPorositiesToEdges( | |
| const unsigned int readidx = d_idx(x, y_read, z); | |
| const unsigned int writeidx = d_idx(x, y, z); | |
| - __syncthreads(); | |
| dev_darcy_phi[writeidx] = dev_darcy_phi[readidx]; | |
| dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx]; | |
| dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx]; | |
| t@@ -616,7 +608,6 @@ __global__ void copyDarcyPorositiesToBottom( | |
| const unsigned int readidx = d_idx(x, y, 1); | |
| const unsigned int writeidx = d_idx(x, y, z); | |
| - __syncthreads(); | |
| dev_darcy_phi[writeidx] = dev_darcy_phi[readidx]; | |
| dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx]; | |
| dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx]; | |
| t@@ -679,7 +670,6 @@ __global__ void findDarcyPorosities( | |
| //unsigned int n = 0; | |
| // Read old porosity | |
| - __syncthreads(); | |
| Float phi_0 = dev_darcy_phi[d_idx(x,y,z)]; | |
| // The cell 3d index | |
| t@@ -713,21 +703,18 @@ __global__ void findDarcyPorosities( | |
| * targetCell.z; | |
| // Lowest particle index in cell | |
| - __syncthreads(); | |
| startIdx = dev_cellStart[cellID]; | |
| // Make sure cell is not empty | |
| if (startIdx != 0xffffffff) { | |
| // Highest particle index in cell | |
| - __syncthreads(); | |
| endIdx = dev_cellEnd[cellID]; | |
| // Iterate over cell particles | |
| for (i=startIdx; i<endIdx; ++i) { | |
| // Read particle position and radius | |
| - __syncthreads(); | |
| xr = dev_x_sorted[i]; | |
| v = dev_vel_sorted[i]; | |
| r = xr.w; | |
| t@@ -796,7 +783,6 @@ __global__ void findDarcyPorosities( | |
| dphi = 0.5*(phi_new - phi_0); | |
| // Save porosity and porosity change | |
| - __syncthreads(); | |
| //phi = 0.5; dphi = 0.0; // disable porosity effects | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| dev_darcy_phi[cellidx] = phi*c_phi; | |
| t@@ -823,10 +809,7 @@ __global__ void findDarcyPorosities( | |
| (void)checkFiniteFloat("dphi", x, y, z, dphi); | |
| #endif | |
| } else { | |
| - | |
| - __syncthreads(); | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| - | |
| dev_darcy_phi[cellidx] = 0.999; | |
| dev_darcy_dphi[cellidx] = 0.0; | |
| } | |
| t@@ -854,7 +837,6 @@ __global__ void findDarcyParticleVelocityDivergence( | |
| if (x < nx && y < ny && z < nz) { | |
| // read values | |
| - __syncthreads(); | |
| Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)]; | |
| Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)]; | |
| Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)]; | |
| t@@ -873,7 +855,6 @@ __global__ void findDarcyParticleVelocityDivergence( | |
| (v_p_yp - v_p_yn)/dy + | |
| (v_p_zp - v_p_zn)/dz; | |
| - __syncthreads(); | |
| dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p; | |
| #ifdef CHECK_FLUID_FINITE | |
| t@@ -900,7 +881,6 @@ __global__ void findDarcyPressureGradient( | |
| if (x < nx && y < ny && z < nz) { | |
| // read values | |
| - __syncthreads(); | |
| Float p_xn = dev_darcy_p[d_idx(x-1,y,z)]; | |
| Float p_xp = dev_darcy_p[d_idx(x+1,y,z)]; | |
| Float p_yn = dev_darcy_p[d_idx(x,y-1,z)]; | |
| t@@ -919,7 +899,6 @@ __global__ void findDarcyPressureGradient( | |
| (p_yp - p_yn)/(dy + dy), | |
| (p_zp - p_zn)/(dz + dz)); | |
| - __syncthreads(); | |
| dev_darcy_grad_p[d_idx(x,y,z)] = grad_p; | |
| //if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0) | |
| t@@ -957,7 +936,6 @@ __global__ void findDarcyPressureForceLinear( | |
| if (i < devC_np) { | |
| // read particle information | |
| - __syncthreads(); | |
| const Float4 x = dev_x[i]; | |
| const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z); | |
| t@@ -979,7 +957,6 @@ __global__ void findDarcyPressureForceLinear( | |
| const Float dz = devC_grid.L[2]/nz; | |
| // read fluid information | |
| - __syncthreads(); | |
| //const Float phi = dev_darcy_phi[cellidx]; | |
| // Cell center position | |
| t@@ -1002,7 +979,6 @@ __global__ void findDarcyPressureForceLinear( | |
| iy_n = i_y + d_iy; | |
| iz_n = i_z + d_iz; | |
| - __syncthreads(); | |
| grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)]; | |
| // make sure edge and corner ghost nodes aren't read | |
| t@@ -1092,7 +1068,6 @@ __global__ void findDarcyPressureForceLinear( | |
| checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p); | |
| #endif | |
| // save force | |
| - __syncthreads(); | |
| dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
| dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
| } | |
| t@@ -1115,7 +1090,6 @@ __global__ void findDarcyPressureForce( | |
| if (i < devC_np) { | |
| // read particle information | |
| - __syncthreads(); | |
| const Float4 x = dev_x[i]; | |
| // determine fluid cell containing the particle | |
| t@@ -1133,7 +1107,6 @@ __global__ void findDarcyPressureForce( | |
| const Float dz = devC_grid.L[2]/devC_grid.num[2]; | |
| // read fluid information | |
| - __syncthreads(); | |
| //const Float phi = dev_darcy_phi[cellidx]; | |
| const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)]; | |
| const Float p = dev_darcy_p[cellidx]; | |
| t@@ -1182,7 +1155,6 @@ __global__ void findDarcyPressureForce( | |
| checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p); | |
| #endif | |
| // save force | |
| - __syncthreads(); | |
| dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
| dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0); | |
| } | |
| t@@ -1207,7 +1179,6 @@ __global__ void setDarcyTopPressure( | |
| const unsigned int cellidx = idx(x,y,z); | |
| // Write the new pressure the top boundary cells | |
| - __syncthreads(); | |
| dev_darcy_p[cellidx] = new_pressure; | |
| } | |
| } | |
| t@@ -1231,7 +1202,6 @@ __global__ void setDarcyTopWallPressure( | |
| const unsigned int cellidx = idx(x,y,z); | |
| // Write the new pressure the top boundary cells | |
| - __syncthreads(); | |
| dev_darcy_p[cellidx] = new_pressure; | |
| } | |
| } | |
| t@@ -1252,7 +1222,6 @@ __global__ void setDarcyTopWallFixedFlow( | |
| z == wall0_iz+1) { | |
| // Write the new pressure the top boundary cells | |
| - __syncthreads(); | |
| const Float new_pressure = dev_darcy_p[idx(x,y,z-2)]; | |
| dev_darcy_p[idx(x,y,z)] = new_pressure; | |
| } | |
| t@@ -1280,7 +1249,6 @@ __global__ void findDarcyPermeabilities( | |
| // 1D thread index | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| - __syncthreads(); | |
| Float phi = dev_darcy_phi[cellidx]; | |
| // avoid division by zero | |
| t@@ -1300,7 +1268,6 @@ __global__ void findDarcyPermeabilities( | |
| //k = fmin(2.7e-9, k); | |
| k = fmin(2.7e-10, k); | |
| - __syncthreads(); | |
| dev_darcy_k[cellidx] = k; | |
| #ifdef CHECK_FLUID_FINITE | |
| t@@ -1335,7 +1302,6 @@ __global__ void findDarcyPermeabilityGradients( | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| // read values | |
| - __syncthreads(); | |
| const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)]; | |
| const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)]; | |
| const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)]; | |
| t@@ -1350,7 +1316,6 @@ __global__ void findDarcyPermeabilityGradients( | |
| (k_zp - k_zn)/(dz+dz)); | |
| // write result | |
| - __syncthreads(); | |
| dev_darcy_grad_k[cellidx] = grad_k; | |
| /*printf("%d,%d,%d findK:\n" | |
| t@@ -1389,7 +1354,6 @@ __global__ void findDarcyPressureChange( | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| // read values | |
| - __syncthreads(); | |
| const Float p_old = dev_darcy_p_old[cellidx]; | |
| const Float p = dev_darcy_p[cellidx]; | |
| t@@ -1401,7 +1365,6 @@ __global__ void findDarcyPressureChange( | |
| dpdt = 0.0; | |
| // write result | |
| - __syncthreads(); | |
| dev_darcy_dpdt[cellidx] = dpdt; | |
| /*printf("%d,%d,%d\n" | |
| t@@ -1461,7 +1424,6 @@ __global__ void firstDarcySolution( | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| // read values | |
| - __syncthreads(); | |
| const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)]; | |
| const Float phi = dev_darcy_phi[cellidx]; | |
| const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)]; | |
| t@@ -1585,7 +1547,6 @@ __global__ void firstDarcySolution( | |
| #endif | |
| // save explicit integrated pressure change | |
| - __syncthreads(); | |
| dev_darcy_dp_expl[cellidx] = dp_expl; | |
| #ifdef CHECK_FLUID_FINITE | |
| t@@ -1641,7 +1602,6 @@ __global__ void updateDarcySolution( | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| // read values | |
| - __syncthreads(); | |
| const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)]; | |
| const Float phi = dev_darcy_phi[cellidx]; | |
| const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)]; | |
| t@@ -1778,7 +1738,6 @@ __global__ void updateDarcySolution( | |
| #endif | |
| // save new pressure and the residual | |
| - __syncthreads(); | |
| dev_darcy_p_new[cellidx] = p_new; | |
| dev_darcy_norm[cellidx] = res_norm; | |
| t@@ -1817,7 +1776,6 @@ __global__ void findNewPressure( | |
| const Float dp = dev_darcy_dp[cellidx]; | |
| // save new pressure | |
| - __syncthreads(); | |
| dev_darcy_p[cellidx] += dp; | |
| /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n", | |
| t@@ -1860,7 +1818,6 @@ __global__ void findDarcyVelocities( | |
| const unsigned int cellidx = d_idx(x,y,z); | |
| - __syncthreads(); | |
| const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)]; | |
| const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)]; | |
| const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)]; | |
| t@@ -1888,7 +1845,6 @@ __global__ void findDarcyVelocities( | |
| const Float3 v = (-k/mu * grad_p)/phi; | |
| // Save velocity | |
| - __syncthreads(); | |
| dev_darcy_v[cellidx] = v; | |
| } | |
| } |