Introduction
Introduction Statistics Contact Development Disclaimer Help
tdarcy.cuh: remove synchronization calls for coalesced darcy calls - sphere - G…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit 7da84647e15a6ce230d9c2f0fcbbe1111fea6bf8
parent e6637c4d721e810b55d400e45a2ad74a0d7cf239
Author: Anders Damsgaard <[email protected]>
Date: Tue, 21 Feb 2023 15:01:35 +0100
darcy.cuh: remove synchronization calls for coalesced darcy calls
Diffstat:
M src/darcy.cuh | 48 ++---------------------------…
1 file changed, 2 insertions(+), 46 deletions(-)
---
diff --git a/src/darcy.cuh b/src/darcy.cuh
t@@ -164,10 +164,8 @@ __global__ void setDarcyNormZero(
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// check that we are not outside the fluid grid
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
- __syncthreads();
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
dev_darcy_norm[d_idx(x,y,z)] = 0.0;
- }
}
// Set an array of scalars to 0.0 inside devC_grid
t@@ -180,10 +178,8 @@ __global__ void setDarcyZeros(T* __restrict__ dev_scalarf…
const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// check that we are not outside the fluid grid
- if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
- __syncthreads();
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2])
dev_scalarfield[d_idx(x,y,z)] = 0.0;
- }
}
t@@ -455,21 +451,18 @@ __global__ void findDarcyPorositiesLinear(
* targetCell.z;
// Lowest particle index in cell
- __syncthreads();
startIdx = dev_cellStart[cellID];
// Make sure cell is not empty
if (startIdx != 0xffffffff) {
// Highest particle index in cell
- __syncthreads();
endIdx = dev_cellEnd[cellID];
// Iterate over cell particles
for (i=startIdx; i<endIdx; ++i) {
// Read particle position and radius
- __syncthreads();
xr = dev_x_sorted[i];
v = dev_vel_sorted[i];
//x3 = MAKE_FLOAT3(xr.x, xr.y, xr.z);
t@@ -582,7 +575,6 @@ __global__ void copyDarcyPorositiesToEdges(
const unsigned int readidx = d_idx(x, y_read, z);
const unsigned int writeidx = d_idx(x, y, z);
- __syncthreads();
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
t@@ -616,7 +608,6 @@ __global__ void copyDarcyPorositiesToBottom(
const unsigned int readidx = d_idx(x, y, 1);
const unsigned int writeidx = d_idx(x, y, z);
- __syncthreads();
dev_darcy_phi[writeidx] = dev_darcy_phi[readidx];
dev_darcy_dphi[writeidx] = dev_darcy_dphi[readidx];
dev_darcy_div_v_p[writeidx] = dev_darcy_div_v_p[readidx];
t@@ -679,7 +670,6 @@ __global__ void findDarcyPorosities(
//unsigned int n = 0;
// Read old porosity
- __syncthreads();
Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
// The cell 3d index
t@@ -713,21 +703,18 @@ __global__ void findDarcyPorosities(
* targetCell.z;
// Lowest particle index in cell
- __syncthreads();
startIdx = dev_cellStart[cellID];
// Make sure cell is not empty
if (startIdx != 0xffffffff) {
// Highest particle index in cell
- __syncthreads();
endIdx = dev_cellEnd[cellID];
// Iterate over cell particles
for (i=startIdx; i<endIdx; ++i) {
// Read particle position and radius
- __syncthreads();
xr = dev_x_sorted[i];
v = dev_vel_sorted[i];
r = xr.w;
t@@ -796,7 +783,6 @@ __global__ void findDarcyPorosities(
dphi = 0.5*(phi_new - phi_0);
// Save porosity and porosity change
- __syncthreads();
//phi = 0.5; dphi = 0.0; // disable porosity effects
const unsigned int cellidx = d_idx(x,y,z);
dev_darcy_phi[cellidx] = phi*c_phi;
t@@ -823,10 +809,7 @@ __global__ void findDarcyPorosities(
(void)checkFiniteFloat("dphi", x, y, z, dphi);
#endif
} else {
-
- __syncthreads();
const unsigned int cellidx = d_idx(x,y,z);
-
dev_darcy_phi[cellidx] = 0.999;
dev_darcy_dphi[cellidx] = 0.0;
}
t@@ -854,7 +837,6 @@ __global__ void findDarcyParticleVelocityDivergence(
if (x < nx && y < ny && z < nz) {
// read values
- __syncthreads();
Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
t@@ -873,7 +855,6 @@ __global__ void findDarcyParticleVelocityDivergence(
(v_p_yp - v_p_yn)/dy +
(v_p_zp - v_p_zn)/dz;
- __syncthreads();
dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
#ifdef CHECK_FLUID_FINITE
t@@ -900,7 +881,6 @@ __global__ void findDarcyPressureGradient(
if (x < nx && y < ny && z < nz) {
// read values
- __syncthreads();
Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
t@@ -919,7 +899,6 @@ __global__ void findDarcyPressureGradient(
(p_yp - p_yn)/(dy + dy),
(p_zp - p_zn)/(dz + dz));
- __syncthreads();
dev_darcy_grad_p[d_idx(x,y,z)] = grad_p;
//if (grad_p.x != 0.0 || grad_p.y != 0.0 || grad_p.z != 0.0)
t@@ -957,7 +936,6 @@ __global__ void findDarcyPressureForceLinear(
if (i < devC_np) {
// read particle information
- __syncthreads();
const Float4 x = dev_x[i];
const Float3 x3 = MAKE_FLOAT3(x.x, x.y, x.z);
t@@ -979,7 +957,6 @@ __global__ void findDarcyPressureForceLinear(
const Float dz = devC_grid.L[2]/nz;
// read fluid information
- __syncthreads();
//const Float phi = dev_darcy_phi[cellidx];
// Cell center position
t@@ -1002,7 +979,6 @@ __global__ void findDarcyPressureForceLinear(
iy_n = i_y + d_iy;
iz_n = i_z + d_iz;
- __syncthreads();
grad_p_iter = dev_darcy_grad_p[d_idx(ix_n, iy_n, iz_n)];
// make sure edge and corner ghost nodes aren't read
t@@ -1092,7 +1068,6 @@ __global__ void findDarcyPressureForceLinear(
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
#endif
// save force
- __syncthreads();
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
}
t@@ -1115,7 +1090,6 @@ __global__ void findDarcyPressureForce(
if (i < devC_np) {
// read particle information
- __syncthreads();
const Float4 x = dev_x[i];
// determine fluid cell containing the particle
t@@ -1133,7 +1107,6 @@ __global__ void findDarcyPressureForce(
const Float dz = devC_grid.L[2]/devC_grid.num[2];
// read fluid information
- __syncthreads();
//const Float phi = dev_darcy_phi[cellidx];
const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
const Float p = dev_darcy_p[cellidx];
t@@ -1182,7 +1155,6 @@ __global__ void findDarcyPressureForce(
checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
#endif
// save force
- __syncthreads();
dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
}
t@@ -1207,7 +1179,6 @@ __global__ void setDarcyTopPressure(
const unsigned int cellidx = idx(x,y,z);
// Write the new pressure the top boundary cells
- __syncthreads();
dev_darcy_p[cellidx] = new_pressure;
}
}
t@@ -1231,7 +1202,6 @@ __global__ void setDarcyTopWallPressure(
const unsigned int cellidx = idx(x,y,z);
// Write the new pressure the top boundary cells
- __syncthreads();
dev_darcy_p[cellidx] = new_pressure;
}
}
t@@ -1252,7 +1222,6 @@ __global__ void setDarcyTopWallFixedFlow(
z == wall0_iz+1) {
// Write the new pressure the top boundary cells
- __syncthreads();
const Float new_pressure = dev_darcy_p[idx(x,y,z-2)];
dev_darcy_p[idx(x,y,z)] = new_pressure;
}
t@@ -1280,7 +1249,6 @@ __global__ void findDarcyPermeabilities(
// 1D thread index
const unsigned int cellidx = d_idx(x,y,z);
- __syncthreads();
Float phi = dev_darcy_phi[cellidx];
// avoid division by zero
t@@ -1300,7 +1268,6 @@ __global__ void findDarcyPermeabilities(
//k = fmin(2.7e-9, k);
k = fmin(2.7e-10, k);
- __syncthreads();
dev_darcy_k[cellidx] = k;
#ifdef CHECK_FLUID_FINITE
t@@ -1335,7 +1302,6 @@ __global__ void findDarcyPermeabilityGradients(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
t@@ -1350,7 +1316,6 @@ __global__ void findDarcyPermeabilityGradients(
(k_zp - k_zn)/(dz+dz));
// write result
- __syncthreads();
dev_darcy_grad_k[cellidx] = grad_k;
/*printf("%d,%d,%d findK:\n"
t@@ -1389,7 +1354,6 @@ __global__ void findDarcyPressureChange(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float p_old = dev_darcy_p_old[cellidx];
const Float p = dev_darcy_p[cellidx];
t@@ -1401,7 +1365,6 @@ __global__ void findDarcyPressureChange(
dpdt = 0.0;
// write result
- __syncthreads();
dev_darcy_dpdt[cellidx] = dpdt;
/*printf("%d,%d,%d\n"
t@@ -1461,7 +1424,6 @@ __global__ void firstDarcySolution(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
const Float phi = dev_darcy_phi[cellidx];
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
t@@ -1585,7 +1547,6 @@ __global__ void firstDarcySolution(
#endif
// save explicit integrated pressure change
- __syncthreads();
dev_darcy_dp_expl[cellidx] = dp_expl;
#ifdef CHECK_FLUID_FINITE
t@@ -1641,7 +1602,6 @@ __global__ void updateDarcySolution(
const unsigned int cellidx = d_idx(x,y,z);
// read values
- __syncthreads();
const Float phi_xn = dev_darcy_phi[d_idx(x-1,y,z)];
const Float phi = dev_darcy_phi[cellidx];
const Float phi_xp = dev_darcy_phi[d_idx(x+1,y,z)];
t@@ -1778,7 +1738,6 @@ __global__ void updateDarcySolution(
#endif
// save new pressure and the residual
- __syncthreads();
dev_darcy_p_new[cellidx] = p_new;
dev_darcy_norm[cellidx] = res_norm;
t@@ -1817,7 +1776,6 @@ __global__ void findNewPressure(
const Float dp = dev_darcy_dp[cellidx];
// save new pressure
- __syncthreads();
dev_darcy_p[cellidx] += dp;
/*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
t@@ -1860,7 +1818,6 @@ __global__ void findDarcyVelocities(
const unsigned int cellidx = d_idx(x,y,z);
- __syncthreads();
const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
t@@ -1888,7 +1845,6 @@ __global__ void findDarcyVelocities(
const Float3 v = (-k/mu * grad_p)/phi;
// Save velocity
- __syncthreads();
dev_darcy_v[cellidx] = v;
}
}
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.