Introduction
Introduction Statistics Contact Development Disclaimer Help
timplemented staggered grid in several functions - sphere - GPU-based 3D discre…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit bd373edea562b18de93fe231cc1583108a4d1b05
parent 0548db26d1afead334b2d7be927411f8e6dfd0dc
Author: Anders Damsgaard <[email protected]>
Date: Tue, 3 Jun 2014 10:51:51 +0200
implemented staggered grid in several functions
Diffstat:
M python/fluidshear.py | 4 ++--
M src/device.cu | 180 +++++++++++++++++------------…
M src/navierstokes.cuh | 431 ++++++++++++++++++++++++++---…
M src/sphere.h | 11 +++++++----
M tests/stokes_law.py | 2 +-
5 files changed, 469 insertions(+), 159 deletions(-)
---
diff --git a/python/fluidshear.py b/python/fluidshear.py
t@@ -9,8 +9,8 @@ sid = 'fluidshear'
sim = sphere.sim(sid + '-init', np = 24000, fluid = False)
#sim.cleanup()
sim.radius[:] = 0.05
-sim.initRandomGridPos(gridnum = [16, 16, 9000])
-sim.initTemporal(total = 5.0, file_dt = 0.05)
+sim.initRandomGridPos(gridnum = [20, 20, 9000])
+sim.initTemporal(total = 10.0, file_dt = 0.05)
sim.g[2] = -9.81
sim.run(dry=True)
sim.run()
diff --git a/src/device.cu b/src/device.cu
t@@ -557,7 +557,7 @@ __host__ void DEM::startTime()
unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid
- // Use 3D block and grid layout for fluid calculations
+ // Use 3D block and grid layout for cell-centered fluid calculations
dim3 dimBlockFluid(8, 8, 8); // 512 threads per block
dim3 dimGridFluid(
iDivUp(grid.num[0], dimBlockFluid.x),
t@@ -568,6 +568,17 @@ __host__ void DEM::startTime()
exit(1);
}
+ // Use 3D block and grid layout for cell-face fluid calculations
+ dim3 dimBlockFluidFace(8, 8, 8); // 512 threads per block
+ dim3 dimGridFluidFace(
+ iDivUp(grid.num[0]+1, dimBlockFluid.x),
+ iDivUp(grid.num[1]+1, dimBlockFluid.y),
+ iDivUp(grid.num[2]+1, dimBlockFluid.z));
+ if (dimGridFluid.z > 64 && navierstokes == 1) {
+ cerr << "Error: dimGridFluid.z > 64" << endl;
+ exit(1);
+ }
+
// Shared memory per block
unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1);
t@@ -867,29 +878,42 @@ __host__ void DEM::startTime()
}*/
if (np > 0) {
- // Determine the interaction force
- /*findInteractionForce<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_d_avg,
- dev_ns_vp_avg,
- dev_ns_v,
- dev_ns_fi);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post findInteractionForce", iter);
- // Apply interaction force to the particles
- applyParticleInteractionForce<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_fi,
- dev_ns_phi,
- dev_ns_p,
- dev_gridParticleIndex,
- dev_cellStart,
- dev_cellEnd,
- dev_x_sorted,
- dev_force);
+ if (iter == 0) {
+ // set cell center ghost nodes
+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v, ns.bc_bot, ns.bc_top);
+
+ // find cell face velocities
+ interpolateCenterToFace
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z);
+ cudaThreadSynchronize();
+ checkForCudaErrors("Post interpolateCenterToFace");
+ }
+
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ ns.bc_bot,
+ ns.bc_top);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post applyParticleInteractionForce",
- iter);*/
+ checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
+
+ findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post findFaceDivTau", iter);
// Per particle, find the fluid-particle interaction force f_pf
// and apply it to the particle
t@@ -898,9 +922,12 @@ __host__ void DEM::startTime()
dev_vel,
dev_ns_phi,
dev_ns_p,
- dev_ns_v,
- dev_ns_tau,
- iter,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z,
dev_ns_f_pf,
dev_force);
cudaThreadSynchronize();
t@@ -961,21 +988,19 @@ __host__ void DEM::startTime()
// Set the values of the ghost nodes in the grid
if (PROFILING == 1)
startTimer(&kernel_tic);
- /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_p,
- dev_ns_v,
- dev_ns_v_p,
- dev_ns_phi,
- dev_ns_dphi,
- dev_ns_epsilon);*/
+
setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
dev_ns_p, ns.bc_bot, ns.bc_top);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v, ns.bc_bot, ns.bc_top);
+ //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
+ //dev_ns_v, ns.bc_bot, ns.bc_top);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p, ns.bc_bot, ns.bc_top);
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z,
+ ns.bc_bot, ns.bc_top);
setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
dev_ns_phi, ns.bc_bot, ns.bc_top);
t@@ -993,26 +1018,15 @@ __host__ void DEM::startTime()
transferNSepsilonFromGlobalDeviceMemory();
printNSarray(stdout, ns.epsilon, "epsilon");*/
- // Find the fluid stress tensor, needed for predicting the flu…
- // velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v,
- dev_ns_tau);
+ // interpolate velocities to cell centers which makes velocity
+ // prediction easier
+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_v);
cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSstressTensor);
- checkForCudaErrorsIter("Post findNSstressTensor", iter);
-
- // Set stress tensor values in the ghost nodes
- setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_tau,
- ns.bc_bot,
- ns.bc_top);
- cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);
+ checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
// Find the divergence of phi*vi*v, needed for predicting the
// fluid velocities
t@@ -1028,53 +1042,44 @@ __host__ void DEM::startTime()
&t_findNSdivphiviv);
checkForCudaErrorsIter("Post findNSdivphiviv", iter);
- // Find the divergence of phi*tau, needed for predicting the
- // fluid velocities
- if (PROFILING == 1)
- startTimer(&kernel_tic);
- /*findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_phi,
- dev_ns_tau,
- dev_ns_div_phi_tau);*/
- findNSdivtau<<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_tau,
- dev_ns_div_tau);
- cudaThreadSynchronize();
- if (PROFILING == 1)
- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
- &t_findNSdivtau);
- //checkForCudaErrorsIter("Post findNSdivtau", iter);
- checkForCudaErrorsIter("Post findNSdivtau", iter);
-
// Predict the fluid velocities on the base of the old pressure
// field and ignoring the incompressibility constraint
if (PROFILING == 1)
startTimer(&kernel_tic);
- findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
+ findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>(
dev_ns_p,
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
dev_ns_v,
dev_ns_phi,
dev_ns_dphi,
- dev_ns_div_phi_vi_v,
- //dev_ns_div_phi_tau,
- dev_ns_div_tau,
+ dev_ns_div_tau_x,
+ dev_ns_div_tau_y,
+ dev_ns_div_tau_z,
ns.bc_bot,
ns.bc_top,
ns.beta,
dev_ns_fi,
ns.ndem,
- dev_ns_v_p);
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z);
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
&t_findPredNSvelocities);
checkForCudaErrorsIter("Post findPredNSvelocities", iter);
- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
- dev_ns_v_p);
+ setNSghostNodesFace<Float>
+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
+ dev_ns_v_p_x,
+ dev_ns_v_p_y,
+ dev_ns_v_p_z,
+ ns.bc_bot, ns.bc_top);
cudaThreadSynchronize();
- checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)…
- iter);
+ checkForCudaErrorsIter(
+ "Post setNSghostNodesFaceFloat3(dev_ns_v_p)", iter);
// In the first iteration of the sphere program, we'll need to
// manually estimate the values of epsilon. In the subsequent
t@@ -1507,6 +1512,17 @@ __host__ void DEM::startTime()
cudaThreadSynchronize();
checkForCudaErrorsIter("Beginning of file output section", iter);
+ // v_x, v_y, v_z -> v
+ if (navierstokes == 1) {
+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
+ dev_ns_v_x,
+ dev_ns_v_y,
+ dev_ns_v_z,
+ dev_ns_v);
+ cudaThreadSynchronize();
+ checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
+ }
+
//// Copy device data to host memory
transferFromGlobalDeviceMemory();
checkForCudaErrorsIter("After transferFromGlobalDeviceMemory()",
diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
t@@ -69,7 +69,7 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_v_x, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_y, memSizeFvel);// velocity in stag. grid
cudaMalloc((void**)&dev_ns_v_z, memSizeFvel);// velocity in stag. grid
- cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
+ //cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity
cudaMalloc((void**)&dev_ns_v_p_x, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_y, memSizeFvel); // pred. vel. in stag. grid
cudaMalloc((void**)&dev_ns_v_p_z, memSizeFvel); // pred. vel. in stag. grid
t@@ -78,7 +78,7 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_fi, memSizeF*3); // interaction force
cudaMalloc((void**)&dev_ns_phi, memSizeF); // cell porosity
cudaMalloc((void**)&dev_ns_dphi, memSizeF); // cell porosity change
- cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v)
+ //cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v)
cudaMalloc((void**)&dev_ns_epsilon, memSizeF); // pressure difference
cudaMalloc((void**)&dev_ns_epsilon_new, memSizeF); // new pressure diff.
cudaMalloc((void**)&dev_ns_epsilon_old, memSizeF); // old pressure diff.
t@@ -87,7 +87,10 @@ void DEM::initNSmemDev(void)
cudaMalloc((void**)&dev_ns_f1, memSizeF); // constant addition in forci…
cudaMalloc((void**)&dev_ns_f2, memSizeF*3); // constant slope in forcing
cudaMalloc((void**)&dev_ns_tau, memSizeF*6); // stress tensor (symmetrical)
- cudaMalloc((void**)&dev_ns_div_tau, memSizeF*6); // div(tau)
+ cudaMalloc((void**)&dev_ns_div_tau, memSizeF3); // div(tau), cell center
+ cudaMalloc((void**)&dev_ns_div_tau_x, memSizeFvel); // div(tau), cell face
+ cudaMalloc((void**)&dev_ns_div_tau_y, memSizeFvel); // div(tau), cell face
+ cudaMalloc((void**)&dev_ns_div_tau_z, memSizeFvel); // div(tau), cell face
cudaMalloc((void**)&dev_ns_div_phi_vi_v, memSizeF*3); // div(phi*vi*v)
//cudaMalloc((void**)&dev_ns_div_phi_tau, memSizeF*3); // div(phi*tau)
cudaMalloc((void**)&dev_ns_f_pf, sizeof(Float3)*np); // particle fluid for…
t@@ -103,7 +106,7 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_v_x);
cudaFree(dev_ns_v_y);
cudaFree(dev_ns_v_z);
- cudaFree(dev_ns_v_p);
+ //cudaFree(dev_ns_v_p);
cudaFree(dev_ns_v_p_x);
cudaFree(dev_ns_v_p_y);
cudaFree(dev_ns_v_p_z);
t@@ -112,7 +115,7 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_fi);
cudaFree(dev_ns_phi);
cudaFree(dev_ns_dphi);
- cudaFree(dev_ns_div_phi_v_v);
+ //cudaFree(dev_ns_div_phi_v_v);
cudaFree(dev_ns_epsilon);
cudaFree(dev_ns_epsilon_new);
cudaFree(dev_ns_epsilon_old);
t@@ -123,7 +126,10 @@ void DEM::freeNSmemDev()
cudaFree(dev_ns_tau);
cudaFree(dev_ns_div_phi_vi_v);
//cudaFree(dev_ns_div_phi_tau);
- cudaFree(dev_ns_div_tau);
+ //cudaFree(dev_ns_div_tau);
+ cudaFree(dev_ns_div_tau_x);
+ cudaFree(dev_ns_div_tau_y);
+ cudaFree(dev_ns_div_tau_z);
cudaFree(dev_ns_f_pf);
}
t@@ -144,7 +150,7 @@ void DEM::transferNStoGlobalDeviceMemory(int statusmsg)
cudaMemcpy(dev_ns_p, ns.p, memSizeF, cudaMemcpyHostToDevice);
checkForCudaErrors("transferNStoGlobalDeviceMemory after first cudaMemcpy"…
cudaMemcpy(dev_ns_v, ns.v, memSizeF*3, cudaMemcpyHostToDevice);
- cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice);
+ //cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice);
cudaMemcpy(dev_ns_phi, ns.phi, memSizeF, cudaMemcpyHostToDevice);
cudaMemcpy(dev_ns_dphi, ns.dphi, memSizeF, cudaMemcpyHostToDevice);
t@@ -165,7 +171,7 @@ void DEM::transferNSfromGlobalDeviceMemory(int statusmsg)
cudaMemcpy(ns.p, dev_ns_p, memSizeF, cudaMemcpyDeviceToHost);
checkForCudaErrors("In transferNSfromGlobalDeviceMemory, dev_ns_p", 0);
cudaMemcpy(ns.v, dev_ns_v, memSizeF*3, cudaMemcpyDeviceToHost);
- cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost);
+ //cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.phi, dev_ns_phi, memSizeF, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.dphi, dev_ns_dphi, memSizeF, cudaMemcpyDeviceToHost);
cudaMemcpy(ns.norm, dev_ns_norm, memSizeF, cudaMemcpyDeviceToHost);
t@@ -624,6 +630,63 @@ __global__ void setNSghostNodes(
}
}
+// Update a field in the ghost nodes from their parent cell values. The edge
+// (diagonal) cells are not written since they are not read.
+// Launch per face.
+template<typename T>
+__global__ void setNSghostNodesFace(
+ T* dev_scalarfield_x,
+ T* dev_scalarfield_y,
+ T* dev_scalarfield_z,
+ int bc_bot,
+ int bc_top)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // check that we are not outside the fluid grid
+ if (x <= nx && y <= ny && z <= nz) {
+
+ const T val_x = dev_scalarfield_x[vidx(x,y,z)];
+ const T val_y = dev_scalarfield_y[vidx(x,y,z)];
+ const T val_z = dev_scalarfield_z[vidx(x,y,z)];
+
+ // x
+ if (x == 0)
+ dev_scalarfield_x[vidx(nx+1,y,z)] = val_x;
+ if (x == nx)
+ dev_scalarfield_x[vidx(-1,y,z)] = val_x;
+
+ // y
+ if (y == 0)
+ dev_scalarfield_y[vidx(x,ny+1,z)] = val_y;
+ if (y == ny)
+ dev_scalarfield_y[vidx(x,-1,z)] = val_y;
+
+ // z
+ if (z == 0 && bc_bot == 0)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Dirichlet
+ if (z == 1 && bc_bot == 1)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Neumann
+ if (z == 0 && bc_bot == 2)
+ dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Periodic -z
+
+ if (z == nz && bc_top == 0)
+ dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Dirichlet
+ if (z == nz-1 && bc_top == 1)
+ dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Neumann
+ if (z == nz && bc_top == 2)
+ dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Periodic +z
+ }
+}
+
// Update the tensor field for the ghost nodes from their parent cell values.
// The edge (diagonal) cells are not written since they are not read. Launch
// this kernel for all cells in the grid.
t@@ -1688,7 +1751,7 @@ __global__ void findNSdivphiviv(
// Read porosity and velocity in the 6 neighbor cells
__syncthreads();
const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
- //const Float phi = dev_ns_phi[idx(x,y,z)];
+ const Float phi = dev_ns_phi[idx(x,y,z)];
const Float phi_xp = dev_ns_phi[idx(x+1,y,z)];
const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
const Float phi_yp = dev_ns_phi[idx(x,y+1,z)];
t@@ -1696,7 +1759,7 @@ __global__ void findNSdivphiviv(
const Float phi_zp = dev_ns_phi[idx(x,y,z+1)];
const Float3 v_xn = dev_ns_v[idx(x-1,y,z)];
- //const Float3 v = dev_ns_v[idx(x,y,z)];
+ const Float3 v = dev_ns_v[idx(x,y,z)];
const Float3 v_xp = dev_ns_v[idx(x+1,y,z)];
const Float3 v_yn = dev_ns_v[idx(x,y-1,z)];
const Float3 v_yp = dev_ns_v[idx(x,y+1,z)];
t@@ -1704,7 +1767,8 @@ __global__ void findNSdivphiviv(
const Float3 v_zp = dev_ns_v[idx(x,y,z+1)];
// Calculate upwind coefficients
- /*const Float3 a = MAKE_FLOAT3(
+ //*
+ const Float3 a = MAKE_FLOAT3(
copysign(1.0, v.x),
copysign(1.0, v.y),
copysign(1.0, v.z));
t@@ -1763,8 +1827,9 @@ __global__ void findNSdivphiviv(
// Determine the weighted average of both discretizations
const Float3 div_phi_vi_v = tau*div_uw + (1.0 - tau)*div_cd;
- */
+ //*/
+ /*
// Calculate the divergence: div(phi*v_i*v)
const Float3 div_phi_vi_v = MAKE_FLOAT3(
// x
t@@ -1779,6 +1844,7 @@ __global__ void findNSdivphiviv(
(phi_xp*v_xp.z*v_xp.x - phi_xn*v_xn.z*v_xn.x)/(2.0*dx) +
(phi_yp*v_yp.z*v_yp.y - phi_yn*v_yn.z*v_yn.y)/(2.0*dy) +
(phi_zp*v_zp.z*v_zp.z - phi_zn*v_zn.z*v_zn.z)/(2.0*dz));
+ // */
// Write divergence
__syncthreads();
t@@ -1929,6 +1995,7 @@ __global__ void findNSdivphitau(
}
// Find the divergence of phi v v
+// Unused
__global__ void findNSdivphivv(
Float* dev_ns_v_prod, // in
Float* dev_ns_phi, // in
t@@ -2017,20 +2084,27 @@ __global__ void findNSdivphivv(
// Find predicted fluid velocity
+// Launch per face.
__global__ void findPredNSvelocities(
Float* dev_ns_p, // in
+ Float* dev_ns_v_x, // in
+ Float* dev_ns_v_y, // in
+ Float* dev_ns_v_z, // in
Float3* dev_ns_v, // in
Float* dev_ns_phi, // in
Float* dev_ns_dphi, // in
+ Float* dev_ns_div_tau_x, // in
+ Float* dev_ns_div_tau_y, // in
+ Float* dev_ns_div_tau_z, // in
Float3* dev_ns_div_phi_vi_v, // in
- //Float3* dev_ns_div_phi_tau, // in
- Float3* dev_ns_div_tau, // in
int bc_bot, // in
int bc_top, // in
Float beta, // in
Float3* dev_ns_fi, // in
unsigned int ndem, // in
- Float3* dev_ns_v_p) // out
+ Float* dev_ns_v_p_x, // out
+ Float* dev_ns_v_p_y, // out
+ Float* dev_ns_v_p_z) // out
{
// 3D thread index
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
t@@ -2048,76 +2122,123 @@ __global__ void findPredNSvelocities(
const Float dz = devC_grid.L[2]/nz;
// 1D thread index
+ const unsigned int vidx = vidx(x,y,z);
const unsigned int cellidx = idx(x,y,z);
// Check that we are not outside the fluid grid
- if (x < nx && y < ny && z < nz) {
+ if (x <= nx && y <= ny && z <= nz) {
// Values that are needed for calculating the predicted velocity
__syncthreads();
- const Float3 v = dev_ns_v[cellidx];
- const Float phi = dev_ns_phi[cellidx];
- const Float dphi = dev_ns_dphi[cellidx];
- const Float3 div_phi_vi_v = dev_ns_div_phi_vi_v[cellidx];
- //const Float3 div_phi_tau = dev_ns_div_phi_tau[cellidx];
- const Float3 div_tau = dev_ns_div_tau[cellidx];
+ const Float3 v_face = MAKE_FLOAT3(
+ dev_ns_v_x[vidx],
+ dev_ns_v_y[vidx],
+ dev_ns_v_z[vidx]);
+ const Float3 div_tau_face = MAKE_FLOAT3(
+ dev_ns_div_tau_x[vidx],
+ dev_ns_div_tau_y[vidx],
+ dev_ns_div_tau_z[vidx]);
+
+ // cell center values
+ const Float phi_xn = dev_ns_phi[idx(x-1,y,z)];
+ const Float phi_c = dev_ns_phi[cellidx];
+ const Float phi_yn = dev_ns_phi[idx(x,y-1,z)];
+ const Float phi_zn = dev_ns_phi[idx(x,y,z-1)];
+
+ const Float dphi_xn = dev_ns_dphi[idx(x-1,y,z)];
+ const Float dphi_c = dev_ns_dphi[cellidx];
+ const Float dphi_yn = dev_ns_dphi[idx(x,y-1,z)];
+ const Float dphi_zn = dev_ns_dphi[idx(x,y,z-1)];
+
+ const Float3 v_xn = dev_ns_v[idx(x-1,y,z)];
+ const Float3 v_c = dev_ns_v[cellidx];
+ const Float3 v_yn = dev_ns_v[idx(x,y-1,z)];
+ const Float3 v_zn = dev_ns_v[idx(x,y,z-1)];
+
+ const Float3 div_phi_vi_v_xn = dev_ns_div_phi_vi_v[idx(x-1,y,z)];
+ const Float3 div_phi_vi_v_c = dev_ns_div_phi_vi_v[cellidx];
+ const Float3 div_phi_vi_v_yn = dev_ns_div_phi_vi_v[idx(x,y-1,z)];
+ const Float3 div_phi_vi_v_zn = dev_ns_div_phi_vi_v[idx(x,y,z-1)];
+
+ // component-wise average values
+ const Float3 phi = MAKE_FLOAT3(
+ (phi_c + phi_xn)/2.0,
+ (phi_c + phi_yn)/2.0,
+ (phi_c + phi_zn)/2.0);
+ const Float3 dphi = MAKE_FLOAT3(
+ (dphi_c + dphi_xn)/2.0,
+ (dphi_c + dphi_yn)/2.0,
+ (dphi_c + dphi_zn)/2.0);
// The particle-fluid interaction force should only be incoorporated if
// there is a fluid viscosity
- Float3 f_i;
- if (devC_params.mu > 0.0)
- f_i = dev_ns_fi[cellidx];
- else
- f_i = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn;
+ if (devC_params.mu > 0.0) {
+ f_i_c = dev_ns_fi[cellidx];
+ f_i_xn = dev_ns_fi[idx(x-1,y,z)];
+ f_i_yn = dev_ns_fi[idx(x,y-1,z)];
+ f_i_zn = dev_ns_fi[idx(x,y,z-1)];
+ } else {
+ f_i_c = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_xn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_yn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ f_i_zn = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ }
+ const Float3 f_i = MAKE_FLOAT3(
+ (f_i_c.x + f_i_xn.x)/2.0,
+ (f_i_c.y + f_i_yn.y)/2.0,
+ (f_i_c.z + f_i_zn.z)/2.0);
const Float dt = ndem*devC_dt;
const Float rho = devC_params.rho_f;
- // Find pressure gradient
- Float3 grad_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
-
// The pressure gradient is not needed in Chorin's projection method
// (ns.beta=0), so only has to be looked up in pressure-dependant
// projection methods
Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0);
if (beta > 0.0) {
- grad_p = gradient(dev_ns_p, x, y, z, dx, dy, dz);
+ __syncthreads();
+ const Float p = dev_ns_p[cellidx];
+ const Float p_xn = dev_ns_p[idx(x-1,y,z)];
+ const Float p_yn = dev_ns_p[idx(x,y-1,z)];
+ const Float p_zn = dev_ns_p[idx(x,y,z-1)];
+ const Float3 grad_p = MAKE_FLOAT3(
+ (p - p_xn)/dx,
+ (p - p_yn)/dy,
+ (p - p_zn)/dz);
#ifdef SET_1
- pressure_term = -beta*dt/(rho*phi)*grad_p;
+ pressure_term = -beta*dt/(rho*phi)*grad_p_x;
#endif
#ifdef SET_2
pressure_term = -beta*dt/rho*grad_p;
#endif
}
- // Calculate the predicted velocity
- /*
- Float3 v_p = v
- + pressure_term
- + 1.0/devC_params.rho_f*div_phi_tau*ndem*devC_dt/phi // diffusion
- + MAKE_FLOAT3(devC_params.g[0], devC_params.g[1], devC_params.g[2])
- *ndem*devC_dt // gravity term
- - ndem*devC_dt/(devC_params.rho_f*phi)*f_i
- //- ndem*devC_dt/(devC_params.rho_f*phi*dx*dy*dz)*f_i
- - v*dphi/phi
- - div_phi_vi_v*ndem*devC_dt/phi // advection term
- ;*/
+ // calculate the advection term using a first-order upwind scheme
+ // div_phi_vf_vf
+
+ const float3 div_phi_vf_vx = make_float3(
+ div_phi_vx_vf,
+ div_phi_vy_vf,
+ div_phi_vz_vf)k
+
+ // Determine the predicted velocity
#ifdef SET_1
- const Float3 interaction_term = -dt/(rho*phi)*f_i;
- const Float3 diffusion_term = dt/(rho*phi)*div_tau;
- const Float3 gravity_term = MAKE_FLOAT3(
+ const Float3 interaction_term = -dt/(rho*phi)*f_i;
+ const Float3 diffusion_term = dt/(rho*phi)*div_tau;
+ const Float3 gravity_term = MAKE_FLOAT3(
devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
- const Float3 porosity_term = -1.0*v*dphi/phi;
- const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
+ const Float3 porosity_term = -1.0*v*dphi/phi;
+ const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
#endif
#ifdef SET_2
- const Float3 interaction_term = -dt/(rho*phi)*f_i;
- const Float3 diffusion_term = dt/rho*div_tau;
- const Float3 gravity_term = MAKE_FLOAT3(
- devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
- const Float3 porosity_term = -1.0*v*dphi/phi;
- const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
+ const Float3 interaction_term = -dt/(rho*phi)*f_i;
+ const Float3 diffusion_term = dt/rho*div_tau;
+ const Float3 gravity_term = MAKE_FLOAT3(
+ devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
+ const Float3 porosity_term = -1.0*v*dphi/phi;
+ const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi;
#endif
Float3 v_p = v
t@@ -2155,8 +2276,9 @@ __global__ void findPredNSvelocities(
// Save the predicted velocity
__syncthreads();
- //v_p = MAKE_FLOAT3(0.0, 0.0, 0.0);
- dev_ns_v_p[cellidx] = v_p;
+ dev_ns_v_p_x[vidx] = v_p.x;
+ dev_ns_v_p_y[vidx] = v_p.y;
+ dev_ns_v_p_z[vidx] = v_p.z;
#ifdef CHECK_NS_FINITE
(void)checkFiniteFloat3("v_p", x, y, z, v_p);
t@@ -2176,7 +2298,7 @@ __global__ void findNSforcing(
Float* dev_ns_f,
Float* dev_ns_phi,
Float* dev_ns_dphi,
- Float3* dev_ns_v_p,
+ //Float3* dev_ns_v_p,
unsigned int nijac,
unsigned int ndem)
{
t@@ -2786,9 +2908,12 @@ __global__ void findInteractionForce(
Float4* dev_vel, // in
Float* dev_ns_phi, // in
Float* dev_ns_p, // in
- Float3* dev_ns_v, // in
- Float* dev_ns_tau, // in
- const unsigned int iter, // in
+ Float* dev_ns_v_x, // in
+ Float* dev_ns_v_y, // in
+ Float* dev_ns_v_z, // in
+ Float* dev_ns_div_tau_x,// in
+ Float* dev_ns_div_tau_y,// in
+ Float* dev_ns_div_tau_z,// in
Float3* dev_ns_f_pf, // out
Float4* dev_force) // out
{
t@@ -2819,8 +2944,30 @@ __global__ void findInteractionForce(
// Fluid information
__syncthreads();
- const Float phi = dev_ns_phi[cellidx];
- const Float3 v_f = dev_ns_v[cellidx];
+ const Float phi = dev_ns_phi[cellidx];
+ const Float v_x = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_x_p = dev_ns_v_x[vidx(x+1,y,z)];
+ const Float v_y = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_y_p = dev_ns_v_x[vidx(x,y+1,z)];
+ const Float v_z = dev_ns_v_x[vidx(x,y,z)];
+ const Float v_z_p = dev_ns_v_x[vidx(x,y,z+1)];
+
+ const Float div_tau_x = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_x_p = dev_ns_div_tau_x[vidx(x+1,y,z)];
+ const Float div_tau_y = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_y_p = dev_ns_div_tau_x[vidx(x,y+1,z)];
+ const Float div_tau_z = dev_ns_div_tau_x[vidx(x,y,z)];
+ const Float div_tau_z_p = dev_ns_div_tau_x[vidx(x,y,z+1)];
+
+ const Float3 v_f = MAKE_FLOAT3(
+ (v_x + v_x_p)/2.0,
+ (v_y + v_y_p)/2.0,
+ (v_z + v_z_p)/2.0);
+
+ const Float3 div_tau = MAKE_FLOAT3(
+ (div_tau_x + div_tau_x_p)/2.0,
+ (div_tau_y + div_tau_y_p)/2.0,
+ (div_tau_z + div_tau_z_p)/2.0);
const Float3 v_rel = v_f - v_p;
const Float v_rel_length = length(v_rel);
t@@ -2844,13 +2991,7 @@ __global__ void findInteractionForce(
-1.0*gradient(dev_ns_p, i_x, i_y, i_z, dx, dy, dz)*V_p;
// Viscous force
- Float3 f_v;
- if (iter == 0)
- f_v = MAKE_FLOAT3(0.0, 0.0, 0.0);
- else
- f_v =
- -1.0*divergence_tensor(dev_ns_tau, i_x, i_y, i_z, dx, dy, dz)
- *V_p;
+ const Float3 f_v = -1.0*div_tau*V_p;
// Interaction force on particle (force) and fluid (f_pf)
__syncthreads();
t@@ -3020,6 +3161,156 @@ __global__ void applyInteractionForceToFluid(
}
}*/
+// Launch per cell face node.
+// Cell center ghost nodes must be set prior to call.
+__global__ void interpolateCenterToFace(
+ Float3* dev_in,
+ Float* dev_out_x,
+ Float* dev_out_y,
+ Float* dev_out_z)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Check that we are not outside the fluid grid
+ if (x <= devC_grid.num[0]
+ && y <= devC_grid.num[1]
+ && z <= devC_grid.num[2]) {
+
+ const unsigned int faceidx = vidx(x,y,z);
+
+ __syncthreads();
+ const Float3 xn = dev_in[idx(x-1,y,z)];
+ const Float3 yn = dev_in[idx(x,y-1,z)];
+ const Float3 zn = dev_in[idx(x,y,z-1)];
+ const Float3 center = dev_in[idx(x,y,z)];
+
+ const Float3 x_val = (center.x - xn.x)/2.0;
+ const Float3 y_val = (center.y - yn.y)/2.0;
+ const Float3 z_val = (center.z - zn.z)/2.0;
+
+ __syncthreads();
+ dev_out_x[faceidx] = x_val;
+ dev_out_y[faceidx] = y_val;
+ dev_out_z[faceidx] = z_val;
+ }
+}
+
+// Launch per cell center node
+__global__ void interpolateFaceToCenter(
+ Float* dev_in_x,
+ Float* dev_in_y,
+ Float* dev_in_z,
+ Float3* dev_out)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Check that we are not outside the fluid grid
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
+
+ const unsigned int cellidx = idx(x,y,z);
+
+ __syncthreads();
+ const Float x_n = dev_in_x[idx(x,y,z)];
+ const Float x_p = dev_in_x[idx(x+1,y,z)];
+ const Float y_n = dev_in_x[idx(x,y,z)];
+ const Float y_p = dev_in_x[idx(x,y+1,z)];
+ const Float z_n = dev_in_x[idx(x,y,z)];
+ const Float z_p = dev_in_x[idx(x,y,z+1)];
+
+ const val = MAKE_FLOAT3(
+ (x_n + x_p)/2.0,
+ (y_n + y_p)/2.0,
+ (z_n + z_p)/2.0);
+
+ __syncthreads();
+ dev_out[cellidx] = val;
+ }
+}
+
+// Launch per cell face node. Set velocity ghost nodes beforehand.
+// Find div(tau) at all cell faces.
+__global__ findFaceDivTau(
+ Float* dev_ns_v_x,
+ Float* dev_ns_v_y,
+ Float* dev_ns_v_z,
+ Float* dev_ns_div_tau_x,
+ Float* dev_ns_div_tau_y,
+ Float* dev_ns_div_tau_z)
+{
+ // 3D thread index
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
+
+ // Grid dimensions
+ const unsigned int nx = devC_grid.num[0];
+ const unsigned int ny = devC_grid.num[1];
+ const unsigned int nz = devC_grid.num[2];
+
+ // Cell sizes
+ const Float dx = devC_grid.L[0]/nx;
+ const Float dy = devC_grid.L[1]/ny;
+ const Float dz = devC_grid.L[2]/nz;
+
+ // Check that we are not outside the fluid grid
+ if (x <= nx && y <= ny && z <= nz) {
+
+ const unsigned int faceidx = vidx(x,y,z);
+
+ __syncthreads();
+ const Float v_x_xn = dev_ns_v_x[vidx(x-1,y,z)];
+ const Float v_x = dev_ns_v_x[faceidx];
+ const Float v_x_xp = dev_ns_v_x[vidx(x+1,y,z)];
+ const Float v_x_yn = dev_ns_v_x[vidx(x,y-1,z)];
+ const Float v_x_yp = dev_ns_v_x[vidx(x,y+1,z)];
+ const Float v_x_zn = dev_ns_v_x[vidx(x,y,z-1)];
+ const Float v_x_zp = dev_ns_v_x[vidx(x,y,z+1)];
+
+ const Float x_y_xn = dev_nx_v_y[vidx(x-1,y,z)];
+ const Float x_y_xp = dev_nx_v_y[vidx(x+1,y,z)];
+ const Float v_y_yn = dev_ns_v_y[vidx(x,y-1,z)];
+ const Float v_y = dev_ns_v_y[faceidx];
+ const Float v_y_yp = dev_ns_v_y[vidx(x,y+1,z)];
+ const Float x_y_zn = dev_nx_v_y[vidx(x,y,z-1)];
+ const Float x_y_zp = dev_nx_v_y[vidx(x,y,z+1)];
+
+ const Float v_z_xn = dev_ns_v_z[vidx(x-1,y,z)];
+ const Float v_z_xp = dev_ns_v_z[vidx(x+1,y,z)];
+ const Float v_z_yn = dev_ns_v_z[vidx(x,y-1,z)];
+ const Float v_z_yp = dev_ns_v_z[vidx(x,y+1,z)];
+ const Float v_z_zn = dev_ns_v_z[vidx(x,y,z-1)];
+ const Float v_z = dev_ns_v_z[faceidx];
+ const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)];
+
+ const Float div_tau_x =
+ devC_params.mu*(
+ (v_x_xp - 2.0*v_x + v_x_xn)/(dx*dx) +
+ (v_x_yp - 2.0*v_x + v_x_yn)/(dy*dy) +
+ (v_x_zp - 2.0*v_x + v_x_zn)/(dz*dz));
+ const Float div_tau_y =
+ devC_params.mu*(
+ (v_y_xp - 2.0*v_y + v_y_xn)/(dx*dx) +
+ (v_y_yp - 2.0*v_y + v_y_yn)/(dy*dy) +
+ (v_y_zp - 2.0*v_y + v_y_zn)/(dz*dz));
+ const Float div_tau_z =
+ devC_params.mu*(
+ (v_z_xp - 2.0*v_z + v_z_xn)/(dx*dx) +
+ (v_z_yp - 2.0*v_z + v_z_yn)/(dy*dy) +
+ (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz));
+
+ __syncthreads();
+ dev_ns_div_tau_x[faceidx] = div_tau_x;
+ dev_ns_div_tau_y[faceidx] = div_tau_y;
+ dev_ns_div_tau_z[faceidx] = div_tau_z;
+ }
+}
+
// Print final heads and free memory
void DEM::endNSdev()
{
diff --git a/src/sphere.h b/src/sphere.h
t@@ -175,7 +175,7 @@ class DEM {
Float* dev_ns_v_x; // Cell fluid velocity in staggered grid
Float* dev_ns_v_y; // Cell fluid velocity in staggered grid
Float* dev_ns_v_z; // Cell fluid velocity in staggered grid
- Float3* dev_ns_v_p; // Predicted cell fluid velocity
+ //Float3* dev_ns_v_p; // Predicted cell fluid velocity
Float* dev_ns_v_p_x; // Predicted cell fluid velocity in st. g…
Float* dev_ns_v_p_y; // Predicted cell fluid velocity in st. g…
Float* dev_ns_v_p_z; // Predicted cell fluid velocity in st. g…
t@@ -184,7 +184,7 @@ class DEM {
Float3* dev_ns_fi; // Particle-fluid interaction force
Float* dev_ns_phi; // Cell porosity
Float* dev_ns_dphi; // Cell porosity change
- Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction
+ //Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction
Float* dev_ns_epsilon; // Pressure difference
Float* dev_ns_epsilon_new; // Pressure diff. after Jacobi iteration
Float* dev_ns_epsilon_old; // Pressure diff. before Jacobi iteration
t@@ -193,10 +193,13 @@ class DEM {
Float* dev_ns_f1; // Constant terms in forcing function
Float3* dev_ns_f2; // Constant slopes in forcing function
Float* dev_ns_v_prod; // Outer product of fluid velocities
- Float* dev_ns_tau; // Fluid stress tensor
- Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v)
+ //Float* dev_ns_tau; // Fluid stress tensor
+ //Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v)
//Float3* dev_ns_div_phi_tau; // div(phi*tau)
Float3* dev_ns_div_tau; // div(tau)
+ Float* dev_ns_div_tau_x; // div(tau) on x-face
+ Float* dev_ns_div_tau_y; // div(tau) on y-face
+ Float* dev_ns_div_tau_z; // div(tau) on z-face
Float3* dev_ns_f_pf; // Particle-fluid interaction force
//// Navier Stokes functions
diff --git a/tests/stokes_law.py b/tests/stokes_law.py
t@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
print("### Stokes test - single sphere sedimentation ###")
## Stokes drag
-orig = sphere.sim(sid = "stokes_law_gradP", fluid = True)
+orig = sphere.sim(sid = "stokes_law_set2", fluid = True)
cleanup(orig)
orig.defaultParams()
orig.addParticle([0.5,0.5,1.46], 0.05)
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.