Introduction
Introduction Statistics Contact Development Disclaimer Help
tWorking on implementing darcy flow - sphere - GPU-based 3D discrete element me…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit f043266094c4d99fe6ccc12c2650d0c03b082ec4
parent fba2d7f021655b31ce60447fa54e3a4afc9ab5d5
Author: Anders Damsgaard <[email protected]>
Date: Mon, 3 Jun 2013 10:57:27 +0200
Working on implementing darcy flow
Diffstat:
M src/CMakeLists.txt | 15 ++++++++++-----
M src/darcy.cpp | 294 +++++++++++++++++++++++++++++…
M src/latticeboltzmann.cuh | 627 +++++++++++++++++++++++------…
M src/porousflow.cpp | 2 +-
M src/sphere.cpp | 1 +
M src/sphere.h | 44 +++++++++++++++++++++++++++++…
6 files changed, 801 insertions(+), 182 deletions(-)
---
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
t@@ -12,13 +12,18 @@ INCLUDE(FindCUDA)
# Additional NVCC command line arguments
# NOTE: Multiple arguments must be semi-colon selimited
-SET(CUDA_NVCC_FLAGS "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20…
+SET(CUDA_NVCC_FLAGS
+ "--use_fast_math;-O3;-gencode=arch=compute_20,code=\"sm_20,compute_20\" -c…
# Rule to build executable program
-CUDA_ADD_EXECUTABLE(../sphere main.cpp file_io.cpp sphere.cpp device.cu utilit…
-CUDA_ADD_EXECUTABLE(../porosity porosity.cpp file_io.cpp sphere.cpp device.cu …
-CUDA_ADD_EXECUTABLE(../forcechains forcechains.cpp file_io.cpp sphere.cpp devi…
-CUDA_ADD_EXECUTABLE(../porousflow porousflow.cpp darcy.cpp file_io.cpp sphere.…
+CUDA_ADD_EXECUTABLE(../sphere
+ main.cpp file_io.cpp sphere.cpp device.cu utility.cu)
+CUDA_ADD_EXECUTABLE(../porosity
+ porosity.cpp file_io.cpp sphere.cpp device.cu utility.cu)
+CUDA_ADD_EXECUTABLE(../forcechains
+ forcechains.cpp file_io.cpp sphere.cpp device.cu utility.cu)
+CUDA_ADD_EXECUTABLE(../porousflow
+ porousflow.cpp darcy.cpp file_io.cpp sphere.cpp device.cu utility.cu)
#ADD_EXECUTABLE(unittests boost-unit-tests.cpp sphere.cpp)
#TARGET_LINK_LIBRARIES(unittests
diff --git a/src/darcy.cpp b/src/darcy.cpp
t@@ -1,39 +1,299 @@
#include <iostream>
-#include <string>
#include <cstdio>
#include <cstdlib>
-#include <cmath>
-#include <vector>
-#include <algorithm>
+#include <string>
#include "typedefs.h"
#include "datatypes.h"
#include "constants.h"
#include "sphere.h"
-// Find hydraulic conductivities for each cell
+//#include "eigen-nvcc/Eigen/Core"
+
+// Initialize memory
+void DEM::initDarcyMem()
+{
+ unsigned int ncells = d_nx*d_ny*d_nz;
+ d_P = new Float[ncells]; // hydraulic pressure matrix
+ d_dP = new Float3[ncells]; // Cell spatial gradient in hydraulic pressures
+ d_K = new Float[ncells]; // hydraulic conductivity matrix
+ d_S = new Float[ncells]; // hydraulic storativity matrix
+ d_W = new Float[ncells]; // hydraulic recharge
+}
+
+// Free memory
+void DEM::freeDarcyMem()
+{
+ free(d_P);
+ free(d_dP);
+ free(d_K);
+ free(d_S);
+ free(d_W);
+}
+
+// 3D index to 1D index
+unsigned int DEM::idx(
+ const unsigned int x,
+ const unsigned int y,
+ const unsigned int z)
+{
+ return x + d_nx*y + d_nx*d_ny*z;
+}
+
+// Set initial values
+void DEM::initDarcyVals()
+{
+ unsigned int ix, iy, iz;
+ for (ix=0; ix<d_nx; ++ix) {
+ for (iy=0; iy<d_ny; ++iy) {
+ for (iz=0; iz<d_nz; ++iz) {
+ d_P[idx(ix,iy,iz)] = 1.0;
+ d_K[idx(ix,iy,iz)] = 1.5;
+ d_S[idx(ix,iy,iz)] = 7.5e-3;
+ d_W[idx(ix,iy,iz)] = 0.0;
+ }
+ }
+ }
+}
+
+Float DEM::minVal3dArr(Float* arr)
+{
+ Float minval = 1e16; // a large val
+ Float val;
+ unsigned int ix, iy, iz;
+ for (ix=0; ix<d_nx; ++ix) {
+ for (iy=0; iy<d_ny; ++iy) {
+ for (iz=0; iz<d_nz; ++iz) {
+ val = arr[idx(ix,iy,iz)];
+ if (minval > val)
+ minval = val;
+ }
+ }
+ }
+}
-// Solve Darcy flow through particles
+// Find the spatial gradient in pressures per cell
+void DEM::findDarcyGradients()
+{
+
+ std::cout << "dx,dy,dz: "
+ << d_dx << ","
+ << d_dy << ","
+ << d_dz << std::endl;
+ const Float dx2 = d_dx*d_dx;
+ const Float dy2 = d_dy*d_dy;
+ const Float dz2 = d_dz*d_dz;
+ std::cout << "dx2,dy2,dz2: "
+ << dx2 << ","
+ << dy2 << ","
+ << dz2 << std::endl;
+ Float localP2;
+
+ unsigned int ix, iy, iz;
+ for (ix=1; ix<d_nx-1; ++ix) {
+ for (iy=1; iy<d_ny-1; ++iy) {
+ for (iz=1; iz<d_nz-1; ++iz) {
+
+ localP2 = 2.0*d_P[idx(ix,iy,iz)];
+
+ d_dP[idx(ix,iy,iz)].x
+ = (d_P[idx(ix+1,iy,iz)] - localP2
+ + d_P[idx(ix-1,iy,iz)])/dx2;
+
+ d_dP[idx(ix,iy,iz)].y
+ = (d_P[idx(ix,iy+1,iz)] - localP2
+ + d_P[idx(ix,iy-1,iz)])/dx2;
+
+ d_dP[idx(ix,iy,iz)].z
+ = (d_P[idx(ix,iy,iz+1)] - localP2
+ + d_P[idx(ix,iy,iz-1)])/dz2;
+ }
+ }
+ }
+}
+
+// Set the gradient to 0.0 in all dimensions at the boundaries
+void DEM::setDarcyBCNeumannZero()
+{
+ Float3 z3 = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ unsigned int ix, iy, iz;
+ unsigned int nx = d_nx-1;
+ unsigned int ny = d_ny-1;
+ unsigned int nz = d_nz-1;
+
+ // I don't care that the values at four edges are written twice
+
+ // x-y plane at z=0 and z=d_dz-1
+ for (ix=0; ix<d_nx; ++ix) {
+ for (iy=0; iy<d_ny; ++iy) {
+ d_dP[idx(ix,iy, 0)] = z3;
+ d_dP[idx(ix,iy,nz)] = z3;
+ }
+ }
+
+ // x-z plane at y=0 and y=d_dy-1
+ for (ix=0; ix<d_nx; ++ix) {
+ for (iz=0; iz<d_nz; ++iz) {
+ d_dP[idx(ix, 0,iz)] = z3;
+ d_dP[idx(ix,ny,iz)] = z3;
+ }
+ }
+
+ // y-z plane at x=0 and x=d_dx-1
+ for (iy=0; iy<d_ny; ++iy) {
+ for (iz=0; iz<d_nz; ++iz) {
+ d_dP[idx( 0,iy,iz)] = z3;
+ d_dP[idx(nx,iy,iz)] = z3;
+ }
+ }
+}
+
+
+void DEM::explDarcyStep(const Float dt)
+{
+ // Find spatial gradients in all cells
+ //findDarcyGradients();
+ //printDarcyArray3(stdout, d_dP, "d_dP, after findDarcyGradients");
+
+ // Set boundary conditions
+ //setDarcyBCNeumannZero();
+ //printDarcyArray3(stdout, d_dP, "d_dP, after setDarcyBCNeumannZero");
+
+ // Cell dims squared
+ const Float dx2 = d_dx*d_dx;
+ const Float dy2 = d_dy*d_dy;
+ const Float dz2 = d_dz*d_dz;
+ std::cout << "dx2,dy2,dz2: "
+ << dx2 << ","
+ << dy2 << ","
+ << dz2 << std::endl;
+
+ // Explicit 3D finite difference scheme
+ // new = old + gradient*timestep
+ unsigned int ix, iy, iz, cellidx;
+ Float K, P;
+ for (ix=1; ix<d_nx-1; ++ix) {
+ for (iy=1; iy<d_ny-1; ++iy) {
+ for (iz=1; iz<d_nz-1; ++iz) {
+
+ cellidx = idx(ix,iy,iz);
+
+ /*d_P[cellidx]
+ -= (d_W[cellidx]*dt
+ + d_K[cellidx]*d_dP[cellidx].x/d_dx
+ + d_K[cellidx]*d_dP[cellidx].y/d_dy
+ + d_K[cellidx]*d_dP[cellidx].z/d_dz) / d_S[cellidx];*/
+
+ K = d_K[cellidx]; // cell hydraulic conductivity
+ P = d_P[cellidx]; // cell hydraulic pressure
+
+ d_P[cellidx]
+ += d_W[cellidx]*dt // cell recharge
+ + K*dt * // diffusivity term
+ (
+ (d_P[idx(ix+1,iy,iz)] - 2.0*P + d_P[idx(ix-1,iy,iz)])/dx2…
+ (d_P[idx(ix,iy+1,iz)] - 2.0*P + d_P[idx(ix,iy-1,iz)])/dy2…
+ (d_P[idx(ix,iy,iz+1)] - 2.0*P + d_P[idx(ix,iy,iz-1)])/dz2
+ );
+
+
+ }
+ }
+ }
+}
+
+// Print array values to file stream (stdout, stderr, other file)
+void DEM::printDarcyArray(FILE* stream, Float* arr)
+{
+ unsigned int x, y, z;
+ for (z=0; z<d_nz; z++) {
+ for (y=0; y<d_ny; y++) {
+ for (x=0; x<d_nx; x++) {
+ fprintf(stream, "%f\t", arr[idx(x,y,z)]);
+ }
+ fprintf(stream, "\n");
+ }
+ fprintf(stream, "\n");
+ }
+}
+
+// Overload printDarcyArray to add optional description
+void DEM::printDarcyArray(FILE* stream, Float* arr, std::string desc)
+{
+ std::cout << "\n" << desc << ":\n";
+ printDarcyArray(stream, arr);
+}
+
+// Print array values to file stream (stdout, stderr, other file)
+void DEM::printDarcyArray3(FILE* stream, Float3* arr)
+{
+ unsigned int x, y, z;
+ for (z=0; z<d_nz; z++) {
+ for (y=0; y<d_ny; y++) {
+ for (x=0; x<d_nx; x++) {
+ fprintf(stream, "%f,%f,%f\t",
+ arr[idx(x,y,z)].x,
+ arr[idx(x,y,z)].y,
+ arr[idx(x,y,z)].z);
+ }
+ fprintf(stream, "\n");
+ }
+ fprintf(stream, "\n");
+ }
+}
+
+// Overload printDarcyArray to add optional description
+void DEM::printDarcyArray3(FILE* stream, Float3* arr, std::string desc)
+{
+ std::cout << "\n" << desc << ":\n";
+ printDarcyArray3(stream, arr);
+}
+
+
+// Find hydraulic conductivities for each cell by finding the particle contents
+//
+
+// Solve Darcy flow on a regular, cubic grid
void DEM::startDarcy(
const Float cellsizemultiplier)
{
// Number of cells
- int nx = grid.L[0]/grid.num[0];
- int ny = grid.L[1]/grid.num[1];
- int nz = grid.L[2]/grid.num[2];
+ d_nx = floor(grid.num[0]*cellsizemultiplier);
+ d_ny = floor(grid.num[1]*cellsizemultiplier);
+ d_nz = floor(grid.num[2]*cellsizemultiplier);
// Cell size
- Float dx = grid.L[0]/nx;
- Float dy = grid.L[1]/nx;
- Float dz = grid.L[2]/nx;
+ Float d_dx = grid.L[0]/d_nx;
+ Float d_dy = grid.L[1]/d_ny;
+ Float d_dz = grid.L[2]/d_nz;
if (verbose == 1) {
- std::cout << "Fluid grid dimensions: "
- << nx << " * "
- << ny << " * "
- << nz << std::endl;
+ std::cout << " - Fluid grid dimensions: "
+ << d_nx << " * "
+ << d_ny << " * "
+ << d_nz << std::endl;
+ std::cout << " - Fluid grid cell size: "
+ << d_dx << " * "
+ << d_dy << " * "
+ << d_dz << std::endl;
}
+ initDarcyMem();
+ initDarcyVals();
-}
+ // Temporal loop
+ //while(time.current <= time.total) {
+ explDarcyStep(time.dt);
+ time.current += time.dt;
+ //}
+
+
+ printDarcyArray(stdout, d_P, "d_P");
+ //printDarcyArray3(stdout, d_dP, "d_dP");
+ //printDarcyArray(stdout, d_K, "d_K");
+ //printDarcyArray(stdout, d_S, "d_S");
+ //printDarcyArray(stdout, d_W, "d_W");
+
+ freeDarcyMem();
+}
diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh
t@@ -1,6 +1,10 @@
#ifndef LATTICEBOLTZMANN_CUH_
#define LATTICEBOLTZMANN_CUH_
+// Enable line below to perform lattice Boltzmann computations on the
+// GPU, disable for CPU computation
+//#define LBM_GPU
+
// latticeboltzmann.cuh
// Functions for solving the Navier-Stokes equations using the Lattice-Boltzma…
// method with D3Q19 stencils
t@@ -8,84 +12,166 @@
// Calculate linear cell index from position (x,y,z)
// and fluid position vector (i).
// From A. Monitzer 2013
-__device__ unsigned int grid2index(
+#ifdef LBM_GPU
+__device__
+#endif
+unsigned int grid2index(
unsigned int x, unsigned int y, unsigned int z,
- unsigned int i)
+ unsigned int i,
+ unsigned int nx, unsigned int ny, unsigned int nz)
{
- return x + ((y + z*devC_grid.num[1])*devC_grid.num[0])
- + (devC_grid.num[0]*devC_grid.num[1]*devC_grid.num[2]*i);
+ return x + ((y + z*ny)*nx) + (nx*ny*nz*i);
}
+
// Equilibrium distribution
-__device__ Float feq(Float3 v, Float rho, Float3 e, Float omega)
+#ifdef LBM_GPU
+__device__
+#endif
+Float feq(Float3 v, Float rho, Float3 e, Float w, Float dt, Float dx)
{
- return omega*rho * (1.0 - 3.0/2.0 * dot(v,v) + 3.0*dot(e,v) +
- 9.0/2.0*dot(e,v)*dot(e,v));
+ // Monitzer 2010
+ //return w*rho * (1.0 - 3.0/2.0 * dot(v,v) + 3.0*dot(e,v) +
+ //9.0/2.0*dot(e,v)*dot(e,v));
+
+ // Rinaldi 2012
+ //return w*rho * (1.0 + 3.0*dot(e,v) + 9.0/2.0*dot(e,v)*dot(e,v)
+ //- 3.0/2.0*dot(v,v));
+
+ // Hecht 2010
+ //Float c2_s = 1.0/sqrt(3); // D3Q19 lattice speed of sound
+ //c2_s *= c2_s;
+ //return w*rho * (1.0 + dot(e,v)/c2_s
+ //+ (dot(e,v)*dot(e,v))/(2.0*c2_s*c2_s)
+ //- dot(v,v)*dot(v,v)/(2.0*c2_s));
+
+ // Chirila 2010
+ //Float c2 = 1.0*grid.num[0]/devC_dt;
+ //Float c2 = 1.0/sqrt(3.0);
+ //c2 *= c2; // Propagation speed on the lattice
+ //return w*rho * (1.0 + 3.0*dot(e,v)/c2
+ //+ 9.0/2.0 * dot(e,v)*dot(e,v)/(c2*c2)
+ //- 3.0/2.0 * dot(v,v)/c2);
+
+ // Habich 2011
+ Float c2 = dx/dt * dx/dt;
+ return rho*w
+ * (1.0 + 3.0/c2*dot(e,v)
+ + 9.0/(2.0*c2*c2) * dot(e,v)*dot(e,v)
+ - 3.0/(2.0*c2) * dot(v,v));
}
// Collision operator
// Bhatnagar-Gross-Krook approximation (BGK), Thurey (2003).
-__device__ Float bgk(
+#ifdef LBM_GPU
+__device__
+#endif
+Float bgk(
+ Float dt,
+ Float dx,
Float f,
Float tau,
Float3 v,
Float rho,
Float3 e,
- Float omega,
+ Float w,
Float3 extF)
{
- return devC_dt / tau * (f - feq(v, rho, e, omega))
- - (1.0 - 1.0/(2.0*tau)) * 3.0/omega * dot(extF, e);
+ //Float feqval = feq(v, rho, e, w);
+ //printf("feq(v, rho=%f, e, w=%f) = %f\n", rho, w, feqval);
+
+ // Monitzer 2008
+ //return dt / tau * (f - feq(v, rho, e, w))
+ //- (1.0 - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
+ //return dt / tau * (f - feq(v, rho, e, w))
+ // + (2.0*tau - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
+ //return dt/tau * (f - feq(v, rho, e, w))
+ //+ (2.0*tau - 1.0/(2.0*tau)) * 3.0/w * dot(extF, e);
+
+ // Monitzer 2010
+ //return dt/tau*(f - feq(v, rho, e, w))
+ //+ (2.0*tau - 1.0)/(2.0*tau) * 3.0/w * dot(extF, e);
+
+ // Rinaldi 2012
+ //return 1.0/tau * (feq(v, rho, e, w) - f);
+
+ // Habich 2011
+ return (f - feq(v, rho, e, w, dt, dx))/tau
+ + (2.0*tau - 1.0)/(2.0*tau) * 3.0/w * dot(extF, e);
}
// Initialize the fluid distributions on the base of the densities provided
-__global__ void initfluid(
+#ifdef LBM_GPU
+__global__ void initFluid(
Float4* dev_v_rho,
Float* dev_f)
+#else
+void initFluid(
+ Float4* dev_v_rho,
+ Float* dev_f,
+ unsigned int nx,
+ unsigned int ny,
+ unsigned int nz)
+#endif
+
{
+#ifdef LBM_GPU
// 3D thread index
- const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
- const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// Grid dimensions
const unsigned int nx = devC_grid.num[0];
const unsigned int ny = devC_grid.num[1];
const unsigned int nz = devC_grid.num[2];
+#else
+ for (unsigned int z = 0; z<nz; z++) {
+ for (unsigned int y = 0; y<ny; y++) {
+ for (unsigned int x = 0; x<nx; x++) {
+#endif
// Check that we are not outside the fluid grid
if (x < nx && y < ny && z < nz) {
// 1D thread index
- const unsigned long int tidx = x + nx*y + nx*ny*z;
+ const unsigned int tidx = x + nx*y + nx*ny*z;
// Read velocity and density, zero velocity
+#ifdef LBM_GPU
+ __syncthreads();
+#endif
Float4 v_rho = dev_v_rho[tidx];
v_rho = MAKE_FLOAT4(0.0, 0.0, 0.0, v_rho.w);
- // Set values to equilibrium distribution (f_i = omega_i * rho_0)
+ // Set values to equilibrium distribution (f_i = w_i * rho_0)
+#ifdef LBM_GPU
__syncthreads();
+#endif
dev_v_rho[tidx] = v_rho;
- dev_f[grid2index(x,y,z,0)] = 1.0/3.0 * v_rho.w;
- dev_f[grid2index(x,y,z,1)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,2)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,3)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,4)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,5)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,6)] = 1.0/18.0 * v_rho.w;
- dev_f[grid2index(x,y,z,7)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,8)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,9)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,10)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,11)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,12)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,13)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,14)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,15)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,16)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,17)] = 1.0/36.0 * v_rho.w;
- dev_f[grid2index(x,y,z,18)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,0,nx,ny,nz)] = 1.0/3.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,1,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,2,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,3,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,4,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,5,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,6,nx,ny,nz)] = 1.0/18.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,7,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,8,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,9,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,10,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,11,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,12,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,13,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,14,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,15,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,16,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,17,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
+ dev_f[grid2index(x,y,z,18,nx,ny,nz)] = 1.0/36.0 * v_rho.w;
}
+#ifndef LBM_GPU
+ }}}
+#endif
}
// Swap two arrays pointers
t@@ -98,6 +184,7 @@ void swapFloatArrays(Float* arr1, Float* arr2)
// Combined streaming and collision step with particle coupling and optional
// periodic boundaries. Derived from A. Monitzer 2013
+#ifdef LBM_GPU
__global__ void latticeBoltzmannD3Q19(
Float* dev_f,
Float* dev_f_new,
t@@ -108,46 +195,68 @@ __global__ void latticeBoltzmannD3Q19(
Float4* dev_vel_sorted, // particle velocities + fixvel
Float4* dev_force,
unsigned int* dev_gridParticleIndex)
+#else
+void latticeBoltzmannD3Q19(
+ Float* dev_f,
+ Float* dev_f_new,
+ Float4* dev_v_rho, // fluid velocities and densities
+ Float devC_dt,
+ Grid& grid,
+ Params& params)
+
+#endif
{
+#ifdef LBM_GPU
// 3D thread index
- const unsigned int z = blockDim.x * blockIdx.x + threadIdx.x;
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
- const unsigned int x = blockDim.z * blockIdx.z + threadIdx.z;
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
// Grid dimensions
const unsigned int nx = devC_grid.num[0];
const unsigned int ny = devC_grid.num[1];
const unsigned int nz = devC_grid.num[2];
+#else
+ // Grid dimensions
+ const unsigned int nx = grid.num[0];
+ const unsigned int ny = grid.num[1];
+ const unsigned int nz = grid.num[2];
+
+ for (unsigned int z = 0; z<nz; z++) {
+ for (unsigned int y = 0; y<ny; y++) {
+ for (unsigned int x = 0; x<nx; x++) {
+#endif
// Check that we are not outside the fluid grid
if (x < nx && y < ny && z < nz) {
+ // 1D thread index
+ const unsigned int tidx = x + nx*y + nx*ny*z;
//printf("(x,y,x) = (%d,%d,%d), tidx = %d\n", x, y, z, tidx);
// Load the fluid distribution into local registers
+#ifdef LBM_GPU
__syncthreads();
- Float f_0 = dev_f[grid2index(x,y,z,0)];
- Float f_1 = dev_f[grid2index(x,y,z,1)];
- Float f_2 = dev_f[grid2index(x,y,z,2)];
- Float f_3 = dev_f[grid2index(x,y,z,3)];
- Float f_4 = dev_f[grid2index(x,y,z,4)];
- Float f_5 = dev_f[grid2index(x,y,z,5)];
- Float f_6 = dev_f[grid2index(x,y,z,6)];
- Float f_7 = dev_f[grid2index(x,y,z,7)];
- Float f_8 = dev_f[grid2index(x,y,z,8)];
- Float f_9 = dev_f[grid2index(x,y,z,9)];
- Float f_10 = dev_f[grid2index(x,y,z,10)];
- Float f_11 = dev_f[grid2index(x,y,z,11)];
- Float f_12 = dev_f[grid2index(x,y,z,12)];
- Float f_13 = dev_f[grid2index(x,y,z,13)];
- Float f_14 = dev_f[grid2index(x,y,z,14)];
- Float f_15 = dev_f[grid2index(x,y,z,15)];
- Float f_16 = dev_f[grid2index(x,y,z,16)];
- Float f_17 = dev_f[grid2index(x,y,z,17)];
- Float f_18 = dev_f[grid2index(x,y,z,18)];
-
- // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
- const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
+#endif
+ Float f_0 = dev_f[grid2index(x,y,z,0,nx,ny,nz)];
+ Float f_1 = dev_f[grid2index(x,y,z,1,nx,ny,nz)];
+ Float f_2 = dev_f[grid2index(x,y,z,2,nx,ny,nz)];
+ Float f_3 = dev_f[grid2index(x,y,z,3,nx,ny,nz)];
+ Float f_4 = dev_f[grid2index(x,y,z,4,nx,ny,nz)];
+ Float f_5 = dev_f[grid2index(x,y,z,5,nx,ny,nz)];
+ Float f_6 = dev_f[grid2index(x,y,z,6,nx,ny,nz)];
+ Float f_7 = dev_f[grid2index(x,y,z,7,nx,ny,nz)];
+ Float f_8 = dev_f[grid2index(x,y,z,8,nx,ny,nz)];
+ Float f_9 = dev_f[grid2index(x,y,z,9,nx,ny,nz)];
+ Float f_10 = dev_f[grid2index(x,y,z,10,nx,ny,nz)];
+ Float f_11 = dev_f[grid2index(x,y,z,11,nx,ny,nz)];
+ Float f_12 = dev_f[grid2index(x,y,z,12,nx,ny,nz)];
+ Float f_13 = dev_f[grid2index(x,y,z,13,nx,ny,nz)];
+ Float f_14 = dev_f[grid2index(x,y,z,14,nx,ny,nz)];
+ Float f_15 = dev_f[grid2index(x,y,z,15,nx,ny,nz)];
+ Float f_16 = dev_f[grid2index(x,y,z,16,nx,ny,nz)];
+ Float f_17 = dev_f[grid2index(x,y,z,17,nx,ny,nz)];
+ Float f_18 = dev_f[grid2index(x,y,z,18,nx,ny,nz)];
// Directional vectors to each lattice-velocity in D3Q19
// Zero velocity: i = 0
t@@ -162,7 +271,7 @@ __global__ void latticeBoltzmannD3Q19(
const Float3 e_6 = MAKE_FLOAT3( 0.0, 0.0,-1.0); // face: -z
const Float3 e_7 = MAKE_FLOAT3( 1.0, 1.0, 0.0); // edge: +x,+y
const Float3 e_8 = MAKE_FLOAT3(-1.0,-1.0, 0.0); // edge: -x,-y
- const Float3 e_9 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: -x,+y
+ const Float3 e_9 = MAKE_FLOAT3(-1.0, 1.0, 0.0); // edge: -x,+y
const Float3 e_10 = MAKE_FLOAT3( 1.0,-1.0, 0.0); // edge: +x,-y
const Float3 e_11 = MAKE_FLOAT3( 1.0, 0.0, 1.0); // edge: +x,+z
const Float3 e_12 = MAKE_FLOAT3(-1.0, 0.0,-1.0); // edge: -x,-z
t@@ -180,22 +289,20 @@ __global__ void latticeBoltzmannD3Q19(
const Float rho = f_0 + f_1 + f_2 + f_3 + f_4 + f_5 + f_6 + f_7 + f_8 +
f_9 + f_10 + f_11 + f_12 + f_13 + f_14 + f_15 + f_16 + f_17 + f_18;
- // Fluid velocity (v = sum(f_i/e_i)/rho)
- const Float3 v = (f_0/e_0 + f_1/e_1 + f_2/e_2 + f_3/e_3 + f_4/e_4 +
- f_5/e_5 + f_6/e_6 + f_7/e_7 + f_8/e_8 + f_9/e_9 + f_10/e_10 +
- f_11/e_11 + f_12/e_12 + f_13/e_13 + f_14/e_14 + f_15/e_15 +
- f_16/e_16 + f_17/e_17 + f_18/e_18) / rho;
+ // Fluid velocity (v = sum(f_i*e_i)/rho)
+ const Float3 v = (f_0*e_0 + f_1*e_1 + f_2*e_2 + f_3*e_3 + f_4*e_4 +
+ f_5*e_5 + f_6*e_6 + f_7*e_7 + f_8*e_8 + f_9*e_9 + f_10*e_10 +
+ f_11*e_11 + f_12*e_12 + f_13*e_13 + f_14*e_14 + f_15*e_15 +
+ f_16*e_16 + f_17*e_17 + f_18*e_18) / rho;
//// Calculate the force transferred from the particles to the fluid
- Float3 f_particle;
+ /*Float3 f_particle;
Float3 f_particles = MAKE_FLOAT3(0.0, 0.0, 0.0);
Float4 x_particle4; // particle position + radius
Float r_particle; // radius
Float4 v_particle4; // particle velocity + fixvel
Float3 v_particle; // particle velocity
- // 1D thread index
- const unsigned int tidx = x + nx*y + nx*ny*z;
// Lowest particle index in cell
unsigned int startIdx = dev_cellStart[tidx];
t@@ -206,7 +313,6 @@ __global__ void latticeBoltzmannD3Q19(
// Highest particle index in cell + 1
unsigned int endIdx = dev_cellEnd[tidx];
-
// Iterate over cell particles
for (unsigned int idx = startIdx; idx<endIdx; ++idx) {
t@@ -243,40 +349,77 @@ __global__ void latticeBoltzmannD3Q19(
// 100: experimental value, depends on the grid size compared to the
// particle size and the time step size
f_particles *= 100.0 * rho * 6.0;
+ */
+
+#ifdef LBM_GPU
+ Float dx = devC_grid.L[0]/devC_grid.num[0];
+
+ // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
+ // nu = 1/6*(2*tau - 1) * dx * c
+ //const Float tau = 0.5*(1.0 + 6.0*devC_params.nu);
+ //const Float tau = 1.0/6.0*(2.0*devC_params.nu - 1.0) * dx*dx/devC_dt;
+ const Float tau = (6.0*devC_params.nu * devC_dt/(dx*dx) + 1)/2.0;
+
+ // Gravitational force (F = g * m)
+ const Float3 f_gravity = MAKE_FLOAT3(
+ devC_params.g[0]*dx*rho,
+ devC_params.g[1]
+ * ((Float)devC_grid.L[1]/devC_grid.num[1]) * rho,
+ devC_params.g[2]
+ * ((Float)devC_grid.L[2]/devC_grid.num[2]) * rho);
+#else
+ Float dx = grid.L[0]/grid.num[0];
+
+ // Fluid constant (Wei et al. 2004), nu: kinematic viscosity [Pa*s]
+ //const Float tau = 0.5*(1.0 + 6.0*params.nu);
+ //const Float tau = 1.0/6.0*(2.0*params.nu - 1.0) * dx*dx/devC_dt;
+ const Float tau = (6.0*params.nu * devC_dt/(dx*dx) + 1)/2.0;
+
+ //if (tau <= 0.5) {
+ //fprintf(stderr, "Error, tau <= 0.5\n");
+ //exit(1);
+ //}
// Gravitational force (F = g * m)
const Float3 f_gravity = MAKE_FLOAT3(
- devC_params.g[0],
- devC_params.g[1],
- devC_params.g[2])
- * (devC_grid.L[0]/devC_grid.num[0])
- * (devC_grid.L[1]/devC_grid.num[1])
- * (devC_grid.L[2]/devC_grid.num[2]) * rho;
+ params.g[0]*dx*rho,
+ params.g[1] * ((Float)grid.L[1]/grid.num[1]) * rho,
+ params.g[2] * ((Float)grid.L[2]/grid.num[2]) * rho);
+#endif
// The final external force
- const Float3 f_ext = f_particles + f_gravity;
+ //const Float3 f_ext = f_particles + f_gravity;
+ const Float3 f_ext = f_gravity;
+ //const Float3 f_ext = MAKE_FLOAT3(0.0, 0.0, 0.0);
+ //printf("%d,%d,%d: f_ext = %f, %f, %f\n", x, y, z,
+ //f_ext.x, f_ext.y, f_ext.z);
//// Collide fluid
// Weights corresponding to each e_i lattice-velocity in D3Q19, sum to…
- f_0 -= bgk(f_0, tau, v, rho, e_0, 1.0/3.0, f_ext);
- f_1 -= bgk(f_1, tau, v, rho, e_1, 1.0/18.0, f_ext);
- f_2 -= bgk(f_2, tau, v, rho, e_2, 1.0/18.0, f_ext);
- f_3 -= bgk(f_3, tau, v, rho, e_3, 1.0/18.0, f_ext);
- f_4 -= bgk(f_4, tau, v, rho, e_4, 1.0/18.0, f_ext);
- f_5 -= bgk(f_5, tau, v, rho, e_5, 1.0/18.0, f_ext);
- f_6 -= bgk(f_6, tau, v, rho, e_6, 1.0/18.0, f_ext);
- f_7 -= bgk(f_7, tau, v, rho, e_7, 1.0/36.0, f_ext);
- f_8 -= bgk(f_8, tau, v, rho, e_8, 1.0/36.0, f_ext);
- f_9 -= bgk(f_9, tau, v, rho, e_9, 1.0/36.0, f_ext);
- f_10 -= bgk(f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
- f_11 -= bgk(f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
- f_12 -= bgk(f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
- f_13 -= bgk(f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
- f_14 -= bgk(f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
- f_15 -= bgk(f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
- f_16 -= bgk(f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
- f_17 -= bgk(f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
- f_18 -= bgk(f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
+ f_0 -= bgk(devC_dt, dx, f_0, tau, v, rho, e_0, 1.0/3.0, f_ext);
+ f_1 -= bgk(devC_dt, dx, f_1, tau, v, rho, e_1, 1.0/18.0, f_ext);
+ f_2 -= bgk(devC_dt, dx, f_2, tau, v, rho, e_2, 1.0/18.0, f_ext);
+ f_3 -= bgk(devC_dt, dx, f_3, tau, v, rho, e_3, 1.0/18.0, f_ext);
+ f_4 -= bgk(devC_dt, dx, f_4, tau, v, rho, e_4, 1.0/18.0, f_ext);
+ f_5 -= bgk(devC_dt, dx, f_5, tau, v, rho, e_5, 1.0/18.0, f_ext);
+ f_6 -= bgk(devC_dt, dx, f_6, tau, v, rho, e_6, 1.0/18.0, f_ext);
+ f_7 -= bgk(devC_dt, dx, f_7, tau, v, rho, e_7, 1.0/36.0, f_ext);
+ f_8 -= bgk(devC_dt, dx, f_8, tau, v, rho, e_8, 1.0/36.0, f_ext);
+ f_9 -= bgk(devC_dt, dx, f_9, tau, v, rho, e_9, 1.0/36.0, f_ext);
+ f_10 -= bgk(devC_dt, dx, f_10, tau, v, rho, e_10, 1.0/36.0, f_ext);
+ f_11 -= bgk(devC_dt, dx, f_11, tau, v, rho, e_11, 1.0/36.0, f_ext);
+ f_12 -= bgk(devC_dt, dx, f_12, tau, v, rho, e_12, 1.0/36.0, f_ext);
+ f_13 -= bgk(devC_dt, dx, f_13, tau, v, rho, e_13, 1.0/36.0, f_ext);
+ f_14 -= bgk(devC_dt, dx, f_14, tau, v, rho, e_14, 1.0/36.0, f_ext);
+ f_15 -= bgk(devC_dt, dx, f_15, tau, v, rho, e_15, 1.0/36.0, f_ext);
+ f_16 -= bgk(devC_dt, dx, f_16, tau, v, rho, e_16, 1.0/36.0, f_ext);
+ f_17 -= bgk(devC_dt, dx, f_17, tau, v, rho, e_17, 1.0/36.0, f_ext);
+ f_18 -= bgk(devC_dt, dx, f_18, tau, v, rho, e_18, 1.0/36.0, f_ext);
+ //Float bgkval = bgk(devC_dt, f_1, tau, v, rho, e_1, 1.0/18.0, f_ext…
+ //Float feqval = feq(v, rho, e_1, 1.0/18.0);
+ //printf("%d,%d,%d: dt %f, f %f, feq %f, tau %f, v %fx%fx%f, rho %f, e…
+ //x, y, z, devC_dt, f_1, feqval, tau, v.x, v.y, v.z, rho,
+ //e_1.x, e_1.y, e_1.z, f_ext.x, f_ext.y, f_ext.z, bgkval);
//// Stream fluid
t@@ -284,171 +427,341 @@ __global__ void latticeBoltzmannD3Q19(
// There may be a write conflict due to bounce backs
+#ifdef LBM_GPU
__syncthreads();
+#endif
// Write fluid velocity and density to global memory
dev_v_rho[tidx] = MAKE_FLOAT4(v.x, v.y, v.z, rho);
+ //dev_v_rho[tidx] = MAKE_FLOAT4(x, y, z, rho);
// Face 0
- dev_f_new[grid2index(x,y,z,0)] = fmax(0.0, f_0);
+ dev_f_new[grid2index(x,y,z,0,nx,ny,nz)] = fmax(0.0, f_0);
+
+ //*
+
+ // Face 1 (+x): Bounce back
+ if (x < nx-1)
+ dev_f_new[grid2index(x+1, y, z, 1,nx,ny,nz)] = fmax(0.0, f_1);
+ else
+ dev_f_new[grid2index( x, y, z, 2,nx,ny,nz)] = fmax(0.0, f_1);
+
+ // Face 2 (-x): Bounce back
+ if (x > 0)
+ dev_f_new[grid2index(x-1, y, z, 2,nx,ny,nz)] = fmax(0.0, f_2);
+ else
+ dev_f_new[grid2index( x, y, z, 1,nx,ny,nz)] = fmax(0.0, f_2);
+
+ // Face 3 (+y): Bounce back
+ if (y < ny-1)
+ dev_f_new[grid2index( x,y+1, z, 3,nx,ny,nz)] = fmax(0.0, f_3);
+ else
+ dev_f_new[grid2index( x, y, z, 4,nx,ny,nz)] = fmax(0.0, f_3);
+
+ // Face 4 (-y): Bounce back
+ if (y > 0)
+ dev_f_new[grid2index( x,y-1, z, 4,nx,ny,nz)] = fmax(0.0, f_4);
+ else
+ dev_f_new[grid2index( x, y, z, 3,nx,ny,nz)] = fmax(0.0, f_4);
+
+ // Face 5 (+z): Bounce back
+ if (z < nz-1)
+ dev_f_new[grid2index( x, y,z+1, 5,nx,ny,nz)] = fmax(0.0, f_5);
+ else
+ dev_f_new[grid2index( x, y, z, 6,nx,ny,nz)] = fmax(0.0, f_5);
+
+ // Face 6 (-z): Bounce back
+ if (z > 0)
+ dev_f_new[grid2index( x, y,z-1, 6,nx,ny,nz)] = fmax(0.0, f_6);
+ else
+ dev_f_new[grid2index( x, y, z, 5,nx,ny,nz)] = fmax(0.0, f_6);
+
+ // Edge 7 (+x,+y): Bounce back
+ if (x < nx-1 && y < ny-1)
+ dev_f_new[grid2index(x+1,y+1, z, 7,nx,ny,nz)] = fmax(0.0, f_7);
+ else if (x < nx-1)
+ dev_f_new[grid2index(x+1, y, z, 9,nx,ny,nz)] = fmax(0.0, f_7);
+ else if (y < ny-1)
+ dev_f_new[grid2index( x,y+1, z, 10,nx,ny,nz)] = fmax(0.0, f_7);
+ else
+ dev_f_new[grid2index( x, y, z, 8,nx,ny,nz)] = fmax(0.0, f_7);
+
+ // Edge 8 (-x,-y): Bounce back
+ if (x > 0 && y > 0)
+ dev_f_new[grid2index(x-1,y-1, z, 8,nx,ny,nz)] = fmax(0.0, f_8);
+ else if (x > 0)
+ dev_f_new[grid2index(x-1, y, z, 9,nx,ny,nz)] = fmax(0.0, f_8);
+ else if (y > 0)
+ dev_f_new[grid2index( x,y-1, z, 10,nx,ny,nz)] = fmax(0.0, f_8);
+ else
+ dev_f_new[grid2index( x, y, z, 7,nx,ny,nz)] = fmax(0.0, f_8);
+
+ // Edge 9 (-x,+y): Bounce back
+ if (x > 0 && y < ny-1)
+ dev_f_new[grid2index(x-1,y+1, z, 9,nx,ny,nz)] = fmax(0.0, f_9);
+ else if (x > 0)
+ dev_f_new[grid2index(x-1, y, z, 8,nx,ny,nz)] = fmax(0.0, f_9);
+ else if (y < ny-1)
+ dev_f_new[grid2index( x,y+1, z, 7,nx,ny,nz)] = fmax(0.0, f_9);
+ else
+ dev_f_new[grid2index( x, y, z, 10,nx,ny,nz)] = fmax(0.0, f_9);
+
+ // Edge 10 (+x,-y): Bounce back
+ if (x < nx-1 && y > 0)
+ dev_f_new[grid2index(x+1,y-1, z, 10,nx,ny,nz)] = fmax(0.0, f_10);
+ else if (x < nx-1)
+ dev_f_new[grid2index(x+1, y, z, 8,nx,ny,nz)] = fmax(0.0, f_10);
+ else if (y > 0)
+ dev_f_new[grid2index( x,y-1, z, 7,nx,ny,nz)] = fmax(0.0, f_10);
+ else
+ dev_f_new[grid2index( x, y, z, 9,nx,ny,nz)] = fmax(0.0, f_10);
+
+ // Edge 11 (+x,+z): Bounce back
+ if (x < nx-1 && z < nz-1)
+ dev_f_new[grid2index(x+1, y,z+1, 11,nx,ny,nz)] = fmax(0.0, f_11);
+ else if (x < nx-1)
+ dev_f_new[grid2index(x+1, y, z, 16,nx,ny,nz)] = fmax(0.0, f_11);
+ else if (z < nz-1)
+ dev_f_new[grid2index( x, y,z+1, 15,nx,ny,nz)] = fmax(0.0, f_11);
+ else
+ dev_f_new[grid2index( x, y, z, 12,nx,ny,nz)] = fmax(0.0, f_11);
+
+ // Edge 12 (-x,-z): Bounce back
+ if (x > 0 && z > 0)
+ dev_f_new[grid2index(x-1, y,z-1, 12,nx,ny,nz)] = fmax(0.0, f_12);
+ else if (x > 0)
+ dev_f_new[grid2index(x-1, y, z, 15,nx,ny,nz)] = fmax(0.0, f_12);
+ else if (z > 0)
+ dev_f_new[grid2index( x, y,z-1, 16,nx,ny,nz)] = fmax(0.0, f_12);
+ else
+ dev_f_new[grid2index( x, y, z, 11,nx,ny,nz)] = fmax(0.0, f_12);
+
+ // Edge 13 (+y,+z): Bounce back
+ if (y < ny-1 && z < nz-1)
+ dev_f_new[grid2index( x,y+1,z+1, 13,nx,ny,nz)] = fmax(0.0, f_13);
+ else if (y < ny-1)
+ dev_f_new[grid2index( x,y+1, z, 18,nx,ny,nz)] = fmax(0.0, f_13);
+ else if (z < nz-1)
+ dev_f_new[grid2index( x, y,z+1, 17,nx,ny,nz)] = fmax(0.0, f_13);
+ else
+ dev_f_new[grid2index( x, y, z, 14,nx,ny,nz)] = fmax(0.0, f_13);
+
+ // Edge 14 (-y,-z): Bounce back
+ if (y > 0 && z > 0)
+ dev_f_new[grid2index( x,y-1,z-1, 14,nx,ny,nz)] = fmax(0.0, f_14);
+ else if (y > 0)
+ dev_f_new[grid2index( x,y-1, z, 17,nx,ny,nz)] = fmax(0.0, f_14);
+ else if (z > 0)
+ dev_f_new[grid2index( x, y,z-1, 18,nx,ny,nz)] = fmax(0.0, f_14);
+ else
+ dev_f_new[grid2index( x, y, z, 13,nx,ny,nz)] = fmax(0.0, f_14);
+
+ // Edge 15 (-x,+z): Bounce back
+ if (x > 0 && z < nz-1)
+ dev_f_new[grid2index(x-1, y,z+1, 15,nx,ny,nz)] = fmax(0.0, f_15);
+ else if (x > 0)
+ dev_f_new[grid2index(x-1, y, z, 12,nx,ny,nz)] = fmax(0.0, f_15);
+ else if (z < nz-1)
+ dev_f_new[grid2index( x, y,z+1, 11,nx,ny,nz)] = fmax(0.0, f_15);
+ else
+ dev_f_new[grid2index( x, y, z, 16,nx,ny,nz)] = fmax(0.0, f_15);
+
+ // Edge 16 (+x,-z)
+ if (x < nx-1 && z > 0)
+ dev_f_new[grid2index(x+1, y,z-1, 16,nx,ny,nz)] = fmax(0.0, f_16);
+ else if (x < nx-1)
+ dev_f_new[grid2index(x+1, y, z, 11,nx,ny,nz)] = fmax(0.0, f_16);
+ else if (z > 0)
+ dev_f_new[grid2index( x, y,z-1, 12,nx,ny,nz)] = fmax(0.0, f_16);
+ else
+ dev_f_new[grid2index( x, y, z, 15,nx,ny,nz)] = fmax(0.0, f_16);
+
+ // Edge 17 (-y,+z)
+ if (y > 0 && z < nz-1)
+ dev_f_new[grid2index( x,y-1,z+1, 17,nx,ny,nz)] = fmax(0.0, f_17);
+ else if (y > 0)
+ dev_f_new[grid2index( x,y-1, z, 14,nx,ny,nz)] = fmax(0.0, f_17);
+ else if (z < nz-1)
+ dev_f_new[grid2index( x, y,z+1, 13,nx,ny,nz)] = fmax(0.0, f_17);
+ else
+ dev_f_new[grid2index( x, y, z, 18,nx,ny,nz)] = fmax(0.0, f_17);
+
+ // Edge 18 (+y,-z)
+ if (y < ny-1 && z > 0)
+ dev_f_new[grid2index( x,y+1,z-1, 18,nx,ny,nz)] = fmax(0.0, f_18);
+ else if (y < ny-1)
+ dev_f_new[grid2index( x,y+1, z, 13,nx,ny,nz)] = fmax(0.0, f_18);
+ else if (z > 0)
+ dev_f_new[grid2index( x, y,z-1, 14,nx,ny,nz)] = fmax(0.0, f_18);
+ else
+ dev_f_new[grid2index( x, y, z, 17,nx,ny,nz)] = fmax(0.0, f_18);
+
+ // */
+
+ /*
// Face 1 (+x): Periodic
if (x < nx-1) // not at boundary
- dev_f_new[grid2index( x+1, y, z, 1)] = fmax(0.0, f_1);
+ dev_f_new[grid2index( x+1, y, z, 1,nx,ny,nz)] = fmax(0.0, f_1…
else // at boundary
- dev_f_new[grid2index( 0, y, z, 1)] = fmax(0.0, f_1);
+ dev_f_new[grid2index( 0, y, z, 1,nx,ny,nz)] = fmax(0.0, f_1…
// Face 2 (-x): Periodic
if (x > 0) // not at boundary
- dev_f_new[grid2index( x-1, y, z, 2)] = fmax(0.0, f_2);
+ dev_f_new[grid2index( x-1, y, z, 2,nx,ny,nz)] = fmax(0.0, f_2…
else // at boundary
- dev_f_new[grid2index(nx-1, y, z, 2)] = fmax(0.0, f_2);
+ dev_f_new[grid2index(nx-1, y, z, 2,nx,ny,nz)] = fmax(0.0, f_2…
// Face 3 (+y): Periodic
if (y < ny-1) // not at boundary
- dev_f_new[grid2index( x, y+1, z, 3)] = fmax(0.0, f_3);
+ dev_f_new[grid2index( x, y+1, z, 3,nx,ny,nz)] = fmax(0.0, f_3…
else // at boundary
- dev_f_new[grid2index( x, 0, z, 3)] = fmax(0.0, f_3);
+ dev_f_new[grid2index( x, 0, z, 3,nx,ny,nz)] = fmax(0.0, f_3…
// Face 4 (-y): Periodic
if (y > 0) // not at boundary
- dev_f_new[grid2index( x, y-1, z, 4)] = fmax(0.0, f_4);
+ dev_f_new[grid2index( x, y-1, z, 4,nx,ny,nz)] = fmax(0.0, f_4…
else // at boundary
- dev_f_new[grid2index( x,ny-1, z, 4)] = fmax(0.0, f_4);
+ dev_f_new[grid2index( x,ny-1, z, 4,nx,ny,nz)] = fmax(0.0, f_4…
// Face 5 (+z): Bounce back, free slip
if (z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y, z+1, 5)] = fmax(0.0, f_5);
+ dev_f_new[grid2index( x, y, z+1, 5,nx,ny,nz)] = fmax(0.0, f_5…
else // at boundary
- dev_f_new[grid2index( x, y, z, 6)] = fmax(0.0, f_5);
+ dev_f_new[grid2index( x, y, z, 6,nx,ny,nz)] = fmax(0.0, f_5…
// Face 6 (-z): Bounce back, free slip
if (z > 0) // not at boundary
- dev_f_new[grid2index( x, y, z-1, 6)] = fmax(0.0, f_6);
+ dev_f_new[grid2index( x, y, z-1, 6,nx,ny,nz)] = fmax(0.0, f_6…
else // at boundary
- dev_f_new[grid2index( x, y, z, 5)] = fmax(0.0, f_6);
-
+ dev_f_new[grid2index( x, y, z, 5,nx,ny,nz)] = fmax(0.0, f_6…
+
// Edge 7 (+x,+y): Periodic
if (x < nx-1 && y < ny-1) // not at boundary
- dev_f_new[grid2index( x+1, y+1, z, 7)] = fmax(0.0, f_7);
+ dev_f_new[grid2index( x+1, y+1, z, 7,nx,ny,nz)] = fmax(0.0, f_7…
else if (x < nx-1) // at +y boundary
- dev_f_new[grid2index( x+1, 0, z, 7)] = fmax(0.0, f_7);
+ dev_f_new[grid2index( x+1, 0, z, 7,nx,ny,nz)] = fmax(0.0, f_7…
else if (y < ny-1) // at +x boundary
- dev_f_new[grid2index( 0, y+1, z, 7)] = fmax(0.0, f_7);
+ dev_f_new[grid2index( 0, y+1, z, 7,nx,ny,nz)] = fmax(0.0, f_7…
else // at +x+y boundary
- dev_f_new[grid2index( 0, 0, z, 7)] = fmax(0.0, f_7);
+ dev_f_new[grid2index( 0, 0, z, 7,nx,ny,nz)] = fmax(0.0, f_7…
// Edge 8 (-x,-y): Periodic
if (x > 0 && y > 0) // not at boundary
- dev_f_new[grid2index( x-1, y-1, z, 8)] = fmax(0.0, f_8);
+ dev_f_new[grid2index( x-1, y-1, z, 8,nx,ny,nz)] = fmax(0.0, f_8…
else if (x > 0) // at -y boundary
- dev_f_new[grid2index( x-1,ny-1, z, 8)] = fmax(0.0, f_8);
+ dev_f_new[grid2index( x-1,ny-1, z, 8,nx,ny,nz)] = fmax(0.0, f_8…
else if (y > 0) // at -x boundary
- dev_f_new[grid2index(nx-1, y-1, z, 8)] = fmax(0.0, f_8);
+ dev_f_new[grid2index(nx-1, y-1, z, 8,nx,ny,nz)] = fmax(0.0, f_8…
else // at -x-y boundary
- dev_f_new[grid2index(nx-1,ny-1, z, 8)] = fmax(0.0, f_8);
+ dev_f_new[grid2index(nx-1,ny-1, z, 8,nx,ny,nz)] = fmax(0.0, f_8…
// Edge 9 (-x,+y): Periodic
if (x > 0 && y < ny-1) // not at boundary
- dev_f_new[grid2index( x-1, y+1, z, 9)] = fmax(0.0, f_9);
+ dev_f_new[grid2index( x-1, y+1, z, 9,nx,ny,nz)] = fmax(0.0, f_9…
else if (x > 0) // at +y boundary
- dev_f_new[grid2index( x-1, 0, z, 9)] = fmax(0.0, f_9);
+ dev_f_new[grid2index( x-1, 0, z, 9,nx,ny,nz)] = fmax(0.0, f_9…
else if (y < ny-1) // at -x boundary
- dev_f_new[grid2index(nx-1, y+1, z, 9)] = fmax(0.0, f_9);
+ dev_f_new[grid2index(nx-1, y+1, z, 9,nx,ny,nz)] = fmax(0.0, f_9…
else // at -x+y boundary
- dev_f_new[grid2index(nx-1, 0, z, 9)] = fmax(0.0, f_9);
+ dev_f_new[grid2index(nx-1, 0, z, 9,nx,ny,nz)] = fmax(0.0, f_9…
// Edge 10 (+x,-y): Periodic
if (x < nx-1 && y > 0) // not at boundary
- dev_f_new[grid2index( x+1, y-1, z, 10)] = fmax(0.0, f_10);
+ dev_f_new[grid2index( x+1, y-1, z, 10,nx,ny,nz)] = fmax(0.0, f_1…
else if (x < nx-1) // at -y boundary
- dev_f_new[grid2index( x+1,ny-1, z, 10)] = fmax(0.0, f_10);
+ dev_f_new[grid2index( x+1,ny-1, z, 10,nx,ny,nz)] = fmax(0.0, f_1…
else if (y > 0) // at +x boundary
- dev_f_new[grid2index( 0, y-1, z, 10)] = fmax(0.0, f_10);
+ dev_f_new[grid2index( 0, y-1, z, 10,nx,ny,nz)] = fmax(0.0, f_1…
else // at +x-y boundary
- dev_f_new[grid2index( 0,ny-1, z, 10)] = fmax(0.0, f_10);
+ dev_f_new[grid2index( 0,ny-1, z, 10,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 11 (+x,+z): Periodic & bounce-back (free slip)
if (x < nx-1 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x+1, y, z+1, 11)] = fmax(0.0, f_11);
+ dev_f_new[grid2index( x+1, y, z+1, 11,nx,ny,nz)] = fmax(0.0, f_1…
else if (x < nx-1) // at +z boundary
- dev_f_new[grid2index( x+1, y, 0, 12)] = fmax(0.0, f_11);
+ dev_f_new[grid2index( x+1, y, 0, 12,nx,ny,nz)] = fmax(0.0, f_1…
else if (z < nz-1) // at +x boundary
- dev_f_new[grid2index( 0, y, z+1, 11)] = fmax(0.0, f_11);
+ dev_f_new[grid2index( 0, y, z+1, 11,nx,ny,nz)] = fmax(0.0, f_1…
else // at +x+z boundary
- dev_f_new[grid2index( 0, y, 0, 12)] = fmax(0.0, f_11);
+ dev_f_new[grid2index( 0, y, 0, 12,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 12 (-x,-z): Periodic & bounce back (free slip)
if (x > 0 && z > 0) // not at boundary
- dev_f_new[grid2index( x-1, y, z-1, 12)] = fmax(0.0, f_12);
+ dev_f_new[grid2index( x-1, y, z-1, 12,nx,ny,nz)] = fmax(0.0, f_1…
else if (x > 0) // at -z boundary
- dev_f_new[grid2index( x-1, y,nz-1, 11)] = fmax(0.0, f_12);
+ dev_f_new[grid2index( x-1, y,nz-1, 11,nx,ny,nz)] = fmax(0.0, f_1…
else if (z > 0) // at -x boundary
- dev_f_new[grid2index(nx-1, y, z-1, 12)] = fmax(0.0, f_12);
+ dev_f_new[grid2index(nx-1, y, z-1, 12,nx,ny,nz)] = fmax(0.0, f_1…
else // at -x-z boundary
- dev_f_new[grid2index(nx-1, y,nz-1, 11)] = fmax(0.0, f_12);
+ dev_f_new[grid2index(nx-1, y,nz-1, 11,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 13 (+y,+z): Periodic & bounce-back (free slip)
if (y < ny-1 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y+1, z+1, 13)] = fmax(0.0, f_13);
+ dev_f_new[grid2index( x, y+1, z+1, 13,nx,ny,nz)] = fmax(0.0, f_1…
else if (y < ny-1) // at +z boundary
- dev_f_new[grid2index( x, y+1, 0, 14)] = fmax(0.0, f_13);
+ dev_f_new[grid2index( x, y+1, 0, 14,nx,ny,nz)] = fmax(0.0, f_1…
else if (z < nz-1) // at +y boundary
- dev_f_new[grid2index( x, 0, z+1, 13)] = fmax(0.0, f_13);
+ dev_f_new[grid2index( x, 0, z+1, 13,nx,ny,nz)] = fmax(0.0, f_1…
else // at +y+z boundary
- dev_f_new[grid2index( x, 0, 0, 14)] = fmax(0.0, f_13);
+ dev_f_new[grid2index( x, 0, 0, 14,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 14 (-y,-z): Periodic & bounce-back (free slip)
if (y > 0 && z > 0) // not at boundary
- dev_f_new[grid2index( x, y-1, z-1, 14)] = fmax(0.0, f_14);
+ dev_f_new[grid2index( x, y-1, z-1, 14,nx,ny,nz)] = fmax(0.0, f_1…
else if (y > 0) // at -z boundary
- dev_f_new[grid2index( x, y-1,nz-1, 13)] = fmax(0.0, f_14);
+ dev_f_new[grid2index( x, y-1,nz-1, 13,nx,ny,nz)] = fmax(0.0, f_1…
else if (z > 0) // at -y boundary
- dev_f_new[grid2index( x,ny-1, z-1, 14)] = fmax(0.0, f_14);
+ dev_f_new[grid2index( x,ny-1, z-1, 14,nx,ny,nz)] = fmax(0.0, f_1…
else // at -y-z boundary
- dev_f_new[grid2index( x,ny-1,nz-1, 13)] = fmax(0.0, f_14);
+ dev_f_new[grid2index( x,ny-1,nz-1, 13,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 15 (-x,+z): Periodic & bounce-back (free slip)
if (x > 0 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x-1, y, z+1, 15)] = fmax(0.0, f_15);
+ dev_f_new[grid2index( x-1, y, z+1, 15,nx,ny,nz)] = fmax(0.0, f_1…
else if (x > 0) // at +z boundary
- dev_f_new[grid2index( x-1, y, 0, 16)] = fmax(0.0, f_15);
+ dev_f_new[grid2index( x-1, y, 0, 16,nx,ny,nz)] = fmax(0.0, f_1…
else if (z < nz-1) // at -x boundary
- dev_f_new[grid2index(nx-1, y, z+1, 15)] = fmax(0.0, f_15);
+ dev_f_new[grid2index(nx-1, y, z+1, 15,nx,ny,nz)] = fmax(0.0, f_1…
else // at -x+z boundary
- dev_f_new[grid2index(nx-1, y, 0, 16)] = fmax(0.0, f_15);
+ dev_f_new[grid2index(nx-1, y, 0, 16,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 16 (+x,-z): Periodic & bounce-back (free slip)
if (x < nx-1 && z > 0) // not at boundary
- dev_f_new[grid2index( x+1, y, z-1, 16)] = fmax(0.0, f_16);
+ dev_f_new[grid2index( x+1, y, z-1, 16,nx,ny,nz)] = fmax(0.0, f_1…
else if (x < nx-1) // at -z boundary
- dev_f_new[grid2index( x+1, y,nz-1, 15)] = fmax(0.0, f_16);
+ dev_f_new[grid2index( x+1, y,nz-1, 15,nx,ny,nz)] = fmax(0.0, f_1…
else if (z > 0) // at +x boundary
- dev_f_new[grid2index( 0, y, z-1, 16)] = fmax(0.0, f_16);
+ dev_f_new[grid2index( 0, y, z-1, 16,nx,ny,nz)] = fmax(0.0, f_1…
else // at +x-z boundary
- dev_f_new[grid2index( 0, y,nz-1, 15)] = fmax(0.0, f_16);
+ dev_f_new[grid2index( 0, y,nz-1, 15,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 17 (-y,+z): Periodic & bounce-back (free slip)
if (y > 0 && z < nz-1) // not at boundary
- dev_f_new[grid2index( x, y-1, z+1, 17)] = fmax(0.0, f_17);
+ dev_f_new[grid2index( x, y-1, z+1, 17,nx,ny,nz)] = fmax(0.0, f_1…
else if (y > 0) // at +z boundary
- dev_f_new[grid2index( x, y-1, 0, 18)] = fmax(0.0, f_17);
+ dev_f_new[grid2index( x, y-1, 0, 18,nx,ny,nz)] = fmax(0.0, f_1…
else if (z < nz-1) // at -y boundary
- dev_f_new[grid2index( x,ny-1, z+1, 17)] = fmax(0.0, f_17);
+ dev_f_new[grid2index( x,ny-1, z+1, 17,nx,ny,nz)] = fmax(0.0, f_1…
else // at -y+z boundary
- dev_f_new[grid2index( x,ny-1, 0, 18)] = fmax(0.0, f_17);
+ dev_f_new[grid2index( x,ny-1, 0, 18,nx,ny,nz)] = fmax(0.0, f_1…
// Edge 18 (+y,-z): Periodic & bounce-back (free slip)
if (y < ny-1 && z > 0) // not at boundary
- dev_f_new[grid2index( x, y+1, z-1, 18)] = fmax(0.0, f_18);
+ dev_f_new[grid2index( x, y+1, z-1, 18,nx,ny,nz)] = fmax(0.0, f_1…
else if (y < ny-1) // at -z boundary
- dev_f_new[grid2index( x, y+1, 0, 17)] = fmax(0.0, f_18);
+ dev_f_new[grid2index( x, y+1, 0, 17,nx,ny,nz)] = fmax(0.0, f_1…
else if (z > 0) // at +y boundary
- dev_f_new[grid2index( x, 0, z-1, 18)] = fmax(0.0, f_18);
+ dev_f_new[grid2index( x, 0, z-1, 18,nx,ny,nz)] = fmax(0.0, f_1…
else // at +y-z boundary
- dev_f_new[grid2index( x, 0, 0, 17)] = fmax(0.0, f_18);
+ dev_f_new[grid2index( x, 0, 0, 17,nx,ny,nz)] = fmax(0.0, f_1…
+ // */
+
}
+#ifndef LBM_GPU
+ }}}
+#endif
}
#endif
diff --git a/src/porousflow.cpp b/src/porousflow.cpp
t@@ -64,7 +64,7 @@ int main(const int argc, const char *argv[])
// mem
DEM dem(argvi, verbose, 0, dry, 0, 0);
- // Otherwise, start iterating through time
+ // Start iterating through time
dem.startDarcy();
diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -66,6 +66,7 @@ DEM::DEM(const std::string inputbin,
transferToGlobalDeviceMemory();
}
+
}
// Destructor: Liberates dynamically allocated host memory
diff --git a/src/sphere.h b/src/sphere.h
t@@ -4,6 +4,7 @@
#include <vector>
+//#include "eigen-nvcc/Eigen/Core"
#include "datatypes.h"
t@@ -146,6 +147,13 @@ class DEM {
// Darcy-flow values
int d_nx, d_ny, d_nz; // Number of cells in each dim
Float d_dx, d_dy, d_dz; // Cell length in each dim
+ Float* d_P; // Cell hydraulic pressures
+ Float3* d_dP; // Cell spatial gradient in pressures
+ Float* d_K; // Cell hydraulic conductivities (anisotropic)
+ Float* d_S; // Cell hydraulic storativity
+ Float* d_W; // Cell hydraulic recharge
+ Float mu; // Fluid viscosity
+
public:
t@@ -212,12 +220,44 @@ class DEM {
const double lower_cutoff = 0.0,
const double upper_cutoff = 1.0e9);
+
+ ///// Darcy flow functions
+
+ // Memory allocation and destruction
+ void initDarcyMem();
+ void freeDarcyMem();
+
+ // Set some values for the Darcy parameters
+ void initDarcyVals();
+
+ // Get linear (1D) index from 3D coordinate
+ unsigned int idx(
+ const unsigned int x,
+ const unsigned int y,
+ const unsigned int z);
+
+ // Get minimum value in 1D array
+ Float minVal3dArr(Float* arr);
+
+ // Finds central difference gradients
+ void findDarcyGradients();
+
+ // Set gradient to zero at grid edges
+ void setDarcyBCNeumannZero();
+
+ // Perform a single time step, explicit integration
+ void explDarcyStep(const Float dt);
+
// Calculate Darcy fluid flow through material
void startDarcy(
const Float cellsizemultiplier = 1.0);
-};
-
+ // Print Darcy arrays to file stream
+ void printDarcyArray(FILE* stream, Float* arr);
+ void printDarcyArray(FILE* stream, Float* arr, std::string desc);
+ void printDarcyArray3(FILE* stream, Float3* arr);
+ void printDarcyArray3(FILE* stream, Float3* arr, std::string desc);
+};
#endif
// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.