tported Darcy routines to CUDA, compiles, needs testing - sphere - GPU-based 3D… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 1658688f8ba0d701c10244801f13fd46821c799c | |
parent 27f9122518041ed1ed936e25643d2f9f61cc8100 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Wed, 9 Oct 2013 11:11:55 +0200 | |
ported Darcy routines to CUDA, compiles, needs testing | |
Diffstat: | |
M src/darcy.cpp | 221 ++++++++++++++++++-----------… | |
A src/darcy.cuh | 616 +++++++++++++++++++++++++++++… | |
M src/device.cu | 172 ++++++++++++++++++++++++++---… | |
M src/sphere.h | 5 +++++ | |
4 files changed, 898 insertions(+), 116 deletions(-) | |
--- | |
diff --git a/src/darcy.cpp b/src/darcy.cpp | |
t@@ -10,6 +10,9 @@ | |
#include "sphere.h" | |
#include "utility.h" | |
+// Enable line below to make x and y boundaries periodic | |
+#define PERIODIC_XY | |
+ | |
// Initialize memory | |
void DEM::initDarcyMem() | |
{ | |
t@@ -50,7 +53,7 @@ unsigned int DEM::idx( | |
//return x + d_nx*y + d_nx*d_ny*z; | |
// with ghost nodes | |
- // the ghost nodes are placed at -1 and WIDTH | |
+ // the ghost nodes are placed at x,y,z = -1 and WIDTH | |
return (x+1) + (d_nx+2)*(y+1) + (d_nx+2)*(d_ny+2)*(z+1); | |
} | |
t@@ -75,17 +78,26 @@ void DEM::initDarcyVals() | |
// read from input binary | |
// Hydraulic permeability [m^2] | |
- d_K[cellidx] = k*rho*-params.g[2]/params.nu; | |
+ //d_K[cellidx] = k*rho*-params.g[2]/params.nu; | |
+ d_K[cellidx] = 0.5; | |
// Hydraulic storativity [-] | |
d_Ss[cellidx] = 8.0e-3; | |
//d_Ss[cellidx] = 1.0; | |
- // Hydraulic recharge [Pa/s] | |
+ // Hydraulic recharge [s^-1] | |
d_W[cellidx] = 0.0; | |
} | |
} | |
} | |
+ | |
+ // Extract water from all cells in center | |
+ ix = d_nx/2-1; iy = d_ny/2-1; | |
+ Float cellvolume = d_dx*d_dy*d_dz; | |
+ for (iz=0; iz<d_nz; ++iz) { | |
+ //d_W[idx(ix,iy,iz)] = -0.1*cellvolume; | |
+ d_W[idx(ix,iy,iz)] = -1.0; | |
+ } | |
} | |
t@@ -169,7 +181,12 @@ void DEM::findDarcyTransmissivities() | |
// Kozeny-Carman parameter | |
//Float a = 1.0e-8; | |
- Float a = 1.0; | |
+ //Float a = 1.0; | |
+ | |
+ // Representative grain radius | |
+ Float r_bar2 = meanRadius()*2.0; | |
+ // Grain size factor for Kozeny-Carman relationship | |
+ Float d_factor = r_bar2*r_bar2/180.0; | |
unsigned int ix, iy, iz, cellidx; | |
Float K, k; | |
t@@ -185,7 +202,9 @@ void DEM::findDarcyTransmissivities() | |
// Calculate permeability from the Kozeny-Carman relationship | |
// Nelson 1994 eq. 1c | |
// Boek 2012 eq. 16 | |
- k = a*phi*phi*phi/(1.0 - phi*phi); | |
+ //k = a*phi*phi*phi/(1.0 - phi*phi); | |
+ // Schwartz and Zhang 2003 | |
+ k = phi*phi*phi/((1.0-phi)*(1.0-phi)) * d_factor; | |
// Save hydraulic conductivity [m/s] | |
//K = d_K[cellidx]; | |
t@@ -261,7 +280,8 @@ void DEM::findDarcyGradients() | |
// With ghost-nodes | |
for (ix=0; ix<d_nx; ++ix) { | |
for (iy=0; iy<d_ny; ++iy) { | |
- for (iz=1; iz<d_nz-1; ++iz) { | |
+ //for (iz=1; iz<d_nz-1; ++iz) { | |
+ for (iz=0; iz<d_nz; ++iz) { | |
cellidx = idx(ix,iy,iz); | |
t@@ -335,9 +355,10 @@ void DEM::explDarcyStep() | |
checkDarcyTimestep(); | |
// Cell dims squared | |
- const Float dx2 = d_dx*d_dx; | |
- const Float dy2 = d_dy*d_dy; | |
- const Float dz2 = d_dz*d_dz; | |
+ const Float dxdx = d_dx*d_dx; | |
+ const Float dydy = d_dy*d_dy; | |
+ const Float dzdz = d_dz*d_dz; | |
+ const Float dxdydz = d_dx*d_dy*d_dz; | |
//setDarcyBCNeumannZero(); | |
t@@ -358,95 +379,104 @@ void DEM::explDarcyStep() | |
// Cell linear index | |
cellidx = idx(ix,iy,iz); | |
- // If x,y,z boundaries are fixed values: | |
- // Enforce Dirichlet BC | |
- if (ix == 0 || iy == 0 || iz == 0 || | |
+ // If x,y,z boundaries are fixed values: Enforce Dirichlet BC | |
+ /*if (ix == 0 || iy == 0 || iz == 0 || | |
ix == d_nx-1 || iy == d_ny-1 || iz == d_nz-1) { | |
+ d_H_new[cellidx] = d_H[cellidx];*/ | |
+ | |
+ // If z boundaries are fixed val, x and y are periodic: | |
+ /*if (iz == 0 || iz == d_nz-1) { | |
d_H_new[cellidx] = d_H[cellidx]; | |
- // If z boundaries are periodic: | |
- //if (iz == 0 || iz == d_nz-1) { | |
- //d_H_new[cellidx] = d_H[cellidx]; | |
- } else { | |
- // Cell hydraulic conductivity | |
- K = d_K[cellidx]; | |
- | |
- // Cell hydraulic transmissivities | |
- Tx = K*d_dx; | |
- Ty = K*d_dy; | |
- Tz = K*d_dz; | |
- | |
- // Cell hydraulic head | |
- H = d_H[cellidx]; | |
- | |
- // Harmonic mean of transmissivity | |
- // (in neg. and pos. direction along axis from cell) | |
- // with periodic x and y boundaries | |
- // without ghost nodes | |
- /* | |
- if (ix == 0) | |
- gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x) | |
- * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2; | |
- else | |
- gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x) | |
- * (d_H[idx(ix-1,iy,iz)] - H)/dx2; | |
- | |
- if (ix == d_nx-1) | |
- gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x) | |
- * (d_H[idx(0,iy,iz)] - H)/dx2; | |
- else | |
- gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x) | |
- * (d_H[idx(ix+1,iy,iz)] - H)/dx2; | |
- | |
- if (iy == 0) | |
- grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y) | |
- * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2; | |
- else | |
- grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y) | |
- * (d_H[idx(ix,iy-1,iz)] - H)/dy2; | |
- | |
- if (iy == d_ny-1) | |
- grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y) | |
- * (d_H[idx(ix,0,iz)] - H)/dy2; | |
- else | |
- grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y) | |
- * (d_H[idx(ix,iy+1,iz)] - H)/dy2; | |
- */ | |
+ } else {*/ | |
+ | |
+ // Cell hydraulic conductivity | |
+ K = d_K[cellidx]; | |
+ | |
+ // Cell hydraulic transmissivities | |
+ Tx = K*d_dx; | |
+ Ty = K*d_dy; | |
+ Tz = K*d_dz; | |
+ // Cell hydraulic head | |
+ H = d_H[cellidx]; | |
+ | |
+ // Harmonic mean of transmissivity | |
+ // (in neg. and pos. direction along axis from cell) | |
+ // with periodic x and y boundaries | |
+ // without ghost nodes | |
+ /* | |
+ if (ix == 0) | |
+ gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x) | |
+ * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2; | |
+ else | |
gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x) | |
* (d_H[idx(ix-1,iy,iz)] - H)/dx2; | |
+ | |
+ if (ix == d_nx-1) | |
+ gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x) | |
+ * (d_H[idx(0,iy,iz)] - H)/dx2; | |
+ else | |
gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x) | |
* (d_H[idx(ix+1,iy,iz)] - H)/dx2; | |
+ if (iy == 0) | |
+ grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y) | |
+ * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2; | |
+ else | |
grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y) | |
* (d_H[idx(ix,iy-1,iz)] - H)/dy2; | |
+ | |
+ if (iy == d_ny-1) | |
+ grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y) | |
+ * (d_H[idx(ix,0,iz)] - H)/dy2; | |
+ else | |
grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y) | |
* (d_H[idx(ix,iy+1,iz)] - H)/dy2; | |
- | |
+ */ | |
+ | |
+ gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x) | |
+ * (d_H[idx(ix-1,iy,iz)] - H)/dxdx; | |
+ gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x) | |
+ * (d_H[idx(ix+1,iy,iz)] - H)/dxdx; | |
+ | |
+ grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y) | |
+ * (d_H[idx(ix,iy-1,iz)] - H)/dydy; | |
+ grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y) | |
+ * (d_H[idx(ix,iy+1,iz)] - H)/dydy; | |
+ | |
+ // Neumann (no-flow) boundary condition at +z and -z boundaries | |
+ // enforced by a gradient value of 0.0 | |
+ if (iz == 0) | |
+ gradz_n = 0.0; | |
+ else | |
gradz_n = hmean(Tz, d_T[idx(ix,iy,iz-1)].z) | |
- * (d_H[idx(ix,iy,iz-1)] - H)/dz2; | |
+ * (d_H[idx(ix,iy,iz-1)] - H)/dzdz; | |
+ if (iz == d_nz-1) | |
+ gradz_p = 0.0; | |
+ else | |
gradz_p = hmean(Tz, d_T[idx(ix,iy,iz+1)].z) | |
- * (d_H[idx(ix,iy,iz+1)] - H)/dz2; | |
- | |
- /*std::cerr << ix << ',' << iy << ',' << iz << '\t' | |
- << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t' | |
- << gradx_n << ',' << gradx_p << '\t' | |
- << grady_n << ',' << grady_p << '\t' | |
- << gradz_n << ',' << gradz_p << std::endl;*/ | |
- | |
- // Cell hydraulic storativity | |
- S = d_Ss[cellidx]*d_dx*d_dy*d_dz; | |
- | |
- // Laplacian operator | |
- deltaH = time.dt/S * | |
- ( gradx_n + gradx_p | |
- + grady_n + grady_p | |
- + gradz_n + gradz_p | |
- + d_W[cellidx] ); | |
- | |
- // Calculate new hydraulic pressure in cell | |
- d_H_new[cellidx] = H + deltaH; | |
- } | |
+ * (d_H[idx(ix,iy,iz+1)] - H)/dzdz; | |
+ | |
+ /*std::cerr << ix << ',' << iy << ',' << iz << '\t' | |
+ << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t' | |
+ << gradx_n << ',' << gradx_p << '\t' | |
+ << grady_n << ',' << grady_p << '\t' | |
+ << gradz_n << ',' << gradz_p << std::endl;*/ | |
+ | |
+ // Cell hydraulic storativity | |
+ S = d_Ss[cellidx]*dxdydz; | |
+ | |
+ // Laplacian operator | |
+ deltaH = time.dt/S * | |
+ ( gradx_n + gradx_p | |
+ + grady_n + grady_p | |
+ + gradz_n + gradz_p | |
+ + d_W[cellidx] ); | |
+ | |
+ // Calculate new hydraulic pressure in cell | |
+ d_H_new[cellidx] = H + deltaH; | |
+ //} | |
} | |
} | |
} | |
t@@ -607,6 +637,7 @@ Float DEM::cellPorosity( | |
return phi; | |
} | |
+// Calculate the porosity for each cell | |
void DEM::findPorosities() | |
{ | |
unsigned int ix, iy, iz, cellidx; | |
t@@ -619,6 +650,16 @@ void DEM::findPorosities() | |
} | |
} | |
+// Returns the mean particle radius | |
+Float DEM::meanRadius() | |
+{ | |
+ unsigned int i; | |
+ Float r_sum; | |
+ for (i=0; i<np; ++i) | |
+ r_sum += k.x[i].w; | |
+ return r_sum/((Float)np); | |
+} | |
+ | |
// Find particles with centres inside a spatial interval | |
// NOTE: This function is untested and unused | |
std::vector<unsigned int> DEM::particlesInCell( | |
t@@ -709,8 +750,8 @@ void DEM::checkDarcyTimestep() | |
if (value > 0.5) { | |
std::cerr << "Error! The explicit darcy solution will be unstable.\n" | |
<< "This happens due to a combination of the following:\n" | |
- << " - The transmissivity T (i.e. hydraulic conductivity, K) is to… | |
- << " (" << T_max << ")\n" | |
+ << " - The transmissivity T (i.e. hydraulic conductivity, K)" | |
+ << " is too large (" << T_max << ")\n" | |
<< " - The storativity S is too small" | |
<< " (" << S_min << ")\n" | |
<< " - The time step is too large" | |
t@@ -722,7 +763,7 @@ void DEM::checkDarcyTimestep() | |
} | |
} | |
-// Solve Darcy flow on a regular, cubic grid | |
+// Initialize darcy arrays, their values, and check the time step length | |
void DEM::initDarcy(const Float cellsizemultiplier) | |
{ | |
if (params.nu <= 0.0) { | |
t@@ -762,15 +803,15 @@ void DEM::initDarcy(const Float cellsizemultiplier) | |
// Print final heads and free memory | |
void DEM::endDarcy() | |
{ | |
- FILE* Kfile; | |
+ /*FILE* Kfile; | |
if ((Kfile = fopen("d_K.txt","w"))) { | |
printDarcyArray(Kfile, d_K); | |
fclose(Kfile); | |
} else { | |
fprintf(stderr, "Error, could not open d_K.txt\n"); | |
- } | |
- printDarcyArray(stdout, d_phi, "d_phi"); | |
- printDarcyArray(stdout, d_K, "d_K"); | |
+ }*/ | |
+ //printDarcyArray(stdout, d_phi, "d_phi"); | |
+ //printDarcyArray(stdout, d_K, "d_K"); | |
//printDarcyArray(stdout, d_H, "d_H"); | |
//printDarcyArray(stdout, d_V, "d_V"); | |
freeDarcyMem(); | |
diff --git a/src/darcy.cuh b/src/darcy.cuh | |
t@@ -0,0 +1,616 @@ | |
+// darcy.cu | |
+// CUDA implementation of Darcy flow | |
+ | |
+// Enable line below to perform Darcy flow computations on the GPU, disable for | |
+// CPU computation | |
+#define DARCY_GPU | |
+ | |
+#include <iostream> | |
+#include <cuda.h> | |
+//#include <cutil_math.h> | |
+#include <helper_math.h> | |
+ | |
+#include "vector_arithmetic.h" // for arbitrary prec. vectors | |
+#include "sphere.h" | |
+#include "datatypes.h" | |
+#include "utility.cuh" | |
+#include "utility.h" | |
+#include "constants.cuh" | |
+#include "debug.h" | |
+ | |
+// Initialize memory | |
+void DEM::initDarcyMemDev(void) | |
+{ | |
+ // number of cells | |
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes | |
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes | |
+ unsigned int memSizeF = sizeof(Float) * ncells; | |
+ | |
+ cudaMalloc((void**)&dev_d_H, memSizeF); // hydraulic pressure | |
+ cudaMalloc((void**)&dev_d_H_new, memSizeF); // new pressure matrix | |
+ cudaMalloc((void**)&dev_d_V, memSizeF*3); // cell hydraulic velocity | |
+ cudaMalloc((void**)&dev_d_dH, memSizeF*3); // hydraulic pressure gradient | |
+ cudaMalloc((void**)&dev_d_K, memSizeF); // hydraulic conductivity | |
+ cudaMalloc((void**)&dev_d_T, memSizeF*3); // hydraulic transmissivity | |
+ cudaMalloc((void**)&dev_d_Ss, memSizeF); // hydraulic storativi | |
+ cudaMalloc((void**)&dev_d_W, memSizeF); // hydraulic recharge | |
+ cudaMalloc((void**)&dev_d_phi, memSizeF); // cell porosity | |
+ | |
+ checkForCudaErrors("End of initDarcyMemDev"); | |
+} | |
+ | |
+// Free memory | |
+void DEM::freeDarcyMemDev() | |
+{ | |
+ cudaFree(dev_d_H); | |
+ cudaFree(dev_d_H_new); | |
+ cudaFree(dev_d_V); | |
+ cudaFree(dev_d_dH); | |
+ cudaFree(dev_d_K); | |
+ cudaFree(dev_d_T); | |
+ cudaFree(dev_d_Ss); | |
+ cudaFree(dev_d_W); | |
+ cudaFree(dev_d_phi); | |
+} | |
+ | |
+// Transfer to device | |
+void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg) | |
+{ | |
+ checkForCudaErrors("Before attempting cudaMemcpy in " | |
+ "transferDarcyToGlobalDeviceMemory"); | |
+ | |
+ if (verbose == 1 && statusmsg == 1) | |
+ std::cout << " Transfering darcy data to the device: "; | |
+ | |
+ // number of cells | |
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes | |
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes | |
+ unsigned int memSizeF = sizeof(Float) * ncells; | |
+ | |
+ // Kinematic particle values | |
+ cudaMemcpy(dev_d_H, d_H, memSizeF, cudaMemcpyHostToDevice); | |
+ checkForCudaErrors("transferDarcyToGlobalDeviceMemory after first cudaMemc… | |
+ cudaMemcpy(dev_d_H_new, d_H_new, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_V, d_V, memSizeF*3, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_dH, d_dH, memSizeF*3, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_K, d_K, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_T, d_T, memSizeF*3, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_Ss, d_Ss, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_W, d_W, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_d_phi, d_phi, memSizeF, cudaMemcpyHostToDevice); | |
+ | |
+ checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory"); | |
+ if (verbose == 1 && statusmsg == 1) | |
+ std::cout << "Done" << std::endl; | |
+} | |
+ | |
+// Transfer from device | |
+void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg) | |
+{ | |
+ if (verbose == 1 && statusmsg == 1) | |
+ std::cout << " Transfering darcy data from the device: "; | |
+ | |
+ // number of cells | |
+ //unsigned int ncells = d_nx*d_ny*d_nz; // without ghost nodes | |
+ unsigned int ncells = (d_nx+2)*(d_ny+2)*(d_nz+2); // with ghost nodes | |
+ unsigned int memSizeF = sizeof(Float) * ncells; | |
+ | |
+ // Kinematic particle values | |
+ cudaMemcpy(d_H, dev_d_H, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_H_new, dev_d_H_new, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_V, dev_d_V, memSizeF*3, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_dH, dev_d_dH, memSizeF*3, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_K, dev_d_K, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_T, dev_d_T, memSizeF*3, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_Ss, dev_d_Ss, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_W, dev_d_W, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(d_phi, dev_d_phi, memSizeF, cudaMemcpyDeviceToHost); | |
+ | |
+ checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory"); | |
+ if (verbose == 1 && statusmsg == 1) | |
+ std::cout << "Done" << std::endl; | |
+} | |
+ | |
+// Get linear index from 3D grid position | |
+__device__ unsigned int idx( | |
+ const int x, const int y, const int z) | |
+{ | |
+ // without ghost nodes | |
+ //return x + dev_grid.num[0]*y + dev_grid.num[0]*dev_grid.num[1]*z; | |
+ | |
+ // with ghost nodes | |
+ // the ghost nodes are placed at x,y,z = -1 and WIDTH | |
+ return (x+1) + (devC_grid.num[0]+2)*(y+1) + | |
+ (devC_grid.num[0]+2)*(devC_grid.num[1]+2)*(z+1); | |
+} | |
+ | |
+__device__ void copyDarcyValsDev( | |
+ unsigned int read, unsigned int write, | |
+ Float* dev_d_H, Float* dev_d_H_new, | |
+ Float3* dev_d_V, Float3* dev_d_dH, | |
+ Float* dev_d_K, Float3* dev_d_T, | |
+ Float* dev_d_Ss, Float* dev_d_W, | |
+ Float* dev_d_phi) | |
+{ | |
+ // Coalesced read | |
+ const Float H = dev_d_H[read]; | |
+ const Float H_new = dev_d_H_new[read]; | |
+ const Float3 V = dev_d_V[read]; | |
+ const Float3 dH = dev_d_dH[read]; | |
+ const Float K = dev_d_K[read]; | |
+ const Float3 T = dev_d_T[read]; | |
+ const Float Ss = dev_d_Ss[read]; | |
+ const Float W = dev_d_W[read]; | |
+ const Float phi = dev_d_phi[read]; | |
+ | |
+ // Coalesced write | |
+ __syncthreads(); | |
+ dev_d_H[write] = H; | |
+ dev_d_H_new[write] = H_new; | |
+ dev_d_V[write] = V; | |
+ dev_d_dH[write] = dH; | |
+ dev_d_K[write] = K; | |
+ dev_d_T[write] = T; | |
+ dev_d_Ss[write] = Ss; | |
+ dev_d_W[write] = W; | |
+ dev_d_phi[write] = phi; | |
+} | |
+ | |
+// Update ghost nodes from their parent cell values | |
+// The edge (diagonal) cells are not written since they are note read | |
+// Launch this kernel for all cells in the grid | |
+__global__ void setDarcyGhostNodesDev( | |
+ Float* dev_d_H, Float* dev_d_H_new, | |
+ Float3* dev_d_V, Float3* dev_d_dH, | |
+ Float* dev_d_K, Float3* dev_d_T, | |
+ Float* dev_d_Ss, Float* dev_d_W, | |
+ Float* dev_d_phi) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // Grid dimensions | |
+ const unsigned int nx = devC_grid.num[0]; | |
+ const unsigned int ny = devC_grid.num[1]; | |
+ const unsigned int nz = devC_grid.num[2]; | |
+ | |
+ // 1D thread index | |
+ const unsigned int cellidx = idx(x,y,z); | |
+ | |
+ // 1D position of ghost node | |
+ unsigned int writeidx; | |
+ | |
+ // check that we are not outside the fluid grid | |
+ if (x < nx && y < ny && z < nz) { | |
+ | |
+ if (x == 0) { | |
+ writeidx = idx(nx,y,z); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ if (x == nx-1) { | |
+ writeidx = idx(-1,y,z); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ | |
+ if (y == 0) { | |
+ writeidx = idx(x,ny,z); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ if (y == ny-1) { | |
+ writeidx = idx(x,-1,z); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ | |
+ if (z == 0) { | |
+ writeidx = idx(x,y,nz); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ if (z == nz-1) { | |
+ writeidx = idx(x,y,-1); | |
+ copyDarcyValsDev(cellidx, writeidx, | |
+ dev_d_H, dev_d_H_new, | |
+ dev_d_V, dev_d_dH, | |
+ dev_d_K, dev_d_T, | |
+ dev_d_Ss, dev_d_W, | |
+ dev_d_phi); | |
+ } | |
+ } | |
+} | |
+ | |
+// Find the porosity in each cell | |
+__global__ void findPorositiesDev( | |
+ unsigned int* dev_cellStart, | |
+ unsigned int* dev_cellEnd, | |
+ Float4* dev_x_sorted, | |
+ Float* dev_d_phi) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // Grid dimensions | |
+ const unsigned int nx = devC_grid.num[0]; | |
+ const unsigned int ny = devC_grid.num[1]; | |
+ const unsigned int nz = devC_grid.num[2]; | |
+ | |
+ // Cell dimensions | |
+ const Float dx = devC_grid.L[0]/nx; | |
+ const Float dy = devC_grid.L[1]/ny; | |
+ const Float dz = devC_grid.L[2]/nz; | |
+ const Float cell_volume = dx*dy*dz; | |
+ | |
+ Float void_volume = cell_volume; | |
+ Float4 xr; // particle pos. and radius | |
+ | |
+ // check that we are not outside the fluid grid | |
+ if (x < nx && y < ny && z < nz) { | |
+ | |
+ // Calculate linear cell ID | |
+ const unsigned int cellID = x + y*devC_grid.num[0] | |
+ + (devC_grid.num[0] * devC_grid.num[1])*z; | |
+ | |
+ // Lowest particle index in cell | |
+ const unsigned int startIdx = dev_cellStart[cellID]; | |
+ | |
+ // Highest particle index in cell | |
+ const unsigned int endIdx = dev_cellEnd[cellID]; | |
+ | |
+ // Iterate over cell particles | |
+ for (unsigned int i = startIdx; i<endIdx; ++i) { | |
+ | |
+ // Read particle position and radius | |
+ __syncthreads(); | |
+ xr = dev_x_sorted[i]; | |
+ | |
+ // Subtract particle volume from void volume | |
+ void_volume -= 4.0/3.0*M_PI*xr.w*xr.w*xr.w; | |
+ } | |
+ | |
+ // Make sure that the porosity is in the interval ]0.0;1.0[ | |
+ const Float phi = fmin(0.99, fmax(0.01, void_volume/cell_volume)); | |
+ | |
+ // Save porosity | |
+ __syncthreads(); | |
+ | |
+ dev_d_phi[idx(x,y,z)] = phi; | |
+ } | |
+} | |
+ | |
+ | |
+// Find cell transmissivities from hydraulic conductivities and cell dimensions | |
+// Make sure to compute the porosities (d_phi) beforehand | |
+// d_factor: Grain size factor for Kozeny-Carman relationship | |
+__global__ void findDarcyTransmissivitiesDev( | |
+ Float* dev_d_K, | |
+ Float3* dev_d_T, | |
+ Float* dev_d_phi, | |
+ Float d_factor) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // Grid dimensions | |
+ const unsigned int nx = devC_grid.num[0]; | |
+ const unsigned int ny = devC_grid.num[1]; | |
+ const unsigned int nz = devC_grid.num[2]; | |
+ | |
+ // Grid sizes | |
+ const Float d_dx = devC_grid.L[0]/nx; | |
+ const Float d_dy = devC_grid.L[1]/ny; | |
+ const Float d_dz = devC_grid.L[2]/nz; | |
+ | |
+ // Density of the fluid [kg/m^3] | |
+ const Float rho = 1000.0; | |
+ | |
+ // Check that we are not outside the fluid grid | |
+ if (x < nx && y < ny && z < nz) { | |
+ | |
+ // 1D thread index | |
+ const unsigned int cellidx = idx(x,y,z); | |
+ | |
+ __syncthreads(); | |
+ | |
+ // Cell porosity [-] | |
+ const Float phi = dev_d_phi[cellidx]; | |
+ | |
+ // Calculate permeability from the Kozeny-Carman relationship | |
+ // Nelson 1994 eq. 1c | |
+ // Boek 2012 eq. 16 | |
+ //k = a*phi*phi*phi/(1.0 - phi*phi); | |
+ // Schwartz and Zhang 2003 | |
+ Float k = phi*phi*phi/((1.0-phi)*(1.0-phi)) * d_factor; | |
+ | |
+ | |
+ __syncthreads(); | |
+ | |
+ // Save hydraulic conductivity [m/s] | |
+ const Float K = k*rho*-devC_params.g[2]/devC_params.nu; | |
+ //K = 0.5; | |
+ dev_d_K[cellidx] = K; | |
+ | |
+ // Hydraulic transmissivity [m2/s] | |
+ Float3 T = {K*d_dx, K*d_dy, K*d_dz}; | |
+ dev_d_T[cellidx] = T; | |
+ | |
+ } | |
+} | |
+ | |
+// Find the spatial gradient in e.g.pressures per cell | |
+// using first order central differences | |
+__global__ void findDarcyGradientsDev( | |
+ Float* dev_scalarfield, // in | |
+ Float3* dev_vectorfield) // out | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // Grid dimensions | |
+ const unsigned int nx = devC_grid.num[0]; | |
+ const unsigned int ny = devC_grid.num[1]; | |
+ const unsigned int nz = devC_grid.num[2]; | |
+ | |
+ // Grid sizes | |
+ const Float dx = devC_grid.L[0]/nx; | |
+ const Float dy = devC_grid.L[1]/ny; | |
+ const Float dz = devC_grid.L[2]/nz; | |
+ | |
+ // 1D thread index | |
+ const unsigned int cellidx = idx(x,y,z); | |
+ | |
+ // Check that we are not outside the fluid grid | |
+ Float3 gradient; | |
+ if (x < nx && y < ny && z < nz) { | |
+ | |
+ __syncthreads(); | |
+ | |
+ // x | |
+ gradient.x = | |
+ (dev_scalarfield[idx(x+1,y,z)] - dev_scalarfield[idx(x-1,y,z)]) | |
+ /(2.0*dx); | |
+ | |
+ // y | |
+ gradient.y = | |
+ (dev_scalarfield[idx(x,y+1,z)] - dev_scalarfield[idx(x,y-1,z)]) | |
+ /(2.0*dy); | |
+ | |
+ // z | |
+ gradient.z = | |
+ (dev_scalarfield[idx(x,y,z+1)] - dev_scalarfield[idx(x,y,z-1)]) | |
+ /(2.0*dz); | |
+ | |
+ __syncthreads(); | |
+ dev_vectorfield[cellidx] = gradient; | |
+ } | |
+} | |
+ | |
+// Arithmetic mean of two numbers | |
+__device__ Float ameanDev(Float a, Float b) { | |
+ return (a+b)*0.5; | |
+} | |
+ | |
+// Harmonic mean of two numbers | |
+__device__ Float hmeanDev(Float a, Float b) { | |
+ return (2.0*a*b)/(a+b); | |
+} | |
+ | |
+// Perform an explicit step. | |
+__global__ void explDarcyStepDev( | |
+ Float* dev_d_H, | |
+ Float* dev_d_H_new, | |
+ Float3* dev_d_T, | |
+ Float* dev_d_Ss, | |
+ Float* dev_d_W) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // Grid dimensions | |
+ const unsigned int nx = devC_grid.num[0]; | |
+ const unsigned int ny = devC_grid.num[1]; | |
+ const unsigned int nz = devC_grid.num[2]; | |
+ | |
+ // Grid sizes | |
+ const Float dx = devC_grid.L[0]/nx; | |
+ const Float dy = devC_grid.L[1]/ny; | |
+ const Float dz = devC_grid.L[2]/nz; | |
+ | |
+ // 1D thread index | |
+ const unsigned int cellidx = idx(x,y,z); | |
+ | |
+ // Check that we are not outside the fluid grid | |
+ if (x < nx && y < ny && z < nz) { | |
+ | |
+ // Explicit 3D finite difference scheme | |
+ // new = old + production*timestep + gradient*timestep | |
+ | |
+ // Enforce Dirichlet BC | |
+ if (x == 0 || y == 0 || z == 0 || | |
+ x == nx-1 || y == ny-1 || z == nz-1) { | |
+ __syncthreads(); | |
+ dev_d_H_new[cellidx] = dev_d_H[cellidx]; | |
+ } else { | |
+ | |
+ // Cell hydraulic conductivity | |
+ __syncthreads(); | |
+ //const Float K = dev_d_K[cellidx]; | |
+ | |
+ // Cell hydraulic transmissivities | |
+ const Float3 T = dev_d_T[cellidx]; | |
+ //const Float Tx = K*dx; | |
+ //const Float Ty = K*dy; | |
+ //const Float Tz = K*dz; | |
+ | |
+ // Harmonic mean of transmissivity | |
+ // (in neg. and pos. direction along axis from cell) | |
+ __syncthreads(); | |
+ const Float Tx_n = hmeanDev(T.x, dev_d_T[idx(x-1,y,z)].x); | |
+ const Float Tx_p = hmeanDev(T.x, dev_d_T[idx(x+1,y,z)].x); | |
+ const Float Ty_n = hmeanDev(T.y, dev_d_T[idx(x,y-1,z)].y); | |
+ const Float Ty_p = hmeanDev(T.y, dev_d_T[idx(x,y+1,z)].y); | |
+ const Float Tz_n = hmeanDev(T.z, dev_d_T[idx(x,y,z-1)].z); | |
+ const Float Tz_p = hmeanDev(T.z, dev_d_T[idx(x,y,z+1)].z); | |
+ | |
+ // Cell hydraulic storativity | |
+ const Float S = dev_d_Ss[cellidx]*dx*dy*dz; | |
+ | |
+ // Cell hydraulic head | |
+ const Float H = dev_d_H[cellidx]; | |
+ | |
+ // Laplacian operator | |
+ const Float deltaH = devC_dt/S * | |
+ ( Tx_n * (dev_d_H[idx(x-1,y,z)] - H)/(dx*dx) | |
+ + Tx_p * (dev_d_H[idx(x+1,y,z)] - H)/(dx*dx) | |
+ + Ty_n * (dev_d_H[idx(x,y-1,z)] - H)/(dy*dy) | |
+ + Ty_p * (dev_d_H[idx(x,y+1,z)] - H)/(dy*dy) | |
+ + Tz_n * (dev_d_H[idx(x,y,z-1)] - H)/(dy*dz) | |
+ + Tz_p * (dev_d_H[idx(x,y,z+1)] - H)/(dy*dz) | |
+ + dev_d_W[cellidx] ); | |
+ | |
+ // Calculate new hydraulic pressure in cell | |
+ __syncthreads(); | |
+ dev_d_H_new[cellidx] = H + deltaH; | |
+ } | |
+ } | |
+} | |
+ | |
+// Find cell velocity | |
+__global__ void findDarcyVelocitiesDev( | |
+ Float* dev_d_H, | |
+ Float3* dev_d_dH, | |
+ Float3* dev_d_V, | |
+ Float* dev_d_phi, | |
+ Float* dev_d_K) | |
+{ | |
+ // 3D thread index | |
+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
+ | |
+ // 1D thread index | |
+ const unsigned int cellidx = idx(x,y,z); | |
+ | |
+ // Check that we are not outside the fluid grid | |
+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
+ | |
+ // Flux [m/s]: q = -k/nu * dH | |
+ // Pore velocity [m/s]: v = q/n | |
+ | |
+ // Dynamic viscosity | |
+ Float nu = devC_params.nu; | |
+ | |
+ __syncthreads(); | |
+ const Float3 dH = dev_d_dH[cellidx]; | |
+ const Float K = dev_d_K[cellidx]; | |
+ const Float phi = dev_d_phi[cellidx]; | |
+ | |
+ // Calculate flux | |
+ // The sign might need to be reversed, depending on the | |
+ // grid orientation | |
+ Float3 q = MAKE_FLOAT3( | |
+ -K/nu * dH.x, | |
+ -K/nu * dH.y, | |
+ -K/nu * dH.z); | |
+ | |
+ // Calculate velocity | |
+ Float3 v = MAKE_FLOAT3( | |
+ v.x = q.x/phi, | |
+ v.y = q.y/phi, | |
+ v.z = q.z/phi); | |
+ | |
+ // Save velocity | |
+ __syncthreads(); | |
+ dev_d_V[cellidx] = v; | |
+ } | |
+} | |
+ | |
+// Solve Darcy flow on a regular, cubic grid | |
+/*void DEM::initDarcyDev(const Float cellsizemultiplier) | |
+{ | |
+ if (params.nu <= 0.0) { | |
+ std::cerr << "Error in initDarcy. The dymamic viscosity (params.nu), " | |
+ << "should be larger than 0.0, but is " << params.nu << std::endl; | |
+ exit(1); | |
+ } | |
+ | |
+ // Number of cells | |
+ d_nx = floor(grid.num[0]*cellsizemultiplier); | |
+ d_ny = floor(grid.num[1]*cellsizemultiplier); | |
+ d_nz = floor(grid.num[2]*cellsizemultiplier); | |
+ | |
+ // Cell size | |
+ d_dx = grid.L[0]/d_nx; | |
+ d_dy = grid.L[1]/d_ny; | |
+ d_dz = grid.L[2]/d_nz; | |
+ | |
+ if (verbose == 1) { | |
+ std::cout << " - Fluid grid dimensions: " | |
+ << d_nx << "*" | |
+ << d_ny << "*" | |
+ << d_nz << std::endl; | |
+ std::cout << " - Fluid grid cell size: " | |
+ << d_dx << "*" | |
+ << d_dy << "*" | |
+ << d_dz << std::endl; | |
+ } | |
+ | |
+ initDarcyMemDev(); | |
+ initDarcyVals(); | |
+ findDarcyTransmissivities(); | |
+ | |
+ checkDarcyTimestep(); | |
+ | |
+ transferDarcyToGlobalDeviceMemory(1); | |
+}*/ | |
+ | |
+// Print final heads and free memory | |
+void DEM::endDarcyDev() | |
+{ | |
+ /*FILE* Kfile; | |
+ if ((Kfile = fopen("d_K.txt","w"))) { | |
+ printDarcyArray(Kfile, d_K); | |
+ fclose(Kfile); | |
+ } else { | |
+ fprintf(stderr, "Error, could not open d_K.txt\n"); | |
+ }*/ | |
+ //printDarcyArray(stdout, d_phi, "d_phi"); | |
+ //printDarcyArray(stdout, d_K, "d_K"); | |
+ //printDarcyArray(stdout, d_H, "d_H"); | |
+ //printDarcyArray(stdout, d_V, "d_V"); | |
+ freeDarcyMemDev(); | |
+} | |
+ | |
+// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -27,7 +27,7 @@ | |
#include "integration.cuh" | |
#include "raytracer.cuh" | |
#include "latticeboltzmann.cuh" | |
-//#include "darcy.cuh" | |
+#include "darcy.cuh" | |
// Wrapper function for initializing the CUDA components. | |
t@@ -450,15 +450,20 @@ __host__ void DEM::transferToGlobalDeviceMemory(int stat… | |
} | |
// Fluid arrays | |
- if (params.nu > 0.0 && darcy == 0) { | |
+ if (params.nu > 0.0) { | |
+ if (darcy == 0) { | |
#ifdef LBM_GPU | |
- cudaMemcpy( dev_f, f, | |
- sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19, | |
- cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_v_rho, v_rho, | |
- sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2], | |
- cudaMemcpyHostToDevice); | |
+ cudaMemcpy( dev_f, f, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19, | |
+ cudaMemcpyHostToDevice); | |
+ cudaMemcpy( dev_v_rho, v_rho, | |
+ sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2], | |
+ cudaMemcpyHostToDevice); | |
#endif | |
+ } //else { | |
+ //transferDarcyToGlobalDeviceMemory(1); | |
+ //} | |
+ // Darcy arrays aren't ready yet | |
} | |
checkForCudaErrors("End of transferToGlobalDeviceMemory"); | |
t@@ -530,16 +535,20 @@ __host__ void DEM::transferFromGlobalDeviceMemory() | |
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost); | |
// Fluid arrays | |
+ if (params.nu > 0.0) { | |
+ if (darcy == 0) { | |
#ifdef LBM_GPU | |
- if (params.nu > 0.0 && darcy == 0) { | |
cudaMemcpy( f, dev_f, | |
sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19, | |
cudaMemcpyDeviceToHost); | |
cudaMemcpy(v_rho, dev_v_rho, | |
sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2], | |
cudaMemcpyDeviceToHost); | |
- } | |
#endif | |
+ } else { | |
+ transferDarcyFromGlobalDeviceMemory(0); | |
+ } | |
+ } | |
checkForCudaErrors("End of transferFromGlobalDeviceMemory"); | |
} | |
t@@ -580,13 +589,13 @@ __host__ void DEM::startTime() | |
unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock); | |
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid | |
- // Use 3D block and grid layout for Lattice-Boltzmann fluid calculations | |
+ // Use 3D block and grid layout for fluid calculations | |
dim3 dimBlockFluid(8, 8, 8); // 512 threads per block | |
dim3 dimGridFluid( | |
iDivUp(grid.num[0], dimBlockFluid.x), | |
iDivUp(grid.num[1], dimBlockFluid.y), | |
iDivUp(grid.num[2], dimBlockFluid.z)); | |
- if (dimGridFluid.z > 64) { | |
+ if (dimGridFluid.z > 64 && params.nu > 0.0) { | |
cerr << "Error: dimGridFluid.z > 64" << endl; | |
exit(1); | |
} | |
t@@ -634,18 +643,32 @@ __host__ void DEM::startTime() | |
fclose(fp); | |
// Initialize fluid distribution array | |
- if (params.nu > 0.0 && darcy == 0) { | |
+ Float d_factor; | |
+ if (params.nu > 0.0) { | |
+ if (darcy == 0) { | |
#ifdef LBM_GPU | |
- initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f); | |
- cudaThreadSynchronize(); | |
-#else | |
+ initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f); | |
+ cudaThreadSynchronize(); | |
+#endif | |
+ } else if (darcy == 1) { | |
#ifdef DARCY_GPU | |
- initFluid(v_rho, f, grid.num[0], grid.num[1], grid.num[2]); | |
+ const Float cellsizemultiplier = 1.0; | |
+ initDarcy(cellsizemultiplier); | |
+ initDarcyMemDev(); | |
+ transferDarcyToGlobalDeviceMemory(1); | |
+ | |
+ // Representative grain radius | |
+ const Float r_bar2 = meanRadius()*2.0; | |
+ // Grain size factor for Kozeny-Carman relationship | |
+ d_factor = r_bar2*r_bar2/180.0; | |
#else | |
- const Float cellsizemultiplier = 1.0; | |
- initDarcy(cellsizemultiplier); | |
-#endif | |
+ const Float cellsizemultiplier = 1.0; | |
+ initDarcy(cellsizemultiplier); | |
#endif | |
+ } else { | |
+ std::cerr << "Error, darcy value (" << darcy | |
+ << ") not understood." << std::endl; | |
+ } | |
} | |
if (verbose == 1) { | |
t@@ -677,6 +700,12 @@ __host__ void DEM::startTime() | |
double t_summation = 0.0; | |
double t_integrateWalls = 0.0; | |
+ double t_findPorositiesDev = 0.0; | |
+ double t_findDarcyTransmissivitiesDev = 0.0; | |
+ double t_explDarcyStepDev = 0.0; | |
+ double t_findDarcyGradientsDev = 0.0; | |
+ double t_findDarcyVelocitiesDev = 0.0; | |
+ | |
if (PROFILING == 1) { | |
cudaEventCreate(&kernel_tic); | |
cudaEventCreate(&kernel_toc); | |
t@@ -866,17 +895,90 @@ __host__ void DEM::startTime() | |
} | |
// Solve darcy flow through grid | |
- if (darcy == 1) { | |
+ if (params.nu > 0.0 && darcy == 1) { | |
#ifdef DARCY_GPU | |
- std::cout << "GPU darcy" << std::endl; | |
+ checkForCudaErrors("Before findPorositiesDev", iter); | |
+ // Find cell porosities | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findPorositiesDev<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_cellStart, | |
+ dev_cellEnd, | |
+ dev_x_sorted, | |
+ dev_d_phi); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findPorositiesDev); | |
+ checkForCudaErrors("Post findPorositiesDev", iter); | |
+ | |
+ // Find resulting cell transmissivities | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findDarcyTransmissivitiesDev<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_d_K, | |
+ dev_d_T, | |
+ dev_d_phi, | |
+ d_factor); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findDarcyTransmissivitiesDev); | |
+ checkForCudaErrors("Post findDarcyTransmissivitiesDev", iter); | |
+ | |
+ // Perform explicit Darcy time step | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ explDarcyStepDev<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_d_H, | |
+ dev_d_H_new, | |
+ dev_d_T, | |
+ dev_d_Ss, | |
+ dev_d_W); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_explDarcyStepDev); | |
+ checkForCudaErrors("Post explDarcyStepDev", iter); | |
+ | |
+ // Flop flop | |
+ swapFloatArrays(dev_d_H, dev_d_H_new); | |
+ | |
+ // Find the pressure gradients | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findDarcyGradientsDev<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_d_H, dev_d_dH); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findDarcyGradientsDev); | |
+ checkForCudaErrors("Post findDarcyGradientsDev", iter); | |
+ | |
+ // Find the pressure gradients | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ findDarcyVelocitiesDev<<<dimGridFluid, dimBlockFluid>>>( | |
+ dev_d_H, | |
+ dev_d_dH, | |
+ dev_d_V, | |
+ dev_d_phi, | |
+ dev_d_K); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_findDarcyVelocitiesDev); | |
+ checkForCudaErrors("Post findDarcyVelocitiesDev", iter); | |
+ | |
+#else | |
// Copy device data to host memory | |
transferFromGlobalDeviceMemory(); | |
// Pause the CPU thread until all CUDA calls previously issued are… | |
cudaThreadSynchronize(); | |
- // Perform explicit Darcy time step | |
+ // Perform a Darcy time step on the CPU | |
explDarcyStep(); | |
// Transfer data from host to device memory | |
t@@ -884,9 +986,6 @@ __host__ void DEM::startTime() | |
// Pause the CPU thread until all CUDA calls previously issued are… | |
cudaThreadSynchronize(); | |
-#else | |
- // Perform a Darcy time step on the CPU | |
- explDarcyStep(); | |
#endif | |
} | |
t@@ -1086,6 +1185,22 @@ __host__ void DEM::startTime() | |
<< "\t(" << 100.0*t_summation/t_sum << " %)\n" | |
<< " - integrateWalls:\t" << t_integrateWalls/1000.0 << " s" | |
<< "\t(" << 100.0*t_integrateWalls/t_sum << " %)\n"; | |
+ if (darcy == 1) { | |
+ cout | |
+ << " - findPorositiesDev:\t" << t_findPorositiesDev/1000.0 << " s" | |
+ << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n" | |
+ << " - findDarcyTransmissivitiesDev:\t" << | |
+ t_findDarcyTransmissivitiesDev/1000.0 << " s" | |
+ << "\t(" << 100.0*t_findDarcyTransmissivitiesDev/t_sum << " %)\n" | |
+ << " - explDarcyStepDev:\t" << t_explDarcyStepDev/1000.0 << " s" | |
+ << "\t(" << 100.0*t_explDarcyStepDev/t_sum << " %)\n" | |
+ << " - findDarcyGradientsDev:\t" << t_findDarcyGradientsDev/1000.0 | |
+ << " s" | |
+ << "\t(" << 100.0*t_findDarcyGradientsDev/t_sum << " %)\n" | |
+ << " - findDarcyVelocitiesDev:\t" | |
+ << t_findDarcyVelocitiesDev/1000.0 << " s" | |
+ << "\t(" << 100.0*t_findDarcyVelocitiesDev/t_sum << " %)\n"; | |
+ } | |
} | |
t@@ -1098,6 +1213,11 @@ __host__ void DEM::startTime() | |
delete[] k.delta_t; | |
#ifndef DARCY_GPU | |
+ if (darcy == 1) { | |
+ endDarcyDev(); | |
+ endDarcy(); | |
+ } | |
+#else | |
if (darcy == 1) | |
endDarcy(); | |
#endif | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -230,6 +230,9 @@ class DEM { | |
// Find darcy flow velocities from specific flux (q) | |
void findDarcyVelocities(); | |
+ // Returns the mean particle radius | |
+ Float meanRadius(); | |
+ | |
// Get linear (1D) index from 3D coordinate | |
unsigned int idx( | |
const unsigned int x, | |
t@@ -238,9 +241,11 @@ class DEM { | |
// Initialize Darcy values and arrays | |
void initDarcy(const Float cellsizemultiplier = 1.0); | |
+ void initDarcyDev(const Float cellsizemultiplier = 1.0); | |
// Clean up Darcy arrays | |
void endDarcy(); | |
+ void endDarcyDev(); | |
// Check whether the explicit integration is going to meet the | |
// stability criteria |