GopherProxy

	tported Darcy routines to CUDA, compiles, needs testing - sphere - GPU-based 3D…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 1658688f8ba0d701c10244801f13fd46821c799c
	parent 27f9122518041ed1ed936e25643d2f9f61cc8100
	Author: Anders Damsgaard <[email protected]>
	Date: Wed, 9 Oct 2013 11:11:55 +0200

	ported Darcy routines to CUDA, compiles, needs testing

	Diffstat:
	M src/darcy.cpp \| 221 ++++++++++++++++++-----------…
	A src/darcy.cuh \| 616 +++++++++++++++++++++++++++++…
	M src/device.cu \| 172 ++++++++++++++++++++++++++---…
	M src/sphere.h \| 5 +++++

	4 files changed, 898 insertions(+), 116 deletions(-)
	---
	diff --git a/src/darcy.cpp b/src/darcy.cpp
	t@@ -10,6 +10,9 @@
	#include "sphere.h"
	#include "utility.h"

	+// Enable line below to make x and y boundaries periodic
	+#define PERIODIC_XY
	+
	// Initialize memory
	void DEM::initDarcyMem()
	{
	t@@ -50,7 +53,7 @@ unsigned int DEM::idx(
	//return x + d_nxy + d_nxd_ny*z;

	// with ghost nodes
	- // the ghost nodes are placed at -1 and WIDTH
	+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
	return (x+1) + (d_nx+2)(y+1) + (d_nx+2)(d_ny+2)*(z+1);
	}

	t@@ -75,17 +78,26 @@ void DEM::initDarcyVals()
	// read from input binary

	// Hydraulic permeability [m^2]
	- d_K[cellidx] = krho-params.g[2]/params.nu;
	+ //d_K[cellidx] = krho-params.g[2]/params.nu;
	+ d_K[cellidx] = 0.5;

	// Hydraulic storativity [-]
	d_Ss[cellidx] = 8.0e-3;
	//d_Ss[cellidx] = 1.0;

	- // Hydraulic recharge [Pa/s]
	+ // Hydraulic recharge [s^-1]
	d_W[cellidx] = 0.0;
	}
	}
	}
	+
	+ // Extract water from all cells in center
	+ ix = d_nx/2-1; iy = d_ny/2-1;
	+ Float cellvolume = d_dxd_dyd_dz;
	+ for (iz=0; iz<d_nz; ++iz) {
	+ //d_W[idx(ix,iy,iz)] = -0.1*cellvolume;
	+ d_W[idx(ix,iy,iz)] = -1.0;
	+ }
	}


	t@@ -169,7 +181,12 @@ void DEM::findDarcyTransmissivities()

	// Kozeny-Carman parameter
	//Float a = 1.0e-8;
	- Float a = 1.0;
	+ //Float a = 1.0;
	+
	+ // Representative grain radius
	+ Float r_bar2 = meanRadius()*2.0;
	+ // Grain size factor for Kozeny-Carman relationship
	+ Float d_factor = r_bar2*r_bar2/180.0;

	unsigned int ix, iy, iz, cellidx;
	Float K, k;
	t@@ -185,7 +202,9 @@ void DEM::findDarcyTransmissivities()
	// Calculate permeability from the Kozeny-Carman relationship
	// Nelson 1994 eq. 1c
	// Boek 2012 eq. 16
	- k = aphiphiphi/(1.0 - phiphi);
	+ //k = aphiphiphi/(1.0 - phiphi);
	+ // Schwartz and Zhang 2003
	+ k = phiphiphi/((1.0-phi)(1.0-phi)) d_factor;

	// Save hydraulic conductivity [m/s]
	//K = d_K[cellidx];
	t@@ -261,7 +280,8 @@ void DEM::findDarcyGradients()
	// With ghost-nodes
	for (ix=0; ix<d_nx; ++ix) {
	for (iy=0; iy<d_ny; ++iy) {
	- for (iz=1; iz<d_nz-1; ++iz) {
	+ //for (iz=1; iz<d_nz-1; ++iz) {
	+ for (iz=0; iz<d_nz; ++iz) {

	cellidx = idx(ix,iy,iz);

	t@@ -335,9 +355,10 @@ void DEM::explDarcyStep()
	checkDarcyTimestep();

	// Cell dims squared
	- const Float dx2 = d_dx*d_dx;
	- const Float dy2 = d_dy*d_dy;
	- const Float dz2 = d_dz*d_dz;
	+ const Float dxdx = d_dx*d_dx;
	+ const Float dydy = d_dy*d_dy;
	+ const Float dzdz = d_dz*d_dz;
	+ const Float dxdydz = d_dxd_dyd_dz;

	//setDarcyBCNeumannZero();

	t@@ -358,95 +379,104 @@ void DEM::explDarcyStep()
	// Cell linear index
	cellidx = idx(ix,iy,iz);

	- // If x,y,z boundaries are fixed values:
	- // Enforce Dirichlet BC
	- if (ix == 0 \|\| iy == 0 \|\| iz == 0 \|\|
	+ // If x,y,z boundaries are fixed values: Enforce Dirichlet BC
	+ /*if (ix == 0 \|\| iy == 0 \|\| iz == 0 \|\|
	ix == d_nx-1 \|\| iy == d_ny-1 \|\| iz == d_nz-1) {
	+ d_H_new[cellidx] = d_H[cellidx];*/
	+
	+ // If z boundaries are fixed val, x and y are periodic:
	+ /*if (iz == 0 \|\| iz == d_nz-1) {
	d_H_new[cellidx] = d_H[cellidx];
	- // If z boundaries are periodic:
	- //if (iz == 0 \|\| iz == d_nz-1) {
	- //d_H_new[cellidx] = d_H[cellidx];
	- } else {

	- // Cell hydraulic conductivity
	- K = d_K[cellidx];
	-
	- // Cell hydraulic transmissivities
	- Tx = K*d_dx;
	- Ty = K*d_dy;
	- Tz = K*d_dz;
	-
	- // Cell hydraulic head
	- H = d_H[cellidx];
	-
	- // Harmonic mean of transmissivity
	- // (in neg. and pos. direction along axis from cell)
	- // with periodic x and y boundaries
	- // without ghost nodes
	- /*
	- if (ix == 0)
	- gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x)
	- * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2;
	- else
	- gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
	- * (d_H[idx(ix-1,iy,iz)] - H)/dx2;
	-
	- if (ix == d_nx-1)
	- gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x)
	- * (d_H[idx(0,iy,iz)] - H)/dx2;
	- else
	- gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
	- * (d_H[idx(ix+1,iy,iz)] - H)/dx2;
	-
	- if (iy == 0)
	- grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y)
	- * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2;
	- else
	- grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
	- * (d_H[idx(ix,iy-1,iz)] - H)/dy2;
	-
	- if (iy == d_ny-1)
	- grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y)
	- * (d_H[idx(ix,0,iz)] - H)/dy2;
	- else
	- grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
	- * (d_H[idx(ix,iy+1,iz)] - H)/dy2;
	- */
	+ } else {*/
	+
	+ // Cell hydraulic conductivity
	+ K = d_K[cellidx];
	+
	+ // Cell hydraulic transmissivities
	+ Tx = K*d_dx;
	+ Ty = K*d_dy;
	+ Tz = K*d_dz;

	+ // Cell hydraulic head
	+ H = d_H[cellidx];
	+
	+ // Harmonic mean of transmissivity
	+ // (in neg. and pos. direction along axis from cell)
	+ // with periodic x and y boundaries
	+ // without ghost nodes
	+ /*
	+ if (ix == 0)
	+ gradx_n = hmean(Tx, d_T[idx(d_nx-1,iy,iz)].x)
	+ * (d_H[idx(d_nx-1,iy,iz)] - H)/dx2;
	+ else
	gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
	* (d_H[idx(ix-1,iy,iz)] - H)/dx2;
	+
	+ if (ix == d_nx-1)
	+ gradx_p = hmean(Tx, d_T[idx(0,iy,iz)].x)
	+ * (d_H[idx(0,iy,iz)] - H)/dx2;
	+ else
	gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
	* (d_H[idx(ix+1,iy,iz)] - H)/dx2;

	+ if (iy == 0)
	+ grady_n = hmean(Ty, d_T[idx(ix,d_ny-1,iz)].y)
	+ * (d_H[idx(ix,d_ny-1,iz)] - H)/dy2;
	+ else
	grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
	* (d_H[idx(ix,iy-1,iz)] - H)/dy2;
	+
	+ if (iy == d_ny-1)
	+ grady_p = hmean(Ty, d_T[idx(ix,0,iz)].y)
	+ * (d_H[idx(ix,0,iz)] - H)/dy2;
	+ else
	grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
	* (d_H[idx(ix,iy+1,iz)] - H)/dy2;
	-
	+ */
	+
	+ gradx_n = hmean(Tx, d_T[idx(ix-1,iy,iz)].x)
	+ * (d_H[idx(ix-1,iy,iz)] - H)/dxdx;
	+ gradx_p = hmean(Tx, d_T[idx(ix+1,iy,iz)].x)
	+ * (d_H[idx(ix+1,iy,iz)] - H)/dxdx;
	+
	+ grady_n = hmean(Ty, d_T[idx(ix,iy-1,iz)].y)
	+ * (d_H[idx(ix,iy-1,iz)] - H)/dydy;
	+ grady_p = hmean(Ty, d_T[idx(ix,iy+1,iz)].y)
	+ * (d_H[idx(ix,iy+1,iz)] - H)/dydy;
	+
	+ // Neumann (no-flow) boundary condition at +z and -z boundaries
	+ // enforced by a gradient value of 0.0
	+ if (iz == 0)
	+ gradz_n = 0.0;
	+ else
	gradz_n = hmean(Tz, d_T[idx(ix,iy,iz-1)].z)
	- * (d_H[idx(ix,iy,iz-1)] - H)/dz2;
	+ * (d_H[idx(ix,iy,iz-1)] - H)/dzdz;
	+ if (iz == d_nz-1)
	+ gradz_p = 0.0;
	+ else
	gradz_p = hmean(Tz, d_T[idx(ix,iy,iz+1)].z)
	- * (d_H[idx(ix,iy,iz+1)] - H)/dz2;
	-
	- /*std::cerr << ix << ',' << iy << ',' << iz << '\t'
	- << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t'
	- << gradx_n << ',' << gradx_p << '\t'
	- << grady_n << ',' << grady_p << '\t'
	- << gradz_n << ',' << gradz_p << std::endl;*/
	-
	- // Cell hydraulic storativity
	- S = d_Ss[cellidx]d_dxd_dy*d_dz;
	-
	- // Laplacian operator
	- deltaH = time.dt/S *
	- ( gradx_n + gradx_p
	- + grady_n + grady_p
	- + gradz_n + gradz_p
	- + d_W[cellidx] );
	-
	- // Calculate new hydraulic pressure in cell
	- d_H_new[cellidx] = H + deltaH;
	- }
	+ * (d_H[idx(ix,iy,iz+1)] - H)/dzdz;
	+
	+ /*std::cerr << ix << ',' << iy << ',' << iz << '\t'
	+ << H << '\t' << Tx << ',' << Ty << ',' << Tz << '\t'
	+ << gradx_n << ',' << gradx_p << '\t'
	+ << grady_n << ',' << grady_p << '\t'
	+ << gradz_n << ',' << gradz_p << std::endl;*/
	+
	+ // Cell hydraulic storativity
	+ S = d_Ss[cellidx]*dxdydz;
	+
	+ // Laplacian operator
	+ deltaH = time.dt/S *
	+ ( gradx_n + gradx_p
	+ + grady_n + grady_p
	+ + gradz_n + gradz_p
	+ + d_W[cellidx] );
	+
	+ // Calculate new hydraulic pressure in cell
	+ d_H_new[cellidx] = H + deltaH;
	+ //}
	}
	}
	}
	t@@ -607,6 +637,7 @@ Float DEM::cellPorosity(
	return phi;
	}

	+// Calculate the porosity for each cell
	void DEM::findPorosities()
	{
	unsigned int ix, iy, iz, cellidx;
	t@@ -619,6 +650,16 @@ void DEM::findPorosities()
	}
	}

	+// Returns the mean particle radius
	+Float DEM::meanRadius()
	+{
	+ unsigned int i;
	+ Float r_sum;
	+ for (i=0; i<np; ++i)
	+ r_sum += k.x[i].w;
	+ return r_sum/((Float)np);
	+}
	+
	// Find particles with centres inside a spatial interval
	// NOTE: This function is untested and unused
	std::vector<unsigned int> DEM::particlesInCell(
	t@@ -709,8 +750,8 @@ void DEM::checkDarcyTimestep()
	if (value > 0.5) {
	std::cerr << "Error! The explicit darcy solution will be unstable.\n"
	<< "This happens due to a combination of the following:\n"
	- << " - The transmissivity T (i.e. hydraulic conductivity, K) is to…
	- << " (" << T_max << ")\n"
	+ << " - The transmissivity T (i.e. hydraulic conductivity, K)"
	+ << " is too large (" << T_max << ")\n"
	<< " - The storativity S is too small"
	<< " (" << S_min << ")\n"
	<< " - The time step is too large"
	t@@ -722,7 +763,7 @@ void DEM::checkDarcyTimestep()
	}
	}

	-// Solve Darcy flow on a regular, cubic grid
	+// Initialize darcy arrays, their values, and check the time step length
	void DEM::initDarcy(const Float cellsizemultiplier)
	{
	if (params.nu <= 0.0) {
	t@@ -762,15 +803,15 @@ void DEM::initDarcy(const Float cellsizemultiplier)
	// Print final heads and free memory
	void DEM::endDarcy()
	{
	- FILE* Kfile;
	+ /FILE Kfile;
	if ((Kfile = fopen("d_K.txt","w"))) {
	printDarcyArray(Kfile, d_K);
	fclose(Kfile);
	} else {
	fprintf(stderr, "Error, could not open d_K.txt\n");
	- }
	- printDarcyArray(stdout, d_phi, "d_phi");
	- printDarcyArray(stdout, d_K, "d_K");
	+ }*/
	+ //printDarcyArray(stdout, d_phi, "d_phi");
	+ //printDarcyArray(stdout, d_K, "d_K");
	//printDarcyArray(stdout, d_H, "d_H");
	//printDarcyArray(stdout, d_V, "d_V");
	freeDarcyMem();
	diff --git a/src/darcy.cuh b/src/darcy.cuh
	t@@ -0,0 +1,616 @@
	+// darcy.cu
	+// CUDA implementation of Darcy flow
	+
	+// Enable line below to perform Darcy flow computations on the GPU, disable for
	+// CPU computation
	+#define DARCY_GPU
	+
	+#include <iostream>
	+#include <cuda.h>
	+//#include <cutil_math.h>
	+#include <helper_math.h>
	+
	+#include "vector_arithmetic.h" // for arbitrary prec. vectors
	+#include "sphere.h"
	+#include "datatypes.h"
	+#include "utility.cuh"
	+#include "utility.h"
	+#include "constants.cuh"
	+#include "debug.h"
	+
	+// Initialize memory
	+void DEM::initDarcyMemDev(void)
	+{
	+ // number of cells
	+ //unsigned int ncells = d_nxd_nyd_nz; // without ghost nodes
	+ unsigned int ncells = (d_nx+2)(d_ny+2)(d_nz+2); // with ghost nodes
	+ unsigned int memSizeF = sizeof(Float) * ncells;
	+
	+ cudaMalloc((void**)&dev_d_H, memSizeF); // hydraulic pressure
	+ cudaMalloc((void**)&dev_d_H_new, memSizeF); // new pressure matrix
	+ cudaMalloc((void*)&dev_d_V, memSizeF3); // cell hydraulic velocity
	+ cudaMalloc((void*)&dev_d_dH, memSizeF3); // hydraulic pressure gradient
	+ cudaMalloc((void**)&dev_d_K, memSizeF); // hydraulic conductivity
	+ cudaMalloc((void*)&dev_d_T, memSizeF3); // hydraulic transmissivity
	+ cudaMalloc((void**)&dev_d_Ss, memSizeF); // hydraulic storativi
	+ cudaMalloc((void**)&dev_d_W, memSizeF); // hydraulic recharge
	+ cudaMalloc((void**)&dev_d_phi, memSizeF); // cell porosity
	+
	+ checkForCudaErrors("End of initDarcyMemDev");
	+}
	+
	+// Free memory
	+void DEM::freeDarcyMemDev()
	+{
	+ cudaFree(dev_d_H);
	+ cudaFree(dev_d_H_new);
	+ cudaFree(dev_d_V);
	+ cudaFree(dev_d_dH);
	+ cudaFree(dev_d_K);
	+ cudaFree(dev_d_T);
	+ cudaFree(dev_d_Ss);
	+ cudaFree(dev_d_W);
	+ cudaFree(dev_d_phi);
	+}
	+
	+// Transfer to device
	+void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
	+{
	+ checkForCudaErrors("Before attempting cudaMemcpy in "
	+ "transferDarcyToGlobalDeviceMemory");
	+
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << " Transfering darcy data to the device: ";
	+
	+ // number of cells
	+ //unsigned int ncells = d_nxd_nyd_nz; // without ghost nodes
	+ unsigned int ncells = (d_nx+2)(d_ny+2)(d_nz+2); // with ghost nodes
	+ unsigned int memSizeF = sizeof(Float) * ncells;
	+
	+ // Kinematic particle values
	+ cudaMemcpy(dev_d_H, d_H, memSizeF, cudaMemcpyHostToDevice);
	+ checkForCudaErrors("transferDarcyToGlobalDeviceMemory after first cudaMemc…
	+ cudaMemcpy(dev_d_H_new, d_H_new, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_V, d_V, memSizeF*3, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_dH, d_dH, memSizeF*3, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_K, d_K, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_T, d_T, memSizeF*3, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_Ss, d_Ss, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_W, d_W, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_d_phi, d_phi, memSizeF, cudaMemcpyHostToDevice);
	+
	+ checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << "Done" << std::endl;
	+}
	+
	+// Transfer from device
	+void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
	+{
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << " Transfering darcy data from the device: ";
	+
	+ // number of cells
	+ //unsigned int ncells = d_nxd_nyd_nz; // without ghost nodes
	+ unsigned int ncells = (d_nx+2)(d_ny+2)(d_nz+2); // with ghost nodes
	+ unsigned int memSizeF = sizeof(Float) * ncells;
	+
	+ // Kinematic particle values
	+ cudaMemcpy(d_H, dev_d_H, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_H_new, dev_d_H_new, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_V, dev_d_V, memSizeF*3, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_dH, dev_d_dH, memSizeF*3, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_K, dev_d_K, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_T, dev_d_T, memSizeF*3, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_Ss, dev_d_Ss, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_W, dev_d_W, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(d_phi, dev_d_phi, memSizeF, cudaMemcpyDeviceToHost);
	+
	+ checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory");
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << "Done" << std::endl;
	+}
	+
	+// Get linear index from 3D grid position
	+__device__ unsigned int idx(
	+ const int x, const int y, const int z)
	+{
	+ // without ghost nodes
	+ //return x + dev_grid.num[0]y + dev_grid.num[0]dev_grid.num[1]*z;
	+
	+ // with ghost nodes
	+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
	+ return (x+1) + (devC_grid.num[0]+2)*(y+1) +
	+ (devC_grid.num[0]+2)(devC_grid.num[1]+2)(z+1);
	+}
	+
	+__device__ void copyDarcyValsDev(
	+ unsigned int read, unsigned int write,
	+ Float* dev_d_H, Float* dev_d_H_new,
	+ Float3* dev_d_V, Float3* dev_d_dH,
	+ Float* dev_d_K, Float3* dev_d_T,
	+ Float* dev_d_Ss, Float* dev_d_W,
	+ Float* dev_d_phi)
	+{
	+ // Coalesced read
	+ const Float H = dev_d_H[read];
	+ const Float H_new = dev_d_H_new[read];
	+ const Float3 V = dev_d_V[read];
	+ const Float3 dH = dev_d_dH[read];
	+ const Float K = dev_d_K[read];
	+ const Float3 T = dev_d_T[read];
	+ const Float Ss = dev_d_Ss[read];
	+ const Float W = dev_d_W[read];
	+ const Float phi = dev_d_phi[read];
	+
	+ // Coalesced write
	+ __syncthreads();
	+ dev_d_H[write] = H;
	+ dev_d_H_new[write] = H_new;
	+ dev_d_V[write] = V;
	+ dev_d_dH[write] = dH;
	+ dev_d_K[write] = K;
	+ dev_d_T[write] = T;
	+ dev_d_Ss[write] = Ss;
	+ dev_d_W[write] = W;
	+ dev_d_phi[write] = phi;
	+}
	+
	+// Update ghost nodes from their parent cell values
	+// The edge (diagonal) cells are not written since they are note read
	+// Launch this kernel for all cells in the grid
	+__global__ void setDarcyGhostNodesDev(
	+ Float* dev_d_H, Float* dev_d_H_new,
	+ Float3* dev_d_V, Float3* dev_d_dH,
	+ Float* dev_d_K, Float3* dev_d_T,
	+ Float* dev_d_Ss, Float* dev_d_W,
	+ Float* dev_d_phi)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // 1D thread index
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // 1D position of ghost node
	+ unsigned int writeidx;
	+
	+ // check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ if (x == 0) {
	+ writeidx = idx(nx,y,z);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+ if (x == nx-1) {
	+ writeidx = idx(-1,y,z);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+
	+ if (y == 0) {
	+ writeidx = idx(x,ny,z);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+ if (y == ny-1) {
	+ writeidx = idx(x,-1,z);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+
	+ if (z == 0) {
	+ writeidx = idx(x,y,nz);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+ if (z == nz-1) {
	+ writeidx = idx(x,y,-1);
	+ copyDarcyValsDev(cellidx, writeidx,
	+ dev_d_H, dev_d_H_new,
	+ dev_d_V, dev_d_dH,
	+ dev_d_K, dev_d_T,
	+ dev_d_Ss, dev_d_W,
	+ dev_d_phi);
	+ }
	+ }
	+}
	+
	+// Find the porosity in each cell
	+__global__ void findPorositiesDev(
	+ unsigned int* dev_cellStart,
	+ unsigned int* dev_cellEnd,
	+ Float4* dev_x_sorted,
	+ Float* dev_d_phi)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Cell dimensions
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+ const Float cell_volume = dxdydz;
	+
	+ Float void_volume = cell_volume;
	+ Float4 xr; // particle pos. and radius
	+
	+ // check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // Calculate linear cell ID
	+ const unsigned int cellID = x + y*devC_grid.num[0]
	+ + (devC_grid.num[0] * devC_grid.num[1])*z;
	+
	+ // Lowest particle index in cell
	+ const unsigned int startIdx = dev_cellStart[cellID];
	+
	+ // Highest particle index in cell
	+ const unsigned int endIdx = dev_cellEnd[cellID];
	+
	+ // Iterate over cell particles
	+ for (unsigned int i = startIdx; i<endIdx; ++i) {
	+
	+ // Read particle position and radius
	+ __syncthreads();
	+ xr = dev_x_sorted[i];
	+
	+ // Subtract particle volume from void volume
	+ void_volume -= 4.0/3.0M_PIxr.wxr.wxr.w;
	+ }
	+
	+ // Make sure that the porosity is in the interval ]0.0;1.0[
	+ const Float phi = fmin(0.99, fmax(0.01, void_volume/cell_volume));
	+
	+ // Save porosity
	+ __syncthreads();
	+
	+ dev_d_phi[idx(x,y,z)] = phi;
	+ }
	+}
	+
	+
	+// Find cell transmissivities from hydraulic conductivities and cell dimensions
	+// Make sure to compute the porosities (d_phi) beforehand
	+// d_factor: Grain size factor for Kozeny-Carman relationship
	+__global__ void findDarcyTransmissivitiesDev(
	+ Float* dev_d_K,
	+ Float3* dev_d_T,
	+ Float* dev_d_phi,
	+ Float d_factor)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Grid sizes
	+ const Float d_dx = devC_grid.L[0]/nx;
	+ const Float d_dy = devC_grid.L[1]/ny;
	+ const Float d_dz = devC_grid.L[2]/nz;
	+
	+ // Density of the fluid [kg/m^3]
	+ const Float rho = 1000.0;
	+
	+ // Check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // 1D thread index
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ __syncthreads();
	+
	+ // Cell porosity [-]
	+ const Float phi = dev_d_phi[cellidx];
	+
	+ // Calculate permeability from the Kozeny-Carman relationship
	+ // Nelson 1994 eq. 1c
	+ // Boek 2012 eq. 16
	+ //k = aphiphiphi/(1.0 - phiphi);
	+ // Schwartz and Zhang 2003
	+ Float k = phiphiphi/((1.0-phi)(1.0-phi)) d_factor;
	+
	+
	+ __syncthreads();
	+
	+ // Save hydraulic conductivity [m/s]
	+ const Float K = krho-devC_params.g[2]/devC_params.nu;
	+ //K = 0.5;
	+ dev_d_K[cellidx] = K;
	+
	+ // Hydraulic transmissivity [m2/s]
	+ Float3 T = {Kd_dx, Kd_dy, K*d_dz};
	+ dev_d_T[cellidx] = T;
	+
	+ }
	+}
	+
	+// Find the spatial gradient in e.g.pressures per cell
	+// using first order central differences
	+__global__ void findDarcyGradientsDev(
	+ Float* dev_scalarfield, // in
	+ Float3* dev_vectorfield) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Grid sizes
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ // 1D thread index
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // Check that we are not outside the fluid grid
	+ Float3 gradient;
	+ if (x < nx && y < ny && z < nz) {
	+
	+ __syncthreads();
	+
	+ // x
	+ gradient.x =
	+ (dev_scalarfield[idx(x+1,y,z)] - dev_scalarfield[idx(x-1,y,z)])
	+ /(2.0*dx);
	+
	+ // y
	+ gradient.y =
	+ (dev_scalarfield[idx(x,y+1,z)] - dev_scalarfield[idx(x,y-1,z)])
	+ /(2.0*dy);
	+
	+ // z
	+ gradient.z =
	+ (dev_scalarfield[idx(x,y,z+1)] - dev_scalarfield[idx(x,y,z-1)])
	+ /(2.0*dz);
	+
	+ __syncthreads();
	+ dev_vectorfield[cellidx] = gradient;
	+ }
	+}
	+
	+// Arithmetic mean of two numbers
	+__device__ Float ameanDev(Float a, Float b) {
	+ return (a+b)*0.5;
	+}
	+
	+// Harmonic mean of two numbers
	+__device__ Float hmeanDev(Float a, Float b) {
	+ return (2.0ab)/(a+b);
	+}
	+
	+// Perform an explicit step.
	+__global__ void explDarcyStepDev(
	+ Float* dev_d_H,
	+ Float* dev_d_H_new,
	+ Float3* dev_d_T,
	+ Float* dev_d_Ss,
	+ Float* dev_d_W)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Grid sizes
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ // 1D thread index
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // Check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // Explicit 3D finite difference scheme
	+ // new = old + productiontimestep + gradienttimestep
	+
	+ // Enforce Dirichlet BC
	+ if (x == 0 \|\| y == 0 \|\| z == 0 \|\|
	+ x == nx-1 \|\| y == ny-1 \|\| z == nz-1) {
	+ __syncthreads();
	+ dev_d_H_new[cellidx] = dev_d_H[cellidx];
	+ } else {
	+
	+ // Cell hydraulic conductivity
	+ __syncthreads();
	+ //const Float K = dev_d_K[cellidx];
	+
	+ // Cell hydraulic transmissivities
	+ const Float3 T = dev_d_T[cellidx];
	+ //const Float Tx = K*dx;
	+ //const Float Ty = K*dy;
	+ //const Float Tz = K*dz;
	+
	+ // Harmonic mean of transmissivity
	+ // (in neg. and pos. direction along axis from cell)
	+ __syncthreads();
	+ const Float Tx_n = hmeanDev(T.x, dev_d_T[idx(x-1,y,z)].x);
	+ const Float Tx_p = hmeanDev(T.x, dev_d_T[idx(x+1,y,z)].x);
	+ const Float Ty_n = hmeanDev(T.y, dev_d_T[idx(x,y-1,z)].y);
	+ const Float Ty_p = hmeanDev(T.y, dev_d_T[idx(x,y+1,z)].y);
	+ const Float Tz_n = hmeanDev(T.z, dev_d_T[idx(x,y,z-1)].z);
	+ const Float Tz_p = hmeanDev(T.z, dev_d_T[idx(x,y,z+1)].z);
	+
	+ // Cell hydraulic storativity
	+ const Float S = dev_d_Ss[cellidx]dxdy*dz;
	+
	+ // Cell hydraulic head
	+ const Float H = dev_d_H[cellidx];
	+
	+ // Laplacian operator
	+ const Float deltaH = devC_dt/S *
	+ ( Tx_n * (dev_d_H[idx(x-1,y,z)] - H)/(dx*dx)
	+ + Tx_p * (dev_d_H[idx(x+1,y,z)] - H)/(dx*dx)
	+ + Ty_n * (dev_d_H[idx(x,y-1,z)] - H)/(dy*dy)
	+ + Ty_p * (dev_d_H[idx(x,y+1,z)] - H)/(dy*dy)
	+ + Tz_n * (dev_d_H[idx(x,y,z-1)] - H)/(dy*dz)
	+ + Tz_p * (dev_d_H[idx(x,y,z+1)] - H)/(dy*dz)
	+ + dev_d_W[cellidx] );
	+
	+ // Calculate new hydraulic pressure in cell
	+ __syncthreads();
	+ dev_d_H_new[cellidx] = H + deltaH;
	+ }
	+ }
	+}
	+
	+// Find cell velocity
	+__global__ void findDarcyVelocitiesDev(
	+ Float* dev_d_H,
	+ Float3* dev_d_dH,
	+ Float3* dev_d_V,
	+ Float* dev_d_phi,
	+ Float* dev_d_K)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // 1D thread index
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // Check that we are not outside the fluid grid
	+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
	+
	+ // Flux [m/s]: q = -k/nu * dH
	+ // Pore velocity [m/s]: v = q/n
	+
	+ // Dynamic viscosity
	+ Float nu = devC_params.nu;
	+
	+ __syncthreads();
	+ const Float3 dH = dev_d_dH[cellidx];
	+ const Float K = dev_d_K[cellidx];
	+ const Float phi = dev_d_phi[cellidx];
	+
	+ // Calculate flux
	+ // The sign might need to be reversed, depending on the
	+ // grid orientation
	+ Float3 q = MAKE_FLOAT3(
	+ -K/nu * dH.x,
	+ -K/nu * dH.y,
	+ -K/nu * dH.z);
	+
	+ // Calculate velocity
	+ Float3 v = MAKE_FLOAT3(
	+ v.x = q.x/phi,
	+ v.y = q.y/phi,
	+ v.z = q.z/phi);
	+
	+ // Save velocity
	+ __syncthreads();
	+ dev_d_V[cellidx] = v;
	+ }
	+}
	+
	+// Solve Darcy flow on a regular, cubic grid
	+/*void DEM::initDarcyDev(const Float cellsizemultiplier)
	+{
	+ if (params.nu <= 0.0) {
	+ std::cerr << "Error in initDarcy. The dymamic viscosity (params.nu), "
	+ << "should be larger than 0.0, but is " << params.nu << std::endl;
	+ exit(1);
	+ }
	+
	+ // Number of cells
	+ d_nx = floor(grid.num[0]*cellsizemultiplier);
	+ d_ny = floor(grid.num[1]*cellsizemultiplier);
	+ d_nz = floor(grid.num[2]*cellsizemultiplier);
	+
	+ // Cell size
	+ d_dx = grid.L[0]/d_nx;
	+ d_dy = grid.L[1]/d_ny;
	+ d_dz = grid.L[2]/d_nz;
	+
	+ if (verbose == 1) {
	+ std::cout << " - Fluid grid dimensions: "
	+ << d_nx << "*"
	+ << d_ny << "*"
	+ << d_nz << std::endl;
	+ std::cout << " - Fluid grid cell size: "
	+ << d_dx << "*"
	+ << d_dy << "*"
	+ << d_dz << std::endl;
	+ }
	+
	+ initDarcyMemDev();
	+ initDarcyVals();
	+ findDarcyTransmissivities();
	+
	+ checkDarcyTimestep();
	+
	+ transferDarcyToGlobalDeviceMemory(1);
	+}*/
	+
	+// Print final heads and free memory
	+void DEM::endDarcyDev()
	+{
	+ /FILE Kfile;
	+ if ((Kfile = fopen("d_K.txt","w"))) {
	+ printDarcyArray(Kfile, d_K);
	+ fclose(Kfile);
	+ } else {
	+ fprintf(stderr, "Error, could not open d_K.txt\n");
	+ }*/
	+ //printDarcyArray(stdout, d_phi, "d_phi");
	+ //printDarcyArray(stdout, d_K, "d_K");
	+ //printDarcyArray(stdout, d_H, "d_H");
	+ //printDarcyArray(stdout, d_V, "d_V");
	+ freeDarcyMemDev();
	+}
	+
	+// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
	diff --git a/src/device.cu b/src/device.cu
	t@@ -27,7 +27,7 @@
	#include "integration.cuh"
	#include "raytracer.cuh"
	#include "latticeboltzmann.cuh"
	-//#include "darcy.cuh"
	+#include "darcy.cuh"


	// Wrapper function for initializing the CUDA components.
	t@@ -450,15 +450,20 @@ __host__ void DEM::transferToGlobalDeviceMemory(int stat…
	}

	// Fluid arrays
	- if (params.nu > 0.0 && darcy == 0) {
	+ if (params.nu > 0.0) {
	+ if (darcy == 0) {
	#ifdef LBM_GPU
	- cudaMemcpy( dev_f, f,
	- sizeof(Float)grid.num[0]grid.num[1]grid.num[2]19,
	- cudaMemcpyHostToDevice);
	- cudaMemcpy( dev_v_rho, v_rho,
	- sizeof(Float4)grid.num[0]grid.num[1]*grid.num[2],
	- cudaMemcpyHostToDevice);
	+ cudaMemcpy( dev_f, f,
	+ sizeof(Float)grid.num[0]grid.num[1]grid.num[2]19,
	+ cudaMemcpyHostToDevice);
	+ cudaMemcpy( dev_v_rho, v_rho,
	+ sizeof(Float4)grid.num[0]grid.num[1]*grid.num[2],
	+ cudaMemcpyHostToDevice);
	#endif
	+ } //else {
	+ //transferDarcyToGlobalDeviceMemory(1);
	+ //}
	+ // Darcy arrays aren't ready yet
	}

	checkForCudaErrors("End of transferToGlobalDeviceMemory");
	t@@ -530,16 +535,20 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
	sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);

	// Fluid arrays
	+ if (params.nu > 0.0) {
	+ if (darcy == 0) {
	#ifdef LBM_GPU
	- if (params.nu > 0.0 && darcy == 0) {
	cudaMemcpy( f, dev_f,
	sizeof(Float)grid.num[0]grid.num[1]grid.num[2]19,
	cudaMemcpyDeviceToHost);
	cudaMemcpy(v_rho, dev_v_rho,
	sizeof(Float4)grid.num[0]grid.num[1]*grid.num[2],
	cudaMemcpyDeviceToHost);
	- }
	#endif
	+ } else {
	+ transferDarcyFromGlobalDeviceMemory(0);
	+ }
	+ }

	checkForCudaErrors("End of transferFromGlobalDeviceMemory");
	}
	t@@ -580,13 +589,13 @@ __host__ void DEM::startTime()
	unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
	dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid

	- // Use 3D block and grid layout for Lattice-Boltzmann fluid calculations
	+ // Use 3D block and grid layout for fluid calculations
	dim3 dimBlockFluid(8, 8, 8); // 512 threads per block
	dim3 dimGridFluid(
	iDivUp(grid.num[0], dimBlockFluid.x),
	iDivUp(grid.num[1], dimBlockFluid.y),
	iDivUp(grid.num[2], dimBlockFluid.z));
	- if (dimGridFluid.z > 64) {
	+ if (dimGridFluid.z > 64 && params.nu > 0.0) {
	cerr << "Error: dimGridFluid.z > 64" << endl;
	exit(1);
	}
	t@@ -634,18 +643,32 @@ __host__ void DEM::startTime()
	fclose(fp);

	// Initialize fluid distribution array
	- if (params.nu > 0.0 && darcy == 0) {
	+ Float d_factor;
	+ if (params.nu > 0.0) {
	+ if (darcy == 0) {
	#ifdef LBM_GPU
	- initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
	- cudaThreadSynchronize();
	-#else
	+ initFluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f);
	+ cudaThreadSynchronize();
	+#endif
	+ } else if (darcy == 1) {
	#ifdef DARCY_GPU
	- initFluid(v_rho, f, grid.num[0], grid.num[1], grid.num[2]);
	+ const Float cellsizemultiplier = 1.0;
	+ initDarcy(cellsizemultiplier);
	+ initDarcyMemDev();
	+ transferDarcyToGlobalDeviceMemory(1);
	+
	+ // Representative grain radius
	+ const Float r_bar2 = meanRadius()*2.0;
	+ // Grain size factor for Kozeny-Carman relationship
	+ d_factor = r_bar2*r_bar2/180.0;
	#else
	- const Float cellsizemultiplier = 1.0;
	- initDarcy(cellsizemultiplier);
	-#endif
	+ const Float cellsizemultiplier = 1.0;
	+ initDarcy(cellsizemultiplier);
	#endif
	+ } else {
	+ std::cerr << "Error, darcy value (" << darcy
	+ << ") not understood." << std::endl;
	+ }
	}

	if (verbose == 1) {
	t@@ -677,6 +700,12 @@ __host__ void DEM::startTime()
	double t_summation = 0.0;
	double t_integrateWalls = 0.0;

	+ double t_findPorositiesDev = 0.0;
	+ double t_findDarcyTransmissivitiesDev = 0.0;
	+ double t_explDarcyStepDev = 0.0;
	+ double t_findDarcyGradientsDev = 0.0;
	+ double t_findDarcyVelocitiesDev = 0.0;
	+
	if (PROFILING == 1) {
	cudaEventCreate(&kernel_tic);
	cudaEventCreate(&kernel_toc);
	t@@ -866,17 +895,90 @@ __host__ void DEM::startTime()
	}

	// Solve darcy flow through grid
	- if (darcy == 1) {
	+ if (params.nu > 0.0 && darcy == 1) {

	#ifdef DARCY_GPU
	- std::cout << "GPU darcy" << std::endl;
	+ checkForCudaErrors("Before findPorositiesDev", iter);
	+ // Find cell porosities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findPorositiesDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_cellStart,
	+ dev_cellEnd,
	+ dev_x_sorted,
	+ dev_d_phi);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findPorositiesDev);
	+ checkForCudaErrors("Post findPorositiesDev", iter);
	+
	+ // Find resulting cell transmissivities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyTransmissivitiesDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_d_K,
	+ dev_d_T,
	+ dev_d_phi,
	+ d_factor);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyTransmissivitiesDev);
	+ checkForCudaErrors("Post findDarcyTransmissivitiesDev", iter);
	+
	+ // Perform explicit Darcy time step
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ explDarcyStepDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_d_H,
	+ dev_d_H_new,
	+ dev_d_T,
	+ dev_d_Ss,
	+ dev_d_W);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_explDarcyStepDev);
	+ checkForCudaErrors("Post explDarcyStepDev", iter);
	+
	+ // Flop flop
	+ swapFloatArrays(dev_d_H, dev_d_H_new);
	+
	+ // Find the pressure gradients
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyGradientsDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_d_H, dev_d_dH);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyGradientsDev);
	+ checkForCudaErrors("Post findDarcyGradientsDev", iter);
	+
	+ // Find the pressure gradients
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyVelocitiesDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_d_H,
	+ dev_d_dH,
	+ dev_d_V,
	+ dev_d_phi,
	+ dev_d_K);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyVelocitiesDev);
	+ checkForCudaErrors("Post findDarcyVelocitiesDev", iter);
	+
	+#else
	// Copy device data to host memory
	transferFromGlobalDeviceMemory();

	// Pause the CPU thread until all CUDA calls previously issued are…
	cudaThreadSynchronize();

	- // Perform explicit Darcy time step
	+ // Perform a Darcy time step on the CPU
	explDarcyStep();

	// Transfer data from host to device memory
	t@@ -884,9 +986,6 @@ __host__ void DEM::startTime()

	// Pause the CPU thread until all CUDA calls previously issued are…
	cudaThreadSynchronize();
	-#else
	- // Perform a Darcy time step on the CPU
	- explDarcyStep();
	#endif
	}

	t@@ -1086,6 +1185,22 @@ __host__ void DEM::startTime()
	<< "\t(" << 100.0*t_summation/t_sum << " %)\n"
	<< " - integrateWalls:\t" << t_integrateWalls/1000.0 << " s"
	<< "\t(" << 100.0*t_integrateWalls/t_sum << " %)\n";
	+ if (darcy == 1) {
	+ cout
	+ << " - findPorositiesDev:\t" << t_findPorositiesDev/1000.0 << " s"
	+ << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
	+ << " - findDarcyTransmissivitiesDev:\t" <<
	+ t_findDarcyTransmissivitiesDev/1000.0 << " s"
	+ << "\t(" << 100.0*t_findDarcyTransmissivitiesDev/t_sum << " %)\n"
	+ << " - explDarcyStepDev:\t" << t_explDarcyStepDev/1000.0 << " s"
	+ << "\t(" << 100.0*t_explDarcyStepDev/t_sum << " %)\n"
	+ << " - findDarcyGradientsDev:\t" << t_findDarcyGradientsDev/1000.0
	+ << " s"
	+ << "\t(" << 100.0*t_findDarcyGradientsDev/t_sum << " %)\n"
	+ << " - findDarcyVelocitiesDev:\t"
	+ << t_findDarcyVelocitiesDev/1000.0 << " s"
	+ << "\t(" << 100.0*t_findDarcyVelocitiesDev/t_sum << " %)\n";
	+ }
	}


	t@@ -1098,6 +1213,11 @@ __host__ void DEM::startTime()
	delete[] k.delta_t;

	#ifndef DARCY_GPU
	+ if (darcy == 1) {
	+ endDarcyDev();
	+ endDarcy();
	+ }
	+#else
	if (darcy == 1)
	endDarcy();
	#endif
	diff --git a/src/sphere.h b/src/sphere.h
	t@@ -230,6 +230,9 @@ class DEM {
	// Find darcy flow velocities from specific flux (q)
	void findDarcyVelocities();

	+ // Returns the mean particle radius
	+ Float meanRadius();
	+
	// Get linear (1D) index from 3D coordinate
	unsigned int idx(
	const unsigned int x,
	t@@ -238,9 +241,11 @@ class DEM {

	// Initialize Darcy values and arrays
	void initDarcy(const Float cellsizemultiplier = 1.0);
	+ void initDarcyDev(const Float cellsizemultiplier = 1.0);

	// Clean up Darcy arrays
	void endDarcy();
	+ void endDarcyDev();

	// Check whether the explicit integration is going to meet the
	// stability criteria