GopherProxy

	tMerge pull request #8 from anders-dc/darcy - sphere - GPU-based 3D discrete el…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 2dc04091730193f7b8df4263b0a00ea9f744c9c7
	parent e6181dcce46e3632a3f4a18ab0aa4c5db4a008a1
	Author: Anders Damsgaard <[email protected]>
	Date: Wed, 5 Nov 2014 15:25:07 +0100

	Merge pull request #8 from anders-dc/darcy

	Darcy solver implemented
	Diffstat:
	M CMakeLists.txt \| 4 +---
	A output/cube-init.output00029.bin \| 0
	A output/cube-init.status.dat \| 1 +
	M python/capillary-cohesion.py \| 2 +-
	M python/collapse.py \| 2 +-
	M python/cube-init.py \| 2 +-
	A python/halfshear-darcy-starter.py \| 69 ++++++++++++++++++++++++++++++
	M python/segregation.py \| 2 +-
	M python/shear-results-strain.py \| 4 ++--
	M python/shear-results.py \| 10 +++++-----
	M python/shear-test.py \| 2 +-
	M python/shear2-init.py \| 2 +-
	M python/sphere.py \| 806 +++++++++++++++++++++--------…
	M src/CMakeLists.txt \| 6 +++---
	M src/constants.h \| 2 +-
	A src/darcy.cpp \| 311 +++++++++++++++++++++++++++++…
	A src/darcy.cuh \| 1025 +++++++++++++++++++++++++++++…
	M src/datatypes.h \| 31 +++++++++++++++++++++++++++++…
	M src/debug.h \| 8 ++++----
	M src/device.cu \| 1588 +++++++++++++++++++----------…
	M src/file_io.cpp \| 337 +++++++++++++++++++++--------…
	M src/navierstokes.cpp \| 4 ++--
	M src/navierstokes.cuh \| 116 +++++++++++++++++------------…
	M src/sphere.cpp \| 69 +++++++++++++++++++++++++++--…
	M src/sphere.h \| 49 +++++++++++++++++++++++++++++…
	M src/utility.cu \| 6 +++++-
	M tests/CMakeLists.txt \| 12 ++++++++++++
	A tests/cfd_tests_darcy.py \| 151 +++++++++++++++++++++++++++++…
	A tests/cfd_tests_darcy_particles.py \| 165 +++++++++++++++++++++++++++++…
	A tests/cfd_tests_neumann_darcy.py \| 41 +++++++++++++++++++++++++++++…
	A tests/fluid_particle_interaction_d… \| 31 +++++++++++++++++++++++++++…
	A tests/highlighttext.py \| 51 +++++++++++++++++++++++++++++…
	M tests/io_tests_fluid.py \| 54 +++++++++++++++++++++++++++++…
	M tests/pytestutils.py \| 5 +++--

	34 files changed, 3926 insertions(+), 1042 deletions(-)
	---
	diff --git a/CMakeLists.txt b/CMakeLists.txt
	t@@ -30,9 +30,7 @@ enable_testing()

	# Set build type = Debug
	#set(CMAKE_BUILD_TYPE Debug)
	-#if (CUDA_FOUND)
	-# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g -G)
	-#endif()
	+#set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g -G)

	# Set build type = Release
	set(CMAKE_BUILD_TYPE Release)
	diff --git a/output/cube-init.output00029.bin b/output/cube-init.output00029.bin
	Binary files differ.
	diff --git a/output/cube-init.status.dat b/output/cube-init.status.dat
	t@@ -0,0 +1 @@
	+5.8000e+00 9.6667e+01 29
	diff --git a/python/capillary-cohesion.py b/python/capillary-cohesion.py
	t@@ -27,7 +27,7 @@ sim.mu_s[0] = 0.0
	sim.mu_d[0] = 0.0
	sim.k_n[0] = 1.0e7
	sim.k_t[0] = 1.0e7
	-sim.generateRadii(psd='uni', radius_mean=1.0e-3, radius_variance=1.0e-4)
	+sim.generateRadii(psd='uni', mean=1.0e-3, variance=1.0e-4)
	sim.contactModel(1)
	sim.initRandomGridPos([12, 12, 10000])
	sim.initTemporal(5.0, file_dt=0.01, epsilon=0.07)
	diff --git a/python/collapse.py b/python/collapse.py
	t@@ -21,7 +21,7 @@ sim_id = 'collapse'
	init = sphere.sim(np = np, nd = 3, nw = 0, sid = sim_id + '-init')

	# Set radii
	-init.generateRadii(radius_mean = 0.1)
	+init.generateRadii(mean = 0.1)

	# Use default params
	init.defaultParams(
	diff --git a/python/cube-init.py b/python/cube-init.py
	t@@ -3,7 +3,7 @@ import sphere

	init = sphere.sim('cube-init', np=1e2)

	-init.generateRadii(psd='uni', radius_mean=0.01, radius_variance=0.002)
	+init.generateRadii(psd='uni', mean=0.01, variance=0.002)

	init.periodicBoundariesXY()

	diff --git a/python/halfshear-darcy-starter.py b/python/halfshear-darcy-starter…
	t@@ -0,0 +1,69 @@
	+#!/usr/bin/env python
	+import sphere
	+import numpy
	+import sys
	+
	+# launch with:
	+# $ ipython halfshear-darcy-starter.py <device> <fluid> <c_phi> <k_c> <sigma_0…
	+
	+device = int(sys.argv[1])
	+wet = int(sys.argv[2])
	+c_phi = float(sys.argv[3])
	+k_c = float(sys.argv[4])
	+sigma0 = float(sys.argv[5])
	+mu = float(sys.argv[6])
	+
	+if wet == 1:
	+ fluid = True
	+else:
	+ fluid = False
	+
	+sim = sphere.sim('halfshear-sigma0=' + str(sigma0), fluid=False)
	+print('Input: ' + sim.sid)
	+sim.readlast()
	+
	+sim.fluid = fluid
	+if fluid:
	+ sim.id('halfshear-darcy-sigma0=' + str(sigma0) + '-k_c=' + str(k_c) + \
	+ '-mu=' + str(mu) + '-shear')
	+else:
	+ sim.id('halfshear-sigma0=' + str(sigma0) + '-shear')
	+
	+sim.checkerboardColors(nx=6,ny=3,nz=6)
	+sim.cleanup()
	+sim.adjustUpperWall()
	+sim.zeroKinematics()
	+
	+sim.shear(1.0/20.0)
	+
	+if fluid:
	+ #sim.num[2] *= 2
	+ sim.num[:] /= 2
	+ #sim.L[2] *= 2.0
	+ #sim.initFluid(mu = 1.787e-6, p = 600.0e3, cfd_solver = 1)
	+ sim.initFluid(mu = 1.787e-6, p = 0.0, cfd_solver = 1)
	+ sim.setFluidBottomNoFlow()
	+ sim.setFluidTopFixedPressure()
	+ #sim.setDEMstepsPerCFDstep(100)
	+ sim.setMaxIterations(2e5)
	+ sim.beta_f[0] = mu
	+ sim.k_c[0] = k_c
	+
	+sim.initTemporal(total = 20.0, file_dt = 0.01, epsilon=0.07)
	+sim.w_devs[0] = sigma0
	+sim.w_m[0] = numpy.abs(sigma0sim.L[0]sim.L[1]/sim.g[2])
	+sim.mu_s[0] = 0.5
	+sim.mu_d[0] = 0.5
	+sim.setDampingNormal(0.0)
	+sim.setDampingTangential(0.0)
	+
	+# Fix lowermost particles
	+#dz = sim.L[2]/sim.num[2]
	+#I = numpy.nonzero(sim.x[:,2] < 1.5*dz)
	+#sim.fixvel[I] = 1
	+
	+sim.run(dry=True)
	+sim.run(device=device)
	+#sim.writeVTKall()
	+#sim.visualize('walls')
	+#sim.visualize('fluid-pressure')
	diff --git a/python/segregation.py b/python/segregation.py
	t@@ -27,7 +27,7 @@ devslist = [120e3]
	init = sphere.sim(np = np, nd = 3, nw = 0, sid = sim_id + '-init')

	# Save radii
	-init.generateRadii(radius_mean = 0.08)
	+init.generateRadii(mean = 0.08)

	# Use default params
	init.defaultParams(gamma_n = 100.0, mu_s = 0.4, mu_d = 0.4)
	diff --git a/python/shear-results-strain.py b/python/shear-results-strain.py
	t@@ -14,8 +14,8 @@ import matplotlib.pyplot as plt
	from matplotlib.ticker import MaxNLocator

	sigma0 = 20000.0
	-cvals = ['dry', 1.0, 0.1, 0.01]
	-#cvals = ['dry', 1.0, 0.1]
	+#cvals = ['dry', 1.0, 0.1, 0.01]
	+cvals = ['dry', 1.0, 0.1]
	#cvals = ['dry', 1.0]
	#step = 1999

	diff --git a/python/shear-results.py b/python/shear-results.py
	t@@ -21,8 +21,8 @@ zflow = False
	#sigma0_list = numpy.array([1.0e3, 2.0e3, 4.0e3, 10.0e3, 20.0e3, 40.0e3])
	#sigma0 = 10.0e3
	sigma0 = float(sys.argv[1])
	-#cvals = [1.0, 0.1]
	-cvals = [1.0, 0.1, 0.01]
	+cvals = [1.0, 0.1]
	+#cvals = [1.0, 0.1, 0.01]
	#cvals = [1.0]

	# return a smoothed version of in. The returned array is smaller than the
	t@@ -139,9 +139,9 @@ for c in numpy.arange(1,len(cvals)+1):
	#sid = 'shear-sigma0=' + str(sigma0) + '-c_phi=' + \
	#str(c_phi) + '-c_v=' + str(c_v) + \
	#'-hi_mu-lo_visc-hw'
	- #sid = 'halfshear-sigma0=' + str(sigma0) + '-c=' + str(c_v) + '-shear'
	- sid = 'halfshear-sigma0=' + str(sigma0) + '-c_v=' + str(c_v) +\
	- '-c_a=0.0-velfac=1.0-shear'
	+ sid = 'halfshear-sigma0=' + str(sigma0) + '-c=' + str(c_v) + '-shear'
	+ #sid = 'halfshear-sigma0=' + str(sigma0) + '-c_v=' + str(c_v) +\
	+ #'-c_a=0.0-velfac=1.0-shear'
	if os.path.isfile('../output/' + sid + '.status.dat'):

	sim = sphere.sim(sid, fluid=fluid)
	diff --git a/python/shear-test.py b/python/shear-test.py
	t@@ -26,7 +26,7 @@ devslist = [80e3, 10e3, 20e3, 40e3, 60e3, 120e3]
	init = Spherebin(np = np, nd = 3, nw = 0, sid = sim_id + "-init")

	# Save radii
	-init.generateRadii(radius_mean = 0.02)
	+init.generateRadii(mean = 0.02)

	# Use default params
	init.defaultParams(gamma_n = 100.0, mu_s = 0.6, mu_d = 0.6)
	diff --git a/python/shear2-init.py b/python/shear2-init.py
	t@@ -2,7 +2,7 @@
	import sphere

	sim = sphere.sim('init2', np=10000)
	-sim.generateRadii(psd='uni', radius_mean=0.02, radius_variance=0.01)
	+sim.generateRadii(psd='uni', mean=0.02, variance=0.01)
	sim.initRandomGridPos([12, 12, 1000])
	sim.initTemporal(10.0, file_dt=0.05, epsilon=0.07)
	sim.gamma_n[0] = 1000.0
	diff --git a/python/sphere.py b/python/sphere.py
	t@@ -19,7 +19,7 @@ numpy.seterr(all='warn', over='raise')

	# Sphere version number. This field should correspond to the value in
	# `../src/constants.h`.
	-VERSION=1.07
	+VERSION=2.0

	class sim:
	'''
	t@@ -40,9 +40,18 @@ class sim:
	:type sid: str
	:param fluid: Setup fluid simulation (default = False)
	:type fluid: bool
	+ :param cfd_solver: Fluid solver to use if fluid == True. 0: Navier-Stokes
	+ (default), 1: Darcy.
	+ :type cfd_solver: int
	'''

	- def __init__(self, sid = 'unnamed', np = 0, nd = 3, nw = 0, fluid = False):
	+ def __init__(self,
	+ sid = 'unnamed',
	+ np = 0,
	+ nd = 3,
	+ nw = 0,
	+ fluid = False,
	+ cfd_solver = 0):

	# Sphere version number
	self.version = numpy.ones(1, dtype=numpy.float64)*VERSION
	t@@ -272,6 +281,11 @@ class sim:

	if self.fluid:

	+ # Fluid solver type
	+ # 0: Navier Stokes (fluid with inertia)
	+ # 1: Stokes-Darcy (fluid without inertia)
	+ self.cfd_solver = numpy.zeros(1, dtype=numpy.int32)
	+
	# Fluid dynamic viscosity [N/(m/s)]
	self.mu = numpy.zeros(1, dtype=numpy.float64)

	t@@ -311,40 +325,73 @@ class sim:

	## Solver parameters

	- # Smoothing parameter, should be in the range [0.0;1.0[.
	- # 0.0 = no smoothing.
	- self.gamma = numpy.array(0.0)
	+ # Navier-Stokes
	+ if self.cfd_solver[0] == 0:

	- # Under-relaxation parameter, should be in the range ]0.0;1.0].
	- # 1.0 = no under-relaxation
	- self.theta = numpy.array(1.0)
	+ # Smoothing parameter, should be in the range [0.0;1.0[.
	+ # 0.0 = no smoothing.
	+ self.gamma = numpy.array(0.0)

	- # Velocity projection parameter, should be in the range [0.0;1.0]
	- self.beta = numpy.array(0.0)
	+ # Under-relaxation parameter, should be in the range ]0.0;1.0].
	+ # 1.0 = no under-relaxation
	+ self.theta = numpy.array(1.0)

	- # Tolerance criteria for the normalized max. residual
	- self.tolerance = numpy.array(1.0e-8)
	+ # Velocity projection parameter, should be in the range
	+ # [0.0;1.0]
	+ self.beta = numpy.array(0.0)

	- # The maximum number of iterations to perform per time step
	- self.maxiter = numpy.array(1e4)
	+ # Tolerance criteria for the normalized max. residual
	+ self.tolerance = numpy.array(1.0e-3)

	- # The number of DEM time steps to perform between CFD updates
	- self.ndem = numpy.array(1)
	+ # The maximum number of iterations to perform per time step
	+ self.maxiter = numpy.array(1e4)

	- # Porosity scaling factor
	- self.c_phi = numpy.ones(1, dtype=numpy.float64)
	+ # The number of DEM time steps to perform between CFD updates
	+ self.ndem = numpy.array(1)

	- # Fluid velocity scaling factor
	- self.c_v = numpy.ones(1, dtype=numpy.float64)
	+ # Porosity scaling factor
	+ self.c_phi = numpy.ones(1, dtype=numpy.float64)

	- # DEM-CFD time scaling factor
	- self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
	+ # Fluid velocity scaling factor
	+ self.c_v = numpy.ones(1, dtype=numpy.float64)

	- ## Interaction forces
	- self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ # DEM-CFD time scaling factor
	+ self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
	+
	+ ## Interaction forces
	+ self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float…
	+
	+ # Darcy
	+ elif self.cfd_solver[0] == 1:
	+
	+ # Tolerance criteria for the normalized max. residual
	+ self.tolerance = numpy.array(1.0e-3)
	+
	+ # The maximum number of iterations to perform per time step
	+ self.maxiter = numpy.array(1e4)
	+
	+ # The number of DEM time steps to perform between CFD updates
	+ self.ndem = numpy.array(1)
	+
	+ # Porosity scaling factor
	+ self.c_phi = numpy.ones(1, dtype=numpy.float64)
	+
	+ # Interaction forces
	+ self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+
	+ # Adiabatic fluid compressibility [1/Pa].
	+ # Fluid bulk modulus = 1/self.beta_f
	+ self.beta_f = numpy.ones(1, dtype=numpy.float64)*4.5e-10
	+
	+ # Hydraulic permeability prefactor [m*m]
	+ self.k_c = numpy.ones(1, dtype=numpy.float64)*4.6e-10
	+
	+ else:
	+ raise Exception('Value of cfd_solver not understood (' + \
	+ str(self.cfd_solver[0]) + ')')

	# Particle color marker
	self.color = numpy.zeros(self.np, dtype=numpy.int32)
	t@@ -546,7 +593,10 @@ class sim:
	return 64

	if self.fluid:
	- if (self.mu != other.mu):
	+ if (self.cfd_solver != other.cfd_solver):
	+ print(91)
	+ return 91
	+ elif (self.mu != other.mu):
	print(65)
	return 65
	elif ((self.v_f != other.v_f).any()):
	t@@ -585,44 +635,69 @@ class sim:
	elif (self.free_slip_top != other.free_slip_top):
	print(77)
	return 77
	- elif (self.gamma != other.gamma):
	- print(78)
	- return 78
	- elif (self.theta != other.theta):
	- print(79)
	- return 79
	- elif (self.beta != other.beta):
	- print(80)
	- return 80
	- elif (self.tolerance != other.tolerance):
	- print(81)
	- return 81
	- elif (self.maxiter != other.maxiter):
	- print(82)
	- return 82
	- elif (self.ndem != other.ndem):
	- print(83)
	- return 83
	- elif (self.c_phi != other.c_phi):
	- print(84)
	- return(84)
	- elif (self.c_v != other.c_v):
	- print(85)
	- elif (self.dt_dem_fac != other.dt_dem_fac):
	- print(85)
	- return(85)
	- elif (self.f_d != other.f_d).any():
	- print(86)
	- return(86)
	- elif (self.f_p != other.f_p).any():
	- print(87)
	- return(87)
	- elif (self.f_v != other.f_v).any():
	- print(88)
	- return(88)
	- elif (self.f_sum != other.f_sum).any():
	- print(89)
	- return(89)
	+
	+ if (self.cfd_solver == 0):
	+ if (self.gamma != other.gamma):
	+ print(78)
	+ return 78
	+ elif (self.theta != other.theta):
	+ print(79)
	+ return 79
	+ elif (self.beta != other.beta):
	+ print(80)
	+ return 80
	+ elif (self.tolerance != other.tolerance):
	+ print(81)
	+ return 81
	+ elif (self.maxiter != other.maxiter):
	+ print(82)
	+ return 82
	+ elif (self.ndem != other.ndem):
	+ print(83)
	+ return 83
	+ elif (self.c_phi != other.c_phi):
	+ print(84)
	+ return(84)
	+ elif (self.c_v != other.c_v):
	+ print(85)
	+ elif (self.dt_dem_fac != other.dt_dem_fac):
	+ print(85)
	+ return(85)
	+ elif (self.f_d != other.f_d).any():
	+ print(86)
	+ return(86)
	+ elif (self.f_p != other.f_p).any():
	+ print(87)
	+ return(87)
	+ elif (self.f_v != other.f_v).any():
	+ print(88)
	+ return(88)
	+ elif (self.f_sum != other.f_sum).any():
	+ print(89)
	+ return(89)
	+
	+ if (self.cfd_solver == 1):
	+ if (self.tolerance != other.tolerance):
	+ print(81)
	+ return 81
	+ elif (self.maxiter != other.maxiter):
	+ print(82)
	+ return 82
	+ elif (self.ndem != other.ndem):
	+ print(83)
	+ return 83
	+ elif (self.c_phi != other.c_phi):
	+ print(84)
	+ return(84)
	+ elif (self.f_p != other.f_p).any():
	+ print(86)
	+ return(86)
	+ elif (self.beta_f != other.beta_f):
	+ print(87)
	+ return(87)
	+ elif (self.k_c != other.k_c):
	+ print(88)
	+ return(88)

	if ((self.color != other.color)).any():
	print(90)
	t@@ -631,15 +706,31 @@ class sim:
	# All equal
	return 0

	- def id(self, sid):
	+ def id(self, sid=''):
	'''
	- Set the simulation id/name, which will be used to identify simulation
	- files in the output folders.
	+ Returns or sets the simulation id/name, which is used to identify
	+ simulation files in the output folders.

	- :param sid: The desired simulation id
	+ :param sid: The desired simulation id. If left blank the current
	+ simulation id will be returned.
	:type sid: str
	+ :returns: The current simulation id if no new value is set.
	+ :return type: str
	'''
	- self.sid = sid
	+ if sid == '':
	+ return self.sid
	+ else:
	+ self.sid = sid
	+
	+ def idAppend(self, string):
	+ '''
	+ Append a string to the simulation id/name, which is used to identify
	+ simulation files in the output folders.
	+
	+ :param string: The string to append to the simulation id (`self.sid`).
	+ :type string: str
	+ '''
	+ self.sid += string

	def addParticle(self,
	x,
	t@@ -852,7 +943,7 @@ class sim:
	self.periodic = numpy.fromfile(fh, dtype=numpy.int32, count=1)

	# Per-particle vectors
	- for i in range(self.np):
	+ for i in numpy.arange(self.np):
	self.x[i,:] =\
	numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
	self.radius[i] =\
	t@@ -865,7 +956,7 @@ class sim:
	self.xyzsum = numpy.fromfile(fh, dtype=numpy.float64,\
	count=self.np*2).reshape(self.np,2)

	- for i in range(self.np):
	+ for i in numpy.arange(self.np):
	self.vel[i,:] =\
	numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
	self.fixvel[i] =\
	t@@ -924,11 +1015,11 @@ class sim:
	self.w_devs = numpy.empty(self.nw, dtype=numpy.float64)

	self.wmode = numpy.fromfile(fh, dtype=numpy.int32, count=self.nw)
	- for i in range(self.nw):
	+ for i in numpy.arange(self.nw):
	self.w_n[i,:] =\
	numpy.fromfile(fh, dtype=numpy.float64, count=self.nd)
	self.w_x[i] = numpy.fromfile(fh, dtype=numpy.float64, count=…
	- for i in range(self.nw):
	+ for i in numpy.arange(self.nw):
	self.w_m[i] = numpy.fromfile(fh, dtype=numpy.float64, count=…
	self.w_vel[i] = numpy.fromfile(fh, dtype=numpy.float64, count=…
	self.w_force[i] =\
	t@@ -946,7 +1037,7 @@ class sim:
	self.sigma_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32)
	- for i in range(self.nb0):
	+ for i in numpy.arange(self.nb0):
	self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32,
	count=1)
	self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32,
	t@@ -963,6 +1054,13 @@ class sim:
	self.nb0 = numpy.zeros(1, dtype=numpy.uint32)

	if self.fluid:
	+
	+ if self.version >= 2.0:
	+ self.cfd_solver = numpy.fromfile(fh, dtype=numpy.int32,
	+ count=1)
	+ else:
	+ self.cfd_solver = numpy.zeros(1, dtype=numpy.int32)
	+
	self.mu = numpy.fromfile(fh, dtype=numpy.float64, count=1)

	self.v_f = numpy.empty(
	t@@ -978,9 +1076,9 @@ class sim:
	numpy.empty((self.num[0],self.num[1],self.num[2]),
	dtype=numpy.float64)

	- for z in range(self.num[2]):
	- for y in range(self.num[1]):
	- for x in range(self.num[0]):
	+ for z in numpy.arange(self.num[2]):
	+ for y in numpy.arange(self.num[1]):
	+ for x in numpy.arange(self.num[0]):
	self.v_f[x,y,z,0] = \
	numpy.fromfile(fh, dtype=numpy.float64,\
	count=1)
	t@@ -1000,7 +1098,7 @@ class sim:
	numpy.fromfile(fh, dtype=numpy.float64,\
	count=1)

	- if (self.version >= 0.36):
	+ if self.version >= 0.36:
	self.rho_f =\
	numpy.fromfile(fh, dtype=numpy.float64, count=1)
	self.p_mod_A =\
	t@@ -1019,65 +1117,93 @@ class sim:
	self.free_slip_top =\
	numpy.fromfile(fh, dtype=numpy.int32, count=1)

	- self.gamma = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.theta = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.beta = numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.tolerance =\
	- numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.maxiter = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	- if self.version >= 1.01:
	- self.ndem = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	- else:
	- self.ndem = 1
	-
	- if self.version >= 1.04:
	- self.c_phi = \
	+ if self.version >= 2.0 and self.cfd_solver == 0:
	+ self.gamma = \
	numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- self.c_v =\
	- numpy.fromfile(fh, dtype=numpy.float64, count=1)
	- if self.version == 1.06:
	- self.c_a =\
	- numpy.fromfile(fh, dtype=numpy.float64, count=…
	- elif self.version >= 1.07:
	- self.dt_dem_fac =\
	- numpy.fromfile(fh, dtype=numpy.float64, count=…
	+ self.theta = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+ self.beta = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+ self.tolerance =\
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+ self.maxiter = \
	+ numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	+ if self.version >= 1.01:
	+ self.ndem = \
	+ numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	else:
	- self.c_a = numpy.ones(1, dtype=numpy.float64)
	- else:
	- self.c_phi = numpy.ones(1, dtype=numpy.float64)
	- self.c_v = numpy.ones(1, dtype=numpy.float64)
	+ self.ndem = 1

	- if self.version >= 1.05:
	- self.f_d = numpy.empty_like(self.x)
	+ if self.version >= 1.04:
	+ self.c_phi = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=…
	+ self.c_v =\
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+ if self.version == 1.06:
	+ self.c_a =\
	+ numpy.fromfile(fh, \
	+ dtype=numpy.float64, count=1)
	+ elif self.version >= 1.07:
	+ self.dt_dem_fac =\
	+ numpy.fromfile(fh, \
	+ dtype=numpy.float64, count=1)
	+ else:
	+ self.c_a = numpy.ones(1, dtype=numpy.float64)
	+ else:
	+ self.c_phi = numpy.ones(1, dtype=numpy.float64)
	+ self.c_v = numpy.ones(1, dtype=numpy.float64)
	+
	+ if self.version >= 1.05:
	+ self.f_d = numpy.empty_like(self.x)
	+ self.f_p = numpy.empty_like(self.x)
	+ self.f_v = numpy.empty_like(self.x)
	+ self.f_sum = numpy.empty_like(self.x)
	+
	+ for i in numpy.arange(self.np[0]):
	+ self.f_d[i,:] = \
	+ numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nd)
	+ for i in numpy.arange(self.np[0]):
	+ self.f_p[i,:] = \
	+ numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nd)
	+ for i in numpy.arange(self.np[0]):
	+ self.f_v[i,:] = \
	+ numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nd)
	+ for i in numpy.arange(self.np[0]):
	+ self.f_sum[i,:] = \
	+ numpy.fromfile(fh, dtype=numpy.float64,
	+ count=self.nd)
	+ else:
	+ self.f_d = numpy.zeros((self.np, self.nd),
	+ dtype=numpy.float64)
	+ self.f_p = numpy.zeros((self.np, self.nd),
	+ dtype=numpy.float64)
	+ self.f_v = numpy.zeros((self.np, self.nd),
	+ dtype=numpy.float64)
	+ self.f_sum = numpy.zeros((self.np, self.nd),
	+ dtype=numpy.float64)
	+
	+ elif self.version >= 2.0 and self.cfd_solver == 1:
	+
	+ self.tolerance = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+ self.maxiter = \
	+ numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	+ self.ndem = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	+ self.c_phi = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	self.f_p = numpy.empty_like(self.x)
	- self.f_v = numpy.empty_like(self.x)
	- self.f_sum = numpy.empty_like(self.x)
	-
	- for i in range(self.np[0]):
	- self.f_d[i,:] = \
	- numpy.fromfile(fh, dtype=numpy.float64,
	- count=self.nd)
	- for i in range(self.np[0]):
	+ for i in numpy.arange(self.np[0]):
	self.f_p[i,:] = \
	numpy.fromfile(fh, dtype=numpy.float64,
	count=self.nd)
	- for i in range(self.np[0]):
	- self.f_v[i,:] = \
	- numpy.fromfile(fh, dtype=numpy.float64,
	- count=self.nd)
	- for i in range(self.np[0]):
	- self.f_sum[i,:] = \
	- numpy.fromfile(fh, dtype=numpy.float64,
	- count=self.nd)
	- else:
	- self.f_d = numpy.zeros((self.np, self.nd),
	- dtype=numpy.float64)
	- self.f_p = numpy.zeros((self.np, self.nd),
	- dtype=numpy.float64)
	- self.f_v = numpy.zeros((self.np, self.nd),
	- dtype=numpy.float64)
	- self.f_sum = numpy.zeros((self.np, self.nd),
	- dtype=numpy.float64)
	+ self.beta_f = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)
	+
	+ self.k_c = \
	+ numpy.fromfile(fh, dtype=numpy.float64, count=1)

	if (self.version >= 1.02):
	self.color =\
	t@@ -1131,14 +1257,14 @@ class sim:
	fh.write(self.periodic.astype(numpy.uint32))

	# Per-particle vectors
	- for i in range(self.np):
	+ for i in numpy.arange(self.np):
	fh.write(self.x[i,:].astype(numpy.float64))
	fh.write(self.radius[i].astype(numpy.float64))

	if (self.np[0] > 0):
	fh.write(self.xyzsum.astype(numpy.float64))

	- for i in range(self.np):
	+ for i in numpy.arange(self.np):
	fh.write(self.vel[i,:].astype(numpy.float64))
	fh.write(self.fixvel[i].astype(numpy.float64))

	t@@ -1177,13 +1303,13 @@ class sim:
	fh.write(self.V_b.astype(numpy.float64))

	fh.write(self.nw.astype(numpy.uint32))
	- for i in range(self.nw):
	+ for i in numpy.arange(self.nw):
	fh.write(self.wmode[i].astype(numpy.int32))
	- for i in range(self.nw):
	+ for i in numpy.arange(self.nw):
	fh.write(self.w_n[i,:].astype(numpy.float64))
	fh.write(self.w_x[i].astype(numpy.float64))

	- for i in range(self.nw):
	+ for i in numpy.arange(self.nw):
	fh.write(self.w_m[i].astype(numpy.float64))
	fh.write(self.w_vel[i].astype(numpy.float64))
	fh.write(self.w_force[i].astype(numpy.float64))
	t@@ -1195,7 +1321,7 @@ class sim:
	fh.write(self.nb0.astype(numpy.uint32))
	fh.write(self.sigma_b.astype(numpy.float64))
	fh.write(self.tau_b.astype(numpy.float64))
	- for i in range(self.nb0):
	+ for i in numpy.arange(self.nb0):
	fh.write(self.bonds[i,0].astype(numpy.uint32))
	fh.write(self.bonds[i,1].astype(numpy.uint32))
	fh.write(self.bonds_delta_n.astype(numpy.float64))
	t@@ -1204,10 +1330,12 @@ class sim:
	fh.write(self.bonds_omega_t.astype(numpy.float64))

	if self.fluid:
	+
	+ fh.write(self.cfd_solver.astype(numpy.int32))
	fh.write(self.mu.astype(numpy.float64))
	- for z in range(self.num[2]):
	- for y in range(self.num[1]):
	- for x in range(self.num[0]):
	+ for z in numpy.arange(self.num[2]):
	+ for y in numpy.arange(self.num[1]):
	+ for x in numpy.arange(self.num[0]):
	fh.write(self.v_f[x,y,z,0].astype(numpy.float64))
	fh.write(self.v_f[x,y,z,1].astype(numpy.float64))
	fh.write(self.v_f[x,y,z,2].astype(numpy.float64))
	t@@ -1225,25 +1353,44 @@ class sim:
	fh.write(self.free_slip_bot.astype(numpy.int32))
	fh.write(self.free_slip_top.astype(numpy.int32))

	- fh.write(self.gamma.astype(numpy.float64))
	- fh.write(self.theta.astype(numpy.float64))
	- fh.write(self.beta.astype(numpy.float64))
	- fh.write(self.tolerance.astype(numpy.float64))
	- fh.write(self.maxiter.astype(numpy.uint32))
	- fh.write(self.ndem.astype(numpy.uint32))
	-
	- fh.write(self.c_phi.astype(numpy.float64))
	- fh.write(self.c_v.astype(numpy.float64))
	- fh.write(self.dt_dem_fac.astype(numpy.float64))
	-
	- for i in numpy.arange(self.np):
	- fh.write(self.f_d[i,:].astype(numpy.float64))
	- for i in numpy.arange(self.np):
	- fh.write(self.f_p[i,:].astype(numpy.float64))
	- for i in numpy.arange(self.np):
	- fh.write(self.f_v[i,:].astype(numpy.float64))
	- for i in numpy.arange(self.np):
	- fh.write(self.f_sum[i,:].astype(numpy.float64))
	+ # Navier Stokes
	+ if self.cfd_solver[0] == 0:
	+ fh.write(self.gamma.astype(numpy.float64))
	+ fh.write(self.theta.astype(numpy.float64))
	+ fh.write(self.beta.astype(numpy.float64))
	+ fh.write(self.tolerance.astype(numpy.float64))
	+ fh.write(self.maxiter.astype(numpy.uint32))
	+ fh.write(self.ndem.astype(numpy.uint32))
	+
	+ fh.write(self.c_phi.astype(numpy.float64))
	+ fh.write(self.c_v.astype(numpy.float64))
	+ fh.write(self.dt_dem_fac.astype(numpy.float64))
	+
	+ for i in numpy.arange(self.np):
	+ fh.write(self.f_d[i,:].astype(numpy.float64))
	+ for i in numpy.arange(self.np):
	+ fh.write(self.f_p[i,:].astype(numpy.float64))
	+ for i in numpy.arange(self.np):
	+ fh.write(self.f_v[i,:].astype(numpy.float64))
	+ for i in numpy.arange(self.np):
	+ fh.write(self.f_sum[i,:].astype(numpy.float64))
	+
	+ # Darcy
	+ elif self.cfd_solver[0] == 1:
	+
	+ fh.write(self.tolerance.astype(numpy.float64))
	+ fh.write(self.maxiter.astype(numpy.uint32))
	+ fh.write(self.ndem.astype(numpy.uint32))
	+ fh.write(self.c_phi.astype(numpy.float64))
	+ for i in numpy.arange(self.np):
	+ fh.write(self.f_p[i,:].astype(numpy.float64))
	+ fh.write(self.beta_f.astype(numpy.float64))
	+ fh.write(self.k_c.astype(numpy.float64))
	+
	+ else:
	+ raise Exception('Value of cfd_solver not understood (' + \
	+ str(self.cfd_solver[0]) + ')')
	+

	fh.write(self.color.astype(numpy.int32))

	t@@ -1385,7 +1532,7 @@ class sim:
	+ 'NumberOfComponents="3" format="ascii">\n')
	fh.write(' ')
	for i in range(self.np):
	- fh.write('%f %f %f' % (self.x[i,0], self.x[i,1], self.x[i,2]))
	+ fh.write('%f %f %f ' % (self.x[i,0], self.x[i,1], self.x[i,2]))
	fh.write('\n')
	fh.write(' </DataArray>\n')
	fh.write(' </Points>\n')
	t@@ -1423,27 +1570,30 @@ class sim:
	fh.write(' </DataArray>\n')

	if self.fluid:
	- # Fluid interaction force
	- fh.write(' <DataArray type="Float32" '
	- + 'Name="Fluid force total" '
	- + 'NumberOfComponents="3" format="ascii">\n')
	- fh.write(' ')
	- for i in range(self.np):
	- fh.write('%f %f %f ' % \
	- (self.f_sum[i,0], self.f_sum[i,1], self.f_sum[i,2]…
	- fh.write('\n')
	- fh.write(' </DataArray>\n')

	- # Fluid drag force
	- fh.write(' <DataArray type="Float32" '
	- + 'Name="Fluid drag force" '
	- + 'NumberOfComponents="3" format="ascii">\n')
	- fh.write(' ')
	- for i in range(self.np):
	- fh.write('%f %f %f ' % \
	- (self.f_d[i,0], self.f_d[i,1], self.f_d[i,2]))
	- fh.write('\n')
	- fh.write(' </DataArray>\n')
	+ if self.cfd_solver == 0: # Navier Stokes
	+ # Fluid interaction force
	+ fh.write(' <DataArray type="Float32" '
	+ + 'Name="Fluid force total" '
	+ + 'NumberOfComponents="3" format="ascii">\n')
	+ fh.write(' ')
	+ for i in range(self.np):
	+ fh.write('%f %f %f ' % \
	+ (self.f_sum[i,0], self.f_sum[i,1], \
	+ self.f_sum[i,2]))
	+ fh.write('\n')
	+ fh.write(' </DataArray>\n')
	+
	+ # Fluid drag force
	+ fh.write(' <DataArray type="Float32" '
	+ + 'Name="Fluid drag force" '
	+ + 'NumberOfComponents="3" format="ascii">\n')
	+ fh.write(' ')
	+ for i in range(self.np):
	+ fh.write('%f %f %f ' % \
	+ (self.f_d[i,0], self.f_d[i,1], self.f_d[i,2]))
	+ fh.write('\n')
	+ fh.write(' </DataArray>\n')

	# Fluid pressure force
	fh.write(' <DataArray type="Float32" '
	t@@ -1456,16 +1606,17 @@ class sim:
	fh.write('\n')
	fh.write(' </DataArray>\n')

	- # Fluid viscous force
	- fh.write(' <DataArray type="Float32" '
	- + 'Name="Fluid viscous force" '
	- + 'NumberOfComponents="3" format="ascii">\n')
	- fh.write(' ')
	- for i in range(self.np):
	- fh.write('%f %f %f ' % \
	- (self.f_v[i,0], self.f_v[i,1], self.f_v[i,2]))
	- fh.write('\n')
	- fh.write(' </DataArray>\n')
	+ if self.cfd_solver == 0: # Navier Stokes
	+ # Fluid viscous force
	+ fh.write(' <DataArray type="Float32" '
	+ + 'Name="Fluid viscous force" '
	+ + 'NumberOfComponents="3" format="ascii">\n')
	+ fh.write(' ')
	+ for i in range(self.np):
	+ fh.write('%f %f %f ' % \
	+ (self.f_v[i,0], self.f_v[i,1], self.f_v[i,2]))
	+ fh.write('\n')
	+ fh.write(' </DataArray>\n')

	# fixvel
	fh.write(' <DataArray type="Float32" Name="FixedVel" '
	t@@ -1709,6 +1860,17 @@ class sim:
	else:
	Re.SetNumberOfTuples(grid.GetNumberOfPoints())

	+ # Find permeabilities if the Darcy solver is used
	+ if self.cfd_solver[0] == 1:
	+ self.findPermeabilities()
	+ k = vtk.vtkDoubleArray()
	+ k.SetName("Permeability [m*m]")
	+ k.SetNumberOfComponents(1)
	+ if cell_centered:
	+ k.SetNumberOfTuples(grid.GetNumberOfCells())
	+ else:
	+ k.SetNumberOfTuples(grid.GetNumberOfPoints())
	+
	# insert values
	for z in range(self.num[2]):
	for y in range(self.num[1]):
	t@@ -1719,6 +1881,8 @@ class sim:
	poros.SetValue(idx, self.phi[x,y,z])
	dporos.SetValue(idx, self.dphi[x,y,z])
	Re.SetValue(idx, self.Re[x,y,z])
	+ if self.cfd_solver[0] == 1:
	+ k.SetValue(idx, self.k[x,y,z])

	# add pres array to grid
	if cell_centered:
	t@@ -1727,12 +1891,16 @@ class sim:
	grid.GetCellData().AddArray(poros)
	grid.GetCellData().AddArray(dporos)
	grid.GetCellData().AddArray(Re)
	+ if self.cfd_solver[0] == 1:
	+ grid.GetCellData().AddArray(k)
	else:
	grid.GetPointData().AddArray(pres)
	grid.GetPointData().AddArray(vel)
	grid.GetPointData().AddArray(poros)
	grid.GetPointData().AddArray(dporos)
	grid.GetPointData().AddArray(Re)
	+ if self.cfd_solver[0] == 1:
	+ grid.GetPointData().AddArray(k)

	# write VTK XML image data file
	writer = vtk.vtkXMLImageDataWriter()
	t@@ -1804,9 +1972,9 @@ class sim:
	self.readbin(fn, verbose)

	def generateRadii(self, psd = 'logn',
	- radius_mean = 440e-6,
	- radius_variance = 8.8e-9,
	- histogram = True):
	+ mean = 440e-6,
	+ variance = 8.8e-9,
	+ histogram = False):
	'''
	Draw random particle radii from a selected probability distribution.
	The larger the variance of radii is, the slower the computations will
	t@@ -1820,26 +1988,29 @@ class sim:
	``logn``, which is a log-normal probability distribution, suitable
	for approximating well-sorted, coarse sediments. The other possible
	value is ``uni``, which is a uniform distribution from
	- ``radius_mean-radius_variance`` to ``radius_mean+radius_variance``.
	+ ``mean - variance`` to ``mean + variance``.
	:type psd: str
	- :param radius_mean: The mean radius [m] (default = 440e-6 m)
	- :type radius_mean: float
	- :param radius_variance: The variance in the probability distribution
	+ :param mean: The mean radius [m] (default = 440e-6 m)
	+ :type mean: float
	+ :param variance: The variance in the probability distribution
	[m].
	- :type radius_variance: float
	+ :type variance: float

	See also: :func:`generateBimodalRadii()`.
	'''

	if psd == 'logn': # Log-normal probability distribution
	mu = math.log(\
	- (radius_mean2)/math.sqrt(radius_variance+radius_mean2))
	- sigma = math.sqrt(math.log(radius_variance/(radius_mean**2)+1))
	+ (mean2)/math.sqrt(variance+mean2))
	+ sigma = math.sqrt(math.log(variance/(mean**2)+1))
	self.radius = numpy.random.lognormal(mu, sigma, self.np)
	- if psd == 'uni': # Uniform distribution
	- radius_min = radius_mean - radius_variance
	- radius_max = radius_mean + radius_variance
	+ elif psd == 'uni': # Uniform distribution
	+ radius_min = mean - variance
	+ radius_max = mean + variance
	self.radius = numpy.random.uniform(radius_min, radius_max, self.np)
	+ else:
	+ raise Exception('Particle size distribution type not understood ('
	+ + str(psd) + '). Valid values are \'uni\' or \'logn\'')

	# Show radii as histogram
	if histogram:
	t@@ -2560,7 +2731,7 @@ class sim:
	self.mu_ws[0] = 0.0
	self.mu_wd[0] = 0.0

	- def largestFluidTimeStep(self, safety=0.01):
	+ def largestFluidTimeStep(self, safety=0.5):
	'''
	Finds and returns the largest time step in the fluid phase by von
	Neumann and Courant-Friedrichs-Lewy analysis given the current
	t@@ -2579,28 +2750,107 @@ class sim:
	:return type: float
	'''

	- dx_min = numpy.min(self.L/self.num)
	- dt_min_von_neumann = 0.5dx_min*2/self.mu[0]
	+ if self.fluid:

	- # Normalized velocities
	- v_norm = numpy.empty(self.num[0]self.num[1]self.num[2])
	- idx = 0
	- for x in numpy.arange(self.num[0]):
	- for y in numpy.arange(self.num[1]):
	- for z in numpy.arange(self.num[2]):
	- v_norm[idx] = numpy.sqrt(self.v_f[x,y,z,:].dot(\
	- self.v_f[x,y,z,:]))
	- idx += 1
	+ # Navier-Stokes
	+ if self.cfd_solver[0] == 0:
	+ dx_min = numpy.min(self.L/self.num)
	+ dt_min_von_neumann = 0.5dx_min*2/self.mu[0]

	- # Advection term. This term has to be reevaluated during the
	- # computations, as the fluid velocity changes.
	- v_max = numpy.amax(v_norm)
	- if v_max == 0:
	- v_max = 1.0e-7
	+ # Normalized velocities
	+ v_norm = numpy.empty(self.num[0]self.num[1]self.num[2])
	+ idx = 0
	+ for x in numpy.arange(self.num[0]):
	+ for y in numpy.arange(self.num[1]):
	+ for z in numpy.arange(self.num[2]):
	+ v_norm[idx] = numpy.sqrt(self.v_f[x,y,z,:].dot(\
	+ self.v_f[x,y,z,:]))
	+ idx += 1
	+
	+ # Advection term. This term has to be reevaluated during the
	+ # computations, as the fluid velocity changes.
	+ v_max = numpy.amax(v_norm)
	+ if v_max == 0:
	+ v_max = 1.0e-7
	+
	+ dt_min_cfl = dx_min/v_max
	+
	+ return numpy.min([dt_min_von_neumann, dt_min_cfl])*safety
	+
	+ # Darcy
	+ elif self.cfd_solver[0] == 1:
	+
	+ # Determine on the base of the diffusivity coefficient
	+ # components
	+ self.cellSize()
	+ self.hydraulicPermeability()
	+ alpha_max = numpy.max(self.k/(self.beta_f0.9self.mu))
	+ return safety * 1.0/(2.0alpha_max)1.0/(
	+ 1.0/(self.dx[0]**2) + \
	+ 1.0/(self.dx[1]**2) + \
	+ 1.0/(self.dx[2]**2))
	+
	+ '''
	+ # Determine value on the base of the hydraulic conductivity
	+ g = numpy.max(numpy.abs(self.g))
	+
	+ # Bulk modulus of fluid
	+ K = 1.0/self.beta_f[0]
	+
	+ self.hydraulicDiffusivity()
	+ self.cellSize()
	+
	+ return safety * 1.0/(2.0self.D)1.0/( \
	+ 1.0/(self.dx[0]**2) + \
	+ 1.0/(self.dx[1]**2) + \
	+ 1.0/(self.dx[2]**2))
	+ '''

	- dt_min_cfl = dx_min/v_max
	+ def cellSize(self):
	+ '''
	+ Calculate the particle sorting (and fluid) cell dimensions.
	+ These values are stored in `self.dx` and are NOT returned.
	+ '''
	+ self.dx = self.L/self.num
	+
	+ def hydraulicConductivity(self):
	+ '''
	+ Determine the hydraulic conductivity (K) [m/s] from the permeability
	+ prefactor. This value is stored in `self.K_c`. This function only works
	+ for the Darcy solver (`self.cfd_solver == 1`)
	+ '''
	+ if self.cfd_solver[0] == 1:
	+ self.K_c = self.k_cself.rho_fg/self.mu
	+ else:
	+ raise Exception('This function only works for the Darcy solver')
	+
	+ def hydraulicPermeability(self, phi_min = 0.1, phi_max = 0.9):
	+ '''
	+ Determine the hydraulic permeability (k) [m*m] from the Kozeny-Carman
	+ relationship, using the permeability prefactor (`self.k_c`), and the
	+ range of valid porosities set in `src/darcy.cuh`, by default in the
	+ range 0.1 to 0.9.
	+
	+ This function is only valid for the Darcy solver (`self.cfd_solver ==
	+ 1`).
	+ '''
	+ if self.cfd_solver[0] == 1:
	+ self.findPermeabilities()
	+ else:
	+ raise Exception('This function only works for the Darcy solver')

	- return numpy.min([dt_min_von_neumann, dt_min_cfl])*safety
	+ def hydraulicDiffusivity(self):
	+ '''
	+ Determine the hydraulic diffusivity (D) [m*m/s]. The result is stored …
	+ `self.D`. This function only works for the Darcy solver
	+ (`self.cfd_solver[0] == 1`)
	+ '''
	+ if self.cfd_solver[0] == 1:
	+ self.hydraulicConductivity()
	+ phi_bar = numpy.mean(self.phi)
	+ alpha = self.K_c/(self.rho_fg(self.k_n[0] + phi_bar*K))
	+ else:
	+ raise Exception('This function only works for the Darcy solver')

	def initTemporal(self, total,
	current = 0.0,
	t@@ -2707,7 +2957,7 @@ class sim:
	self.initFluid()

	def initFluid(self, mu = 8.9e-4, rho = 1.0e3, p = 1.0,
	- hydrostatic = True):
	+ hydrostatic = False, cfd_solver = 0):
	'''
	Initialize the fluid arrays and the fluid viscosity. The default value
	of ``mu`` equals the dynamic viscosity of water at 25 degrees Celcius.
	t@@ -2725,6 +2975,9 @@ class sim:
	created if a gravitational acceleration along :math:`z` previously
	has been specified
	:type hydrostatic: bool
	+ :param cfd_solver: Solver to use for the computational fluid dynamics.
	+ Accepted values: 0 (Navier Stokes, default) and 1 (Darcy).
	+ :type cfd_solver: int
	'''
	self.fluid = True
	self.mu = numpy.ones(1, dtype=numpy.float64) * mu
	t@@ -2771,21 +3024,41 @@ class sim:
	self.free_slip_bot = numpy.ones(1, dtype=numpy.int32)
	self.free_slip_top = numpy.ones(1, dtype=numpy.int32)

	- self.gamma = numpy.array(0.0)
	- self.theta = numpy.array(1.0)
	- self.beta = numpy.array(0.0)
	- self.tolerance = numpy.array(1.0e-8)
	- self.maxiter = numpy.array(1e4)
	- self.ndem = numpy.array(1)
	+ # Fluid solver type
	+ # 0: Navier Stokes (fluid with inertia)
	+ # 1: Stokes-Darcy (fluid without inertia)
	+ self.cfd_solver = numpy.ones(1)*cfd_solver
	+
	+ if self.cfd_solver[0] == 0:
	+ self.gamma = numpy.array(0.0)
	+ self.theta = numpy.array(1.0)
	+ self.beta = numpy.array(0.0)
	+ self.tolerance = numpy.array(1.0e-3)
	+ self.maxiter = numpy.array(1e4)
	+ self.ndem = numpy.array(1)
	+
	+ self.c_phi = numpy.ones(1, dtype=numpy.float64)
	+ self.c_v = numpy.ones(1, dtype=numpy.float64)
	+ self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
	+
	+ self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)

	- self.c_phi = numpy.ones(1, dtype=numpy.float64)
	- self.c_v = numpy.ones(1, dtype=numpy.float64)
	- self.dt_dem_fac = numpy.ones(1, dtype=numpy.float64)
	+ elif self.cfd_solver[0] == 1:
	+ self.tolerance = numpy.array(1.0e-3)
	+ self.maxiter = numpy.array(1e4)
	+ self.ndem = numpy.array(1)
	+ self.c_phi = numpy.ones(1, dtype=numpy.float64)
	+ self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.beta_f = numpy.ones(1, dtype=numpy.float64)*4.5e-10
	+ self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ self.k_c = numpy.ones(1, dtype=numpy.float64)*4.6e-10

	- self.f_d = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_p = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_v = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	- self.f_sum = numpy.zeros((self.np, self.nd), dtype=numpy.float64)
	+ else:
	+ raise Exception('Value of cfd_solver not understood (' + \
	+ str(self.cfd_solver[0]) + ')')

	def setFluidBottomNoFlow(self):
	'''
	t@@ -2847,6 +3120,18 @@ class sim:
	'''
	self.bc_top[0] = 0

	+ def findPermeabilities(self):
	+ '''
	+ Calculates the hydrological permeabilities from the Kozeny-Carman
	+ relationship. These values are only relevant when the Darcy solver is
	+ used (`self.cfd_solver = 1`). The permeability pre-factor `self.k_c`
	+ and the assemblage porosities must be set beforehand. The former values
	+ are set if a file from the `output/` folder is read using
	+ `self.readbin`.
	+ '''
	+ phi = numpy.clip(self.phi, 0.1, 0.9)
	+ self.k = self.k_c * phi3/(1.0 - phi2)
	+
	def defaultParams(self,
	mu_s = 0.5,
	mu_d = 0.5,
	t@@ -2960,6 +3245,26 @@ class sim:
	# Debonding distance
	self.db[0] = (1.0 + theta/2.0) * self.V_b**(1.0/3.0)

	+ def setStiffnessNormal(self, k_n):
	+ '''
	+ Set the elastic stiffness (`k_n`) in the normal direction of the
	+ contact.
	+
	+ :param k_n: The elastic stiffness coefficient [N/m]
	+ :type k_n: float
	+ '''
	+ self.k_n[0] = k_n
	+
	+ def setStiffnessTangential(self, k_t):
	+ '''
	+ Set the elastic stiffness (`k_t`) in the tangential direction of the
	+ contact.
	+
	+ :param k_t: The elastic stiffness coefficient [N/m]
	+ :type k_t: float
	+ '''
	+ self.k_t[0] = k_t
	+
	def setDampingNormal(self, gamma, over_damping=False):
	'''
	Set the dampening coefficient (gamma) in the normal direction of the
	t@@ -3324,18 +3629,27 @@ class sim:

	def bulkPorosity(self):
	'''
	- Calculates the bulk porosity
	+ Calculates the bulk porosity in the smallest axis-parallel cube.

	:returns: The bulk porosity, in [0:1]
	:return type: float
	'''

	- if (self.nw == 0):
	- V_total = self.L[0] * self.L[1] * self.L[2]
	- elif (self.nw == 1):
	- V_total = self.L[0] * self.L[1] * self.w_x[0]
	- if (V_total <= 0.0):
	- raise Exception("Could not determine total volume")
	+ min_x = numpy.min(self.x[:,0] - self.radius)
	+ min_y = numpy.min(self.x[:,1] - self.radius)
	+ min_z = numpy.min(self.x[:,2] - self.radius)
	+ max_x = numpy.max(self.x[:,0] + self.radius)
	+ max_y = numpy.max(self.x[:,1] + self.radius)
	+ max_z = numpy.max(self.x[:,2] + self.radius)
	+
	+ #if (self.nw == 0):
	+ #V_total = self.L[0] * self.L[1] * self.L[2]
	+ #elif (self.nw == 1):
	+ #V_total = self.L[0] * self.L[1] * self.w_x[0]
	+ #if (V_total <= 0.0):
	+ #raise Exception("Could not determine total volume")
	+
	+ V_total = (max_x - min_x)(max_y - min_y)(max_z - min_z)

	# Find the volume of solids
	V_solid = numpy.sum(V_sphere(self.radius))
	t@@ -4677,7 +4991,7 @@ class sim:
	plt.ylabel('Jacobi iterations')
	plt.plot(conv[:,0], conv[:,1])
	plt.grid()
	- plt.savefig('../output/' + self.sid + '-conv.' + graphics_format)
	+ plt.savefig(self.sid + '-conv.' + graphics_format)
	plt.clf()
	plt.close(fig)

	t@@ -4814,7 +5128,7 @@ class sim:
	the required value of the maximum normalized residual for the fluid
	solver.

	- The default and recommended value is 1.0e-8.
	+ The default and recommended value is 1.0e-3.

	:param tolerance: The tolerance criteria for the maximal normalized
	residual
	diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
	t@@ -23,13 +23,13 @@ ENDIF (GPU_GENERATION EQUAL 1)
	# Rule to build executable program
	CUDA_ADD_EXECUTABLE(../sphere
	main.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
	- navierstokes.cpp)
	+ navierstokes.cpp darcy.cpp)
	CUDA_ADD_EXECUTABLE(../porosity
	porosity.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
	- navierstokes.cpp)
	+ navierstokes.cpp darcy.cpp)
	CUDA_ADD_EXECUTABLE(../forcechains
	forcechains.cpp file_io.cpp sphere.cpp device.cu utility.cu utility.cpp
	- navierstokes.cpp)
	+ navierstokes.cpp darcy.cpp)

	#ADD_EXECUTABLE(unittests boost-unit-tests.cpp sphere.cpp)
	#TARGET_LINK_LIBRARIES(unittests
	diff --git a/src/constants.h b/src/constants.h
	t@@ -16,7 +16,7 @@ const Float PI = 3.14159265358979;
	const unsigned int ND = 3;

	// Define source code version
	-const double VERSION = 1.07;
	+const double VERSION = 2.00;

	// Max. number of contacts per particle
	//const int NC = 16;
	diff --git a/src/darcy.cpp b/src/darcy.cpp
	t@@ -0,0 +1,311 @@
	+#include <iostream>
	+#include <cstdio>
	+#include <cstdlib>
	+#include <string>
	+#include <vector>
	+
	+#include "typedefs.h"
	+#include "datatypes.h"
	+#include "constants.h"
	+#include "sphere.h"
	+#include "utility.h"
	+
	+// 1: Enable color output in array printing functions, 0: Disable
	+const int color_output = 0;
	+
	+// Initialize memory
	+void DEM::initDarcyMem()
	+{
	+ // Number of cells
	+ darcy.nx = grid.num[0];
	+ darcy.ny = grid.num[1];
	+ darcy.nz = grid.num[2];
	+ unsigned int ncells = darcyCells();
	+ //unsigned int ncells_st = darcyCellsVelocity();
	+
	+ darcy.p = new Float[ncells]; // hydraulic pressure
	+ darcy.v = new Float3[ncells]; // hydraulic velocity
	+ darcy.phi = new Float[ncells]; // porosity
	+ darcy.dphi = new Float[ncells]; // porosity change
	+ darcy.norm = new Float[ncells]; // normalized residual of epsilon
	+ darcy.f_p = new Float4[np]; // pressure force on particles
	+ darcy.k = new Float[ncells]; // hydraulic pressure
	+}
	+
	+unsigned int DEM::darcyCells()
	+{
	+ //return darcy.nxdarcy.nydarcy.nz; // without ghost nodes
	+ return (darcy.nx+2)(darcy.ny+2)(darcy.nz+2); // with ghost nodes
	+}
	+
	+// Returns the number of velocity nodes in a congruent padded grid. There are
	+// velocity nodes between the boundary points and the pressure ghost nodes, but
	+// not on the outer side of the ghost nodes
	+unsigned int DEM::darcyCellsVelocity()
	+{
	+ // Congruent padding for velocity grids. See "Cohen and Molemaker 'A fast
	+ // double precision CFD code using CUDA'" for details
	+ //return (darcy.nx+1)(darcy.ny+1)(darcy.nz+1); // without ghost nodes
	+ return (darcy.nx+3)(darcy.ny+3)(darcy.nz+3); // with ghost nodes
	+}
	+
	+// Free memory
	+void DEM::freeDarcyMem()
	+{
	+ delete[] darcy.p;
	+ delete[] darcy.v;
	+ delete[] darcy.phi;
	+ delete[] darcy.dphi;
	+ delete[] darcy.norm;
	+ delete[] darcy.f_p;
	+ delete[] darcy.k;
	+}
	+
	+// 3D index to 1D index
	+unsigned int DEM::d_idx(
	+ const int x,
	+ const int y,
	+ const int z)
	+{
	+ // without ghost nodes
	+ //return x + d.nxy + d.nxd.ny*z;
	+
	+ // with ghost nodes
	+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
	+ return (x+1) + (darcy.nx+2)(y+1) + (darcy.nx+2)(darcy.ny+2)*(z+1);
	+}
	+
	+// 3D index to 1D index of cell-face velocity nodes. The cell-face velocities
	+// are placed at x = [0;nx], y = [0;ny], z = [0;nz].
	+// The coordinate x,y,z corresponds to the lowest corner of cell(x,y,z).
	+unsigned int DEM::d_vidx(
	+ const int x,
	+ const int y,
	+ const int z)
	+{
	+ //return x + (darcy.nx+1)y + (darcy.nx+1)(darcy.ny+1)*z; // without ghos…
	+ return (x+1) + (darcy.nx+3)(y+1) + (darcy.nx+3)(darcy.ny+3)*(z+1); // wi…
	+}
	+
	+Float DEM::largestDarcyPermeability()
	+{
	+ Float k_max = 0.0;
	+ for (unsigned int z=0; z<grid.num[2]; z++)
	+ for (unsigned int y=0; y<grid.num[1]; y++)
	+ for (unsigned int x=0; x<grid.num[0]; x++)
	+ if (darcy.k[d_idx(x,y,z)] > k_max)
	+ k_max = darcy.k[d_idx(x,y,z)];
	+ return k_max;
	+}
	+
	+Float DEM::smallestDarcyPorosity()
	+{
	+ Float phi_min = 10.0;
	+ for (unsigned int z=0; z<grid.num[2]; z++)
	+ for (unsigned int y=0; y<grid.num[1]; y++)
	+ for (unsigned int x=0; x<grid.num[0]; x++)
	+ if (darcy.phi[d_idx(x,y,z)] < phi_min)
	+ phi_min = darcy.phi[d_idx(x,y,z)];
	+ return phi_min;
	+}
	+
	+// Determine if the FTCS (forward time, central space) solution of the Navier
	+// Stokes equations is unstable
	+void DEM::checkDarcyStability()
	+{
	+ // Cell dimensions
	+ const Float dx = grid.L[0]/grid.num[0];
	+ const Float dy = grid.L[1]/grid.num[1];
	+ const Float dz = grid.L[2]/grid.num[2];
	+
	+ const Float alpha_max = largestDarcyPermeability()
	+ /(darcy.beta_fsmallestDarcyPorosity()darcy.mu);
	+
	+ // Check the diffusion term using von Neumann stability analysis
	+ //if (darcy.mutime.dt/(dmindmin) > 0.5) {
	+ if (time.dt >= 1.0/(2.0alpha_max)
	+ 1.0/(1.0/(dxdx) + 1.0/(dydy) + 1.0/(dz*dz))) {
	+ std::cerr << "Error: The time step is too large to ensure stability in…
	+ "the diffusive term of the fluid momentum equation.\n"
	+ "Increase the viscosity, decrease the time step, and/or increase "
	+ "the fluid grid cell size." << std::endl;
	+ //exit(1);
	+ }
	+}
	+
	+// Print array values to file stream (stdout, stderr, other file)
	+void DEM::printDarcyArray(FILE* stream, Float* arr)
	+{
	+ int x, y, z;
	+
	+ // show ghost nodes
	+ for (z = darcy.nz; z >= -1; z--) { // top to bottom
	+
	+ fprintf(stream, "z = %d\n", z);
	+
	+ for (y=-1; y<=darcy.ny; y++) {
	+ for (x=-1; x<=darcy.nx; x++) {
	+
	+ if (x > -1 && x < darcy.nx &&
	+ y > -1 && y < darcy.ny &&
	+ z > -1 && z < darcy.nz) {
	+ fprintf(stream, "%f\t", arr[d_idx(x,y,z)]);
	+ } else { // ghost node
	+ if (color_output) {
	+ fprintf(stream, "\x1b[30;1m%f\x1b[0m\t",
	+ arr[d_idx(x,y,z)]);
	+ } else {
	+ fprintf(stream, "%f\t", arr[d_idx(x,y,z)]);
	+ }
	+ }
	+ }
	+ fprintf(stream, "\n");
	+ }
	+ fprintf(stream, "\n");
	+ }
	+}
	+
	+// Overload printDarcyArray to add optional description
	+void DEM::printDarcyArray(FILE* stream, Float* arr, std::string desc)
	+{
	+ std::cout << "\n" << desc << ":\n";
	+ printDarcyArray(stream, arr);
	+}
	+
	+// Print array values to file stream (stdout, stderr, other file)
	+void DEM::printDarcyArray(FILE* stream, Float3* arr)
	+{
	+ int x, y, z;
	+ for (z=0; z<darcy.nz; z++) {
	+ for (y=0; y<darcy.ny; y++) {
	+ for (x=0; x<darcy.nx; x++) {
	+ fprintf(stream, "%f,%f,%f\t",
	+ arr[d_idx(x,y,z)].x,
	+ arr[d_idx(x,y,z)].y,
	+ arr[d_idx(x,y,z)].z);
	+ }
	+ fprintf(stream, "\n");
	+ }
	+ fprintf(stream, "\n");
	+ }
	+}
	+
	+// Overload printDarcyArray to add optional description
	+void DEM::printDarcyArray(FILE* stream, Float3* arr, std::string desc)
	+{
	+ std::cout << "\n" << desc << ":\n";
	+ printDarcyArray(stream, arr);
	+}
	+
	+// Returns the average value of the normalized residuals
	+double DEM::avgNormResDarcy()
	+{
	+ double norm_res_sum, norm_res;
	+
	+ // do not consider the values of the ghost nodes
	+ for (int z=0; z<grid.num[2]; ++z) {
	+ for (int y=0; y<grid.num[1]; ++y) {
	+ for (int x=0; x<grid.num[0]; ++x) {
	+ norm_res = static_cast<double>(darcy.norm[d_idx(x,y,z)]);
	+ if (norm_res != norm_res) {
	+ std::cerr << "\nError: normalized residual is NaN ("
	+ << norm_res << ") in cell "
	+ << x << "," << y << "," << z << std::endl;
	+ std::cerr << "\tt = " << time.current << ", iter = "
	+ << int(time.current/time.dt) << std::endl;
	+ std::cerr << "This often happens if the system has become "
	+ "unstable." << std::endl;
	+ exit(1);
	+ }
	+ norm_res_sum += norm_res;
	+ }
	+ }
	+ }
	+ return norm_res_sum/(grid.num[0]grid.num[1]grid.num[2]);
	+}
	+
	+
	+// Returns the average value of the normalized residuals
	+double DEM::maxNormResDarcy()
	+{
	+ double max_norm_res = -1.0e9; // initialized to a small number
	+ double norm_res;
	+
	+ // do not consider the values of the ghost nodes
	+ for (int z=0; z<grid.num[2]; ++z) {
	+ for (int y=0; y<grid.num[1]; ++y) {
	+ for (int x=0; x<grid.num[0]; ++x) {
	+ norm_res = fabs(static_cast<double>(darcy.norm[d_idx(x,y,z)]));
	+ if (norm_res != norm_res) {
	+ std::cerr << "\nError: normalized residual is NaN ("
	+ << norm_res << ") in cell "
	+ << x << "," << y << "," << z << std::endl;
	+ std::cerr << "\tt = " << time.current << ", iter = "
	+ << int(time.current/time.dt) << std::endl;
	+ std::cerr << "This often happens if the system has become "
	+ "unstable." << std::endl;
	+ exit(1);
	+ }
	+ if (norm_res > max_norm_res)
	+ max_norm_res = norm_res;
	+ }
	+ }
	+ }
	+ return max_norm_res;
	+}
	+
	+// Initialize fluid parameters
	+void DEM::initDarcy()
	+{
	+ // Cell size
	+ darcy.dx = grid.L[0]/darcy.nx;
	+ darcy.dy = grid.L[1]/darcy.ny;
	+ darcy.dz = grid.L[2]/darcy.nz;
	+
	+ if (verbose == 1) {
	+ std::cout << " - Fluid grid dimensions: "
	+ << darcy.nx << "*"
	+ << darcy.ny << "*"
	+ << darcy.nz << std::endl;
	+ std::cout << " - Fluid grid cell size: "
	+ << darcy.dx << "*"
	+ << darcy.dy << "*"
	+ << darcy.dz << std::endl;
	+ }
	+}
	+
	+// Write values in scalar field to file
	+void DEM::writeDarcyArray(Float* array, const char* filename)
	+{
	+ FILE* file;
	+ if ((file = fopen(filename,"w"))) {
	+ printDarcyArray(file, array);
	+ fclose(file);
	+ } else {
	+ fprintf(stderr, "Error, could not open %s.\n", filename);
	+ }
	+}
	+
	+// Write values in vector field to file
	+void DEM::writeDarcyArray(Float3* array, const char* filename)
	+{
	+ FILE* file;
	+ if ((file = fopen(filename,"w"))) {
	+ printDarcyArray(file, array);
	+ fclose(file);
	+ } else {
	+ fprintf(stderr, "Error, could not open %s.\n", filename);
	+ }
	+}
	+
	+
	+// Print final heads and free memory
	+void DEM::endDarcy()
	+{
	+ // Write arrays to stdout/text files for debugging
	+ //writeDarcyArray(darcy.phi, "ns_phi.txt");
	+
	+ freeDarcyMem();
	+}
	+
	+// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
	diff --git a/src/darcy.cuh b/src/darcy.cuh
	t@@ -0,0 +1,1025 @@
	+// darcy.cuh
	+// CUDA implementation of Darcy porous flow
	+
	+#include <iostream>
	+#include <cuda.h>
	+//#include <cutil_math.h>
	+#include <helper_math.h>
	+
	+#include "vector_arithmetic.h" // for arbitrary precision vectors
	+#include "sphere.h"
	+#include "datatypes.h"
	+#include "utility.h"
	+#include "constants.cuh"
	+#include "debug.h"
	+
	+// Initialize memory
	+void DEM::initDarcyMemDev(void)
	+{
	+ // size of scalar field
	+ unsigned int memSizeF = sizeof(Float)*darcyCells();
	+
	+ // size of cell-face arrays in staggered grid discretization
	+ //unsigned int memSizeFface = sizeof(Float)*darcyCellsVelocity();
	+
	+ cudaMalloc((void**)&dev_darcy_dpdt, memSizeF); // Backwards Euler gradient
	+ cudaMalloc((void**)&dev_darcy_p_old, memSizeF); // old pressure
	+ cudaMalloc((void**)&dev_darcy_p, memSizeF); // hydraulic pressure
	+ cudaMalloc((void**)&dev_darcy_p_new, memSizeF); // updated pressure
	+ cudaMalloc((void*)&dev_darcy_v, memSizeF3); // cell hydraulic velocity
	+ //cudaMalloc((void*)&dev_darcy_vp_avg, memSizeF3); // avg. particle velo…
	+ //cudaMalloc((void**)&dev_darcy_d_avg, memSizeF); // avg. particle diameter
	+ cudaMalloc((void**)&dev_darcy_phi, memSizeF); // cell porosity
	+ cudaMalloc((void**)&dev_darcy_dphi, memSizeF); // cell porosity change
	+ cudaMalloc((void**)&dev_darcy_norm, memSizeF); // normalized residual
	+ cudaMalloc((void*)&dev_darcy_f_p, sizeof(Float4)np); // pressure force
	+ cudaMalloc((void**)&dev_darcy_k, memSizeF); // hydraulic permeabili…
	+ cudaMalloc((void*)&dev_darcy_grad_k, memSizeF3); // grad(permeability)
	+ //cudaMalloc((void**)&dev_darcy_div_v_p, memSizeF3); // divergence(v_p)
	+
	+ checkForCudaErrors("End of initDarcyMemDev");
	+}
	+
	+// Free memory
	+void DEM::freeDarcyMemDev()
	+{
	+ cudaFree(dev_darcy_dpdt);
	+ cudaFree(dev_darcy_p_old);
	+ cudaFree(dev_darcy_p);
	+ cudaFree(dev_darcy_p_new);
	+ cudaFree(dev_darcy_v);
	+ //cudaFree(dev_darcy_vp_avg);
	+ //cudaFree(dev_darcy_d_avg);
	+ cudaFree(dev_darcy_phi);
	+ cudaFree(dev_darcy_dphi);
	+ cudaFree(dev_darcy_norm);
	+ cudaFree(dev_darcy_f_p);
	+ cudaFree(dev_darcy_k);
	+ cudaFree(dev_darcy_grad_k);
	+ //cudaFree(dev_darcy_div_v_p);
	+}
	+
	+// Transfer to device
	+void DEM::transferDarcyToGlobalDeviceMemory(int statusmsg)
	+{
	+ checkForCudaErrors("Before attempting cudaMemcpy in "
	+ "transferDarcyToGlobalDeviceMemory");
	+
	+ //if (verbose == 1 && statusmsg == 1)
	+ //std::cout << " Transfering fluid data to the device: ";
	+
	+ // memory size for a scalar field
	+ unsigned int memSizeF = sizeof(Float)*darcyCells();
	+
	+ //writeNSarray(ns.p, "ns.p.txt");
	+
	+ cudaMemcpy(dev_darcy_p, darcy.p, memSizeF, cudaMemcpyHostToDevice);
	+ checkForCudaErrors("transferDarcytoGlobalDeviceMemory after first "
	+ "cudaMemcpy");
	+ cudaMemcpy(dev_darcy_v, darcy.v, memSizeF*3, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_darcy_phi, darcy.phi, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_darcy_dphi, darcy.dphi, memSizeF, cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_darcy_f_p, darcy.f_p, sizeof(Float4)*np,
	+ cudaMemcpyHostToDevice);
	+
	+ checkForCudaErrors("End of transferDarcyToGlobalDeviceMemory");
	+ //if (verbose == 1 && statusmsg == 1)
	+ //std::cout << "Done" << std::endl;
	+}
	+
	+// Transfer from device
	+void DEM::transferDarcyFromGlobalDeviceMemory(int statusmsg)
	+{
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << " Transfering fluid data from the device: ";
	+
	+ // memory size for a scalar field
	+ unsigned int memSizeF = sizeof(Float)*darcyCells();
	+
	+ cudaMemcpy(darcy.p, dev_darcy_p, memSizeF, cudaMemcpyDeviceToHost);
	+ checkForCudaErrors("In transferDarcyFromGlobalDeviceMemory, dev_darcy_p", …
	+ cudaMemcpy(darcy.v, dev_darcy_v, memSizeF*3, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(darcy.phi, dev_darcy_phi, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(darcy.dphi, dev_darcy_dphi, memSizeF, cudaMemcpyDeviceToHost);
	+ cudaMemcpy(darcy.f_p, dev_darcy_f_p, sizeof(Float4)*np,
	+ cudaMemcpyDeviceToHost);
	+ cudaMemcpy(darcy.k, dev_darcy_k, memSizeF, cudaMemcpyDeviceToHost);
	+
	+ checkForCudaErrors("End of transferDarcyFromGlobalDeviceMemory", 0);
	+ if (verbose == 1 && statusmsg == 1)
	+ std::cout << "Done" << std::endl;
	+}
	+
	+// Transfer the normalized residuals from device to host
	+void DEM::transferDarcyNormFromGlobalDeviceMemory()
	+{
	+ cudaMemcpy(darcy.norm, dev_darcy_norm, sizeof(Float)*darcyCells(),
	+ cudaMemcpyDeviceToHost);
	+ checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
	+}
	+
	+// Transfer the pressures from device to host
	+void DEM::transferDarcyPressuresFromGlobalDeviceMemory()
	+{
	+ cudaMemcpy(darcy.p, dev_darcy_p, sizeof(Float)*darcyCells(),
	+ cudaMemcpyDeviceToHost);
	+ checkForCudaErrors("End of transferDarcyNormFromGlobalDeviceMemory");
	+}
	+
	+// Get linear index from 3D grid position
	+__inline__ __device__ unsigned int d_idx(
	+ const int x, const int y, const int z)
	+{
	+ // without ghost nodes
	+ //return x + dev_grid.num[0]y + dev_grid.num[0]dev_grid.num[1]*z;
	+
	+ // with ghost nodes
	+ // the ghost nodes are placed at x,y,z = -1 and WIDTH
	+ return (x+1) + (devC_grid.num[0]+2)*(y+1) +
	+ (devC_grid.num[0]+2)(devC_grid.num[1]+2)(z+1);
	+}
	+
	+// Get linear index of velocity node from 3D grid position in staggered grid
	+__inline__ __device__ unsigned int d_vidx(
	+ const int x, const int y, const int z)
	+{
	+ // without ghost nodes
	+ //return x + (devC_grid.num[0]+1)*y
	+ //+ (devC_grid.num[0]+1)(devC_grid.num[1]+1)z;
	+
	+ // with ghost nodes
	+ // the ghost nodes are placed at x,y,z = -1 and WIDTH+1
	+ return (x+1) + (devC_grid.num[0]+3)*(y+1)
	+ + (devC_grid.num[0]+3)(devC_grid.num[1]+3)(z+1);
	+}
	+
	+// The normalized residuals are given an initial value of 0, since the values …
	+// the Dirichlet boundaries aren't written during the iterations.
	+__global__ void setDarcyNormZero(
	+ Float* __restrict__ dev_darcy_norm)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // check that we are not outside the fluid grid
	+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
	+ __syncthreads();
	+ dev_darcy_norm[d_idx(x,y,z)] = 0.0;
	+ }
	+}
	+
	+// Update a field in the ghost nodes from their parent cell values. The edge
	+// (diagonal) cells are not written since they are not read. Launch this kernel
	+// for all cells in the grid using
	+// setDarcyGhostNodes<datatype><<<.. , ..>>>( .. );
	+ template<typename T>
	+__global__ void setDarcyGhostNodes(
	+ T* __restrict__ dev_scalarfield,
	+ const int bc_bot,
	+ const int bc_top)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ const T val = dev_scalarfield[d_idx(x,y,z)];
	+
	+ // x
	+ if (x == 0)
	+ dev_scalarfield[idx(nx,y,z)] = val;
	+ if (x == nx-1)
	+ dev_scalarfield[idx(-1,y,z)] = val;
	+
	+ // y
	+ if (y == 0)
	+ dev_scalarfield[idx(x,ny,z)] = val;
	+ if (y == ny-1)
	+ dev_scalarfield[idx(x,-1,z)] = val;
	+
	+ // z
	+ if (z == 0 && bc_bot == 0)
	+ dev_scalarfield[idx(x,y,-1)] = val; // Dirichlet
	+ if (z == 0 && bc_bot == 1)
	+ dev_scalarfield[idx(x,y,-1)] = val; // Neumann
	+ if (z == 0 && bc_bot == 2)
	+ dev_scalarfield[idx(x,y,nz)] = val; // Periodic -z
	+
	+ if (z == nz-1 && bc_top == 0)
	+ dev_scalarfield[idx(x,y,nz)] = val; // Dirichlet
	+ if (z == nz-2 && bc_top == 1)
	+ dev_scalarfield[idx(x,y,nz)] = val; // Neumann
	+ if (z == nz-1 && bc_top == 2)
	+ dev_scalarfield[idx(x,y,-1)] = val; // Periodic +z
	+ }
	+}
	+
	+// Find the porosity in each cell on the base of a sphere, centered at the cell
	+// center.
	+__global__ void findDarcyPorosities(
	+ const unsigned int* __restrict__ dev_cellStart, // in
	+ const unsigned int* __restrict__ dev_cellEnd, // in
	+ const Float4* __restrict__ dev_x_sorted, // in
	+ const unsigned int iteration, // in
	+ const unsigned int np, // in
	+ const Float c_phi, // in
	+ Float* __restrict__ dev_darcy_phi, // in + out
	+ Float* __restrict__ dev_darcy_dphi) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Cell dimensions
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ // Cell sphere radius
	+ //const Float R = fmin(dx, fmin(dy,dz)) * 0.5; // diameter = cell width
	+ const Float R = fmin(dx, fmin(dy,dz)); // diameter = 2*cell width
	+ const Float cell_volume = 4.0/3.0M_PIRRR;
	+
	+ Float void_volume = cell_volume;
	+ Float4 xr; // particle pos. and radius
	+
	+ // check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ if (np > 0) {
	+
	+ // Cell sphere center position
	+ const Float3 X = MAKE_FLOAT3(
	+ xdx + 0.5dx,
	+ ydy + 0.5dy,
	+ zdz + 0.5dz);
	+
	+ Float d, r;
	+ Float phi = 1.00;
	+ //Float4 v;
	+ unsigned int n = 0;
	+
	+ //Float3 v_avg = MAKE_FLOAT3(0.0, 0.0, 0.0);
	+ //Float d_avg = 0.0;
	+
	+ // Read old porosity
	+ __syncthreads();
	+ Float phi_0 = dev_darcy_phi[d_idx(x,y,z)];
	+
	+ // The cell 3d index
	+ const int3 gridPos = make_int3((int)x,(int)y,(int)z);
	+
	+ // The neighbor cell 3d index
	+ int3 targetCell;
	+
	+ // The distance modifier for particles across periodic boundaries
	+ Float3 dist, distmod;
	+
	+ unsigned int cellID, startIdx, endIdx, i;
	+
	+ // Iterate over 27 neighbor cells, R = cell width
	+ /*for (int z_dim=-1; z_dim<2; ++z_dim) // z-axis
	+ for (int y_dim=-1; y_dim<2; ++y_dim) // y-axis
	+ for (int x_dim=-1; x_dim<2; ++x_dim) // x-axis*/
	+
	+ // Iterate over 27 neighbor cells, R = 2*cell width
	+ for (int z_dim=-2; z_dim<3; ++z_dim) { // z-axis
	+ for (int y_dim=-2; y_dim<3; ++y_dim) { // y-axis
	+ for (int x_dim=-2; x_dim<3; ++x_dim) { // x-axis
	+
	+ // Index of neighbor cell this iteration is looking at
	+ targetCell = gridPos + make_int3(x_dim, y_dim, z_dim);
	+
	+ // Get distance modifier for interparticle
	+ // vector, if it crosses a periodic boundary
	+ distmod = MAKE_FLOAT3(0.0, 0.0, 0.0);
	+ if (findDistMod(&targetCell, &distmod) != -1) {
	+
	+ // Calculate linear cell ID
	+ cellID = targetCell.x
	+ + targetCell.y * devC_grid.num[0]
	+ + (devC_grid.num[0] * devC_grid.num[1])
	+ * targetCell.z;
	+
	+ // Lowest particle index in cell
	+ __syncthreads();
	+ startIdx = dev_cellStart[cellID];
	+
	+ // Make sure cell is not empty
	+ if (startIdx != 0xffffffff) {
	+
	+ // Highest particle index in cell
	+ __syncthreads();
	+ endIdx = dev_cellEnd[cellID];
	+
	+ // Iterate over cell particles
	+ for (i=startIdx; i<endIdx; ++i) {
	+
	+ // Read particle position and radius
	+ __syncthreads();
	+ xr = dev_x_sorted[i];
	+ //v = dev_vel_sorted[i];
	+ r = xr.w;
	+
	+ // Find center distance
	+ dist = MAKE_FLOAT3(
	+ X.x - xr.x,
	+ X.y - xr.y,
	+ X.z - xr.z);
	+ dist += distmod;
	+ d = length(dist);
	+
	+ // Lens shaped intersection
	+ if ((R - r) < d && d < (R + r)) {
	+ void_volume -=
	+ 1.0/(12.0d) (
	+ M_PI(R + r - d)(R + r - …
	+ (dd + 2.0dr - 3.0rr
	+ + 2.0dR + 6.0rR
	+ - 3.0RR) );
	+ //v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
	+ //d_avg += r+r;
	+ n++;
	+ }
	+
	+ // Particle fully contained in cell sphere
	+ if (d <= R - r) {
	+ void_volume -= 4.0/3.0M_PIrrr;
	+ //v_avg += MAKE_FLOAT3(v.x, v.y, v.z);
	+ //d_avg += r+r;
	+ n++;
	+ }
	+ }
	+ }
	+ }
	+ }
	+ }
	+ }
	+
	+ //if (phi < 0.999) {
	+ //v_avg /= n;
	+ //d_avg /= n;
	+ //}
	+
	+ // Make sure that the porosity is in the interval [0.0;1.0]
	+ phi = fmin(0.99, fmax(0.01, void_volume/cell_volume));
	+ //phi = void_volume/cell_volume;
	+
	+ Float dphi = phi - phi_0;
	+ if (iteration == 0)
	+ dphi = 0.0;
	+
	+ // report values to stdout for debugging
	+ //printf("%d,%d,%d\tphi = %f dphi = %f\n", x,y,z, phi, dphi);
	+ //printf("%d,%d,%d\tphi = %f dphi = %f v_avg = %f,%f,%f d_avg = %f…
	+ // x,y,z, phi, dphi, v_avg.x, v_avg.y, v_avg.z, d_avg);
	+
	+ // Save porosity and porosity change
	+ __syncthreads();
	+ //phi = 0.5; dphi = 0.0; // disable porosity effects
	+ const unsigned int cellidx = d_idx(x,y,z);
	+ dev_darcy_phi[cellidx] = phi*c_phi;
	+ dev_darcy_dphi[cellidx] = dphi*c_phi;
	+ //dev_darcy_vp_avg[cellidx] = v_avg;
	+ //dev_darcy_d_avg[cellidx] = d_avg;
	+
	+#ifdef CHECK_FLUID_FINITE
	+ (void)checkFiniteFloat("phi", x, y, z, phi);
	+ (void)checkFiniteFloat("dphi", x, y, z, dphi);
	+ //(void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
	+ //(void)checkFiniteFloat("d_avg", x, y, z, d_avg);
	+#endif
	+ } else {
	+
	+ __syncthreads();
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ //Float phi = 0.5;
	+ //Float dphi = 0.0;
	+ //if (iteration == 20 && x == nx/2 && y == ny/2 && z == nz/2) {
	+ //phi = 0.4;
	+ //dphi = 0.1;
	+ //}
	+ //dev_darcy_phi[cellidx] = phi;
	+ //dev_darcy_dphi[cellidx] = dphi;
	+ dev_darcy_phi[cellidx] = 0.999;
	+ dev_darcy_dphi[cellidx] = 0.0;
	+
	+ //dev_darcy_vp_avg[cellidx] = MAKE_FLOAT3(0.0, 0.0, 0.0);
	+ //dev_darcy_d_avg[cellidx] = 0.0;
	+ }
	+ }
	+}
	+
	+// Find the particle velocity divergence at the cell center from the average
	+// particle velocities on the cell faces
	+__global__ void findDarcyParticleVelocityDivergence(
	+ const Float* __restrict__ dev_darcy_v_p_x, // in
	+ const Float* __restrict__ dev_darcy_v_p_y, // in
	+ const Float* __restrict__ dev_darcy_v_p_z, // in
	+ Float* __restrict__ dev_darcy_div_v_p) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // read values
	+ __syncthreads();
	+ Float v_p_xn = dev_darcy_v_p_x[d_vidx(x,y,z)];
	+ Float v_p_xp = dev_darcy_v_p_x[d_vidx(x+1,y,z)];
	+ Float v_p_yn = dev_darcy_v_p_y[d_vidx(x,y,z)];
	+ Float v_p_yp = dev_darcy_v_p_y[d_vidx(x,y+1,z)];
	+ Float v_p_zn = dev_darcy_v_p_z[d_vidx(x,y,z)];
	+ Float v_p_zp = dev_darcy_v_p_z[d_vidx(x,y,z+1)];
	+
	+ // cell dimensions
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ // calculate the divergence using first order central finite differenc…
	+ const Float div_v_p =
	+ (v_p_xp - v_p_xn)/dx +
	+ (v_p_yp - v_p_yn)/dy +
	+ (v_p_zp - v_p_zn)/dz;
	+
	+ __syncthreads();
	+ dev_darcy_div_v_p[d_idx(x,y,z)] = div_v_p;
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat("div_v_p", x, y, z, div_v_p);
	+#endif
	+ }
	+}
	+
	+// Find particle-fluid interaction force as outlined by Zhou et al. 2010, and
	+// originally by Gidaspow 1992. All terms other than the pressure force are
	+// neglected. The buoyancy force is included.
	+__global__ void findDarcyPressureForce(
	+ const Float4* __restrict__ dev_x, // in
	+ const Float* __restrict__ dev_darcy_p, // in
	+ const Float* __restrict__ dev_darcy_phi, // in
	+ Float4* __restrict__ dev_force, // out
	+ Float4* __restrict__ dev_darcy_f_p) // out
	+{
	+ unsigned int i = threadIdx.x + blockIdx.x*blockDim.x; // Particle index
	+
	+ if (i < devC_np) {
	+
	+ // read particle information
	+ __syncthreads();
	+ const Float4 x = dev_x[i];
	+
	+ // determine fluid cell containing the particle
	+ const unsigned int i_x =
	+ floor((x.x - devC_grid.origo[0])/(devC_grid.L[0]/devC_grid.num[0])…
	+ const unsigned int i_y =
	+ floor((x.y - devC_grid.origo[1])/(devC_grid.L[1]/devC_grid.num[1])…
	+ const unsigned int i_z =
	+ floor((x.z - devC_grid.origo[2])/(devC_grid.L[2]/devC_grid.num[2])…
	+ const unsigned int cellidx = d_idx(i_x, i_y, i_z);
	+
	+ // determine cell dimensions
	+ const Float dx = devC_grid.L[0]/devC_grid.num[0];
	+ const Float dy = devC_grid.L[1]/devC_grid.num[1];
	+ const Float dz = devC_grid.L[2]/devC_grid.num[2];
	+
	+ // read fluid information
	+ __syncthreads();
	+ const Float phi = dev_darcy_phi[cellidx];
	+ const Float p_xn = dev_darcy_p[d_idx(i_x-1,i_y,i_z)];
	+ //const Float p = dev_darcy_p[cellidx];
	+ const Float p_xp = dev_darcy_p[d_idx(i_x+1,i_y,i_z)];
	+ const Float p_yn = dev_darcy_p[d_idx(i_x,i_y-1,i_z)];
	+ const Float p_yp = dev_darcy_p[d_idx(i_x,i_y+1,i_z)];
	+ const Float p_zn = dev_darcy_p[d_idx(i_x,i_y,i_z-1)];
	+ const Float p_zp = dev_darcy_p[d_idx(i_x,i_y,i_z+1)];
	+
	+ // find particle volume (radius in x.w)
	+ const Float V = 4.0/3.0M_PIx.wx.wx.w;
	+
	+ // determine pressure gradient from first order central difference
	+ const Float3 grad_p = MAKE_FLOAT3(
	+ (p_xp - p_xn)/(dx + dx),
	+ (p_yp - p_yn)/(dy + dy),
	+ (p_zp - p_zn)/(dz + dz));
	+
	+ // find pressure gradient force plus buoyancy force.
	+ // buoyancy force = weight of displaced fluid
	+ // f_b = -rho_fVg
	+ const Float3 f_p = -1.0grad_pV/(1.0 - phi);
	+ //- devC_params.rho_fVMAKE_FLOAT3(
	+ //devC_params.g[0],
	+ //devC_params.g[1],
	+ //devC_params.g[2]);
	+
	+ /*printf("%d,%d,%d findPF:\n"
	+ "\tphi = %f\n"
	+ "\tp = %f\n"
	+ "\tgrad_p = % f, % f, % f\n"
	+ "\tf_p = % f, % f, % f\n",
	+ i_x, i_y, i_z,
	+ phi, p,
	+ grad_p.x, grad_p.y, grad_p.z,
	+ f_p.x, f_p.y, f_p.z);*/
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat3("f_p", i_x, i_y, i_z, f_p);
	+#endif
	+ // save force
	+ __syncthreads();
	+ dev_force[i] += MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
	+ dev_darcy_f_p[i] = MAKE_FLOAT4(f_p.x, f_p.y, f_p.z, 0.0);
	+ }
	+}
	+
	+// Set the pressure at the top boundary to new_pressure
	+__global__ void setDarcyTopPressure(
	+ const Float new_pressure, Float* __restrict__ dev_darcy_p)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // check that the thread is located at the top boundary
	+ if (x < devC_grid.num[0] &&
	+ y < devC_grid.num[1] &&
	+ z == devC_grid.num[2]-1) {
	+
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // Write the new pressure the top boundary cells
	+ __syncthreads();
	+ dev_darcy_p[cellidx] = new_pressure;
	+ }
	+}
	+
	+// Set the pressure at the top wall to new_pressure
	+__global__ void setDarcyTopWallPressure(
	+ const Float new_pressure,
	+ const unsigned int wall0_iz,
	+ Float* __restrict__ dev_darcy_p)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // check that the thread is located at the top boundary
	+ if (x < devC_grid.num[0] &&
	+ y < devC_grid.num[1] &&
	+ z == wall0_iz) {
	+
	+ const unsigned int cellidx = idx(x,y,z);
	+
	+ // Write the new pressure the top boundary cells
	+ __syncthreads();
	+ dev_darcy_p[cellidx] = new_pressure;
	+ }
	+}
	+
	+
	+// Find the cell permeabilities from the Kozeny-Carman equation
	+__global__ void findDarcyPermeabilities(
	+ const Float k_c, // in
	+ const Float* __restrict__ dev_darcy_phi, // in
	+ Float* __restrict__ dev_darcy_k) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // 1D thread index
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ __syncthreads();
	+ Float phi = dev_darcy_phi[cellidx];
	+
	+ // avoid division by zero
	+ if (phi > 0.9999)
	+ phi = 0.9999;
	+
	+ Float k = k_c*pow(phi,3)/pow(1.0 - phi, 2);
	+
	+ /*printf("%d,%d,%d findK:\n"
	+ "\tphi = %f\n"
	+ "\tk = %e\n",
	+ x, y, z,
	+ phi, k);*/
	+
	+ // limit permeability [m*m]
	+ // K_gravel = 3.0e-2 m/s => k_gravel = 2.7e-9 m*m
	+ k = fmin(2.7e-9, k);
	+
	+ __syncthreads();
	+ dev_darcy_k[cellidx] = k;
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat("k", x, y, z, k);
	+#endif
	+ }
	+}
	+
	+// Find the spatial gradients of the permeability. To be used in the pressure
	+// diffusion term in updateDarcySolution.
	+__global__ void findDarcyPermeabilityGradients(
	+ const Float* __restrict__ dev_darcy_k, // in
	+ Float3* __restrict__ dev_darcy_grad_k) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Cell size
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // 1D thread index
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ // read values
	+ __syncthreads();
	+ const Float k_xn = dev_darcy_k[d_idx(x-1,y,z)];
	+ const Float k_xp = dev_darcy_k[d_idx(x+1,y,z)];
	+ const Float k_yn = dev_darcy_k[d_idx(x,y-1,z)];
	+ const Float k_yp = dev_darcy_k[d_idx(x,y+1,z)];
	+ const Float k_zn = dev_darcy_k[d_idx(x,y,z-1)];
	+ const Float k_zp = dev_darcy_k[d_idx(x,y,z+1)];
	+
	+ // gradient approximated by first-order central difference
	+ const Float3 grad_k = MAKE_FLOAT3(
	+ (k_xp - k_xn)/(dx+dx),
	+ (k_yp - k_yn)/(dy+dy),
	+ (k_zp - k_zn)/(dz+dz));
	+
	+ // write result
	+ __syncthreads();
	+ dev_darcy_grad_k[cellidx] = grad_k;
	+
	+ /*printf("%d,%d,%d findK:\n"
	+ "\tk_x = %e, %e\n"
	+ "\tk_y = %e, %e\n"
	+ "\tk_z = %e, %e\n"
	+ "\tgrad(k) = %e, %e, %e\n",
	+ x, y, z,
	+ k_xn, k_xp,
	+ k_yn, k_yp,
	+ k_zn, k_zp,
	+ grad_k.x, grad_k.y, grad_k.z);*/
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat3("grad_k", x, y, z, grad_k);
	+#endif
	+ }
	+}
	+
	+// Find the temporal gradient in pressure using the backwards Euler method
	+__global__ void findDarcyPressureChange(
	+ const Float* __restrict__ dev_darcy_p_old,
	+ const Float* __restrict__ dev_darcy_p,
	+ const unsigned int iter,
	+ Float* __restrict__ dev_darcy_dpdt)
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) {
	+
	+ // 1D thread index
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ // read values
	+ __syncthreads();
	+ const Float p_old = dev_darcy_p_old[cellidx];
	+ const Float p = dev_darcy_p[cellidx];
	+
	+ Float dpdt = (p - p_old)/devC_dt;
	+
	+ // Ignore the large initial pressure gradients caused by solver "warm
	+ // up" towards hydrostatic pressure distribution
	+ if (iter < 2)
	+ dpdt = 0.0;
	+
	+ // write result
	+ __syncthreads();
	+ dev_darcy_dpdt[cellidx] = dpdt;
	+
	+ /*printf("%d,%d,%d\n"
	+ "\tp_old = %e\n"
	+ "\tp = %e\n"
	+ "\tdt = %e\n"
	+ "\tdpdt = %e\n",
	+ x,y,z,
	+ p_old, p,
	+ devC_dt, dpdt);*/
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat("dpdt", x, y, z, dpdt);
	+#endif
	+ }
	+}
	+
	+// A single jacobi iteration where the pressure values are updated to
	+// dev_darcy_p_new.
	+// bc = 0: Dirichlet, 1: Neumann
	+__global__ void updateDarcySolution(
	+ const Float* __restrict__ dev_darcy_p_old, // in
	+ //const Float* __restrict__ dev_darcy_dpdt, // in
	+ const Float* __restrict__ dev_darcy_p, // in
	+ const Float* __restrict__ dev_darcy_k, // in
	+ const Float* __restrict__ dev_darcy_phi, // in
	+ const Float* __restrict__ dev_darcy_dphi, // in
	+ const Float3* __restrict__ dev_darcy_grad_k, // in
	+ const Float beta_f, // in
	+ const Float mu, // in
	+ const int bc_bot, // in
	+ const int bc_top, // in
	+ const unsigned int ndem, // in
	+ const unsigned int wall0_iz, // in
	+ Float* __restrict__ dev_darcy_p_new, // out
	+ Float* __restrict__ dev_darcy_norm) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Cell size
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ if (x < nx && y < ny && z < nz) {
	+
	+ // 1D thread index
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ // read values
	+ __syncthreads();
	+ const Float k = dev_darcy_k[cellidx];
	+ const Float3 grad_k = dev_darcy_grad_k[cellidx];
	+ const Float phi = dev_darcy_phi[cellidx];
	+ const Float dphi = dev_darcy_dphi[cellidx];
	+
	+ //const Float dpdt = dev_darcy_dpdt[cellidx];
	+
	+ const Float p_old = dev_darcy_p_old[cellidx];
	+
	+ const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
	+ const Float p = dev_darcy_p[cellidx];
	+ const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
	+ const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
	+ const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
	+ Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
	+ Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
	+
	+ // gradient approximated by first-order central difference
	+ const Float3 grad_p = MAKE_FLOAT3(
	+ (p_xp - p_xn)/(dx+dx),
	+ (p_yp - p_yn)/(dy+dy),
	+ (p_zp - p_zn)/(dz+dz));
	+
	+ const Float laplace_p =
	+ (p_xp - (p+p) + p_xn)/(dx*dx) +
	+ (p_yp - (p+p) + p_yn)/(dy*dy) +
	+ (p_zp - (p+p) + p_zn)/(dz*dz);
	+
	+ // find forcing function value
	+ /const Float f_transient = beta_fphimu/kdpdt;
	+ const Float f_forcing = mu/((1.0 - phi)k)dphi/devC_dt;
	+ const Float f_diff = -1.0*dot(grad_p, grad_k)/k;
	+ const Float f = f_transient + f_forcing + f_diff;*/
	+
	+ //const Float div_v_p = dev_darcy_div_v_p[cellidx];
	+
	+ // Neumann BCs
	+ if (z == 0 && bc_bot == 1)
	+ p_zn = p;
	+ if (z == nz-1 && bc_top == 1)
	+ p_zp = p;
	+
	+ // New value of epsilon in 3D update, derived by rearranging the
	+ // 3d discrete finite difference Laplacian
	+ /const Float dxdx = dxdx;
	+ const Float dydy = dy*dy;
	+ const Float dzdz = dz*dz;
	+ Float p_new
	+ = (-dxdxdydydzdz*f
	+ + dydydzdz(p_xn + p_xp)
	+ + dxdxdzdz(p_yn + p_yp)
	+ + dxdxdydy(p_zn + p_zp))
	+ /(2.0(dxdxdydy + dxdxdzdz + dydydzdz));*/
	+
	+ Float p_new = p_old
	+ + devC_dt/(beta_fphimu)(klaplace_p + dot(grad_k, grad_p))
	+ - dphi/(beta_fphi(1.0 - phi));
	+
	+ // Dirichlet BC at dynamic top wall. wall0_iz will be larger than the
	+ // grid if the wall isn't dynamic
	+ if (z == wall0_iz)
	+ p_new = p;
	+
	+ // Neumann BC at dynamic top wall
	+ //if (z == wall0_iz + 1)
	+ //p_new = p_zn + dp_dz;
	+
	+ // Dirichlet BCs
	+ if ((z == 0 && bc_bot == 0) \|\|
	+ (z == nz-1 && bc_top == 0) \|\|
	+ (z == wall0_iz))
	+ p_new = p;
	+
	+ // normalized residual, avoid division by zero
	+ //const Float res_norm = (p_new - p)(p_new - p)/(p_newp_new + 1.0e-1…
	+ const Float res_norm = (p_new - p)/(p + 1.0e-16);
	+
	+#ifdef REPORT_FORCING_TERMS
	+ printf("\n%d,%d,%d updateDarcySolution\n"
	+ "dpdt = %e\n"
	+ "p = %e\n"
	+ "p_new = %e\n"
	+ "f = %e\n"
	+ "f_transient = %e\n"
	+ "f_forcing = %e\n"
	+ "f_diff = %e\n"
	+ "res_norm = %e\n",
	+ x,y,z,
	+ dpdt,
	+ p, p_new,
	+ f,
	+ f_transient, f_forcing, f_diff,
	+ res_norm);
	+#endif
	+
	+ // save new pressure and the residual
	+ __syncthreads();
	+ dev_darcy_p_new[cellidx] = p_new;
	+ dev_darcy_norm[cellidx] = res_norm;
	+
	+ /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
	+ x,y,z,
	+ p,
	+ p_new,
	+ res_norm);*/
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat("p_new", x, y, z, p_new);
	+ checkFiniteFloat("res_norm", x, y, z, res_norm);
	+#endif
	+ }
	+}
	+
	+__global__ void findNewPressure(
	+ const Float* __restrict__ dev_darcy_dp, // in
	+ Float* __restrict__ dev_darcy_p) // in+out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ const Float dp = dev_darcy_dp[cellidx];
	+
	+ // save new pressure
	+ __syncthreads();
	+ dev_darcy_p[cellidx] += dp;
	+
	+ /*printf("%d,%d,%d\tp = % f\tp_new = % f\tres_norm = % f\n",
	+ x,y,z,
	+ p,
	+ p_new,
	+ res_norm);*/
	+
	+#ifdef CHECK_FLUID_FINITE
	+ checkFiniteFloat("dp", x, y, z, dp);
	+#endif
	+ }
	+}
	+
	+// Find cell velocities
	+__global__ void findDarcyVelocities(
	+ const Float* __restrict__ dev_darcy_p, // in
	+ const Float* __restrict__ dev_darcy_phi, // in
	+ const Float* __restrict__ dev_darcy_k, // in
	+ const Float mu, // in
	+ Float3* __restrict__ dev_darcy_v) // out
	+{
	+ // 3D thread index
	+ const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	+ const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
	+ const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z;
	+
	+ // Grid dimensions
	+ const unsigned int nx = devC_grid.num[0];
	+ const unsigned int ny = devC_grid.num[1];
	+ const unsigned int nz = devC_grid.num[2];
	+
	+ // Cell size
	+ const Float dx = devC_grid.L[0]/nx;
	+ const Float dy = devC_grid.L[1]/ny;
	+ const Float dz = devC_grid.L[2]/nz;
	+
	+ // Check that we are not outside the fluid grid
	+ if (x < nx && y < ny && z < nz) {
	+
	+ const unsigned int cellidx = d_idx(x,y,z);
	+
	+ __syncthreads();
	+ const Float p_xn = dev_darcy_p[d_idx(x-1,y,z)];
	+ const Float p_xp = dev_darcy_p[d_idx(x+1,y,z)];
	+ const Float p_yn = dev_darcy_p[d_idx(x,y-1,z)];
	+ const Float p_yp = dev_darcy_p[d_idx(x,y+1,z)];
	+ const Float p_zn = dev_darcy_p[d_idx(x,y,z-1)];
	+ const Float p_zp = dev_darcy_p[d_idx(x,y,z+1)];
	+
	+ const Float k = dev_darcy_k[cellidx];
	+ const Float phi = dev_darcy_phi[cellidx];
	+
	+ // approximate pressure gradient with first order central differences
	+ const Float3 grad_p = MAKE_FLOAT3(
	+ (p_xp - p_xn)/(dx + dx),
	+ (p_yp - p_yn)/(dy + dy),
	+ (p_zp - p_zn)/(dz + dz));
	+
	+ // Flux [m/s]: q = -k/nu * dH
	+ // Pore velocity [m/s]: v = q/n
	+
	+ // calculate flux
	+ //const Float3 q = -k/mu*grad_p;
	+
	+ // calculate velocity
	+ //const Float3 v = q/phi;
	+ const Float3 v = (-k/mu * grad_p)/phi;
	+
	+ // Save velocity
	+ __syncthreads();
	+ dev_darcy_v[cellidx] = v;
	+ }
	+}
	+
	+// Print final heads and free memory
	+void DEM::endDarcyDev()
	+{
	+ freeDarcyMemDev();
	+}
	+
	+// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4
	diff --git a/src/datatypes.h b/src/datatypes.h
	t@@ -96,8 +96,6 @@ struct Params {
	Float tau_b; // Bond shear strength
	Float devs_A; // Amplitude of modulations in normal stress
	Float devs_f; // Frequency of modulations in normal stress
	- Float mu; // Fluid dynamic viscosity
	- Float rho_f; // Fluid density
	};

	// Structure containing wall parameters
	t@@ -146,6 +144,35 @@ struct NavierStokes {
	Float4* f_p; // Pressure force on particles
	Float4* f_v; // Viscous force on particles
	Float4* f_sum; // Viscous force on particles
	+ Float mu; // Fluid dynamic viscosity
	+ Float rho_f; // Fluid density
	+};
	+
	+struct Darcy {
	+ int nx, ny, nz; // Number of cells in each dimension
	+ Float dx, dy, dz; // Cell length in each dim
	+ Float* p; // Cell hydraulic pressures
	+ Float3* v; // Cell fluid velocity
	+ Float* k; // Cell hydraulic permeability
	+ Float* phi; // Cell porosity
	+ Float* dphi; // Cell porosity change
	+ Float* norm; // Normalized residual of epsilon updates
	+ Float p_mod_A; // Pressure modulation amplitude at top
	+ Float p_mod_f; // Pressure modulation frequency at top
	+ Float p_mod_phi; // Pressure modulation phase at top
	+ int bc_bot; // 0: Dirichlet, 1: Neumann
	+ int bc_top; // 0: Dirichlet, 1: Neumann
	+ int free_slip_bot; // 0: no, 1: yes
	+ int free_slip_top; // 0: no, 1: yes
	+ Float tolerance; // Solver parameter: Max residual tolerance
	+ unsigned int maxiter; // Solver parameter: Max iterations to perform
	+ unsigned int ndem; // Solver parameter: DEM time steps per CFD step
	+ Float c_phi; // Porosity scaling coefficient
	+ Float4* f_p; // Pressure force on particles
	+ Float beta_f; // Adiabatic fluid compressibility
	+ Float k_c; // Permeability prefactor in Kozeny-Carman eq.
	+ Float mu; // Fluid dynamic viscosity
	+ Float rho_f; // Fluid density
	};

	// Image structure
	diff --git a/src/debug.h b/src/debug.h
	t@@ -20,7 +20,7 @@ const unsigned int nijacnorm = 10;
	// 0: False, 1: True
	const int write_res_log = 0;

	-// Report epsilon values during Jacobi iterations to stdout
	+// Report pressure (epsilon) values during Jacobi iterations to stdout
	//#define REPORT_EPSILON
	//#define REPORT_MORE_EPSILON

	t@@ -30,15 +30,15 @@ const int write_res_log = 0;
	const int write_conv_log = 1;

	// The interval between iteration number reporting in 'output/<sid>-conv.log'
	-//const int conv_log_interval = 10;
	-const int conv_log_interval = 4;
	+const int conv_log_interval = 10;
	+//const int conv_log_interval = 4;
	//const int conv_log_interval = 1;

	// Enable drag force and particle fluid coupling
	#define CFD_DEM_COUPLING

	// Check for nan/inf values in fluid solver kernels
	-#define CHECK_NS_FINITE
	+#define CHECK_FLUID_FINITE

	// Enable reporting of velocity prediction components to stdout
	//#define REPORT_V_P_COMPONENTS
	diff --git a/src/device.cu b/src/device.cu
	t@@ -24,6 +24,7 @@
	#include "integration.cuh"
	#include "raytracer.cuh"
	#include "navierstokes.cuh"
	+#include "darcy.cuh"

	// Returns the number of cores per streaming multiprocessor, which is
	// a function of the device compute capability
	t@@ -170,40 +171,65 @@ __global__ void checkConstantValues(int* dev_equal,

	// Compare values between global- and constant
	// memory structures
	- if (dev_grid->origo[0] != devC_grid.origo[0] \|\|
	- dev_grid->origo[1] != devC_grid.origo[1] \|\|
	- dev_grid->origo[2] != devC_grid.origo[2] \|\|
	- dev_grid->L[0] != devC_grid.L[0] \|\|
	- dev_grid->L[1] != devC_grid.L[1] \|\|
	- dev_grid->L[2] != devC_grid.L[2] \|\|
	- dev_grid->num[0] != devC_grid.num[0] \|\|
	- dev_grid->num[1] != devC_grid.num[1] \|\|
	- dev_grid->num[2] != devC_grid.num[2] \|\|
	- dev_grid->periodic != devC_grid.periodic)
	- *dev_equal = 1; // Not ok
	-
	- else if (dev_params->g[0] != devC_params.g[0] \|\|
	- dev_params->g[1] != devC_params.g[1] \|\|
	- dev_params->g[2] != devC_params.g[2] \|\|
	- dev_params->k_n != devC_params.k_n \|\|
	- dev_params->k_t != devC_params.k_t \|\|
	- dev_params->k_r != devC_params.k_r \|\|
	- dev_params->gamma_n != devC_params.gamma_n \|\|
	- dev_params->gamma_t != devC_params.gamma_t \|\|
	- dev_params->gamma_r != devC_params.gamma_r \|\|
	- dev_params->mu_s != devC_params.mu_s \|\|
	- dev_params->mu_d != devC_params.mu_d \|\|
	- dev_params->mu_r != devC_params.mu_r \|\|
	- dev_params->rho != devC_params.rho \|\|
	- dev_params->contactmodel != devC_params.contactmodel \|\|
	- dev_params->kappa != devC_params.kappa \|\|
	- dev_params->db != devC_params.db \|\|
	- dev_params->V_b != devC_params.V_b \|\|
	- dev_params->lambda_bar != devC_params.lambda_bar \|\|
	- dev_params->nb0 != devC_params.nb0 \|\|
	- dev_params->mu != devC_params.mu \|\|
	- dev_params->rho_f != devC_params.rho_f)
	+ if (dev_grid->origo[0] != devC_grid.origo[0])
	+ *dev_equal = 1;
	+ if (dev_grid->origo[1] != devC_grid.origo[1])
	*dev_equal = 2; // Not ok
	+ if (dev_grid->origo[2] != devC_grid.origo[2])
	+ *dev_equal = 3; // Not ok
	+ if (dev_grid->L[0] != devC_grid.L[0])
	+ *dev_equal = 4; // Not ok
	+ if (dev_grid->L[1] != devC_grid.L[1])
	+ *dev_equal = 5; // Not ok
	+ if (dev_grid->L[2] != devC_grid.L[2])
	+ *dev_equal = 6; // Not ok
	+ if (dev_grid->num[0] != devC_grid.num[0])
	+ *dev_equal = 7; // Not ok
	+ if (dev_grid->num[1] != devC_grid.num[1])
	+ *dev_equal = 8; // Not ok
	+ if (dev_grid->num[2] != devC_grid.num[2])
	+ *dev_equal = 9; // Not ok
	+ if (dev_grid->periodic != devC_grid.periodic)
	+ *dev_equal = 10; // Not ok
	+
	+ if (dev_params->g[0] != devC_params.g[0])
	+ *dev_equal = 11; // Not ok
	+ if (dev_params->g[1] != devC_params.g[1])
	+ *dev_equal = 12; // Not ok
	+ if (dev_params->g[2] != devC_params.g[2])
	+ *dev_equal = 13; // Not ok
	+ if (dev_params->k_n != devC_params.k_n)
	+ *dev_equal = 14; // Not ok
	+ if (dev_params->k_t != devC_params.k_t)
	+ *dev_equal = 15; // Not ok
	+ if (dev_params->k_r != devC_params.k_r)
	+ *dev_equal = 16; // Not ok
	+ if (dev_params->gamma_n != devC_params.gamma_n)
	+ *dev_equal = 17; // Not ok
	+ if (dev_params->gamma_t != devC_params.gamma_t)
	+ *dev_equal = 18; // Not ok
	+ if (dev_params->gamma_r != devC_params.gamma_r)
	+ *dev_equal = 19; // Not ok
	+ if (dev_params->mu_s != devC_params.mu_s)
	+ *dev_equal = 20; // Not ok
	+ if (dev_params->mu_d != devC_params.mu_d)
	+ *dev_equal = 21; // Not ok
	+ if (dev_params->mu_r != devC_params.mu_r)
	+ *dev_equal = 22; // Not ok
	+ if (dev_params->rho != devC_params.rho)
	+ *dev_equal = 23; // Not ok
	+ if (dev_params->contactmodel != devC_params.contactmodel)
	+ *dev_equal = 24; // Not ok
	+ if (dev_params->kappa != devC_params.kappa)
	+ *dev_equal = 25; // Not ok
	+ if (dev_params->db != devC_params.db)
	+ *dev_equal = 26; // Not ok
	+ if (dev_params->V_b != devC_params.V_b)
	+ *dev_equal = 27; // Not ok
	+ if (dev_params->lambda_bar != devC_params.lambda_bar)
	+ *dev_equal = 28; // Not ok
	+ if (dev_params->nb0 != devC_params.nb0)
	+ *dev_equal = 29; // Not ok
	}

	// Copy the constant data components to device memory,
	t@@ -524,9 +550,12 @@ __host__ void DEM::freeGlobalDeviceMemory()
	cudaFree(dev_walls_acc);

	// Fluid arrays
	- if (navierstokes == 1) {
	+ if (fluid == 1 && cfd_solver == 0) {
	freeNSmemDev();
	}
	+ if (fluid == 1 && cfd_solver == 1) {
	+ freeDarcyMemDev();
	+ }

	//checkForCudaErrors("During cudaFree calls");

	t@@ -604,11 +633,15 @@ __host__ void DEM::transferToGlobalDeviceMemory(int stat…
	sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);

	// Fluid arrays
	- if (navierstokes == 1) {
	- transferNStoGlobalDeviceMemory(1);
	- } else if (navierstokes != 0) {
	- std::cerr << "Error: navierstokes value not understood ("
	- << navierstokes << ")" << std::endl;
	+ if (fluid == 1) {
	+ if (cfd_solver == 0) {
	+ transferNStoGlobalDeviceMemory(1);
	+ } else if (cfd_solver == 1) {
	+ transferDarcyToGlobalDeviceMemory(1);
	+ } else {
	+ std::cerr << "Error: cfd_solver value not understood ("
	+ << cfd_solver << ")" << std::endl;
	+ }
	}

	checkForCudaErrors("End of transferToGlobalDeviceMemory");
	t@@ -680,9 +713,13 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
	sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);

	// Fluid arrays
	- if (navierstokes == 1) {
	+ if (fluid == 1 && cfd_solver == 0) {
	transferNSfromGlobalDeviceMemory(0);
	}
	+ else if (fluid == 1 && cfd_solver == 1) {
	+ transferDarcyFromGlobalDeviceMemory(0);
	+ checkDarcyStability();
	+ }

	//checkForCudaErrors("End of transferFromGlobalDeviceMemory");
	}
	t@@ -733,7 +770,7 @@ __host__ void DEM::startTime()
	iDivUp(grid.num[0], dimBlockFluid.x),
	iDivUp(grid.num[1], dimBlockFluid.y),
	iDivUp(grid.num[2], dimBlockFluid.z));
	- if (dimGridFluid.z > 64 && navierstokes == 1) {
	+ if (dimGridFluid.z > 64 && fluid == 1) {
	cerr << "Error: dimGridFluid.z > 64" << endl;
	exit(1);
	}
	t@@ -744,7 +781,7 @@ __host__ void DEM::startTime()
	iDivUp(grid.num[0]+1, dimBlockFluidFace.x),
	iDivUp(grid.num[1]+1, dimBlockFluidFace.y),
	iDivUp(grid.num[2]+1, dimBlockFluidFace.z));
	- if (dimGridFluidFace.z > 64 && navierstokes == 1) {
	+ if (dimGridFluidFace.z > 64 && fluid == 1) {
	cerr << "Error: dimGridFluidFace.z > 64" << endl;
	exit(1);
	}
	t@@ -766,7 +803,7 @@ __host__ void DEM::startTime()
	<< dimBlock.x << "" << dimBlock.y << "" << dimBlock.z << "\n"
	<< " - Shared memory required per block: " << smemSize << " bytes"
	<< endl;
	- if (navierstokes == 1) {
	+ if (fluid == 1) {
	cout << " - Blocks per fluid grid: "
	<< dimGridFluid.x << "" << dimGridFluid.y << "" <<
	dimGridFluid.z << "\n"
	t@@ -832,6 +869,17 @@ __host__ void DEM::startTime()
	double t_jacobiIterationNS = 0.0;
	double t_updateNSvelocityPressure = 0.0;

	+ double t_findDarcyPorosities = 0.0;
	+ double t_setDarcyGhostNodes = 0.0;
	+ double t_findDarcyPressureForce = 0.0;
	+ double t_setDarcyTopPressure = 0.0;
	+ double t_findDarcyPermeabilities = 0.0;
	+ double t_findDarcyPermeabilityGradients = 0.0;
	+ //double t_findDarcyPressureChange = 0.0;
	+ double t_updateDarcySolution = 0.0;
	+ double t_copyValues = 0.0;
	+ double t_findDarcyVelocities = 0.0;
	+
	if (PROFILING == 1) {
	cudaEventCreate(&kernel_tic);
	cudaEventCreate(&kernel_toc);
	t@@ -844,7 +892,11 @@ __host__ void DEM::startTime()
	// Index of dynamic top wall (if it exists)
	unsigned int wall0_iz = 10000000;
	// weight of fluid between two cells in z direction
	- const Float dp_dz = fabs(params.rho_fparams.g[2]grid.L[2]/grid.num[2]);
	+ Float dp_dz;
	+ if (cfd_solver == 0)
	+ dp_dz = fabs(ns.rho_fparams.g[2]grid.L[2]/grid.num[2]);
	+ else if (cfd_solver == 1)
	+ dp_dz = fabs(darcy.rho_fparams.g[2]grid.L[2]/grid.num[2]);
	//std::cout << "dp_dz = " << dp_dz << std::endl;

	// Write a log file of the number of iterations it took before
	t@@ -1018,163 +1070,170 @@ __host__ void DEM::startTime()
	}
	}

	- // Solve Navier Stokes flow through the grid
	- if (navierstokes == 1) {
	- checkForCudaErrorsIter("Before findPorositiesDev", iter);
	- // Find cell porosities, average particle velocities, and average
	- // particle diameters. These are needed for predicting the fluid
	- // velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findPorositiesVelocitiesDiametersSpherical
	- //findPorositiesVelocitiesDiametersSphericalGradient
	- <<<dimGridFluid, dimBlockFluid>>>(
	- dev_cellStart,
	- dev_cellEnd,
	- dev_x_sorted,
	- dev_vel_sorted,
	- dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_vp_avg,
	- dev_ns_d_avg,
	- iter,
	- np,
	- ns.c_phi);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findPorositiesDev);
	- checkForCudaErrorsIter("Post findPorositiesDev", iter);
	+ // Solve fluid flow through the grid
	+ if (fluid == 1) {

	-#ifdef CFD_DEM_COUPLING
	- /*if (params.nu <= 0.0) {
	- std::cerr << "Error! The fluid needs a positive viscosity "
	- "value in order to simulate particle-fluid interaction."
	- << std::endl;
	- exit(1);
	- }*/
	- if (iter == 0) {
	- // set cell center ghost nodes
	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v, ns.bc_bot, ns.bc_top);
	+ // Navier-Stokes solution
	+ if (cfd_solver == 0) {

	- // find cell face velocities
	- interpolateCenterToFace
	- <<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v,
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z);
	+ checkForCudaErrorsIter("Before findPorositiesDev", iter);
	+ // Find cell porosities, average particle velocities, and aver…
	+ // particle diameters. These are needed for predicting the flu…
	+ // velocities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findPorositiesVelocitiesDiametersSpherical
	+ //findPorositiesVelocitiesDiametersSphericalGradient
	+ <<<dimGridFluid, dimBlockFluid>>>(
	+ dev_cellStart,
	+ dev_cellEnd,
	+ dev_x_sorted,
	+ dev_vel_sorted,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_vp_avg,
	+ dev_ns_d_avg,
	+ iter,
	+ np,
	+ ns.c_phi);
	cudaThreadSynchronize();
	- checkForCudaErrors("Post interpolateCenterToFace");
	- }
	-
	- setNSghostNodesFace<Float>
	- <<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z,
	- ns.bc_bot,
	- ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesFace", iter);
	-
	- findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z,
	- dev_ns_div_tau_x,
	- dev_ns_div_tau_y,
	- dev_ns_div_tau_z);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post findFaceDivTau", iter);
	-
	- setNSghostNodesFace<Float>
	- <<<dimGridFluidFace, dimBlockFluid>>>(
	- dev_ns_div_tau_x,
	- dev_ns_div_tau_y,
	- dev_ns_div_tau_z,
	- ns.bc_bot,
	- ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
	- iter);
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p, ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi, ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findPorositiesDev);
	+ checkForCudaErrorsIter("Post findPorositiesDev", iter);

	+#ifdef CFD_DEM_COUPLING
	+ /*if (params.nu <= 0.0) {
	+ std::cerr << "Error! The fluid needs a positive viscosity "
	+ "value in order to simulate particle-fluid interaction."
	+ << std::endl;
	+ exit(1);
	+ }*/
	+ if (iter == 0) {
	+ // set cell center ghost nodes
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v, ns.bc_bot, ns.bc_top);
	+
	+ // find cell face velocities
	+ interpolateCenterToFace
	+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v,
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("Post interpolateCenterToFace");
	+ }

	- if (np > 0) {
	+ setNSghostNodesFace<Float>
	+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z,
	+ ns.bc_bot,
	+ ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesFace", iter);

	- // Per particle, find the fluid-particle interaction force f_pf
	- // and apply it to the particle
	- findInteractionForce<<<dimGrid, dimBlock>>>(
	- dev_x,
	- dev_vel,
	- dev_ns_phi,
	- dev_ns_p,
	+ findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>(
	dev_ns_v_x,
	dev_ns_v_y,
	dev_ns_v_z,
	+ ns.mu,
	dev_ns_div_tau_x,
	dev_ns_div_tau_y,
	- dev_ns_div_tau_z,
	- //ns.c_v,
	- dev_ns_f_pf,
	- dev_force,
	- dev_ns_f_d,
	- dev_ns_f_p,
	- dev_ns_f_v,
	- dev_ns_f_sum);
	+ dev_ns_div_tau_z);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post findInteractionForce", iter);
	+ checkForCudaErrorsIter("Post findFaceDivTau", iter);
	+
	+ setNSghostNodesFace<Float>
	+ <<<dimGridFluidFace, dimBlockFluid>>>(
	+ dev_ns_div_tau_x,
	+ dev_ns_div_tau_y,
	+ dev_ns_div_tau_z,
	+ ns.bc_bot,
	+ ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_div_tau)",
	+ iter);

	setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	dev_ns_p, ns.bc_bot, ns.bc_top);
	cudaThreadSynchronize();
	checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);

	- // Apply fluid-particle interaction force to the fluid
	- applyInteractionForceToFluid<<<dimGridFluid, dimBlockFluid>>>(
	- dev_gridParticleIndex,
	- dev_cellStart,
	- dev_cellEnd,
	- dev_ns_f_pf,
	- dev_ns_F_pf);
	- //dev_ns_F_pf_x,
	- //dev_ns_F_pf_y,
	- //dev_ns_F_pf_z);
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_phi, ns.bc_bot, ns.bc_top);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post applyInteractionForceToFluid",
	- iter);
	+ checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", iter);

	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_F_pf, ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(F_pf)", iter);
	- }
	+
	+ if (np > 0) {
	+
	+ // Per particle, find the fluid-particle interaction force…
	+ // and apply it to the particle
	+ findInteractionForce<<<dimGrid, dimBlock>>>(
	+ dev_x,
	+ dev_vel,
	+ dev_ns_phi,
	+ dev_ns_p,
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z,
	+ dev_ns_div_tau_x,
	+ dev_ns_div_tau_y,
	+ dev_ns_div_tau_z,
	+ //ns.c_v,
	+ ns.mu,
	+ ns.rho_f,
	+ dev_ns_f_pf,
	+ dev_force,
	+ dev_ns_f_d,
	+ dev_ns_f_p,
	+ dev_ns_f_v,
	+ dev_ns_f_sum);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post findInteractionForce", iter);
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p, ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodes(dev_ns_p)", i…
	+
	+ // Apply fluid-particle interaction force to the fluid
	+ applyInteractionForceToFluid<<<dimGridFluid, dimBlockFluid…
	+ dev_gridParticleIndex,
	+ dev_cellStart,
	+ dev_cellEnd,
	+ dev_ns_f_pf,
	+ dev_ns_F_pf);
	+ //dev_ns_F_pf_x,
	+ //dev_ns_F_pf_y,
	+ //dev_ns_F_pf_z);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post applyInteractionForceToFluid",
	+ iter);
	+
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_F_pf, ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodes(F_pf)", iter);
	+ }
	#endif

	- if ((iter % ns.ndem) == 0) {
	- // Initial guess for the top epsilon values. These may be
	- // changed in setUpperPressureNS
	- // TODO: Check if this should only be set when top bc=Dirichlet
	- Float pressure = ns.p[idx(0,0,ns.nz-1)];
	- Float pressure_new = pressure; // Dirichlet
	- Float epsilon_value = pressure_new - ns.beta*pressure;
	- setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- epsilon_value);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSepsilonTop", iter);
	+ if ((iter % ns.ndem) == 0) {
	+ // Initial guess for the top epsilon values. These may be
	+ // changed in setUpperPressureNS
	+ // TODO: Check if this should only be set when top bc=Diri…
	+ Float pressure = ns.p[idx(0,0,ns.nz-1)];
	+ Float pressure_new = pressure; // Dirichlet
	+ Float epsilon_value = pressure_new - ns.beta*pressure;
	+ setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ epsilon_value);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSepsilonTop", iter);

	#if defined(REPORT_EPSILON) \|\| defined(REPORT_V_P_COMPONENTS) \|\| defined(REPOR…
	std::cout
	t@@ -1182,503 +1241,811 @@ __host__ void DEM::startTime()
	<< std::endl;
	#endif

	- // find cell containing top wall
	- if (walls.nw > 0 && walls.wmode[0] == 1) {
	- wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
	- setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- wall0_iz,
	- epsilon_value,
	- dp_dz);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSepsilonAtTopWall", iter);
	+ // find cell containing top wall
	+ if (walls.nw > 0 && walls.wmode[0] == 1) {
	+ wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
	+ setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ wall0_iz,
	+ epsilon_value,
	+ dp_dz);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSepsilonAtTopWall", i…

	#ifdef REPORT_EPSILON
	- std::cout
	- << "\n###### EPSILON setNSepsilonAtTopWall "
	- << "######" << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	+ std::cout
	+ << "\n###### EPSILON setNSepsilonAtTopWall "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	#endif
	- }
	+ }

	- // Modulate the pressures at the upper boundary cells
	- if ((ns.p_mod_A > 1.0e-5 \|\| ns.p_mod_A < -1.0e-5) &&
	- ns.p_mod_f > 1.0e-7) {
	- // original pressure
	- Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
	- + ns.p_mod_Asin(2.0M_PIns.p_mod_ftime.current
	- + ns.p_mod_phi);
	- setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
	+ // Modulate the pressures at the upper boundary cells
	+ if ((ns.p_mod_A > 1.0e-5 \|\| ns.p_mod_A < -1.0e-5) &&
	+ ns.p_mod_f > 1.0e-7) {
	+ // original pressure
	+ Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
	+ + ns.p_mod_Asin(2.0M_PIns.p_mod_ftime.current
	+ + ns.p_mod_phi);
	+ setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p,
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ ns.beta,
	+ new_pressure);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setUpperPressureNS", iter…
	+
	+#ifdef REPORT_MORE_EPSILON
	+ std::cout
	+ << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	+ << "\n###### EPSILON AFTER setUpperPressureNS "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif
	+ }
	+
	+ // Set the values of the ghost nodes in the grid
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p, ns.bc_bot, ns.bc_top);
	+
	+ //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ //dev_ns_v, ns.bc_bot, ns.bc_top);
	+
	+ setNSghostNodesFace<Float>
	+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ ns.bc_bot, ns.bc_top);
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_phi, ns.bc_bot, ns.bc_top);
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_dphi, ns.bc_bot, ns.bc_top);
	+
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_setNSghostNodesDev);
	+ checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
	+ /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev …
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");*/
	+
	+ // interpolate velocities to cell centers which makes velo…
	+ // prediction easier
	+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z,
	+ dev_ns_v);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post interpolateFaceToCenter", ite…
	+
	+ // Set cell-center velocity ghost nodes
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v, ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
	+
	+ // Find the divergence of phiviv, needed for predicting …
	+ // fluid velocities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_phi,
	+ dev_ns_v,
	+ dev_ns_div_phi_vi_v);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findNSdivphiviv);
	+ checkForCudaErrorsIter("Post findNSdivphiviv", iter);
	+
	+ // Set cell-center ghost nodes
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_div_phi_vi_v, ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodes(div_phi_vi_v)…
	+ iter);
	+
	+ // Predict the fluid velocities on the base of the old pre…
	+ // field and ignoring the incompressibility constraint
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace…
	dev_ns_p,
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_div_tau_x,
	+ dev_ns_div_tau_y,
	+ dev_ns_div_tau_z,
	+ dev_ns_div_phi_vi_v,
	+ ns.bc_bot,
	+ ns.bc_top,
	ns.beta,
	- new_pressure);
	+ dev_ns_F_pf,
	+ ns.ndem,
	+ wall0_iz,
	+ ns.c_v,
	+ ns.mu,
	+ ns.rho_f,
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setUpperPressureNS", iter);
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findPredNSvelocities);
	+ checkForCudaErrorsIter("Post findPredNSvelocities", iter);
	+
	+ setNSghostNodesFace<Float>
	+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter(
	+ "Post setNSghostNodesFace(dev_ns_v_p)", iter);
	+
	+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ dev_ns_v_p);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post interpolateFaceToCenter", ite…
	+
	+
	+ // In the first iteration of the sphere program, we'll nee…
	+ // manually estimate the values of epsilon. In the subsequ…
	+ // iterations, the previous values are used.
	+ if (iter == 0) {
	+
	+ // Define the first estimate of the values of epsilon.
	+ // The initial guess depends on the value of ns.beta.
	+ Float pressure = ns.p[idx(2,2,2)];
	+ Float pressure_new = pressure; // Guess p_current = p_…
	+ Float epsilon_value = pressure_new - ns.beta*pressure;
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon, epsilon_value);
	+ cudaThreadSynchronize();
	+
	+ setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_…
	+ cudaThreadSynchronize();
	+
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse…
	+ &t_setNSepsilon);
	+ checkForCudaErrorsIter("Post setNSepsilonInterior", it…

	#ifdef REPORT_MORE_EPSILON
	- std::cout
	+ std::cout
	+ << "\n###### EPSILON AFTER setNSepsilonInterior "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif
	+
	+ // Set the epsilon values at the lower boundary
	+ pressure = ns.p[idx(0,0,0)];
	+ pressure_new = pressure; // Guess p_current = p_new
	+ epsilon_value = pressure_new - ns.beta*pressure;
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ epsilon_value);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse…
	+ &t_setNSdirichlet);
	+ checkForCudaErrorsIter("Post setNSepsilonBottom", iter…
	+
	+#ifdef REPORT_MORE_EPSILON
	+ std::cout
	+ << "\n###### EPSILON AFTER setNSepsilonBottom "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif
	+
	+ /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid…
	+ dev_ns_epsilon);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("Post setNSghostNodesFloat(dev_ns…
	+ iter);*/
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>…
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)…
	+ iter);
	+
	+#ifdef REPORT_MORE_EPSILON
	+ std::cout <<
	+ "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif
	+ }
	+
	+ // Solve the system of epsilon using a Jacobi iterative so…
	+ // The average normalized residual is initialized to a lar…
	+ // value.
	+ //double avg_norm_res;
	+ double max_norm_res;
	+
	+ // Write a log file of the normalized residuals during the…
	+ // iterations
	+ std::ofstream reslog;
	+ if (write_res_log == 1)
	+ reslog.open("max_res_norm.dat");
	+
	+ // transfer normalized residuals from GPU to CPU
	+#ifdef REPORT_MORE_EPSILON
	+ std::cout << "\n###### BEFORE FIRST JACOBI ITERATION #####…
	<< "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	- << "\n###### EPSILON AFTER setUpperPressureNS "
	- << "######" << std::endl;
	+ << std::endl;
	transferNSepsilonFromGlobalDeviceMemory();
	printNSarray(stdout, ns.epsilon, "epsilon");
	#endif
	- }

	- // Set the values of the ghost nodes in the grid
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	+ for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {

	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p, ns.bc_bot, ns.bc_top);
	+ // Only grad(epsilon) changes during the Jacobi iterat…
	+ // The remaining terms of the forcing function are only
	+ // calculated during the first iteration.
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_v_p,
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ nijac,
	+ ns.ndem,
	+ ns.c_v,
	+ ns.rho_f,
	+ dev_ns_f1,
	+ dev_ns_f2,
	+ dev_ns_f);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse…
	+ &t_findNSforcing);
	+ checkForCudaErrorsIter("Post findNSforcing", iter);
	+ /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid…
	+ dev_ns_f1,
	+ dev_ns_f2,
	+ dev_ns_f,
	+ nijac);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("Post setNSghostNodesForcing", it…
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>…
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)…
	+ iter);

	- //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- //dev_ns_v, ns.bc_bot, ns.bc_top);
	+#ifdef REPORT_EPSILON
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac
	+ << " after setNSghostNodes(epsilon,2) ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif

	- setNSghostNodesFace<Float>
	- <<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- ns.bc_bot, ns.bc_top);
	+ // Perform a single Jacobi iteration
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ dev_ns_norm,
	+ dev_ns_f,
	+ ns.bc_bot,
	+ ns.bc_top,
	+ ns.theta,
	+ wall0_iz,
	+ dp_dz);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse…
	+ &t_jacobiIterationNS);
	+ checkForCudaErrorsIter("Post jacobiIterationNS", iter);
	+
	+ // set Dirichlet and Neumann BC at cells containing to…
	+ /*if (walls.nw > 0 && walls.wmode[0] == 1) {
	+ setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>…
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ wall0_iz,
	+ epsilon_value,
	+ dp_dz);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
	+ iter);
	+ }*/
	+
	+ // Copy new values to current values
	+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon_new,
	+ dev_ns_epsilon);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post copyValues (epsilon_new->epsilon)", iter);

	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi, ns.bc_bot, ns.bc_top);
	+#ifdef REPORT_EPSILON
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac << " after jacobiIterationNS ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif

	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_dphi, ns.bc_bot, ns.bc_top);
	+ if (nijac % nijacnorm == 0) {

	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSghostNodesDev);
	- checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
	- /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev ####…
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");*/
	+ // Read the normalized residuals from the device
	+ transferNSnormFromGlobalDeviceMemory();

	- // interpolate velocities to cell centers which makes velocity
	- // prediction easier
	- interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z,
	- dev_ns_v);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
	+ // Write the normalized residuals to the terminal
	+ //printNSarray(stdout, ns.norm, "norm");

	- // Set cell-center velocity ghost nodes
	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v, ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(v)", iter);
	-
	- // Find the divergence of phiviv, needed for predicting the
	- // fluid velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi,
	- dev_ns_v,
	- dev_ns_div_phi_vi_v);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSdivphiviv);
	- checkForCudaErrorsIter("Post findNSdivphiviv", iter);
	+ // Find the maximum value of the normalized residu…
	+ max_norm_res = maxNormResNS();

	- // Set cell-center ghost nodes
	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_div_phi_vi_v, ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes(div_phi_vi_v)",
	- iter);
	+ // Write the Jacobi iteration number and maximum v…
	+ // of the normalized residual to the log file
	+ if (write_res_log == 1)
	+ reslog << nijac << '\t' << max_norm_res
	+ << std::endl;
	+ }

	- // Predict the fluid velocities on the base of the old pressure
	- // field and ignoring the incompressibility constraint
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_p,
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z,
	- dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_div_tau_x,
	- dev_ns_div_tau_y,
	- dev_ns_div_tau_z,
	- dev_ns_div_phi_vi_v,
	- ns.bc_bot,
	- ns.bc_top,
	- ns.beta,
	- dev_ns_F_pf,
	- ns.ndem,
	- wall0_iz,
	- ns.c_v,
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findPredNSvelocities);
	- checkForCudaErrorsIter("Post findPredNSvelocities", iter);
	+ if (max_norm_res < ns.tolerance) {

	- setNSghostNodesFace<Float>
	- <<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter(
	- "Post setNSghostNodesFace(dev_ns_v_p)", iter);
	+ if (write_conv_log == 1 && iter % conv_log_interva…
	+ convlog << iter+1 << '\t' << nijac << std::end…

	- interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- dev_ns_v_p);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
	+ setNSghostNodes<Float>
	+ <<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post setNSghostNodesEpsilon(4)", iter);

	+ // Apply smoothing if requested
	+ if (ns.gamma > 0.0) {
	+
	+ smoothing<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.gamma,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post smoothing", iter);
	+
	+ setNSghostNodes<Float>
	+ <<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post setNSghostNodesEpsilon(4)", iter);
	+ }

	- // In the first iteration of the sphere program, we'll need to
	- // manually estimate the values of epsilon. In the subsequent
	- // iterations, the previous values are used.
	- if (iter == 0) {
	+#ifdef REPORT_EPSILON
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac << " after smoothing ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+#endif

	- // Define the first estimate of the values of epsilon.
	- // The initial guess depends on the value of ns.beta.
	- Float pressure = ns.p[idx(2,2,2)];
	- Float pressure_new = pressure; // Guess p_current = p_new
	- Float epsilon_value = pressure_new - ns.beta*pressure;
	+ break; // solution has converged, exit Jacobi loop
	+ }
	+
	+ if (nijac >= ns.maxiter-1) {
	+
	+ if (write_conv_log == 1)
	+ convlog << iter+1 << '\t' << nijac << std::end…
	+
	+ std::cerr << "\nIteration " << iter << ", time "
	+ << iter*time.dt << " s: "
	+ "Error, the epsilon solution in the fluid "
	+ "calculations did not converge. Try increasing…
	+ "value of 'ns.maxiter' (" << ns.maxiter
	+ << ") or increase 'ns.tolerance' ("
	+ << ns.tolerance << ")." << std::endl;
	+ }
	+ //break; // end after Jacobi first iteration
	+ } // end Jacobi iteration loop
	+
	+ if (write_res_log == 1)
	+ reslog.close();
	+
	+ // Find the new pressures and velocities
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon, epsilon_value);
	- cudaThreadSynchronize();

	- setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm…
	+ updateNSpressure<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.beta,
	+ dev_ns_p);
	cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post updateNSpressure", iter);

	+ updateNSvelocity<<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ dev_ns_phi,
	+ dev_ns_epsilon,
	+ ns.beta,
	+ ns.bc_bot,
	+ ns.bc_top,
	+ ns.ndem,
	+ ns.c_v,
	+ ns.rho_f,
	+ wall0_iz,
	+ iter,
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z);
	+ cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSepsilon);
	- checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
	+ &t_updateNSvelocityPressure);
	+ checkForCudaErrorsIter("Post updateNSvelocity", iter);
	+
	+ setNSghostNodesFace<Float>
	+ <<<dimGridFluidFace, dimBlockFluidFace>>>(
	+ dev_ns_v_p_x,
	+ dev_ns_v_p_y,
	+ dev_ns_v_p_z,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter(
	+ "Post setNSghostNodesFace(dev_ns_v)", iter);
	+
	+ interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v_x,
	+ dev_ns_v_y,
	+ dev_ns_v_z,
	+ dev_ns_v);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post interpolateFaceToCenter", ite…
	+ } // end iter % ns.dem == 0
	+ } // end cfd_solver == 0

	-#ifdef REPORT_MORE_EPSILON
	+ // Darcy solution
	+ else if (cfd_solver == 1) {
	+
	+#if defined(REPORT_EPSILON) \|\| defined(REPORT_FORCING_TERMS)
	std::cout
	- << "\n###### EPSILON AFTER setNSepsilonInterior "
	- << "######" << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	+ << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
	+ << std::endl;
	#endif

	- // Set the epsilon values at the lower boundary
	- pressure = ns.p[idx(0,0,0)];
	- pressure_new = pressure; // Guess p_current = p_new
	- epsilon_value = pressure_new - ns.beta*pressure;
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyPorosities<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_cellStart,
	+ dev_cellEnd,
	+ dev_x_sorted,
	+ iter,
	+ np,
	+ darcy.c_phi,
	+ dev_darcy_phi,
	+ dev_darcy_dphi);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyPorosities);
	+ checkForCudaErrorsIter("Post findDarcyPorosities", iter);
	+
	+ if (np > 0) {
	+
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- epsilon_value);
	+ setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p, darcy.bc_bot, darcy.bc_top);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSdirichlet);
	- checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
	-
	-#ifdef REPORT_MORE_EPSILON
	- std::cout
	- << "\n###### EPSILON AFTER setNSepsilonBottom "
	- << "######" << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	-#endif
	+ &t_setDarcyGhostNodes);
	+ checkForCudaErrorsIter("Post setDarcyGhostNodes("
	+ "dev_darcy_p) before findDarcyPressureForce", iter…

	- /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon);
	- cudaThreadSynchronize();
	- checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_eps…
	- iter);*/
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyPressureForce<<<dimGrid, dimBlock>>>(
	+ dev_x,
	+ dev_darcy_p,
	+ dev_darcy_phi,
	+ dev_force,
	+ dev_darcy_f_p);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyPressureForce);
	+ checkForCudaErrorsIter("Post findDarcyPressureForce",
	iter);
	+ }

	-#ifdef REPORT_MORE_EPSILON
	- std::cout <<
	- "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
	- << "######" << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	-#endif
	+ // Modulate the pressures at the upper boundary cells
	+ if ((darcy.p_mod_A > 1.0e-5 \|\| darcy.p_mod_A < -1.0e-5) &&
	+ darcy.p_mod_f > 1.0e-7) {
	+ // original pressure
	+ Float new_pressure = darcy.p[d_idx(0,0,darcy.nz-1)] //orig…
	+ + darcy.p_mod_Asin(2.0M_PIdarcy.p_mod_ftime.current
	+ + darcy.p_mod_phi);
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setDarcyTopPressure<<<dimGridFluid, dimBlockFluid>>>(
	+ new_pressure,
	+ dev_darcy_p);
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_setDarcyTopPressure);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setUpperPressureNS", iter);
	+ }
	+
	+ if (walls.nw > 0 && walls.wmode[0] == 1) {
	+ wall0_iz = walls.nx->w/(grid.L[2]/grid.num[2]);
	+ /*setDarcyTopWallPressure<<<dimGridFluid, dimBlockFluid>>>(
	+ new_pressure,
	+ wall0_iz,
	+ dev_darcy_p);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setDarcyTopWallPressure",
	+ iter);*/
	+ }
	+
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyPermeabilities<<<dimGridFluid, dimBlockFluid>>>(
	+ darcy.k_c, dev_darcy_phi, dev_darcy_k);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyPermeabilities);
	+ checkForCudaErrorsIter("Post findDarcyPermeabilities",
	+ iter);
	+
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_k, darcy.bc_bot, darcy.bc_top);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_setDarcyGhostNodes);
	+ checkForCudaErrorsIter("Post setDarcyGhostNodes(dev_darcy_k)",
	+ iter);
	+
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyPermeabilityGradients<<<dimGridFluid, dimBlockFluid>>…
	+ dev_darcy_k, dev_darcy_grad_k);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyPermeabilityGradients);
	+ checkForCudaErrorsIter("Post findDarcyPermeabilityGradients",
	+ iter);
	+
	+ if (iter == 0) {
	+ setDarcyNormZero<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_norm);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setDarcyNormZero", iter);
	+
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p,
	+ dev_darcy_p_old);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_copyValues);
	+ checkForCudaErrorsIter("Post copyValues(p -> p_old)",
	+ iter);
	}

	+ /*if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyPressureChange<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p_old,
	+ dev_darcy_p,
	+ iter,
	+ dev_darcy_dpdt);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyPressureChange);
	+ checkForCudaErrorsIter("Post findDarcyPressureChange", iter);*/
	+
	// Solve the system of epsilon using a Jacobi iterative solver.
	// The average normalized residual is initialized to a large
	// value.
	//double avg_norm_res;
	double max_norm_res;

	- // Write a log file of the normalized residuals during the Jac…
	- // iterations
	+ // Write a log file of the normalized residuals during the
	+ // Jacobi iterations
	std::ofstream reslog;
	if (write_res_log == 1)
	reslog.open("max_res_norm.dat");

	- // transfer normalized residuals from GPU to CPU
	-#ifdef REPORT_MORE_EPSILON
	- std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
	- << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	-#endif
	+ for (unsigned int nijac = 0; nijac<darcy.maxiter; ++nijac) {

	- for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
	-
	- //printf("### Jacobi iteration %d\n", nijac);
	+ if (nijac == 0) {
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p,
	+ dev_darcy_p_old);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapse…
	+ &t_copyValues);
	+ checkForCudaErrorsIter("Post copyValues(p -> p_old)",
	+ iter);
	+ }

	- // Only grad(epsilon) changes during the Jacobi iterations.
	- // The remaining terms of the forcing function are only
	- // calculated during the first iteration.
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_v_p,
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- nijac,
	- ns.ndem,
	- ns.c_v,
	- dev_ns_f1,
	- dev_ns_f2,
	- dev_ns_f);
	+ setDarcyGhostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p, darcy.bc_bot, darcy.bc_top);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSforcing);
	- checkForCudaErrorsIter("Post findNSforcing", iter);
	- /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_f1,
	- dev_ns_f2,
	- dev_ns_f,
	- nijac);
	- cudaThreadSynchronize();
	- checkForCudaErrors("Post setNSghostNodesForcing", iter);…
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
	- iter);
	-
	-#ifdef REPORT_EPSILON
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac
	- << " after setNSghostNodes(epsilon,2) ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	-#endif
	+ &t_setDarcyGhostNodes);
	+ checkForCudaErrorsIter("Post setDarcyGhostNodes("
	+ "dev_darcy_p) in Jacobi loop", iter);

	- // Perform a single Jacobi iteration
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- dev_ns_norm,
	- dev_ns_f,
	- ns.bc_bot,
	- ns.bc_top,
	- ns.theta,
	+ updateDarcySolution<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p_old,
	+ //dev_darcy_dpdt,
	+ dev_darcy_p,
	+ dev_darcy_k,
	+ dev_darcy_phi,
	+ dev_darcy_dphi,
	+ dev_darcy_grad_k,
	+ darcy.beta_f,
	+ darcy.mu,
	+ darcy.bc_bot,
	+ darcy.bc_top,
	+ darcy.ndem,
	wall0_iz,
	- dp_dz);
	+ dev_darcy_p_new,
	+ dev_darcy_norm);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_jacobiIterationNS);
	- checkForCudaErrorsIter("Post jacobiIterationNS", iter);
	-
	- // set Dirichlet and Neumann BC at cells containing top wa…
	- /*if (walls.nw > 0 && walls.wmode[0] == 1) {
	- setNSepsilonAtTopWall<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- wall0_iz,
	- epsilon_value,
	- dp_dz);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSepsilonAtTopWall",
	- iter);
	- }*/
	+ &t_updateDarcySolution);
	+ checkForCudaErrorsIter("Post updateDarcySolution", iter);

	// Copy new values to current values
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon_new,
	- dev_ns_epsilon);
	+ dev_darcy_p_new,
	+ dev_darcy_p);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter
	- ("Post copyValues (epsilon_new->epsilon)", iter);
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_copyValues);
	+ checkForCudaErrorsIter("Post copyValues(p_new -> p)", iter…

	#ifdef REPORT_EPSILON
	std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after jacobiIterationNS ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	+ << nijac << " after copyValues ######" << std::endl;
	+ transferDarcyPressuresFromGlobalDeviceMemory();
	+ printDarcyArray(stdout, darcy.p, "p");
	#endif

	if (nijac % nijacnorm == 0) {
	-
	// Read the normalized residuals from the device
	- transferNSnormFromGlobalDeviceMemory();
	+ transferDarcyNormFromGlobalDeviceMemory();

	// Write the normalized residuals to the terminal
	- //printNSarray(stdout, ns.norm, "norm");
	+ //printDarcyArray(stdout, darcy.norm, "norm");

	// Find the maximum value of the normalized residuals
	- max_norm_res = maxNormResNS();
	+ max_norm_res = maxNormResDarcy();

	// Write the Jacobi iteration number and maximum value
	// of the normalized residual to the log file
	if (write_res_log == 1)
	reslog << nijac << '\t' << max_norm_res
	<< std::endl;
	- }

	- if (max_norm_res < ns.tolerance) {
	+ if (max_norm_res <= darcy.tolerance) {
	+ if (write_conv_log == 1
	+ && iter % conv_log_interval == 0)
	+ convlog << iter+1 << '\t' << nijac << std::end…

	- if (write_conv_log == 1 && iter % conv_log_interval ==…
	- convlog << iter << '\t' << nijac << std::endl;
	-
	- setNSghostNodes<Float>
	- <<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter
	- ("Post setNSghostNodesEpsilon(4)", iter);
	-
	- // Apply smoothing if requested
	- if (ns.gamma > 0.0) {
	-
	- smoothing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.gamma,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post smoothing", iter);
	-
	- setNSghostNodes<Float>
	- <<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter
	- ("Post setNSghostNodesEpsilon(4)", iter);
	+ break; // solution has converged, exit Jacobi loop
	}
	-
	-#ifdef REPORT_EPSILON
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after smoothing ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	-#endif
	-
	- break; // solution has converged, exit Jacobi loop
	}

	- if (nijac >= ns.maxiter-1) {
	+ if (nijac == darcy.maxiter-1) {

	if (write_conv_log == 1)
	- convlog << iter << '\t' << nijac << std::endl;
	+ convlog << iter+1 << '\t' << nijac << std::endl;

	std::cerr << "\nIteration " << iter << ", time "
	<< iter*time.dt << " s: "
	- "Error, the epsilon solution in the fluid "
	- "calculations did not converge. Try increasing the…
	- "value of 'ns.maxiter' (" << ns.maxiter
	- << ") or increase 'ns.tolerance' ("
	- << ns.tolerance << ")." << std::endl;
	+ "Error, the pressure solution in the fluid "
	+ "calculations did not converge. Try increasing "
	+ "the value of 'darcy.maxiter' ("
	+ << darcy.maxiter
	+ << ") or increase 'darcy.tolerance' ("
	+ << darcy.tolerance << ")." << std::endl;
	}
	- //break; // end after Jacobi first iteration
	- } // end Jacobi iteration loop

	- if (write_res_log == 1)
	- reslog.close();
	+ if (write_res_log == 1)
	+ reslog.close();
	+
	+ //break; // end after first iteration
	+ }

	- // Find the new pressures and velocities
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	-
	- updateNSpressure<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.beta,
	- dev_ns_p);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post updateNSpressure", iter);
	-
	- updateNSvelocity<<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- dev_ns_phi,
	- dev_ns_epsilon,
	- ns.beta,
	- ns.bc_bot,
	- ns.bc_top,
	- ns.ndem,
	- ns.c_v,
	- wall0_iz,
	- iter,
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z);
	+ setDarcyGhostNodes<Float> <<<dimGridFluid, dimBlockFluid>>>
	+ (dev_darcy_p, darcy.bc_bot, darcy.bc_top);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_updateNSvelocityPressure);
	- checkForCudaErrorsIter("Post updateNSvelocity", iter);
	-
	- setNSghostNodesFace<Float>
	- <<<dimGridFluidFace, dimBlockFluidFace>>>(
	- dev_ns_v_p_x,
	- dev_ns_v_p_y,
	- dev_ns_v_p_z,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter(
	- "Post setNSghostNodesFace(dev_ns_v)", iter);
	+ &t_setDarcyGhostNodes);
	+ checkForCudaErrorsIter("Post setDarcyGhostNodes("
	+ "dev_darcy_p) after Jacobi loop", iter);

	- interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v_x,
	- dev_ns_v_y,
	- dev_ns_v_z,
	- dev_ns_v);
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findDarcyVelocities<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_darcy_p,
	+ dev_darcy_phi,
	+ dev_darcy_k,
	+ darcy.mu,
	+ dev_darcy_v);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post interpolateFaceToCenter", iter);
	- } // end iter % ns.dem == 0
	- } // end navierstokes == 1
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findDarcyVelocities);
	+ checkForCudaErrorsIter("Post findDarcyVelocities", iter);
	+ }
	+ }
	+ //break; // end after first iteration

	if (np > 0) {
	// Update particle kinematics
	t@@ -1777,7 +2144,7 @@ __host__ void DEM::startTime()
	checkForCudaErrorsIter("Beginning of file output section", iter);

	// v_x, v_y, v_z -> v
	- if (navierstokes == 1) {
	+ if (fluid == 1 && cfd_solver == 0) {
	interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>(
	dev_ns_v_x,
	dev_ns_v_y,
	t@@ -1797,7 +2164,7 @@ __host__ void DEM::startTime()
	cudaThreadSynchronize();

	// Check the numerical stability of the NS solver
	- if (navierstokes == 1)
	+ if (fluid == 1 && cfd_solver == 0)
	checkNSstability();

	// Write binary output file
	t@@ -1812,7 +2179,7 @@ __host__ void DEM::startTime()
	printNSarray(stdout, ns.epsilon, "epsilon");*/

	// Write fluid arrays
	- /*if (navierstokes == 1) {
	+ /*if (fluid == 1 && cfd_solver == 0) {
	sprintf(file,"output/%s.ns_phi.output%05d.bin", sid.c_str(),
	time.step_count);
	writeNSarray(ns.phi, file);
	t@@ -1904,7 +2271,12 @@ __host__ void DEM::startTime()
	t_findNSstressTensor +
	t_findNSdivphiviv + t_findNSdivtau + t_findPredNSvelocities +
	t_setNSepsilon + t_setNSdirichlet + t_setNSghostNodesDev +
	- t_findNSforcing + t_jacobiIterationNS + t_updateNSvelocityPressure;
	+ t_findNSforcing + t_jacobiIterationNS + t_updateNSvelocityPressure…
	+ t_findDarcyPorosities + t_setDarcyGhostNodes +
	+ t_findDarcyPressureForce + t_setDarcyTopPressure +
	+ t_findDarcyPermeabilities + t_findDarcyPermeabilityGradients +
	+ //t_findDarcyPressureChange +
	+ t_updateDarcySolution + t_copyValues + t_findDarcyVelocities;

	cout << "\nKernel profiling statistics:\n"
	<< " - calcParticleCellID:\t\t" << t_calcParticleCellID/1000.0
	t@@ -1931,34 +2303,67 @@ __host__ void DEM::startTime()
	<< "\t(" << 100.0*t_summation/t_sum << " %)\n"
	<< " - integrateWalls:\t\t" << t_integrateWalls/1000.0 << " s"
	<< "\t(" << 100.0*t_integrateWalls/t_sum << " %)\n";
	- if (navierstokes == 1) {
	+ if (fluid == 1 && cfd_solver == 0) {
	cout << " - findPorositiesDev:\t\t" << t_findPorositiesDev/1000.0
	- << " s" << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
	- << " - findNSstressTensor:\t\t" << t_findNSstressTensor/1000.0
	- << " s" << "\t(" << 100.0*t_findNSstressTensor/t_sum << " %)\n"
	- << " - findNSdivphiviv:\t\t" << t_findNSdivphiviv/1000.0
	- << " s" << "\t(" << 100.0*t_findNSdivphiviv/t_sum << " %)\n"
	- << " - findNSdivtau:\t\t" << t_findNSdivtau/1000.0
	- << " s" << "\t(" << 100.0*t_findNSdivtau/t_sum << " %)\n"
	- << " - findPredNSvelocities:\t" << t_findPredNSvelocities/1000.0
	- << " s" << "\t(" << 100.0*t_findPredNSvelocities/t_sum << " %)\n"
	- << " - setNSepsilon:\t\t" << t_setNSepsilon/1000.0
	- << " s" << "\t(" << 100.0*t_setNSepsilon/t_sum << " %)\n"
	- << " - setNSdirichlet:\t\t" << t_setNSdirichlet/1000.0
	- << " s" << "\t(" << 100.0*t_setNSdirichlet/t_sum << " %)\n"
	- << " - setNSghostNodesDev:\t\t" << t_setNSghostNodesDev/1000.0
	- << " s" << "\t(" << 100.0*t_setNSghostNodesDev/t_sum << " %)\n"
	- << " - findNSforcing:\t\t" << t_findNSforcing/1000.0 << " s"
	- << "\t(" << 100.0*t_findNSforcing/t_sum << " %)\n"
	- << " - jacobiIterationNS:\t\t" << t_jacobiIterationNS/1000.0 << "…
	- << "\t(" << 100.0*t_jacobiIterationNS/t_sum << " %)\n"
	- << " - updateNSvelocityPressure:\t"
	- << t_updateNSvelocityPressure/1000.0 << " s"
	- << "\t(" << 100.0*t_updateNSvelocityPressure/t_sum << " %)\n";
	+ << " s" << "\t(" << 100.0*t_findPorositiesDev/t_sum << " %)\n"
	+ << " - findNSstressTensor:\t\t" << t_findNSstressTensor/1000.0
	+ << " s" << "\t(" << 100.0*t_findNSstressTensor/t_sum << " %)\n"
	+ << " - findNSdivphiviv:\t\t" << t_findNSdivphiviv/1000.0
	+ << " s" << "\t(" << 100.0*t_findNSdivphiviv/t_sum << " %)\n"
	+ << " - findNSdivtau:\t\t" << t_findNSdivtau/1000.0
	+ << " s" << "\t(" << 100.0*t_findNSdivtau/t_sum << " %)\n"
	+ << " - findPredNSvelocities:\t" <<
	+ t_findPredNSvelocities/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findPredNSvelocities/t_sum << " %)\n"
	+ << " - setNSepsilon:\t\t" << t_setNSepsilon/1000.0
	+ << " s" << "\t(" << 100.0*t_setNSepsilon/t_sum << " %)\n"
	+ << " - setNSdirichlet:\t\t" << t_setNSdirichlet/1000.0
	+ << " s" << "\t(" << 100.0*t_setNSdirichlet/t_sum << " %)\n"
	+ << " - setNSghostNodesDev:\t\t" << t_setNSghostNodesDev/1000.0
	+ << " s" << "\t(" << 100.0*t_setNSghostNodesDev/t_sum << " %)\n"
	+ << " - findNSforcing:\t\t" << t_findNSforcing/1000.0 << " s"
	+ << "\t(" << 100.0*t_findNSforcing/t_sum << " %)\n"
	+ << " - jacobiIterationNS:\t\t" << t_jacobiIterationNS/1000.0
	+ << " s"
	+ << "\t(" << 100.0*t_jacobiIterationNS/t_sum << " %)\n"
	+ << " - updateNSvelocityPressure:\t"
	+ << t_updateNSvelocityPressure/1000.0 << " s"
	+ << "\t(" << 100.0*t_updateNSvelocityPressure/t_sum << " %)\n";
	+ } else if (fluid == 1 && cfd_solver == 1) {
	+ cout << " - findDarcyPorosities:\t" <<
	+ t_findDarcyPorosities/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findDarcyPorosities/t_sum << " %)\n"
	+ << " - setDarcyGhostNodes:\t\t" <<
	+ t_setDarcyGhostNodes/1000.0 << " s" << "\t(" <<
	+ 100.0*t_setDarcyGhostNodes/t_sum << " %)\n"
	+ << " - findDarcyPressureForce:\t" <<
	+ t_findDarcyPressureForce/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findDarcyPressureForce/t_sum << " %)\n"
	+ << " - setDarcyTopPressure:\t" <<
	+ t_setDarcyTopPressure/1000.0 << " s" << "\t(" <<
	+ 100.0*t_setDarcyTopPressure/t_sum << " %)\n"
	+ << " - findDarcyPermeabilities:\t" <<
	+ t_findDarcyPermeabilities/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findDarcyPermeabilities/t_sum << " %)\n"
	+ << " - findDarcyPermeabilityGrads:\t" <<
	+ t_findDarcyPermeabilityGradients/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findDarcyPermeabilityGradients/t_sum << " %)\n"
	+ //<< " - findDarcyPressureChange:\t" <<
	+ //t_findDarcyPressureChange/1000.0 << " s" << "\t(" <<
	+ //100.0*t_findDarcyPressureChange/t_sum << " %)\n"
	+ << " - updateDarcySolution:\t" <<
	+ t_updateDarcySolution/1000.0 << " s" << "\t(" <<
	+ 100.0*t_updateDarcySolution/t_sum << " %)\n"
	+ << " - copyValues:\t\t\t" <<
	+ t_copyValues/1000.0 << " s" << "\t(" <<
	+ 100.0*t_copyValues/t_sum << " %)\n"
	+ << " - findDarcyVelocities:\t" <<
	+ t_findDarcyVelocities/1000.0 << " s" << "\t(" <<
	+ 100.0*t_findDarcyVelocities/t_sum << " %)" << std::endl;
	}
	}

	- // Free GPU device memory
	+ // Free GPU device memory
	freeGlobalDeviceMemory();
	checkForCudaErrorsIter("After freeGlobalDeviceMemory()", iter);

	t@@ -1967,8 +2372,11 @@ __host__ void DEM::startTime()
	delete[] k.distmod;
	delete[] k.delta_t;

	- if (navierstokes == 1) {
	- endNS();
	+ if (fluid == 1) {
	+ if (cfd_solver == 0)
	+ endNS();
	+ else if (cfd_solver == 1)
	+ endDarcy();
	}

	cudaDeviceReset();
	diff --git a/src/file_io.cpp b/src/file_io.cpp
	t@@ -254,73 +254,135 @@ void DEM::readbin(const char *target)
	if (verbose == 1)
	cout << "Done\n";

	- if (navierstokes == 1) { // Navier Stokes flow
	+ // Simulate fluid
	+ if (fluid == 1) {

	- initNSmem();
	+ ifs.read(as_bytes(cfd_solver), sizeof(int));

	- ifs.read(as_bytes(params.mu), sizeof(params.mu));
	+ if (cfd_solver < 0 \|\| cfd_solver > 1) {
	+ std::cerr << "Value of cfd_solver not understood ("
	+ << cfd_solver << ")" << std::endl;
	+ exit(1);
	+ }
	+
	+ if (cfd_solver == 0) { // Navier Stokes flow
	+
	+ initNSmem();
	+
	+ ifs.read(as_bytes(ns.mu), sizeof(Float));

	- if (verbose == 1)
	- cout << " - Reading fluid values:\t\t\t ";
	+ if (verbose == 1)
	+ cout << " - Reading fluid values:\t\t\t ";

	- for (z = 0; z<grid.num[2]; ++z) {
	- for (y = 0; y<grid.num[1]; ++y) {
	- for (x = 0; x<grid.num[0]; ++x) {
	- i = idx(x,y,z);
	- ifs.read(as_bytes(ns.v[i].x), sizeof(Float));
	- ifs.read(as_bytes(ns.v[i].y), sizeof(Float));
	- ifs.read(as_bytes(ns.v[i].z), sizeof(Float));
	- ifs.read(as_bytes(ns.p[i]), sizeof(Float));
	- ifs.read(as_bytes(ns.phi[i]), sizeof(Float));
	- ifs.read(as_bytes(ns.dphi[i]), sizeof(Float));
	+ for (z = 0; z<grid.num[2]; ++z) {
	+ for (y = 0; y<grid.num[1]; ++y) {
	+ for (x = 0; x<grid.num[0]; ++x) {
	+ i = idx(x,y,z);
	+ ifs.read(as_bytes(ns.v[i].x), sizeof(Float));
	+ ifs.read(as_bytes(ns.v[i].y), sizeof(Float));
	+ ifs.read(as_bytes(ns.v[i].z), sizeof(Float));
	+ ifs.read(as_bytes(ns.p[i]), sizeof(Float));
	+ ifs.read(as_bytes(ns.phi[i]), sizeof(Float));
	+ ifs.read(as_bytes(ns.dphi[i]), sizeof(Float));
	+ }
	}
	}
	- }

	- ifs.read(as_bytes(params.rho_f), sizeof(Float));
	- ifs.read(as_bytes(ns.p_mod_A), sizeof(Float));
	- ifs.read(as_bytes(ns.p_mod_f), sizeof(Float));
	- ifs.read(as_bytes(ns.p_mod_phi), sizeof(Float));
	+ ifs.read(as_bytes(ns.rho_f), sizeof(Float));
	+ ifs.read(as_bytes(ns.p_mod_A), sizeof(Float));
	+ ifs.read(as_bytes(ns.p_mod_f), sizeof(Float));
	+ ifs.read(as_bytes(ns.p_mod_phi), sizeof(Float));

	- ifs.read(as_bytes(ns.bc_bot), sizeof(int));
	- ifs.read(as_bytes(ns.bc_top), sizeof(int));
	- ifs.read(as_bytes(ns.free_slip_bot), sizeof(int));
	- ifs.read(as_bytes(ns.free_slip_top), sizeof(int));
	+ ifs.read(as_bytes(ns.bc_bot), sizeof(int));
	+ ifs.read(as_bytes(ns.bc_top), sizeof(int));
	+ ifs.read(as_bytes(ns.free_slip_bot), sizeof(int));
	+ ifs.read(as_bytes(ns.free_slip_top), sizeof(int));

	- ifs.read(as_bytes(ns.gamma), sizeof(Float));
	- ifs.read(as_bytes(ns.theta), sizeof(Float));
	- ifs.read(as_bytes(ns.beta), sizeof(Float));
	- ifs.read(as_bytes(ns.tolerance), sizeof(Float));
	- ifs.read(as_bytes(ns.maxiter), sizeof(unsigned int));
	- ifs.read(as_bytes(ns.ndem), sizeof(unsigned int));
	+ ifs.read(as_bytes(ns.gamma), sizeof(Float));
	+ ifs.read(as_bytes(ns.theta), sizeof(Float));
	+ ifs.read(as_bytes(ns.beta), sizeof(Float));
	+ ifs.read(as_bytes(ns.tolerance), sizeof(Float));
	+ ifs.read(as_bytes(ns.maxiter), sizeof(unsigned int));
	+ ifs.read(as_bytes(ns.ndem), sizeof(unsigned int));

	- ifs.read(as_bytes(ns.c_phi), sizeof(Float));
	- ifs.read(as_bytes(ns.c_v), sizeof(Float));
	- ifs.read(as_bytes(ns.dt_dem_fac), sizeof(Float));
	+ ifs.read(as_bytes(ns.c_phi), sizeof(Float));
	+ ifs.read(as_bytes(ns.c_v), sizeof(Float));
	+ ifs.read(as_bytes(ns.dt_dem_fac), sizeof(Float));

	- for (i = 0; i<np; ++i) {
	- ifs.read(as_bytes(ns.f_d[i].x), sizeof(Float));
	- ifs.read(as_bytes(ns.f_d[i].y), sizeof(Float));
	- ifs.read(as_bytes(ns.f_d[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ifs.read(as_bytes(ns.f_p[i].x), sizeof(Float));
	- ifs.read(as_bytes(ns.f_p[i].y), sizeof(Float));
	- ifs.read(as_bytes(ns.f_p[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ifs.read(as_bytes(ns.f_v[i].x), sizeof(Float));
	- ifs.read(as_bytes(ns.f_v[i].y), sizeof(Float));
	- ifs.read(as_bytes(ns.f_v[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ifs.read(as_bytes(ns.f_sum[i].x), sizeof(Float));
	- ifs.read(as_bytes(ns.f_sum[i].y), sizeof(Float));
	- ifs.read(as_bytes(ns.f_sum[i].z), sizeof(Float));
	- }
	+ for (i = 0; i<np; ++i) {
	+ ifs.read(as_bytes(ns.f_d[i].x), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_d[i].y), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_d[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ifs.read(as_bytes(ns.f_p[i].x), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_p[i].y), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_p[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ifs.read(as_bytes(ns.f_v[i].x), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_v[i].y), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_v[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ifs.read(as_bytes(ns.f_sum[i].x), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_sum[i].y), sizeof(Float));
	+ ifs.read(as_bytes(ns.f_sum[i].z), sizeof(Float));
	+ }
	+
	+ if (verbose == 1)
	+ cout << "Done" << std::endl;

	- if (verbose == 1)
	- cout << "Done" << std::endl;
	+ } else if (cfd_solver == 1) { // Darcy flow
	+
	+ initDarcyMem();
	+
	+ ifs.read(as_bytes(darcy.mu), sizeof(Float));
	+
	+ if (verbose == 1)
	+ cout << " - Reading fluid values:\t\t\t ";
	+
	+ for (z = 0; z<darcy.nz; ++z) {
	+ for (y = 0; y<darcy.ny; ++y) {
	+ for (x = 0; x<darcy.nx; ++x) {
	+ i = d_idx(x,y,z);
	+ ifs.read(as_bytes(darcy.v[i].x), sizeof(Float));
	+ ifs.read(as_bytes(darcy.v[i].y), sizeof(Float));
	+ ifs.read(as_bytes(darcy.v[i].z), sizeof(Float));
	+ ifs.read(as_bytes(darcy.p[i]), sizeof(Float));
	+ ifs.read(as_bytes(darcy.phi[i]), sizeof(Float));
	+ ifs.read(as_bytes(darcy.dphi[i]), sizeof(Float));
	+ }
	+ }
	+ }
	+
	+ ifs.read(as_bytes(darcy.rho_f), sizeof(Float));
	+ ifs.read(as_bytes(darcy.p_mod_A), sizeof(Float));
	+ ifs.read(as_bytes(darcy.p_mod_f), sizeof(Float));
	+ ifs.read(as_bytes(darcy.p_mod_phi), sizeof(Float));
	+
	+ ifs.read(as_bytes(darcy.bc_bot), sizeof(int));
	+ ifs.read(as_bytes(darcy.bc_top), sizeof(int));
	+ ifs.read(as_bytes(darcy.free_slip_bot), sizeof(int));
	+ ifs.read(as_bytes(darcy.free_slip_top), sizeof(int));
	+
	+ ifs.read(as_bytes(darcy.tolerance), sizeof(Float));
	+ ifs.read(as_bytes(darcy.maxiter), sizeof(unsigned int));
	+ ifs.read(as_bytes(darcy.ndem), sizeof(unsigned int));
	+ ifs.read(as_bytes(darcy.c_phi), sizeof(Float));
	+
	+ for (i = 0; i<np; ++i) {
	+ ifs.read(as_bytes(darcy.f_p[i].x), sizeof(Float));
	+ ifs.read(as_bytes(darcy.f_p[i].y), sizeof(Float));
	+ ifs.read(as_bytes(darcy.f_p[i].z), sizeof(Float));
	+ }
	+
	+ ifs.read(as_bytes(darcy.beta_f), sizeof(Float));
	+ ifs.read(as_bytes(darcy.k_c), sizeof(Float));
	+
	+ if (verbose == 1)
	+ cout << "Done" << std::endl;
	+ }
	}

	for (i = 0; i<np; ++i)
	t@@ -484,73 +546,128 @@ void DEM::writebin(const char *target)
	ofs.write(as_bytes(k.bonds_omega[i].z), sizeof(Float));
	}

	- if (navierstokes == 1) { // Navier Stokes flow
	+ if (fluid == 1) {

	- ofs.write(as_bytes(params.mu), sizeof(params.mu));
	+ ofs.write(as_bytes(cfd_solver), sizeof(int));

	- int x, y, z;
	- for (z=0; z<ns.nz; z++) {
	- for (y=0; y<ns.ny; y++) {
	- for (x=0; x<ns.nx; x++) {
	- i = idx(x,y,z);
	+ if (cfd_solver == 0) { // Navier Stokes flow
	+
	+ ofs.write(as_bytes(ns.mu), sizeof(Float));
	+
	+ int x, y, z;
	+ for (z=0; z<ns.nz; z++) {
	+ for (y=0; y<ns.ny; y++) {
	+ for (x=0; x<ns.nx; x++) {
	+ i = idx(x,y,z);

	- // Interpolated cell-center velocities
	- ofs.write(as_bytes(ns.v[i].x), sizeof(Float));
	- ofs.write(as_bytes(ns.v[i].y), sizeof(Float));
	- ofs.write(as_bytes(ns.v[i].z), sizeof(Float));
	+ // Interpolated cell-center velocities
	+ ofs.write(as_bytes(ns.v[i].x), sizeof(Float));
	+ ofs.write(as_bytes(ns.v[i].y), sizeof(Float));
	+ ofs.write(as_bytes(ns.v[i].z), sizeof(Float));

	- // Cell-face velocities
	- //ofs.write(as_bytes(ns.v_x[vidx(x,y,z)]), sizeof(Floa…
	- //ofs.write(as_bytes(ns.v_y[vidx(x,y,z)]), sizeof(Floa…
	- //ofs.write(as_bytes(ns.v_z[vidx(x,y,z)]), sizeof(Floa…
	+ // Cell-face velocities
	+ //ofs.write(as_bytes(ns.v_x[vidx(x,y,z)]), sizeof(…
	+ //ofs.write(as_bytes(ns.v_y[vidx(x,y,z)]), sizeof(…
	+ //ofs.write(as_bytes(ns.v_z[vidx(x,y,z)]), sizeof(…

	- ofs.write(as_bytes(ns.p[i]), sizeof(Float));
	- ofs.write(as_bytes(ns.phi[i]), sizeof(Float));
	- ofs.write(as_bytes(ns.dphi[i]), sizeof(Float));
	+ ofs.write(as_bytes(ns.p[i]), sizeof(Float));
	+ ofs.write(as_bytes(ns.phi[i]), sizeof(Float));
	+ ofs.write(as_bytes(ns.dphi[i]), sizeof(Float));
	+ }
	}
	}
	- }

	- ofs.write(as_bytes(params.rho_f), sizeof(Float));
	- ofs.write(as_bytes(ns.p_mod_A), sizeof(Float));
	- ofs.write(as_bytes(ns.p_mod_f), sizeof(Float));
	- ofs.write(as_bytes(ns.p_mod_phi), sizeof(Float));
	+ ofs.write(as_bytes(ns.rho_f), sizeof(Float));
	+ ofs.write(as_bytes(ns.p_mod_A), sizeof(Float));
	+ ofs.write(as_bytes(ns.p_mod_f), sizeof(Float));
	+ ofs.write(as_bytes(ns.p_mod_phi), sizeof(Float));
	+
	+ ofs.write(as_bytes(ns.bc_bot), sizeof(int));
	+ ofs.write(as_bytes(ns.bc_top), sizeof(int));
	+ ofs.write(as_bytes(ns.free_slip_bot), sizeof(int));
	+ ofs.write(as_bytes(ns.free_slip_top), sizeof(int));
	+
	+ ofs.write(as_bytes(ns.gamma), sizeof(Float));
	+ ofs.write(as_bytes(ns.theta), sizeof(Float));
	+ ofs.write(as_bytes(ns.beta), sizeof(Float));
	+ ofs.write(as_bytes(ns.tolerance), sizeof(Float));
	+ ofs.write(as_bytes(ns.maxiter), sizeof(unsigned int));
	+ ofs.write(as_bytes(ns.ndem), sizeof(unsigned int));
	+
	+ ofs.write(as_bytes(ns.c_phi), sizeof(Float));
	+ ofs.write(as_bytes(ns.c_v), sizeof(Float));
	+ ofs.write(as_bytes(ns.dt_dem_fac), sizeof(Float));
	+
	+ for (i = 0; i<np; ++i) {
	+ ofs.write(as_bytes(ns.f_d[i].x), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_d[i].y), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_d[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ofs.write(as_bytes(ns.f_p[i].x), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_p[i].y), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_p[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ofs.write(as_bytes(ns.f_v[i].x), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_v[i].y), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_v[i].z), sizeof(Float));
	+ }
	+ for (i = 0; i<np; ++i) {
	+ ofs.write(as_bytes(ns.f_sum[i].x), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_sum[i].y), sizeof(Float));
	+ ofs.write(as_bytes(ns.f_sum[i].z), sizeof(Float));
	+ }
	+
	+ } else if (cfd_solver == 1) { // Darcy flow

	- ofs.write(as_bytes(ns.bc_bot), sizeof(int));
	- ofs.write(as_bytes(ns.bc_top), sizeof(int));
	- ofs.write(as_bytes(ns.free_slip_bot), sizeof(int));
	- ofs.write(as_bytes(ns.free_slip_top), sizeof(int));
	+ ofs.write(as_bytes(darcy.mu), sizeof(Float));

	- ofs.write(as_bytes(ns.gamma), sizeof(Float));
	- ofs.write(as_bytes(ns.theta), sizeof(Float));
	- ofs.write(as_bytes(ns.beta), sizeof(Float));
	- ofs.write(as_bytes(ns.tolerance), sizeof(Float));
	- ofs.write(as_bytes(ns.maxiter), sizeof(unsigned int));
	- ofs.write(as_bytes(ns.ndem), sizeof(unsigned int));
	+ int x, y, z;
	+ for (z=0; z<darcy.nz; z++) {
	+ for (y=0; y<darcy.ny; y++) {
	+ for (x=0; x<darcy.nx; x++) {
	+ i = d_idx(x,y,z);

	- ofs.write(as_bytes(ns.c_phi), sizeof(Float));
	- ofs.write(as_bytes(ns.c_v), sizeof(Float));
	- ofs.write(as_bytes(ns.dt_dem_fac), sizeof(Float));
	+ // Interpolated cell-center velocities
	+ ofs.write(as_bytes(darcy.v[i].x), sizeof(Float));
	+ ofs.write(as_bytes(darcy.v[i].y), sizeof(Float));
	+ ofs.write(as_bytes(darcy.v[i].z), sizeof(Float));

	- for (i = 0; i<np; ++i) {
	- ofs.write(as_bytes(ns.f_d[i].x), sizeof(Float));
	- ofs.write(as_bytes(ns.f_d[i].y), sizeof(Float));
	- ofs.write(as_bytes(ns.f_d[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ofs.write(as_bytes(ns.f_p[i].x), sizeof(Float));
	- ofs.write(as_bytes(ns.f_p[i].y), sizeof(Float));
	- ofs.write(as_bytes(ns.f_p[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ofs.write(as_bytes(ns.f_v[i].x), sizeof(Float));
	- ofs.write(as_bytes(ns.f_v[i].y), sizeof(Float));
	- ofs.write(as_bytes(ns.f_v[i].z), sizeof(Float));
	- }
	- for (i = 0; i<np; ++i) {
	- ofs.write(as_bytes(ns.f_sum[i].x), sizeof(Float));
	- ofs.write(as_bytes(ns.f_sum[i].y), sizeof(Float));
	- ofs.write(as_bytes(ns.f_sum[i].z), sizeof(Float));
	+ // Cell-face velocities
	+ //ofs.write(as_bytes(darcy.v_x[vidx(x,y,z)]), size…
	+ //ofs.write(as_bytes(darcy.v_y[vidx(x,y,z)]), size…
	+ //ofs.write(as_bytes(darcy.v_z[vidx(x,y,z)]), size…
	+
	+ ofs.write(as_bytes(darcy.p[i]), sizeof(Float));
	+ ofs.write(as_bytes(darcy.phi[i]), sizeof(Float));
	+ ofs.write(as_bytes(darcy.dphi[i]), sizeof(Float));
	+ }
	+ }
	+ }
	+
	+ ofs.write(as_bytes(darcy.rho_f), sizeof(Float));
	+ ofs.write(as_bytes(darcy.p_mod_A), sizeof(Float));
	+ ofs.write(as_bytes(darcy.p_mod_f), sizeof(Float));
	+ ofs.write(as_bytes(darcy.p_mod_phi), sizeof(Float));
	+
	+ ofs.write(as_bytes(darcy.bc_bot), sizeof(int));
	+ ofs.write(as_bytes(darcy.bc_top), sizeof(int));
	+ ofs.write(as_bytes(darcy.free_slip_bot), sizeof(int));
	+ ofs.write(as_bytes(darcy.free_slip_top), sizeof(int));
	+
	+ ofs.write(as_bytes(darcy.tolerance), sizeof(Float));
	+ ofs.write(as_bytes(darcy.maxiter), sizeof(unsigned int));
	+ ofs.write(as_bytes(darcy.ndem), sizeof(unsigned int));
	+ ofs.write(as_bytes(darcy.c_phi), sizeof(Float));
	+
	+ for (i = 0; i<np; ++i) {
	+ ofs.write(as_bytes(darcy.f_p[i].x), sizeof(Float));
	+ ofs.write(as_bytes(darcy.f_p[i].y), sizeof(Float));
	+ ofs.write(as_bytes(darcy.f_p[i].z), sizeof(Float));
	+ }
	+ ofs.write(as_bytes(darcy.beta_f), sizeof(Float));
	+ ofs.write(as_bytes(darcy.k_c), sizeof(Float));
	}
	}

	diff --git a/src/navierstokes.cpp b/src/navierstokes.cpp
	t@@ -122,7 +122,7 @@ void DEM::checkNSstability()
	const Float dmin = fmin(dx, fmin(dy, dz));

	// Check the diffusion term using von Neumann stability analysis
	- if (params.mutime.dt/(dmindmin) > 0.5) {
	+ if (ns.mutime.dt/(dmindmin) > 0.5) {
	std::cerr << "Error: The time step is too large to ensure stability in…
	"the diffusive term of the fluid momentum equation.\n"
	"Decrease the viscosity, decrease the time step, and/or increase "
	t@@ -279,7 +279,7 @@ double DEM::maxNormResNS()
	for (int z=0; z<grid.num[2]; ++z) {
	for (int y=0; y<grid.num[1]; ++y) {
	for (int x=0; x<grid.num[0]; ++x) {
	- norm_res = static_cast<double>(ns.norm[idx(x,y,z)]);
	+ norm_res = fabs(static_cast<double>(ns.norm[idx(x,y,z)]));
	if (norm_res != norm_res) {
	std::cerr << "\nError: normalized residual is NaN ("
	<< norm_res << ") in cell "
	diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
	t@@ -1309,7 +1309,7 @@ __global__ void findPorositiesVelocitiesDiametersSpheric…
	dev_ns_vp_avg[cellidx] = v_avg;
	dev_ns_d_avg[cellidx] = d_avg;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("phi", x, y, z, phi);
	(void)checkFiniteFloat("dphi", x, y, z, dphi);
	(void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
	t@@ -1526,7 +1526,7 @@ __global__ void findPorositiesVelocitiesDiametersSpheric…
	dev_ns_vp_avg[cellidx] = v_avg;
	dev_ns_d_avg[cellidx] = d_avg;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("phi", x, y, z, phi);
	(void)checkFiniteFloat("dphi", x, y, z, dphi);
	(void)checkFiniteFloat3("v_avg", x, y, z, v_avg);
	t@@ -1578,7 +1578,7 @@ __global__ void setUpperPressureNS(
	dev_ns_epsilon_new[cellidx] = epsilon;
	dev_ns_p[cellidx] = new_pressure;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("epsilon", x, y, z, epsilon);
	(void)checkFiniteFloat("new_pressure", x, y, z, new_pressure);
	#endif
	t@@ -1715,7 +1715,7 @@ __device__ Float3 divergence_tensor(
	(t_yz_yp - t_yz_yn)/dy +
	(t_zz_zp - t_zz_zn)/dz);

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("div_tensor", x, y, z, div_tensor);
	#endif
	return div_tensor;
	t@@ -1755,7 +1755,7 @@ __global__ void findNSgradientsDev(
	__syncthreads();
	dev_vectorfield[cellidx] = grad;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("grad", x, y, z, grad);
	#endif
	}
	t@@ -1799,7 +1799,7 @@ __global__ void findvvOuterProdNS(
	dev_ns_v_prod[cellidx6+4] = v.y*v.z;
	dev_ns_v_prod[cellidx6+5] = v.z*v.z;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("v_prod[0]", x, y, z, v.x*v.x);
	(void)checkFiniteFloat("v_prod[1]", x, y, z, v.x*v.y);
	(void)checkFiniteFloat("v_prod[2]", x, y, z, v.x*v.z);
	t@@ -1815,6 +1815,7 @@ __global__ void findvvOuterProdNS(
	// values in 3D.
	__global__ void findNSstressTensor(
	const Float3* __restrict__ dev_ns_v, // in
	+ const Float mu, // in
	Float* __restrict__ dev_ns_tau) // out
	{
	// 3D thread index
	t@@ -1860,17 +1861,17 @@ __global__ void findNSstressTensor(
	const Float3 zn = dev_ns_v[idx(x,y,z-1)];

	// The diagonal stress tensor components
	- const Float tau_xx = 2.0devC_params.mu(xp.x - xn.x)/(2.0*dx);
	- const Float tau_yy = 2.0devC_params.mu(yp.y - yn.y)/(2.0*dy);
	- const Float tau_zz = 2.0devC_params.mu(zp.z - zn.z)/(2.0*dz);
	+ const Float tau_xx = 2.0mu(xp.x - xn.x)/(2.0*dx);
	+ const Float tau_yy = 2.0mu(yp.y - yn.y)/(2.0*dy);
	+ const Float tau_zz = 2.0mu(zp.z - zn.z)/(2.0*dz);

	// The off-diagonal stress tensor components
	const Float tau_xy =
	- devC_params.mu((yp.x - yn.x)/(2.0dy) + (xp.y - xn.y)/(2.0*dx));
	+ mu((yp.x - yn.x)/(2.0dy) + (xp.y - xn.y)/(2.0*dx));
	const Float tau_xz =
	- devC_params.mu((zp.x - zn.x)/(2.0dz) + (xp.z - xn.z)/(2.0*dx));
	+ mu((zp.x - zn.x)/(2.0dz) + (xp.z - xn.z)/(2.0*dx));
	const Float tau_yz =
	- devC_params.mu((zp.y - zn.y)/(2.0dz) + (yp.z - yn.z)/(2.0*dy));
	+ mu((zp.y - zn.y)/(2.0dz) + (yp.z - yn.z)/(2.0*dy));

	/*
	if (x == 0 && y == 0 && z == 0)
	t@@ -1892,7 +1893,7 @@ __global__ void findNSstressTensor(
	dev_ns_tau[cellidx6+4] = tau_yz;
	dev_ns_tau[cellidx6+5] = tau_zz;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("tau_xx", x, y, z, tau_xx);
	(void)checkFiniteFloat("tau_xy", x, y, z, tau_xy);
	(void)checkFiniteFloat("tau_xz", x, y, z, tau_xz);
	t@@ -2036,7 +2037,7 @@ __global__ void findNSdivphiviv(
	//printf("div(phivv) [%d,%d,%d] = %f, %f, %f\n", x,y,z,
	//div_phi_vi_v.x, div_phi_vi_v.y, div_phi_vi_v.z);

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("div_phi_vi_v", x, y, z, div_phi_vi_v);
	#endif
	}
	t@@ -2175,7 +2176,7 @@ __global__ void findNSdivphitau(
	__syncthreads();
	dev_ns_div_phi_tau[cellidx] = div_phi_tau;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("div_phi_tau", x, y, z, div_phi_tau);
	#endif
	}
	t@@ -2263,7 +2264,7 @@ __global__ void findNSdivphivv(
	__syncthreads();
	dev_ns_div_phi_v_v[cellidx] = div;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("div_phi_v_v", x, y, z, div);
	#endif
	}
	t@@ -2290,6 +2291,8 @@ __global__ void findPredNSvelocities(
	const unsigned int ndem, // in
	const unsigned int wall0_iz, // in
	const Float c_v, // in
	+ const Float mu, // in
	+ const Float rho_f, // in
	Float* __restrict__ dev_ns_v_p_x, // out
	Float* __restrict__ dev_ns_v_p_y, // out
	Float* __restrict__ dev_ns_v_p_z) // out
	t@@ -2326,7 +2329,7 @@ __global__ void findPredNSvelocities(
	//printf("v in v* [%d,%d,%d] = %f, %f, %f\n", x,y,z, v.x, v.y, v.z);

	Float3 div_tau = MAKE_FLOAT3(0.0, 0.0, 0.0);
	- if (devC_params.mu > 0.0) {
	+ if (mu > 0.0) {
	div_tau = MAKE_FLOAT3(
	dev_ns_div_tau_x[fidx],
	dev_ns_div_tau_y[fidx],
	t@@ -2362,7 +2365,7 @@ __global__ void findPredNSvelocities(
	// The particle-fluid interaction force should only be incoorporated if
	// there is a fluid viscosity
	Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn;
	- if (devC_params.mu > 0.0 && devC_np > 0) {
	+ if (mu > 0.0 && devC_np > 0) {
	f_i_c = dev_ns_F_pf[cellidx];
	f_i_xn = dev_ns_F_pf[idx(x-1,y,z)];
	f_i_yn = dev_ns_F_pf[idx(x,y-1,z)];
	t@@ -2379,7 +2382,6 @@ __global__ void findPredNSvelocities(
	amean(f_i_c.z, f_i_zn.z));

	const Float dt = ndem*devC_dt;
	- const Float rho = devC_params.rho_f;

	// The pressure gradient is not needed in Chorin's projection method
	// (ns.beta=0), so only has to be looked up in pressure-dependant
	t@@ -2396,10 +2398,10 @@ __global__ void findPredNSvelocities(
	(p - p_yn)/dy,
	(p - p_zn)/dz);
	#ifdef SET_1
	- pressure_term = -betadt/(rhophi)*grad_p;
	+ pressure_term = -betadt/(rho_fphi)*grad_p;
	#endif
	#ifdef SET_2
	- pressure_term = -betadt/rhograd_p;
	+ pressure_term = -betadt/rho_fgrad_p;
	#endif
	}

	t@@ -2409,16 +2411,16 @@ __global__ void findPredNSvelocities(
	amean(div_phi_vi_v_zn.x, div_phi_vi_v_c.z));

	// Determine the terms of the predicted velocity change
	- const Float3 interaction_term = -dt/(rhophi)f_i;
	+ const Float3 interaction_term = -dt/(rho_fphi)f_i;
	const Float3 gravity_term = MAKE_FLOAT3(
	devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt;
	const Float3 advection_term = -1.0div_phi_vi_vdt/phi;
	const Float3 porosity_term = -1.0vdphi/phi;
	#ifdef SET_1
	- const Float3 diffusion_term = dt/(rhophi)div_tau;
	+ const Float3 diffusion_term = dt/(rho_fphi)div_tau;
	#endif
	#ifdef SET_2
	- const Float3 diffusion_term = dt/rho*div_tau;
	+ const Float3 diffusion_term = dt/rho_f*div_tau;
	#endif

	// Predict new velocity and add scaling parameters
	t@@ -2481,7 +2483,7 @@ __global__ void findPredNSvelocities(
	dev_ns_v_p_y[fidx] = v_p.y;
	dev_ns_v_p_z[fidx] = v_p.z;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat3("v_p", x, y, z, v_p);
	#endif
	}
	t@@ -2503,6 +2505,7 @@ __global__ void findNSforcing(
	const unsigned int nijac, // in
	const unsigned int ndem, // in
	const Float c_v, // in
	+ const Float rho_f, // in
	Float* __restrict__ dev_ns_f1, // out
	Float3* __restrict__ dev_ns_f2, // out
	Float* __restrict__ dev_ns_f) // out
	t@@ -2554,14 +2557,14 @@ __global__ void findNSforcing(

	// Find forcing function terms
	#ifdef SET_1
	- const Float t1 = devC_params.rho_fphidiv_v_p/(c_v*dt);
	- const Float t2 = devC_params.rho_fdot(grad_phi, v_p)/(c_vdt);
	- const Float t4 = devC_params.rho_fdphi/(dtdt*c_v);
	+ const Float t1 = rho_fphidiv_v_p/(c_v*dt);
	+ const Float t2 = rho_fdot(grad_phi, v_p)/(c_vdt);
	+ const Float t4 = rho_fdphi/(dtdt*c_v);
	#endif
	#ifdef SET_2
	- const Float t1 = devC_params.rho_fdiv_v_p/(c_vdt);
	- const Float t2 = devC_params.rho_fdot(grad_phi, v_p)/(c_vdt*phi);
	- const Float t4 = devC_params.rho_fdphi/(dtdtphic_v);
	+ const Float t1 = rho_fdiv_v_p/(c_vdt);
	+ const Float t2 = rho_fdot(grad_phi, v_p)/(c_vdt*phi);
	+ const Float t4 = rho_fdphi/(dtdtphic_v);
	#endif
	f1 = t1 + t2 + t4;
	f2 = grad_phi/phi; // t3/grad(epsilon)
	t@@ -2603,7 +2606,7 @@ __global__ void findNSforcing(
	__syncthreads();
	dev_ns_f[cellidx] = f;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("f", x, y, z, f);
	#endif
	}
	t@@ -2666,7 +2669,7 @@ __global__ void smoothing(
	" e_zn = %f, e_zp = %f\n", x,y,z, e_xn, e_xp,
	e_yn, e_yp, e_zn, e_zp);*/

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("e_smooth", x, y, z, e_smooth);
	#endif
	}
	t@@ -2775,7 +2778,7 @@ __global__ void jacobiIterationNS(
	dev_ns_epsilon_new[cellidx] = e_relax;
	dev_ns_norm[cellidx] = res_norm;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	(void)checkFiniteFloat("e_new", x, y, z, e_new);
	(void)checkFiniteFloat("e_relax", x, y, z, e_relax);
	//(void)checkFiniteFloat("res_norm", x, y, z, res_norm);
	t@@ -2861,12 +2864,13 @@ __global__ void findNormalizedResiduals(

	// Find the normalized residual value. A small value is added to the
	// denominator to avoid a divide by zero.
	- const Float res_norm = (e_new - e)(e_new - e)/(e_newe_new + 1.0e-16);
	+ //const Float res_norm = (e_new - e)(e_new - e)/(e_newe_new + 1.0e-1…
	+ const Float res_norm = (e_new - e)/(e + 1.0e-16);

	__syncthreads();
	dev_ns_norm[cellidx] = res_norm;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	checkFiniteFloat("res_norm", x, y, z, res_norm);
	#endif
	}
	t@@ -2902,7 +2906,7 @@ __global__ void updateNSpressure(
	__syncthreads();
	dev_ns_p[cellidx] = p;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	checkFiniteFloat("p", x, y, z, p);
	#endif
	}
	t@@ -2919,6 +2923,7 @@ __global__ void updateNSvelocity(
	const int bc_top, // in
	const unsigned int ndem, // in
	const Float c_v, // in
	+ const Float rho_f, // in
	const unsigned int wall0_iz, // in
	const unsigned int iter, // in
	Float* __restrict__ dev_ns_v_x, // out
	t@@ -2978,10 +2983,10 @@ __global__ void updateNSvelocity(
	// Find new velocity
	Float3 v = v_p
	#ifdef SET_1
	- - c_vndemdevC_dt/(phidevC_params.rho_f)grad_epsilon;
	+ - c_vndemdevC_dt/(phirho_f)grad_epsilon;
	#endif
	#ifdef SET_2
	- - c_vndemdevC_dt/devC_params.rho_f*grad_epsilon;
	+ - c_vndemdevC_dt/rho_f*grad_epsilon;
	#endif

	if ((z == 0 && bc_bot == 1) \|\| (z > nz-1 && bc_top == 1))
	t@@ -3043,7 +3048,7 @@ __global__ void updateNSvelocity(
	dev_ns_v_y[cellidx] = v.y;
	dev_ns_v_z[cellidx] = v.z;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	checkFiniteFloat3("v", x, y, z, v);
	#endif
	}
	t@@ -3112,7 +3117,7 @@ __global__ void findAvgParticleVelocityDiameter(
	dev_ns_vp_avg[cellidx] = v_avg;
	dev_ns_d_avg[cellidx] = d_avg;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	checkFiniteFloat3("v_avg", x, y, z, v_avg);
	checkFiniteFloat("d_avg", x, y, z, d_avg);
	#endif
	t@@ -3144,6 +3149,8 @@ __global__ void findInteractionForce(
	const Float* __restrict__ dev_ns_div_tau_x,// in
	const Float* __restrict__ dev_ns_div_tau_y,// in
	const Float* __restrict__ dev_ns_div_tau_z,// in
	+ const Float mu, // in
	+ const Float rho_f, // in
	//const Float c_v, // in
	Float3* __restrict__ dev_ns_f_pf, // out
	Float4* __restrict__ dev_force, // out
	t@@ -3209,7 +3216,7 @@ __global__ void findInteractionForce(
	const Float v_rel_length = length(v_rel);

	const Float V_p = dxdydz - phidxdy*dz;
	- const Float Re = devC_params.rho_fdphi*v_rel_length/devC_params.mu;
	+ const Float Re = rho_fdphi*v_rel_length/mu;
	Float Cd = pow(0.63 + 4.8/pow(Re, 0.5), 2.0);
	Float chi = 3.7 - 0.65*exp(-pow(1.5 - log10(Re), 2.0)/2.0);

	t@@ -3219,7 +3226,7 @@ __global__ void findInteractionForce(
	}

	// Drag force
	- const Float3 f_d = 0.125CddevC_params.rho_fM_PIddphi*phi
	+ const Float3 f_d = 0.125Cdrho_fM_PIddphi*phi
	v_rel_lengthv_rel*pow(phi, -chi);

	// Pressure gradient force
	t@@ -3233,7 +3240,7 @@ __global__ void findInteractionForce(
	__syncthreads();
	const Float3 f_pf = f_d + f_p + f_v;

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	/*
	printf("\nfindInteractionForce %d [%d,%d,%d]\n"
	"\tV_p = %f Re=%f Cd=%f chi=%f\n"
	t@@ -3325,7 +3332,7 @@ __global__ void applyInteractionForceToFluid(

	const Float3 F_pf = fi/(dxdydz);

	-#ifdef CHECK_NS_FINITE
	+#ifdef CHECK_FLUID_FINITE
	checkFiniteFloat3("F_pf", x, y, z, F_pf);
	#endif
	//printf("F_pf [%d,%d,%d] = %f,%f,%f\n", x,y,z, F_pf.x, F_pf.y, F_pf.z…
	t@@ -3418,12 +3425,13 @@ __global__ void interpolateFaceToCenter(
	// Warning: The grid-corner values will be invalid, along with the non-normal
	// components of the ghost nodes
	__global__ void findFaceDivTau(
	- const Float* __restrict__ dev_ns_v_x,
	- const Float* __restrict__ dev_ns_v_y,
	- const Float* __restrict__ dev_ns_v_z,
	- Float* __restrict__ dev_ns_div_tau_x,
	- Float* __restrict__ dev_ns_div_tau_y,
	- Float* __restrict__ dev_ns_div_tau_z)
	+ const Float* __restrict__ dev_ns_v_x, // in
	+ const Float* __restrict__ dev_ns_v_y, // in
	+ const Float* __restrict__ dev_ns_v_z, // in
	+ const Float mu, // in
	+ Float* __restrict__ dev_ns_div_tau_x, // out
	+ Float* __restrict__ dev_ns_div_tau_y, // out
	+ Float* __restrict__ dev_ns_div_tau_z) // out
	{
	// 3D thread index
	const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	t@@ -3472,17 +3480,17 @@ __global__ void findFaceDivTau(
	const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)];

	const Float div_tau_x =
	- devC_params.mu*(
	+ mu*(
	(v_x_xp - 2.0v_x + v_x_xn)/(dxdx) +
	(v_x_yp - 2.0v_x + v_x_yn)/(dydy) +
	(v_x_zp - 2.0v_x + v_x_zn)/(dzdz));
	const Float div_tau_y =
	- devC_params.mu*(
	+ mu*(
	(v_y_xp - 2.0v_y + v_y_xn)/(dxdx) +
	(v_y_yp - 2.0v_y + v_y_yn)/(dydy) +
	(v_y_zp - 2.0v_y + v_y_zn)/(dzdz));
	const Float div_tau_z =
	- devC_params.mu*(
	+ mu*(
	(v_z_xp - 2.0v_z + v_z_xn)/(dxdx) +
	(v_z_yp - 2.0v_z + v_z_yn)/(dydy) +
	(v_z_zp - 2.0v_z + v_z_zn)/(dzdz));
	diff --git a/src/sphere.cpp b/src/sphere.cpp
	t@@ -21,7 +21,7 @@ DEM::DEM(const std::string inputbin,
	const int transferConstMem,
	const int fluidFlow,
	const int device)
	-: verbose(verbosity), navierstokes(fluidFlow), device(device)
	+: verbose(verbosity), fluid(fluidFlow), device(device)
	{
	using std::cout;
	using std::cerr;
	t@@ -50,8 +50,16 @@ DEM::DEM(const std::string inputbin,
	if (dry == 1)
	exit(0);

	- if (navierstokes == 1) {
	- initNS();
	+ if (fluid == 1) {
	+ if (cfd_solver == 0)
	+ initNS();
	+ else if (cfd_solver == 1)
	+ initDarcy();
	+ else {
	+ std::cerr << "DEM::DEM Error: Value of cfd_solver not understood ("
	+ << cfd_solver << ")" << std::endl;
	+ exit(1);
	+ }
	}

	if (initCuda == 1) {
	t@@ -64,8 +72,17 @@ DEM::DEM(const std::string inputbin,
	transferToConstantDeviceMemory();
	}

	- if (navierstokes == 1) {
	- initNSmemDev();
	+ if (fluid == 1) {
	+ if (cfd_solver == 0)
	+ initNSmemDev();
	+ else if (cfd_solver == 1)
	+ initDarcyMemDev();
	+ else {
	+ std::cerr
	+ << "DEM::DEM Error: Value of cfd_solver not understood ("
	+ << cfd_solver << ")" << std::endl;
	+ exit(1);
	+ }
	}

	// Allocate device memory for particle variables,
	t@@ -224,8 +241,8 @@ void DEM::checkValues(void)

	checkIfNaN(x, "position", i);
	checkIfNaN(vel, "velocity", i);
	- checkIfNaN(angpos, "angular position", i);
	- checkIfNaN(angvel, "angular velocity", i);
	+ //checkIfNaN(angpos, "angular position", i);
	+ //checkIfNaN(angvel, "angular velocity", i);

	// Check that all particles are inside of the grid
	if (x.x < grid.origo[0] \|\|
	t@@ -281,6 +298,44 @@ void DEM::checkValues(void)
	cerr << "Error: rho = " << params.rho << " kg/m3" << endl;
	exit(1);
	}
	+
	+ if (fluid == 1) {
	+
	+ // Navier-Stokes tests
	+ if (cfd_solver == 0) {
	+ if (ns.rho_f <= 0.0) {
	+ cerr << "Error: rho = " << params.rho << " kg/m3" << endl;
	+ exit(1);
	+ }
	+ }
	+
	+ // Darcy tests
	+ else if (cfd_solver == 1) {
	+ if (darcy.rho_f <= 0.0) {
	+ cerr << "Error: rho_f = " << darcy.rho_f << " kg/m3" << endl;
	+ exit(1);
	+ }
	+
	+ if (darcy.mu <= 0.0) {
	+ cerr << "Error: mu = " << darcy.mu << " Pa s" << endl;
	+ exit(1);
	+ }
	+
	+ if (darcy.beta_f <= 0.0) {
	+ cerr << "Error: beta_f = " << darcy.beta_f << " 1/Pa" << endl;
	+ exit(1);
	+ }
	+
	+ if (darcy.k_c <= 0.0) {
	+ cerr << "Error: k_c = " << darcy.k_c << " m*m" << endl;
	+ exit(1);
	+ }
	+ } else {
	+ cerr << "Solver type value not understood (cfd_solver = "
	+ << cfd_solver << endl;
	+ exit(1);
	+ }
	+ }
	}


	diff --git a/src/sphere.h b/src/sphere.h
	t@@ -173,7 +173,8 @@ class DEM {
	Float4 *dev_v_rho; // Device equivalent

	//// Porous flow
	- int navierstokes; // 0: no, 1: yes
	+ int fluid; // 0: no, 1: yes
	+ int cfd_solver; // 0: Navier Stokes, 1: Darcy

	// Navier Stokes values, host
	NavierStokes ns;
	t@@ -262,7 +263,7 @@ class DEM {
	unsigned int idx(const int x, const int y, const int z); // pres. nodes
	unsigned int vidx(const int x, const int y, const int z); // vel. nodes

	- // Initialize Darcy values and arrays
	+ // Initialize Navier Stokes values and arrays
	void initNS();

	// Clean up Navier Stokes arrays
	t@@ -289,6 +290,50 @@ class DEM {
	void transferNSepsilonFromGlobalDeviceMemory();
	void transferNSepsilonNewFromGlobalDeviceMemory();

	+ // Darcy values, host
	+ Darcy darcy;
	+
	+ // Darcy values, device
	+ Float* dev_darcy_p_old; // Previous cell hydraulic pressure
	+ Float* dev_darcy_dpdt; // Previous cell hydraulic pressure
	+ Float* dev_darcy_p; // Cell hydraulic pressure
	+ Float* dev_darcy_p_new; // Updated cell hydraulic pressure
	+ Float3* dev_darcy_v; // Cell fluid velocity
	+ Float* dev_darcy_phi; // Cell porosity
	+ Float* dev_darcy_dphi; // Cell porosity change
	+ Float* dev_darcy_norm; // Normalized residual of epsilon values
	+ Float4* dev_darcy_f_p; // Pressure gradient force on particles
	+ Float* dev_darcy_k; // Cell hydraulic permeability
	+ Float3* dev_darcy_grad_k; // Spatial gradient of permeability
	+
	+ // Darcy functions
	+ void initDarcyMem();
	+ Float largestDarcyPermeability();
	+ Float smallestDarcyPorosity();
	+ void initDarcyMemDev();
	+ unsigned int darcyCells();
	+ unsigned int darcyCellsVelocity();
	+ void transferDarcyToGlobalDeviceMemory(int statusmsg);
	+ void transferDarcyFromGlobalDeviceMemory(int statusmsg);
	+ void transferDarcyNormFromGlobalDeviceMemory();
	+ void transferDarcyPressuresFromGlobalDeviceMemory();
	+ void freeDarcyMem();
	+ void freeDarcyMemDev();
	+ unsigned int d_idx(const int x, const int y, const int z);
	+ unsigned int d_vidx(const int x, const int y, const int z);
	+ void checkDarcyStability();
	+ void printDarcyArray(FILE* stream, Float* arr);
	+ void printDarcyArray(FILE* stream, Float* arr, std::string desc);
	+ void printDarcyArray(FILE* stream, Float3* arr);
	+ void printDarcyArray(FILE* stream, Float3* arr, std::string desc);
	+ double avgNormResDarcy();
	+ double maxNormResDarcy();
	+ void initDarcy();
	+ void writeDarcyArray(Float* arr, const char* filename);
	+ void writeDarcyArray(Float3* arr, const char* filename);
	+ void endDarcy();
	+ void endDarcyDev();
	+

	public:
	// Values and functions accessible from the outside
	diff --git a/src/utility.cu b/src/utility.cu
	t@@ -15,10 +15,14 @@ void DEM::diagnostics()
	checkValues();

	// Clean up memory before exiting
	- if (navierstokes == 1) {
	+ if (fluid == 1 && cfd_solver == 0) {
	freeNSmemDev();
	freeNSmem();
	}
	+ if (fluid == 1 && cfd_solver == 1) {
	+ freeDarcyMemDev();
	+ freeDarcyMem();
	+ }
	freeGlobalDeviceMemory();
	// CPU memory freed upon object destruction
	}
	diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
	t@@ -32,3 +32,15 @@ add_test(cfd_tests_neumann-c_v=0.1 ${PYTHON_EXECUTABLE}

	add_test(fluid_particle_interaction ${PYTHON_EXECUTABLE}
	${CMAKE_CURRENT_BINARY_DIR}/fluid_particle_interaction.py)
	+
	+add_test(cfd_tests_darcy ${PYTHON_EXECUTABLE}
	+ ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_darcy.py)
	+
	+add_test(cfd_tests_neumann_darcy ${PYTHON_EXECUTABLE}
	+ ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_neumann_darcy.py)
	+
	+add_test(fluid_particle_interaction_darcy ${PYTHON_EXECUTABLE}
	+ ${CMAKE_CURRENT_BINARY_DIR}/fluid_particle_interaction_darcy.py)
	+
	+add_test(cfd_tests_darcy_particles ${PYTHON_EXECUTABLE}
	+ ${CMAKE_CURRENT_BINARY_DIR}/cfd_tests_darcy_particles.py)
	diff --git a/tests/cfd_tests_darcy.py b/tests/cfd_tests_darcy.py
	t@@ -0,0 +1,151 @@
	+#!/usr/bin/env python
	+from pytestutils import *
	+
	+import sphere
	+import sys
	+import numpy
	+import matplotlib.pyplot as plt
	+
	+print("### CFD tests - Dirichlet BCs ###")
	+
	+# Iteration and conservation of mass test
	+# No gravity, no pressure gradients => no flow
	+orig = sphere.sim(np = 0, nd = 3, nw = 0, sid = "cfdtest", fluid = True)
	+cleanup(orig)
	+orig.defaultParams()
	+#orig.defineWorldBoundaries([1.0,1.0,1.0], dx=0.1)
	+orig.defineWorldBoundaries([0.4,0.3,0.4], dx=0.1)
	+orig.initFluid(cfd_solver = 1)
	+#orig.initFluid(mu = 8.9e-4)
	+orig.initTemporal(total = 0.2, file_dt = 0.01, dt = 1.0e-7)
	+#orig.g[2] = -10.0
	+orig.time_file_dt = orig.time_dt*0.99
	+orig.time_total = orig.time_dt*10
	+#orig.run(dry=True)
	+orig.run(verbose=False)
	+#orig.run(verbose=True)
	+py = sphere.sim(sid = orig.sid, fluid = True)
	+
	+ones = numpy.ones((orig.num))
	+py.readlast(verbose = False)
	+compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
	+
	+# Convergence rate (1/3)
	+it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
	+compare(it[:,1].sum(), 0.0, "Convergence rate (1/3):\t")
	+
	+# Fluid flow should be very small
	+if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ print(numpy.min(py.v_f))
	+ print(numpy.mean(py.v_f))
	+ print(numpy.max(py.v_f))
	+ raise Exception("Failed")
	+
	+
	+# Add pressure gradient
	+# This test passes with BETA=0.0 and tolerance=1.0e-9
	+orig.p_f[:,:,-1] = 1.1
	+#orig.setTolerance(1.0e-8)
	+orig.cleanup()
	+#orig.time_file_dt = orig.time_dt*0.99
	+#orig.time_total = orig.time_dt*1
	+orig.run(verbose=False)
	+#orig.run(verbose=True)
	+py.readlast(verbose = False)
	+ideal_grad_p_z = numpy.linspace(orig.p_f[0,0,0], orig.p_f[0,0,-1], orig.num[2])
	+#orig.writeVTKall()
	+compareNumpyArraysClose(numpy.zeros((1,orig.num[2])),\
	+ ideal_grad_p_z - py.p_f[0,0,:],\
	+ "Pressure gradient:\t", tolerance=1.0e-1)
	+ #"Pressure gradient:\t", tolerance=1.0e-2)
	+
	+# Fluid flow direction, opposite of gradient (i.e. towards -z)
	+if ((py.v_f[:,:,:,2] < 0.0).all() and (py.v_f[:,:,:,0:1] < 1.0e-7).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ raise Exception("Failed")
	+
	+# Convergence rate (2/3)
	+it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
	+if ((it[0:6,1] < 1000).all() and (it[6:,1] < 20).all()):
	+ print("Convergence rate (2/3):\t" + passed())
	+else:
	+ print("Convergence rate (2/3):\t" + failed())
	+#'''
	+
	+# Long test
	+'''
	+#orig.p_f[:,:,-1] = 1.1
	+orig.time_total[0] = 0.1
	+orig.time_file_dt[0] = orig.time_total[0]/10.0
	+orig.run(verbose=False)
	+py.readlast(verbose = False)
	+ideal_grad_p_z = numpy.linspace(orig.p_f[0,0,0], orig.p_f[0,0,-1], orig.num[2])
	+#py.writeVTKall()
	+compareNumpyArraysClose(numpy.zeros((1,orig.num[2])),\
	+ ideal_grad_p_z - py.p_f[0,0,:],\
	+ "Pressure gradient (long test):", tolerance=1.0e-2)
	+
	+# Fluid flow direction, opposite of gradient (i.e. towards -z)
	+if ((py.v_f[:,:,:,2] < 0.0).all() and (py.v_f[:,:,:,0:1] < 1.0e-7).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+
	+# Convergence rate (3/3)
	+# This test passes with BETA=0.0 and tolerance=1.0e-9
	+it = numpy.loadtxt("../output/" + orig.sid + "-conv.log")
	+if (it[0,1] < 700 and it[1,1] < 250 and (it[2:,1] < 20).all()):
	+ print("Convergence rate (3/3):\t" + passed())
	+else:
	+ print("Convergence rate (3/3):\t" + failed())
	+'''
	+
	+'''
	+# Slow pressure modulation test
	+orig.cleanup()
	+orig.time_total[0] = 1.0e-1
	+orig.time_file_dt[0] = 0.101*orig.time_total[0]
	+orig.setFluidPressureModulation(A=1.0, f=1.0/orig.time_total[0])
	+#orig.plotPrescribedFluidPressures()
	+orig.run(verbose=False)
	+#py.readlast()
	+#py.plotConvergence()
	+#py.plotFluidDiffAdvPresZ()
	+#py.writeVTKall()
	+for it in range(1,py.status()): # gradient should be smooth in all output files
	+ py.readstep(it, verbose=False)
	+ ideal_grad_p_z =\
	+ numpy.linspace(py.p_f[0,0,0], py.p_f[0,0,-1], py.num[2])
	+ compareNumpyArraysClose(numpy.zeros((1,py.num[2])),\
	+ ideal_grad_p_z - py.p_f[0,0,:],\
	+ 'Slow pressure modulation (' +
	+ str(it+1) + '/' + str(py.status()) + '):', tolerance=1.0e-1)
	+'''
	+
	+#'''
	+# Fast pressure modulation test
	+orig.time_total[0] = 1.0e-2
	+orig.time_file_dt[0] = 0.101*orig.time_total[0]
	+orig.setFluidPressureModulation(A=1.0, f=1.0/orig.time_total[0])
	+#orig.plotPrescribedFluidPressures()
	+orig.run(verbose=False)
	+#py.plotConvergence()
	+#py.plotFluidDiffAdvPresZ()
	+#py.writeVTKall()
	+for it in range(1,py.status()+1): # gradient should be smooth in all output fi…
	+ py.readstep(it, verbose=False)
	+ #py.plotFluidDiffAdvPresZ()
	+ ideal_grad_p_z =\
	+ numpy.linspace(py.p_f[0,0,0], py.p_f[0,0,-1], py.num[2])
	+ compareNumpyArraysClose(numpy.zeros((1,py.num[2])),\
	+ ideal_grad_p_z - py.p_f[0,0,:],\
	+ 'Fast pressure modulation (' +
	+ str(it) + '/' + str(py.status()) + '):', tolerance=5.0e-1)
	+#'''
	+
	+cleanup(orig)
	diff --git a/tests/cfd_tests_darcy_particles.py b/tests/cfd_tests_darcy_particl…
	t@@ -0,0 +1,165 @@
	+#!/usr/bin/env python
	+from pytestutils import *
	+import sphere
	+import numpy
	+
	+#'''
	+print("### Steady state, no gravity, no forcing, Dirichlet+Dirichlet BCs")
	+orig = sphere.sim('darcy_particles', np = 1000)
	+orig.cleanup()
	+#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_var…
	+orig.defaultParams()
	+orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
	+orig.initRandomGridPos([20, 20, 200])
	+orig.initTemporal(total=0.005, file_dt=0.001)
	+orig.initFluid(cfd_solver=1)
	+#orig.p_f[5,3,2] *= 1.5
	+#orig.k_c[0] = 4.6e-15
	+orig.k_c[0] = 4.6e-10
	+#orig.g[2] = -10.0
	+orig.setStiffnessNormal(36.4e9)
	+orig.setStiffnessTangential(36.4e9/3.0)
	+orig.run(verbose=False)
	+#orig.writeVTKall()
	+py = sphere.sim(sid = orig.sid, fluid = True)
	+py.readlast(verbose=False)
	+
	+ones = numpy.ones((orig.num))
	+py.readlast(verbose = False)
	+compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
	+
	+# Fluid flow should be very small
	+if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ print(numpy.min(py.v_f))
	+ print(numpy.mean(py.v_f))
	+ print(numpy.max(py.v_f))
	+ raise Exception("Failed")
	+
	+
	+
	+print("### Steady state, no gravity, no forcing, Neumann+Dirichlet BCs")
	+orig = sphere.sim('darcy_particles', np = 1000)
	+orig.cleanup()
	+#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_var…
	+orig.defaultParams()
	+orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
	+orig.initRandomGridPos([20, 20, 200])
	+orig.initTemporal(total=0.005, file_dt=0.001)
	+orig.initFluid(cfd_solver=1)
	+#orig.p_f[5,3,2] *= 1.5
	+#orig.k_c[0] = 4.6e-15
	+orig.k_c[0] = 4.6e-10
	+orig.setFluidBottomNoFlow()
	+#orig.g[2] = -10.0
	+orig.setStiffnessNormal(36.4e9)
	+orig.setStiffnessTangential(36.4e9/3.0)
	+orig.run(verbose=False)
	+#orig.writeVTKall()
	+py = sphere.sim(sid = orig.sid, fluid = True)
	+py.readlast(verbose=False)
	+
	+ones = numpy.ones((orig.num))
	+py.readlast(verbose = False)
	+compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
	+
	+# Fluid flow should be very small
	+if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ print(numpy.min(py.v_f))
	+ print(numpy.mean(py.v_f))
	+ print(numpy.max(py.v_f))
	+ raise Exception("Failed")
	+
	+
	+
	+print("### Steady state, no gravity, no forcing, Neumann+Neumann BCs")
	+orig = sphere.sim('darcy_particles', np = 1000)
	+orig.cleanup()
	+#orig.generateRadii(histogram=False, psd='uni', radius_mean=5.0e-4, radius_var…
	+orig.defaultParams()
	+orig.generateRadii(psd='uni', mean=5.0e-2, variance=5.0e-5)
	+orig.initRandomGridPos([20, 20, 200])
	+orig.initTemporal(total=0.005, file_dt=0.001)
	+orig.initFluid(cfd_solver=1)
	+#orig.p_f[5,3,2] *= 1.5
	+#orig.k_c[0] = 4.6e-15
	+orig.k_c[0] = 4.6e-10
	+orig.setFluidBottomNoFlow()
	+orig.setFluidTopNoFlow()
	+#orig.g[2] = -10.0
	+orig.setStiffnessNormal(36.4e9)
	+orig.setStiffnessTangential(36.4e9/3.0)
	+orig.run(verbose=False)
	+#orig.writeVTKall()
	+py = sphere.sim(sid = orig.sid, fluid = True)
	+py.readlast(verbose=False)
	+
	+ones = numpy.ones((orig.num))
	+py.readlast(verbose = False)
	+compareNumpyArrays(ones, py.p_f, "Conservation of pressure:")
	+
	+# Fluid flow should be very small
	+if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ print(numpy.min(py.v_f))
	+ print(numpy.mean(py.v_f))
	+ print(numpy.max(py.v_f))
	+ raise Exception("Failed")
	+#'''
	+
	+
	+print("### Fluidization test: Transient, gravity, Dirichlet+Dirichlet BCs")
	+#orig = sphere.sim('diffusivity-relax', fluid=False)
	+orig = sphere.sim('cube-init', fluid=False)
	+orig.readlast(verbose=False)
	+orig.num[2] /= 2
	+orig.L[2] /= 2.0
	+orig.id('darcy_fluidization')
	+orig.cleanup()
	+orig.initTemporal(total=0.005, file_dt=0.001)
	+orig.initFluid(cfd_solver=1)
	+orig.g[2] = -10.0
	+#orig.k_c[0] = numpy.mean(orig.radius)**2/540.0
	+
	+mean_porosity = orig.bulkPorosity()
	+fluidize_pressure = numpy.abs((orig.rho - orig.rho_f) \
	+ (1.0 - mean_porosity)numpy.abs(orig.g[2]))
	+
	+fluid_pressure_gradient = numpy.array([0.9, 2.0])
	+
	+for i in numpy.arange(fluid_pressure_gradient.size):
	+
	+ orig.id('fluidization-' + str(fluid_pressure_gradient[i]))
	+ # set pressure gradient
	+ dpdz = fluid_pressure_gradient[i] * fluidize_pressure
	+ dp = dpdz * orig.L[2]
	+ base_p = 0.0
	+ orig.p_f[:,:,0] = base_p + dp # high pressure at bottom
	+ orig.p_f[:,:,-1] = base_p # low pressure at top
	+
	+ orig.run(verbose=False)
	+ orig.writeVTKall()
	+ py = sphere.sim(sid = orig.sid, fluid = True)
	+ py.readlast(verbose=False)
	+
	+ """print('Mean particle velocity: '
	+ + str(numpy.mean(py.vel[:,0])) + ', '
	+ + str(numpy.mean(py.vel[:,1])) + ', '
	+ + str(numpy.mean(py.vel[:,2])) + ' m/s')"""
	+
	+ z_vel_threshold = 0.001
	+ if fluid_pressure_gradient[i] < 1.0:
	+ test(numpy.mean(py.vel[:,2]) < z_vel_threshold,
	+ 'Fluidization (' + str(fluid_pressure_gradient[i]) + '):\t')
	+ elif fluid_pressure_gradient[i] > 1.0:
	+ test(numpy.mean(py.vel[:,2]) > z_vel_threshold,
	+ 'Fluidization (' + str(fluid_pressure_gradient[i]) + '):\t')
	+
	+
	diff --git a/tests/cfd_tests_neumann_darcy.py b/tests/cfd_tests_neumann_darcy.py
	t@@ -0,0 +1,41 @@
	+#!/usr/bin/env python
	+from pytestutils import *
	+
	+import sphere
	+import sys
	+import numpy
	+import matplotlib.pyplot as plt
	+
	+print('### CFD tests - Dirichlet/Neumann BCs ###')
	+
	+print('''# Neumann bottom, Dirichlet top BC.
	+# No gravity, no pressure gradients => no flow''')
	+
	+orig = sphere.sim("neumann", fluid = True)
	+cleanup(orig)
	+orig.defaultParams(mu_s = 0.4, mu_d = 0.4)
	+orig.defineWorldBoundaries([0.4, 0.4, 1], dx = 0.1)
	+orig.initFluid(cfd_solver = 1)
	+orig.initTemporal(total = 0.05, file_dt = 0.005, dt = 1.0e-4)
	+py = sphere.sim(sid = orig.sid, fluid = True)
	+orig.bc_bot[0] = 1 # No-flow BC at bottom (Neumann)
	+#orig.run(dry=True)
	+orig.run(verbose=False)
	+#orig.writeVTKall()
	+py.readlast(verbose = False)
	+ones = numpy.ones((orig.num))
	+py.readlast(verbose = False)
	+compareNumpyArraysClose(ones, py.p_f, "Conservation of pressure:",
	+ tolerance = 1.0e-5)
	+
	+# Fluid flow along z should be very small
	+if ((numpy.abs(py.v_f[:,:,:,:]) < 1.0e-6).all()):
	+ print("Flow field:\t\t" + passed())
	+else:
	+ print("Flow field:\t\t" + failed())
	+ print(numpy.min(py.v_f))
	+ print(numpy.mean(py.v_f))
	+ print(numpy.max(py.v_f))
	+ raise Exception("Failed")
	+
	+orig.cleanup()
	diff --git a/tests/fluid_particle_interaction_darcy.py b/tests/fluid_particle_i…
	t@@ -0,0 +1,31 @@
	+#!/usr/bin/env python
	+import sphere
	+from pytestutils import *
	+
	+sim = sphere.sim('fluid_particle_interaction', fluid=True)
	+sim.cleanup()
	+
	+sim.defineWorldBoundaries([1.0, 1.0, 1.0], dx = 0.1)
	+sim.initFluid(cfd_solver = 1)
	+
	+
	+# No gravity, pressure gradient enforced by Dirichlet boundaries.
	+# The particle should be sucked towards the low pressure
	+print('# Test 1: Test pressure gradient force')
	+sim.p_f[:,:,0] = 1.0
	+sim.p_f[:,:,-1] = 1.1
	+sim.addParticle([0.5, 0.5, 0.5], 0.01)
	+sim.initTemporal(total=0.001, file_dt=0.0001)
	+#sim.time_file_dt[0] = sim.time_dt[0]
	+#sim.time_total[0] = sim.time_dt[0]
	+
	+#sim.run(verbose=False)
	+sim.run()
	+#sim.run(dry=True)
	+#sim.run(cudamemcheck=True)
	+#sim.writeVTKall()
	+
	+sim.readlast()
	+test(sim.vel[0,2] < 0.0, 'Particle velocity:')
	+
	+sim.cleanup()
	diff --git a/tests/highlighttext.py b/tests/highlighttext.py
	t@@ -0,0 +1,51 @@
	+#!/usr/bin/env python
	+# ANSI color coded text if the terminal supports it. No external requirements.
	+# Inspired by
	+# http://korbinin.blogspot.dk/2012/10/color-text-output-from-python.html
	+
	+import sys
	+
	+def highlight(string, fg, bold=False):
	+ '''
	+ Return `string` with ANSI color codes.
	+
	+ :param string: String to colorize.
	+ :type string: str
	+ :param fg: Foreground text color. Possible values: 'g', 'green', 'r', 're…
	+ 'y', 'yellow', 'b', 'blue', 'm' or 'magenta'
	+ :type fg: str
	+ :param bold: Bold text
	+ :type bold: bool
	+
	+ :returns: ANSI formatted string, can be `print()`'ed directly.
	+ :return type: str
	+ '''
	+
	+ attr = []
	+ if sys.stdout.isatty():
	+
	+ if fg == 'g' or fg == 'green':
	+ attr.append('32')
	+
	+ elif fg == 'r' or fg == 'red':
	+ attr.append('31')
	+
	+ elif fg == 'y' or fg == 'yellow':
	+ attr.append('33')
	+
	+ elif fg == 'b' or fg == 'blue':
	+ attr.append('34')
	+
	+ elif fg == 'm' or fg == 'magenta':
	+ attr.append('35')
	+
	+ else:
	+ raise Exception('Error: Foreground color `fg` value not understood…
	+
	+ if bold:
	+ attr.append('1')
	+
	+ return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
	+
	+ else:
	+ return string
	diff --git a/tests/io_tests_fluid.py b/tests/io_tests_fluid.py
	t@@ -3,10 +3,11 @@ from pytestutils import *
	import sphere

	#### Input/output tests ####
	-print("### Fluid input/output tests ###")
	+print("### Fluid input/output tests - Navier Stokes CFD solver ###")

	# Generate data in python
	orig = sphere.sim(np=100, sid="test-initgrid-fluid", fluid=True)
	+orig.cleanup()
	orig.generateRadii(histogram=False, radius_mean=1.0)
	orig.defaultParams()
	orig.initRandomGridPos()
	t@@ -22,7 +23,56 @@ py.readbin("../input/" + orig.sid + ".bin", verbose=False)
	compare(orig, py, "Python IO:")

	# Test C++ IO routines
	-orig.run()
	+orig.run(verbose=False)
	+#orig.run(dry=True)
	+#orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
	+cpp = sphere.sim(fluid=True)
	+cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False)
	+compare(orig, cpp, "C++ IO: ")
	+
	+# Test CUDA IO routines
	+cuda = sphere.sim(fluid=True)
	+cuda.readbin("../output/" + orig.sid + ".output00001.bin", verbose=False)
	+cuda.time_current = orig.time_current
	+cuda.time_step_count = orig.time_step_count
	+compareNumpyArraysClose(orig.v_f, cuda.v_f, "cuda.v_f:", tolerance=1e-5)
	+cuda.v_f = orig.v_f
	+#compareNumpyArraysClose(orig.p_f, cuda.p_f, "cuda.p_f:", tolerance=0.1)
	+cuda.p_f = orig.p_f
	+if numpy.allclose(orig.x, cuda.x, 0.01):
	+ cuda.x = orig.x # ignore small changes
	+if numpy.max(numpy.abs(cuda.vel - orig.vel)) < 1.0e-5:
	+ cuda.vel = orig.vel # ignore small changes
	+ cuda.xyzsum = orig.xyzsum
	+ cuda.force = orig.force
	+compare(orig, cuda, "CUDA IO: ")
	+
	+
	+
	+#### Input/output tests ####
	+print("### Fluid input/output tests - Darcy CFD solver ###")
	+
	+# Generate data in python
	+orig = sphere.sim(np=100, sid="test-initgrid-fluid", fluid=True)
	+orig.cleanup()
	+orig.generateRadii(histogram=False, radius_mean=1.0)
	+orig.defaultParams()
	+orig.initRandomGridPos()
	+
	+orig.initFluid(cfd_solver = 1)
	+orig.setMaxIterations(10)
	+orig.initTemporal(current=0.0, total=0.0)
	+orig.time_total=2.0*orig.time_dt
	+orig.time_file_dt = orig.time_dt
	+orig.writebin(verbose=False)
	+
	+# Test Python IO routines
	+py = sphere.sim(fluid=True)
	+py.readbin("../input/" + orig.sid + ".bin", verbose=False)
	+compare(orig, py, "Python IO:")
	+
	+# Test C++ IO routines
	+orig.run(verbose=False)
	#orig.run(dry=True)
	#orig.run(verbose=True, hideinputfile=False, cudamemcheck=True)
	cpp = sphere.sim(fluid=True)
	diff --git a/tests/pytestutils.py b/tests/pytestutils.py
	t@@ -1,14 +1,15 @@
	#!/usr/bin/env python

	from sphere import *
	+from highlighttext import highlight
	import subprocess
	import sys

	def passed():
	- return "\tPassed"
	+ return "\t" + highlight("Passed", "green")

	def failed():
	- return "\tFailed"
	+ return "\t" + highlight("Failed", "red", True)

	def test(statement, string):
	if (statement == True):