GopherProxy

	tSeveral important bugfixes - sphere - GPU-based 3D discrete element method alg…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 4261e0c247da8874c98b9047d674d8f708d4509d
	parent 44c5a7bf26b6ee3834079f25c6ada71ac1f0eafc
	Author: Anders Damsgaard Christensen <[email protected]>
	Date: Mon, 16 Apr 2012 14:52:51 +0200

	Several important bugfixes

	Diffstat:
	M mfiles/freadbin.m \| 4 +++-
	M mfiles/fwritebin.m \| 1 +
	M mfiles/initsetup.m \| 3 ++-
	M python/sphere.py \| 486 ++++++++++++++++++++++++-----…
	D python/test.py \| 15 ---------------
	M raytracer/main.cpp \| 343 ++++++++++++++++++-----------…
	M raytracer/rt_kernel.h \| 2 +-
	M src/Makefile \| 2 +-
	M src/constants.cuh \| 8 +++-----
	M src/datatypes.h \| 12 ++++--------
	M src/device.cu \| 374 +++++++++++++++++++----------…
	M src/file_io.cpp \| 6 ++----
	M src/main.cpp \| 26 +++++++++-----------------

	13 files changed, 839 insertions(+), 443 deletions(-)
	---
	diff --git a/mfiles/freadbin.m b/mfiles/freadbin.m
	t@@ -32,7 +32,8 @@ function [p, grids, time, params, walls] = freadbin(path, fn)
	p.k_r = zeros(p.np, 1); % Rolling stiffness
	p.gamma_s = zeros(p.np, 1); % Shear viscosity
	p.gamma_r = zeros(p.np, 1); % Rolling viscosity
	- p.mu_s = zeros(p.np, 1); % Inter-particle contact shear friction…
	+ p.mu_s = zeros(p.np, 1); % Inter-particle contact static shear f…
	+ p.mu_d = zeros(p.np, 1); % Inter-particle contact dynamic shear …
	p.mu_r = zeros(p.np, 1); % Inter-particle contact rolling fricti…
	p.C = zeros(p.np, 1); % Inter-particle cohesion
	p.E = zeros(p.np, 1); % Young's modulus
	t@@ -71,6 +72,7 @@ function [p, grids, time, params, walls] = freadbin(path, fn)
	p.gamma_s(j) = fread(fid, 1, 'float');
	p.gamma_r(j) = fread(fid, 1, 'float');
	p.mu_s(j) = fread(fid, 1, 'float');
	+ p.mu_d(j) = fread(fid, 1, 'float');
	p.mu_r(j) = fread(fid, 1, 'float');
	p.C(j) = fread(fid, 1, 'float');
	p.E(j) = fread(fid, 1, 'float');
	diff --git a/mfiles/fwritebin.m b/mfiles/fwritebin.m
	t@@ -51,6 +51,7 @@ function fwritebin(path, fn, p, grids, time, params, walls)
	fwrite(fid, p.gamma_s(j), 'float');
	fwrite(fid, p.gamma_r(j), 'float');
	fwrite(fid, p.mu_s(j), 'float');
	+ fwrite(fid, p.mu_d(j), 'float');
	fwrite(fid, p.mu_r(j), 'float');
	fwrite(fid, p.C(j), 'float');
	fwrite(fid, p.E(j), 'float');
	diff --git a/mfiles/initsetup.m b/mfiles/initsetup.m
	t@@ -75,7 +75,8 @@ if visualize == 1
	end

	% Friction angles
	-ang_s = 25; % Angle of shear resistance
	+ang_s = 30; % Angle of static shear resistance
	+ang_d = 25; % Angle of dynamic shear resistance
	ang_r = 35; % Angle of rolling resistance

	% Other particle parameters
	diff --git a/python/sphere.py b/python/sphere.py
	t@@ -1,7 +1,10 @@
	#!/usr/bin/env python2.7
	import math
	import numpy
	+import matplotlib as mpl
	+mpl.use('Agg')
	import matplotlib.pyplot as plt
	+from matplotlib.font_manager import FontProperties
	import subprocess

	numpy.seterr(all='warn', over='raise')
	t@@ -43,14 +46,12 @@ class Spherebin:
	self.k_n = numpy.zeros(self.np, dtype=numpy.float32)
	self.k_s = numpy.zeros(self.np, dtype=numpy.float32)
	self.k_r = numpy.zeros(self.np, dtype=numpy.float32)
	+ self.gamma_n = numpy.zeros(self.np, dtype=numpy.float32)
	self.gamma_s = numpy.zeros(self.np, dtype=numpy.float32)
	self.gamma_r = numpy.zeros(self.np, dtype=numpy.float32)
	self.mu_s = numpy.zeros(self.np, dtype=numpy.float32)
	+ self.mu_d = numpy.zeros(self.np, dtype=numpy.float32)
	self.mu_r = numpy.zeros(self.np, dtype=numpy.float32)
	- self.C = numpy.zeros(self.np, dtype=numpy.float32)
	- self.E = numpy.zeros(self.np, dtype=numpy.float32)
	- self.K = numpy.zeros(self.np, dtype=numpy.float32)
	- self.nu = numpy.zeros(self.np, dtype=numpy.float32)
	self.es_dot = numpy.zeros(self.np, dtype=numpy.float32)
	self.es = numpy.zeros(self.np, dtype=numpy.float32)
	self.p = numpy.zeros(self.np, dtype=numpy.float32)
	t@@ -66,7 +67,7 @@ class Spherebin:
	self.shearmodel = numpy.zeros(1, dtype=numpy.uint32)

	# Wall data
	- self.nw = numpy.ones(1, dtype=numpy.uint32)
	+ self.nw = numpy.ones(1, dtype=numpy.uint32) * nw
	self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float32).reshape(s…
	self.w_x = numpy.zeros(self.nw, dtype=numpy.float32)
	self.w_m = numpy.zeros(self.nw, dtype=numpy.float32)
	t@@ -112,14 +113,12 @@ class Spherebin:
	self.k_n = numpy.zeros(self.np, dtype=numpy.float32)
	self.k_s = numpy.zeros(self.np, dtype=numpy.float32)
	self.k_r = numpy.zeros(self.np, dtype=numpy.float32)
	+ self.gamma_n = numpy.zeros(self.np, dtype=numpy.float32)
	self.gamma_s = numpy.zeros(self.np, dtype=numpy.float32)
	self.gamma_r = numpy.zeros(self.np, dtype=numpy.float32)
	self.mu_s = numpy.zeros(self.np, dtype=numpy.float32)
	+ self.mu_d = numpy.zeros(self.np, dtype=numpy.float32)
	self.mu_r = numpy.zeros(self.np, dtype=numpy.float32)
	- self.C = numpy.zeros(self.np, dtype=numpy.float32)
	- self.E = numpy.zeros(self.np, dtype=numpy.float32)
	- self.K = numpy.zeros(self.np, dtype=numpy.float32)
	- self.nu = numpy.zeros(self.np, dtype=numpy.float32)
	self.es_dot = numpy.zeros(self.np, dtype=numpy.float32)
	self.es = numpy.zeros(self.np, dtype=numpy.float32)
	self.p = numpy.zeros(self.np, dtype=numpy.float32)
	t@@ -148,14 +147,12 @@ class Spherebin:
	self.k_n[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.k_s[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.k_r[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	+ self.gamma_n[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.gamma_s[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.gamma_r[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.mu_s[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	+ self.mu_d[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.mu_r[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	- self.C[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	- self.E[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	- self.K[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	- self.nu[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.es_dot[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.es[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	self.p[j] = numpy.fromfile(fh, dtype=numpy.float32, count=1)
	t@@ -170,6 +167,7 @@ class Spherebin:

	# Wall data
	self.nw = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
	+
	self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float32).reshape…
	self.w_x = numpy.zeros(self.nw, dtype=numpy.float32)
	self.w_m = numpy.zeros(self.nw, dtype=numpy.float32)
	t@@ -245,14 +243,12 @@ class Spherebin:
	fh.write(self.k_n[j].astype(numpy.float32))
	fh.write(self.k_s[j].astype(numpy.float32))
	fh.write(self.k_r[j].astype(numpy.float32))
	+ fh.write(self.gamma_n[j].astype(numpy.float32))
	fh.write(self.gamma_s[j].astype(numpy.float32))
	fh.write(self.gamma_r[j].astype(numpy.float32))
	fh.write(self.mu_s[j].astype(numpy.float32))
	+ fh.write(self.mu_d[j].astype(numpy.float32))
	fh.write(self.mu_r[j].astype(numpy.float32))
	- fh.write(self.C[j].astype(numpy.float32))
	- fh.write(self.E[j].astype(numpy.float32))
	- fh.write(self.K[j].astype(numpy.float32))
	- fh.write(self.nu[j].astype(numpy.float32))
	fh.write(self.es_dot[j].astype(numpy.float32))
	fh.write(self.es[j].astype(numpy.float32))
	fh.write(self.p[j].astype(numpy.float32))
	t@@ -310,22 +306,26 @@ class Spherebin:

	# Show radii as histogram
	if histogram == 1:
	+ fig = plt.figure(figsize=(15,10), dpi=300)
	+ figtitle = 'Particle size distribution, {0} particles'.format(self.np[0])
	+ fig.text(0.5,0.95,figtitle,horizontalalignment='center',fontproperties=F…
	bins = 20
	# Create histogram
	plt.hist(self.radius, bins)
	# Plot
	- plt.title('Particle size distribution, {0} particles'.format(self.np))
	plt.xlabel('Radii [m]')
	plt.ylabel('Count')
	plt.axis('tight')
	- plt.show()
	+ fig.savefig('psd.png')
	+ fig.clf()


	- # Initialize particle positions to non-overlapping configuration
	- def initsetup(self, g = numpy.array([0.0, 0.0, -9.80665]),
	- gridnum = numpy.array([12, 12, 36]),
	- periodic = 1,
	- shearmodel = 1):
	+ # Initialize particle positions to completely random, non-overlapping config…
	+ # This method is very compute intensive at high particle numbers.
	+ def initRandomPos(self, g = numpy.array([0.0, 0.0, -9.80665]),
	+ gridnum = numpy.array([12, 12, 36]),
	+ periodic = 1,
	+ shearmodel = 2):
	""" Initialize particle positions in loose, cubic configuration.
	Radii must be set beforehand.
	xynum is the number of rows in both x- and y- directions.
	t@@ -358,7 +358,7 @@ class Spherebin:
	delta = self.x[i] - self.x[j]
	delta_len = math.sqrt(numpy.dot(delta,delta)) \
	- (self.radius[i] + self.radius[j])
	- if (delta_len < 0):
	+ if (delta_len < 0.0):
	overlaps = True
	print "\rFinding non-overlapping particle positions, {0} % complete".for…

	t@@ -372,9 +372,222 @@ class Spherebin:
	self.w_vel[0] = 0.0
	self.w_force[0] = 0.0
	self.w_devs[0] = 0.0
	- self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
	+ #self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
	+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
	+
	+ # Initialize particle positions to regular, grid-like, non-overlapping confi…
	+ def initGridPos(self, g = numpy.array([0.0, 0.0, -9.80665]),
	+ gridnum = numpy.array([12, 12, 36]),
	+ periodic = 1,
	+ shearmodel = 2):
	+ """ Initialize particle positions in loose, cubic configuration.
	+ Radii must be set beforehand.
	+ xynum is the number of rows in both x- and y- directions.
	+ """
	+ self.g = g
	+ self.periodic[0] = periodic
	+
	+ # Calculate cells in grid
	+ self.num = gridnum
	+
	+ # World size
	+ r_max = numpy.amax(self.radius)
	+ cellsize = 2.1 * r_max
	+ self.L = self.num * cellsize
	+
	+ # Check whether there are enough grid cells
	+ if ((self.num[0]self.num[1]self.num[2]-(2**3)) < self.np):
	+ print "Error! The grid is not sufficiently large."
	+ raise NameError('Error! The grid is not sufficiently large.')
	+
	+ gridpos = numpy.zeros(self.nd, dtype=numpy.uint32)
	+
	+ # Make sure grid is sufficiently large if every second level is moved
	+ if (self.periodic[0] == 1):
	+ self.num[0] -= 1
	+ self.num[1] -= 1
	+
	+ # Check whether there are enough grid cells
	+ if ((self.num[0]self.num[1]self.num[2]-(233)) < self.np):
	+ print "Error! The grid is not sufficiently large."
	+ raise NameError('Error! The grid is not sufficiently large.')
	+
	+
	+ # Particle positions randomly distributed without overlap
	+ for i in range(self.np):
	+
	+ # Find position in 3d mesh from linear index
	+ gridpos[0] = (i % (self.num[0]))
	+ gridpos[1] = numpy.floor(i/(self.num[0])) % (self.num[0])
	+ gridpos[2] = numpy.floor(i/((self.num[0])*(self.num[1]))) #\
	+ #% ((self.num[0])*(self.num[1]))
	+
	+ for d in range(self.nd):
	+ self.x[i,d] = gridpos[d] * cellsize + 0.5*cellsize
	+
	+ if (self.periodic[0] == 1): # Allow pushing every 2.nd level out of late…
	+ # Offset every second level
	+ if (gridpos[2] % 2):
	+ self.x[i,0] += 0.5*cellsize
	+ self.x[i,1] += 0.5*cellsize
	+
	+ self.shearmodel[0] = shearmodel
	+
	+ # Readjust grid to correct size
	+ if (self.periodic[0] == 1):
	+ self.num[0] += 1
	+ self.num[1] += 1
	+
	+ # Initialize upper wall
	+ self.w_n[0,2] = -1.0
	+ self.w_x[0] = self.L[2]
	+ self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
	+ self.w_vel[0] = 0.0
	+ self.w_force[0] = 0.0
	+ self.w_devs[0] = 0.0
	+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
	+
	+
	+ # Initialize particle positions to non-overlapping configuration
	+ # in grid, with a certain element of randomness
	+ def initRandomGridPos(self, g = numpy.array([0.0, 0.0, -9.80665]),
	+ gridnum = numpy.array([12, 12, 32]),
	+ periodic = 1,
	+ shearmodel = 2):
	+ """ Initialize particle positions in loose, cubic configuration.
	+ Radii must be set beforehand.
	+ xynum is the number of rows in both x- and y- directions.
	+ """
	+ self.g = g
	+ self.periodic[0] = periodic
	+
	+ # Calculate cells in grid
	+ coarsegrid = numpy.floor(gridnum/2)
	+
	+ # World size
	+ r_max = numpy.amax(self.radius)
	+ cellsize = 2.1 * r_max * 2 # Cells in grid 2*size to make space for random…
	+
	+ # Check whether there are enough grid cells
	+ if (((coarsegrid[0]-1)(coarsegrid[1]-1)(coarsegrid[2]-1)) < self.np):
	+ print "Error! The grid is not sufficiently large."
	+ raise NameError('Error! The grid is not sufficiently large.')
	+
	+ gridpos = numpy.zeros(self.nd, dtype=numpy.uint32)
	+
	+ # Particle positions randomly distributed without overlap
	+ for i in range(self.np):

	+ # Find position in 3d mesh from linear index
	+ gridpos[0] = (i % (coarsegrid[0]))
	+ gridpos[1] = numpy.floor(i/(coarsegrid[0])) % (coarsegrid[0])
	+ gridpos[2] = numpy.floor(i/((coarsegrid[0])*(coarsegrid[1])))
	+
	+ # Place particles in grid structure, and randomly adjust the positions
	+ # within the oversized cells (uniform distribution)
	+ for d in range(self.nd):
	+ r = self.radius[i]*1.05
	+ self.x[i,d] = gridpos[d] * cellsize \
	+ + ((cellsize-r) - r) * numpy.random.random_sample() + r

	+ self.shearmodel[0] = shearmodel
	+
	+ # Calculate new grid with cell size equal to max. particle diameter
	+ x_max = numpy.max(self.x[:,0] + self.radius)
	+ y_max = numpy.max(self.x[:,1] + self.radius)
	+ z_max = numpy.max(self.x[:,2] + self.radius)
	+ cellsize = 2.1 * r_max
	+ # Adjust size of world
	+ self.num[0] = numpy.ceil(x_max/cellsize)
	+ self.num[1] = numpy.ceil(y_max/cellsize)
	+ self.num[2] = numpy.ceil(z_max/cellsize)
	+ self.L = self.num * cellsize
	+
	+ # Initialize upper wall
	+ self.w_n[0,2] = -1.0
	+ self.w_x[0] = self.L[2]
	+ self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
	+ self.w_vel[0] = 0.0
	+ self.w_force[0] = 0.0
	+ self.w_devs[0] = 0.0
	+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
	+
	+ # Adjust grid and upper wall for consolidation
	+ def consolidate(self, deviatoric_stress = 10e3,
	+ periodic = 1):
	+ """ Setup consolidation experiment. Specify the upper wall
	+ deviatoric stress in Pascal, default value is 10 kPa.
	+ """
	+
	+ # Compute new grid, scaled to fit max. and min. particle positions
	+ z_min = numpy.min(self.x[:,2] - self.radius)
	+ z_max = numpy.max(self.x[:,2] + self.radius)
	+ cellsize = self.L[0] / self.num[0]
	+ z_adjust = 1.1 # Overheightening of grid. 1.0 = no overheightening
	+ self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
	+ self.L[2] = (z_max-z_min)*z_adjust
	+
	+ # Initialize upper wall
	+ self.w_n[0,2] = -1.0
	+ self.w_x[0] = self.L[2]
	+ self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
	+ self.w_vel[0] = 0.0
	+ self.w_force[0] = 0.0
	+ self.w_devs[0] = deviatoric_stress
	+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
	+
	+
	+ # Adjust grid and upper wall for shear, and fix boundary particle velocities
	+ def shear(self, deviatoric_stress = 10e3,
	+ shear_strain_rate = 1,
	+ periodic = 1):
	+ """ Setup shear experiment. Specify the upper wall
	+ deviatoric stress in Pascal, default value is 10 kPa.
	+ The shear strain rate is the shear length divided by the
	+ initial height per second.
	+ """
	+
	+ # Compute new grid, scaled to fit max. and min. particle positions
	+ z_min = numpy.min(self.x[:,2] - self.radius)
	+ z_max = numpy.max(self.x[:,2] + self.radius)
	+ cellsize = self.L[0] / self.num[0]
	+ z_adjust = 1.3 # Overheightening of grid. 1.0 = no overheightening
	+ self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize)
	+ self.L[2] = (z_max-z_min)*z_adjust
	+
	+ # Initialize upper wall
	+ self.w_devs[0] = deviatoric_stress
	+
	+ # Zero kinematics
	+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float32).reshape(s…
	+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float32).reshape(s…
	+
	+ #fixheight = 2*cellsize
	+ fixheight = cellsize
	+
	+ # Fix horizontal velocity to 0.0 of lowermost particles
	+ I = numpy.nonzero(self.x[:,2] < (z_min + fixheight)) # Find indices of low…
	+ self.fixvel[I] = 1;
	+ self.angvel[I,0] = 0.0;
	+ self.angvel[I,1] = 0.0;
	+ self.angvel[I,2] = 0.0;
	+ self.vel[I,0] = 0.0; # x-dim
	+ self.vel[I,1] = 0.0; # y-dim
	+
	+ # Fix horizontal velocity to specific value of uppermost particles
	+ I = numpy.nonzero(self.x[:,2] > (z_max - fixheight)) # Find indices of low…
	+ self.fixvel[I] = 1;
	+ self.angvel[I,0] = 0.0;
	+ self.angvel[I,1] = 0.0;
	+ self.angvel[I,2] = 0.0;
	+ self.vel[I,0] = (z_max-z_min)*shear_strain_rate
	+ self.vel[I,1] = 0.0; # y-dim
	+
	+ # Zero x-axis displacement
	+ self.xsum = numpy.zeros(self.np, dtype=numpy.float32)
	+
	+
	+
	def initTemporal(self, total,
	current = 0.0,
	file_dt = 0.01,
	t@@ -395,14 +608,16 @@ class Spherebin:
	self.time_file_dt[0] = file_dt
	self.time_step_count[0] = 0

	- def defaultparams(self, ang_s = 25,
	- ang_r = 35,
	+ def defaultParams(self, ang_s = 20,
	+ ang_d = 15,
	+ ang_r = 0,
	rho = 3600,
	- k_n = 4e5,
	- k_s = 4e5,
	+ k_n = 1e9,
	+ k_s = 6.6e8,
	k_r = 4e6,
	- gamma_s = 4e2,
	- gamma_r = 4e2,
	+ gamma_n = 1e3,
	+ gamma_s = 1e3,
	+ gamma_r = 2e3,
	capillaryCohesion = 0):
	""" Initialize particle parameters to default values.
	Radii must be set prior to calling this function.
	t@@ -422,26 +637,28 @@ class Spherebin:
	# Contact rolling elastic stiffness (for shearmodel = 2), N/m
	self.k_r = numpy.ones(self.np, dtype=numpy.float32) * k_r

	- # Contact shear viscosity (for shearmodel = 1), Ns/m
	+ # Contact normal viscosity. Critical damping: 2sqrt(mk_n).
	+ # Normal force component elastic if nu = 0.0.
	+ #self.gamma_n = numpy.ones(self.np, dtype=numpy.float32) \
	+ # * nu_frac * 2.0 * math.sqrt(4.0/3.0 * math.pi * numpy.amin(…
	+ # * self.rho[0] * self.k_n[0])
	+ self.gamma_n = numpy.ones(self.np, dtype=numpy.float32) * gamma_n
	+
	+ # Contact shear viscosity, Ns/m
	self.gamma_s = numpy.ones(self.np, dtype=numpy.float32) * gamma_s

	- # Contact rolling visscosity (for shearmodel = 1), Ns/m?
	+ # Contact rolling viscosity, Ns/m?
	self.gamma_r = numpy.ones(self.np, dtype=numpy.float32) * gamma_r

	- # Contact shear friction coefficient
	+ # Contact static shear friction coefficient
	self.mu_s = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.rad…

	+ # Contact dynamic shear friction coefficient
	+ self.mu_d = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.rad…
	+
	# Contact rolling friction coefficient
	self.mu_r = numpy.ones(self.np, dtype=numpy.float32) * numpy.tan(numpy.rad…

	- r_min = numpy.amin(self.radius)
	-
	- # Poisson's ratio. Critical damping: 2sqrt(mk_n).
	- # Normal force component elastic if nu = 0.0.
	- self.nu = numpy.ones(self.np, dtype=numpy.float32) \
	- * 0.1 * 2.0 * math.sqrt(4.0/3.0 * math.pi * r_min**3 \
	- * self.rho[0] * self.k_n[0])
	-
	# Global parameters
	# if 1 = all particles have the same values for the physical parameters
	self.globalparams[0] = 1
	t@@ -522,6 +739,12 @@ def visualize(project, method = 'energy', savefig = False…

	lastfile = status(project)

	+ ### Plotting
	+ fig = plt.figure(figsize=(15,10),dpi=300)
	+ figtitle = "{0}, simulation {1}".format(method, project)
	+ fig.text(0.5,0.95,figtitle,horizontalalignment='center',fontproperties=FontP…
	+
	+
	if method == 'energy':

	# Allocate arrays
	t@@ -547,38 +770,50 @@ def visualize(project, method = 'energy', savefig = Fals…

	t = numpy.linspace(0.0, sb.time_current, lastfile+1)

	- # Plotting
	- plt.subplot(2,3,1)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Total potential energy [J]')
	- plt.plot(t, Epot, '+-')
	-
	- plt.subplot(2,3,2)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Total kinetic energy [J]')
	- plt.plot(t, Ekin, '+-')
	-
	- plt.subplot(2,3,3)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Total rotational energy [J]')
	- plt.plot(t, Erot, '+-')
	-
	- plt.subplot(2,3,4)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Shear energy rate [W]')
	- plt.plot(t, Es_dot, '+-')
	-
	- plt.subplot(2,3,5)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Total shear energy [J]')
	- plt.plot(t, Es, '+-')
	-
	- plt.subplot(2,3,6)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Total energy [J]')
	- plt.plot(t, Esum, '+-')
	-
	- #plt.show()
	+ # Potential energy
	+ ax1 = plt.subplot2grid((2,3),(0,0))
	+ ax1.set_xlabel('Time [s]')
	+ ax1.set_ylabel('Total potential energy [J]')
	+ ax1.plot(t, Epot, '+-')
	+
	+ # Kinetic energy
	+ ax2 = plt.subplot2grid((2,3),(0,1))
	+ ax2.set_xlabel('Time [s]')
	+ ax2.set_ylabel('Total kinetic energy [J]')
	+ ax2.plot(t, Ekin, '+-')
	+
	+ # Rotational energy
	+ ax3 = plt.subplot2grid((2,3),(0,2))
	+ ax3.set_xlabel('Time [s]')
	+ ax3.set_ylabel('Total rotational energy [J]')
	+ ax3.plot(t, Erot, '+-')
	+
	+ # Shear energy rate
	+ ax4 = plt.subplot2grid((2,3),(1,0))
	+ ax4.set_xlabel('Time [s]')
	+ ax4.set_ylabel('Shear energy rate [W]')
	+ ax4.plot(t, Es_dot, '+-')
	+
	+ # Shear energy
	+ ax5 = plt.subplot2grid((2,3),(1,1))
	+ ax5.set_xlabel('Time [s]')
	+ ax5.set_ylabel('Total shear energy [J]')
	+ ax5.plot(t, Es, '+-')
	+
	+ # Total energy
	+ #ax6 = plt.subplot2grid((2,3),(1,2))
	+ #ax6.set_xlabel('Time [s]')
	+ #ax6.set_ylabel('Total energy [J]')
	+ #ax6.plot(t, Esum, '+-')
	+
	+ # Combined view
	+ ax6 = plt.subplot2grid((2,3),(1,2))
	+ ax6.set_xlabel('Time [s]')
	+ ax6.set_ylabel('Energy [J]')
	+ ax6.plot(t, Epot, '+-g')
	+ ax6.plot(t, Ekin, '+-b')
	+ ax6.plot(t, Erot, '+-r')
	+ ax6.legend(('$\sum E_{pot}$','$\sum E_{kin}$','$\sum E_{rot}$'), 'upper ri…

	elif method == 'walls':

	t@@ -590,43 +825,92 @@ def visualize(project, method = 'energy', savefig = Fals…

	# Allocate arrays on first run
	if (i == 0):
	- wforce = numpy.zeros(lastfile+1, sb.nw[0])
	- wvel = numpy.zeros(lastfile+1, sb.nw[0])
	- wpos = numpy.zeros(lastfile+1, sb.nw[0])
	- wdevs = numpy.zeros(lastfile+1, sb.nw[0])
	-
	- wforce[i] = sb.w_force
	- wvel[i] = sb.w_vel
	- wpos[i] = sb.w_x
	- wdevs[i] = sb.w_devs
	+ wforce = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).resha…
	+ wvel = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).resha…
	+ wpos = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).resha…
	+ wdevs = numpy.zeros((lastfile+1)*sb.nw[0], dtype=numpy.float32).resha…
	+
	+ wforce[i] = sb.w_force[0]
	+ wvel[i] = sb.w_vel[0]
	+ wpos[i] = sb.w_x[0]
	+ wdevs[i] = sb.w_devs[0]

	t = numpy.linspace(0.0, sb.time_current, lastfile+1)

	# Plotting
	- plt.subplot(2,2,1)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Position [m]')
	- plt.plot(t, wpos, '+-')
	+ ax1 = plt.subplot2grid((2,2),(0,0))
	+ ax1.set_xlabel('Time [s]')
	+ ax1.set_ylabel('Position [m]')
	+ ax1.plot(t, wpos, '+-')
	+
	+ ax2 = plt.subplot2grid((2,2),(0,1))
	+ ax2.set_xlabel('Time [s]')
	+ ax2.set_ylabel('Velocity [m/s]')
	+ ax2.plot(t, wvel, '+-')

	- plt.subplot(2,2,2)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Velocity [m/s]')
	- plt.plot(t, wvel, '+-')
	+ ax3 = plt.subplot2grid((2,2),(1,0))
	+ ax3.set_xlabel('Time [s]')
	+ ax3.set_ylabel('Force [N]')
	+ ax3.plot(t, wforce, '+-')

	- plt.subplot(2,2,3)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Force [N]')
	- plt.plot(t, wforce, '+-')
	+ ax4 = plt.subplot2grid((2,2),(1,1))
	+ ax4.set_xlabel('Time [s]')
	+ ax4.set_ylabel('Deviatoric stress [Pa]')
	+ ax4.plot(t, wdevs, '+-')

	- plt.subplot(2,2,4)
	- plt.xlabel('Time [s]')
	- plt.ylabel('Deviatoric stress [Pa]')
	- plt.plot(t, wdevs, '+-')
	+
	+ elif method == 'shear':
	+
	+ sb = Spherebin()
	+ # Read stress values from project binaries
	+ for i in range(lastfile+1):
	+
	+ fn = "../output/{0}.output{1}.bin".format(project, i)
	+ sb.readbin(fn, verbose = False)
	+
	+ # First iteration: Allocate arrays and find constant values
	+ if (i == 0):
	+ xdisp = numpy.zeros(lastfile+1, dtype=numpy.float32) # Shear displ…
	+ sigma = numpy.zeros(lastfile+1, dtype=numpy.float32) # Normal stre…
	+ tau = numpy.zeros(lastfile+1, dtype=numpy.float32) # Shear stress
	+ dilation = numpy.zeros(lastfile+1, dtype=numpy.float32) # Upper wall …
	+
	+ fixvel = numpy.nonzero(sb.fixvel > 0.0)
	+ #fixvel_upper = numpy.nonzero(sb.vel[fixvel,0] > 0.0)
	+ shearvel = sb.vel[fixvel,0].max()
	+ w_x0 = sb.w_x[0]
	+ A = sb.L[0] * sb.L[1] # Upper surface area
	+
	+ # Summation of shear stress contributions
	+ for j in fixvel[0]:
	+ if (sb.vel[j,0] > 0.0):
	+ tau[i] += -sb.force[j,0]
	+
	+ xdisp[i] = sb.time_current[0] * shearvel
	+ sigma[i] = sb.w_force[0] / A
	+ sigma[i] = sb.w_devs[0]
	+ #tau[i] = sb.force[fixvel_upper,0].sum() / A
	+ dilation[i] = sb.w_x[0] - w_x0
	+
	+ # Plot stresses
	+ ax1 = plt.subplot2grid((2,1),(0,0))
	+ ax1.set_xlabel('Shear distance [m]')
	+ ax1.set_ylabel('Stress [Pa]')
	+ ax1.plot(xdisp, sigma, '+-g')
	+ ax1.plot(xdisp, tau, '+-r')
	+ #plt.legend('$\sigma`$','$\tau$')
	+
	+ # Plot dilation
	+ ax2 = plt.subplot2grid((2,1),(1,0))
	+ ax2.set_xlabel('Shear distance [m]')
	+ ax2.set_ylabel('Dilation [m]')
	+ ax2.plot(xdisp, dilation, '+-')


	# Optional save of figure
	if (savefig == True):
	- plt.savefig("{0}-{1}.{2}".format(project, method, outformat))
	+ fig.savefig("{0}-{1}.{2}".format(project, method, outformat))
	+ fig.clf()
	else:
	plt.show()

	diff --git a/python/test.py b/python/test.py
	t@@ -1,15 +0,0 @@
	-#!/usr/bin/env python
	-
	-# Import sphere functionality
	-from sphere import *
	-
	-#render("../input/1e3-init-pyout.bin")
	-
	-# New class
	-init = Spherebin(np = 1e3, nd = 3)
	-init.generateRadii(psd = 'uni', histogram = 0)
	-init.defaultparams()
	-init.initsetup()
	-init.initTemporal(total = 1.5)
	-init.writebin("../input/1e3-pyinit.bin")
	-render("../input/1e3-pyinit.bin", out = "~/Desktop/init")
	diff --git a/raytracer/main.cpp b/raytracer/main.cpp
	t@@ -15,8 +15,8 @@ int main(const int argc, const char* argv[])
	if (argc < 6 \|\| argc > 8 ){
	cout << "Usage: " << argv[0] << " <GPU or CPU> <sphere-binary.bin> <width>…
	<< "or\n"
	- << "Usage: " << argv[0] << " GPU <sphere-binary.bin> <width> <height>…
	- << "<pressure \| pressure50 \| es \| es_dot> <max value in color range>…
	+ << "Usage: " << argv[0] << " GPU <sphere-binary.bin \| colorbar> <widt…
	+ << " <pressure \| pressure50 \| es \| es_dot> <max value in color range…
	return 1;
	}

	t@@ -24,113 +24,6 @@ int main(const int argc, const char* argv[])
	<< "This is the SPHERE raytracer\n"
	<< "----------------------------\n";

	-
	- cout << "Reading binary: " << argv[2] << "\n";
	-
	- FILE* fin;
	- if ((fin = fopen(argv[2], "rb")) == NULL) {
	- cout << " Error encountered while trying to open binary '"
	- << argv[2] << "'\n";
	- return 1;
	- }
	-
	- // Read the number of dimensions and particles
	- unsigned int nd, np;
	- (void)fread(&nd, sizeof(nd), 1, fin);
	- if (nd != 3) {
	- cout << " Input binary contains " << nd
	- << "D data, this raytracer is made for 3D data\n";
	- return 1;
	- }
	-
	- (void)fread(&np, sizeof(np), 1, fin);
	- cout << " Number of particles: " << np << "\n";
	-
	- // Allocate particle structure array
	- cout << " Allocating host memory\n";
	- float4* p = new float4[np];
	- float* es_dot = new float[np];
	- float* es = new float[np];
	- float* pres = new float[np];
	- for (unsigned int i=0; i<np; i++) {
	- pres[i] = 0.0f; // Initialize values to zero
	- es_dot[i] = 0.0f;
	- }
	-
	- // Read temporal information
	- float dt, file_dt;
	- double current, total;
	- unsigned int step_count;
	- (void)fread(&dt, sizeof(dt), 1, fin);
	- (void)fread(&current, sizeof(current), 1, fin);
	- (void)fread(&total, sizeof(total), 1, fin);
	- (void)fread(&file_dt, sizeof(file_dt), 1, fin);
	- (void)fread(&step_count, sizeof(step_count), 1, fin);
	-
	- // Canonical coordinate system origo
	- f3 origo;
	- (void)fread(&origo, sizeof(float)*3, 1, fin);
	-
	- // Canonical coordinate system dimensions
	- f3 L;
	- (void)fread(&L, sizeof(float)*3, 1, fin);
	-
	- // Skip over irrelevant data
	- float blankf;
	- //f3 blankf3;
	- unsigned int blankui;
	- for (int j=0; j<3; j++) // Skip over grid.num data
	- (void)fread(&blankui, sizeof(blankui), 1, fin);
	-
	-
	- // Load data into particle array
	- for (unsigned int i=0; i<np; i++) {
	- (void)fread(&p[i].x, sizeof(float), 1, fin);
	- for (int j=0; j<4; j++)
	- (void)fread(&blankf, sizeof(blankf), 1, fin);
	-
	- (void)fread(&p[i].y, sizeof(float), 1, fin);
	- for (int j=0; j<4; j++)
	- (void)fread(&blankf, sizeof(blankf), 1, fin);
	-
	- (void)fread(&p[i].z, sizeof(float), 1, fin);
	- for (int j=0; j<4; j++)
	- (void)fread(&blankf, sizeof(blankf), 1, fin);
	- }
	- for (unsigned int i=0; i<np; i++) {
	- (void)fread(&blankf, sizeof(blankf), 1, fin); // fixvel
	- (void)fread(&blankf, sizeof(blankf), 1, fin); // xsum
	- (void)fread(&p[i].w, sizeof(float), 1, fin);
	- for (int j=0; j<12; j++)
	- (void)fread(&blankf, sizeof(blankf), 1, fin);
	- (void)fread(&es_dot[i], sizeof(float), 1, fin);
	- (void)fread(&es[i], sizeof(float), 1, fin);
	- (void)fread(&pres[i], sizeof(float), 1, fin);
	- }
	-
	- fclose(fin); // Binary read complete
	-
	- cout << " Spatial dimensions: "
	- << L.x << "" << L.y << "" << L.z << " m\n";
	-
	- // Eye position and focus point
	- f3 eye = {2.5fL.x, -4.2L.y, 2.0f*L.z};
	- f3 lookat = {0.45fL.x, 0.5fL.y, 0.4f*L.z};
	- if (L.z > (L.x + L.y)/1.5f) { // Render only bottom of world (for initsetup'…
	- eye.x = 2.5f*L.x;
	- eye.y = -6.2*L.y;
	- eye.z = 0.12*L.z;
	- lookat.x = 0.45f*L.x;
	- lookat.y = 0.5f*L.y;
	- lookat.z = 0.012f*L.z;
	- }
	-
	- // Screen width in world coordinates
	- //float imgw = 1.4f*L.x;
	- //float imgw = pow(L.xL.yL.z, 0.32f);
	- //float imgw = sqrt(L.xL.y)1.2f; // Adjust last float to capture entire he…
	- float imgw = L.x*1.35f;
	-
	// Allocate memory for image
	unsigned int width = atoi(argv[3]);
	unsigned int height = atoi(argv[4]);
	t@@ -141,52 +34,212 @@ int main(const int argc, const char* argv[])
	rgb* img;
	img = new rgb [height*width];

	- // Determine visualization mode
	- int visualize = 0; // 0: ordinary view
	- float max_val = 0;
	- if (argc == 8) {
	- if(strcmp(argv[6],"pressure") == 0)
	- visualize = 1; // 1: pressure view
	- if(strcmp(argv[6],"es_dot") == 0)
	- visualize = 2; // 2: es_dot view
	- if(strcmp(argv[6],"es") == 0)
	- visualize = 3; // 3: es view
	- if(strcmp(argv[6],"pressure50") == 0)
	- visualize = 4; // 4: pressure50 view
	-
	- // Read max. value specified in command args.
	- max_val = atof(argv[7]);
	- }
	+ // Render colorbar image
	+ if (strcmp(argv[2],"colorbar") == 0) {
	+
	+ for (unsigned int y=0; y<height; y++) {
	+ for (unsigned int x=0; x<width; x++) {
	+
	+ // Colormap value is relative to width position
	+ float ratio = (float) (x+1)/width;
	+
	+ // Determine Blue-White-Red color components
	+ float red = fmin(1.0f, 0.209fratioratioratio - 2.49fratio*ratio …
	+ float green = fmin(1.0f, -2.44fratioratio + 2.15f*ratio + 0.369f);
	+ float blue = fmin(1.0f, -2.21fratioratio + 1.61f*ratio + 0.573f);
	+
	+ // Write pixel value to image array
	+ //img[x + ywidth].r = red 250.f;
	+ img[x + ywidth].r = red 250.f;
	+ img[x + ywidth].g = green 250.f;
	+ img[x + ywidth].b = blue 250.f;
	+
	+ }
	+ }
	+ } else { // Render sphere binary
	+
	+ cout << "Reading binary: " << argv[2] << "\n";
	+
	+ FILE* fin;
	+ if ((fin = fopen(argv[2], "rb")) == NULL) {
	+ cout << " Error encountered while trying to open binary '"
	+ << argv[2] << "'\n";
	+ return 1;
	+ }

	- if (strcmp(argv[1],"GPU") == 0) {
	-
	- // Call cuda wrapper
	- if (rt(p, np, img, width, height, origo, L, eye, lookat, imgw, visualize, …
	- cout << "\nError in rt()\n";
	+ // Read the number of dimensions and particles
	+ unsigned int nd, np;
	+ (void)fread(&nd, sizeof(nd), 1, fin);
	+ if (nd != 3) {
	+ cout << " Input binary contains " << nd
	+ << "D data, this raytracer is made for 3D data\n";
	return 1;
	}
	- } else if (strcmp(argv[1],"CPU") == 0) {
	-
	- // Call CPU wrapper
	- if (rt_cpu(p, np, img, width, height, origo, L, eye, lookat, imgw) != 0) {
	- cout << "\nError in rt_cpu()\n";
	+
	+ (void)fread(&np, sizeof(np), 1, fin);
	+ cout << " Number of particles: " << np << "\n";
	+
	+ // Allocate particle structure array
	+ cout << " Allocating host memory\n";
	+ float4* p = new float4[np];
	+ float* fixvel = new float[np];
	+ float* es_dot = new float[np];
	+ float* es = new float[np];
	+ float* pres = new float[np];
	+ for (unsigned int i=0; i<np; i++) {
	+ fixvel[i] = 0.0f;
	+ pres[i] = 0.0f; // Initialize values to zero
	+ es_dot[i] = 0.0f;
	+ es[i] = 0.0f;
	+ }
	+
	+ // Read temporal information
	+ float dt, file_dt;
	+ double current, total;
	+ unsigned int step_count;
	+ (void)fread(&dt, sizeof(dt), 1, fin);
	+ (void)fread(&current, sizeof(current), 1, fin);
	+ (void)fread(&total, sizeof(total), 1, fin);
	+ (void)fread(&file_dt, sizeof(file_dt), 1, fin);
	+ (void)fread(&step_count, sizeof(step_count), 1, fin);
	+
	+ // Canonical coordinate system origo
	+ f3 origo;
	+ (void)fread(&origo, sizeof(float)*3, 1, fin);
	+
	+ // Canonical coordinate system dimensions
	+ f3 L;
	+ (void)fread(&L, sizeof(float)*3, 1, fin);
	+
	+ // Skip over irrelevant data
	+ float blankf;
	+ //f3 blankf3;
	+ unsigned int blankui;
	+ for (int j=0; j<3; j++) // Skip over grid.num data
	+ (void)fread(&blankui, sizeof(blankui), 1, fin);
	+
	+
	+ // Load data into particle array
	+ for (unsigned int i=0; i<np; i++) {
	+ (void)fread(&p[i].x, sizeof(float), 1, fin);
	+ for (int j=0; j<4; j++)
	+ (void)fread(&blankf, sizeof(blankf), 1, fin);
	+
	+ (void)fread(&p[i].y, sizeof(float), 1, fin);
	+ for (int j=0; j<4; j++)
	+ (void)fread(&blankf, sizeof(blankf), 1, fin);
	+
	+ (void)fread(&p[i].z, sizeof(float), 1, fin);
	+ for (int j=0; j<4; j++)
	+ (void)fread(&blankf, sizeof(blankf), 1, fin);
	+ }
	+ for (unsigned int i=0; i<np; i++) {
	+ //(void)fread(&blankf, sizeof(blankf), 1, fin); // fixvel
	+ (void)fread(&fixvel[i], sizeof(float), 1, fin); // fixvel
	+ (void)fread(&blankf, sizeof(blankf), 1, fin); // xsum
	+ (void)fread(&p[i].w, sizeof(float), 1, fin); // radius
	+ for (int j=0; j<10; j++)
	+ (void)fread(&blankf, sizeof(blankf), 1, fin);
	+ (void)fread(&es_dot[i], sizeof(float), 1, fin);
	+ (void)fread(&es[i], sizeof(float), 1, fin);
	+ (void)fread(&pres[i], sizeof(float), 1, fin);
	+ }
	+
	+ fclose(fin); // Binary read complete
	+
	+ cout << " Spatial dimensions: "
	+ << L.x << "" << L.y << "" << L.z << " m\n";
	+
	+ // Eye position and focus point
	+ //f3 eye = {0.5fL.x, -4.2L.y, 0.5f*L.z};
	+ f3 eye = {2.5fL.x, -4.2L.y, 2.0f*L.z};
	+ f3 lookat = {0.45fL.x, 0.5fL.y, 0.45f*L.z};
	+ if (L.z > (L.x + L.y)/1.5f) { // Render only bottom of world (for initsetu…
	+ eye.x = 1.1f*L.x;
	+ eye.y = 15.1*L.y;
	+ eye.z = 1.1*L.z;
	+ /lookat.x = 0.45fL.x;
	+ lookat.y = 0.5f*L.y;
	+ lookat.z = 0.30fL.z;/
	+ }
	+
	+ // Screen width in world coordinates
	+ //float imgw = 1.4f*L.x;
	+ //float imgw = pow(L.xL.yL.z, 0.32f);
	+ //float imgw = sqrt(L.xL.y)1.2f; // Adjust last float to capture entire …
	+ float imgw = L.x*1.35f;
	+
	+ // Determine visualization mode
	+ int visualize = 0; // 0: ordinary view
	+ float max_val = 0;
	+ if (argc == 8) {
	+ if(strcmp(argv[6],"pressure") == 0)
	+ visualize = 1; // 1: pressure view
	+ if(strcmp(argv[6],"es_dot") == 0)
	+ visualize = 2; // 2: es_dot view
	+ if(strcmp(argv[6],"es") == 0)
	+ visualize = 3; // 3: es view
	+ if(strcmp(argv[6],"pressure50") == 0)
	+ visualize = 4; // 4: pressure50 view
	+
	+ // Read max. value specified in command args.
	+ max_val = atof(argv[7]);
	+ }
	+
	+ // Render colorbar image
	+ if (strcmp(argv[2],"colorbar") == 0) {
	+
	+ for (unsigned int x=0; x<width; x++) {
	+ for (unsigned int y=0; y<height; y++) {
	+
	+ // Colormap value is relative to width position
	+ float ratio = (float)x/width;
	+
	+ // Determine Blue-White-Red color components
	+ float red = fmin(1.0f, 0.209fratioratioratio - 2.49fratio*rati…
	+ float green = fmin(1.0f, -2.44fratioratio + 2.15f*ratio + 0.369f);
	+ float blue = fmin(1.0f, -2.21fratioratio + 1.61f*ratio + 0.573f);
	+
	+ // Write pixel value to image array
	+ img[yheight + x].r = (unsigned char) red 255;
	+ img[yheight + x].g = (unsigned char) green 255;
	+ img[yheight + x].b = (unsigned char) blue 255;
	+
	+ }
	+ }
	+ }
	+
	+ if (strcmp(argv[1],"GPU") == 0) {
	+
	+ // Call cuda wrapper
	+ if (rt(p, np, img, width, height, origo, L, eye, lookat, imgw, visualize…
	+ cout << "\nError in rt()\n";
	+ return 1;
	+ }
	+ } else if (strcmp(argv[1],"CPU") == 0) {
	+
	+ // Call CPU wrapper
	+ if (rt_cpu(p, np, img, width, height, origo, L, eye, lookat, imgw) != 0)…
	+ cout << "\nError in rt_cpu()\n";
	+ return 1;
	+ }
	+ } else {
	+ cout << "Please specify CPU or GPU for execution\n";
	return 1;
	}
	- } else {
	- cout << "Please specify CPU or GPU for execution\n";
	- return 1;
	- }

	+ // Free dynamically allocated memory
	+ delete [] p;
	+ delete [] fixvel;
	+ delete [] pres;
	+ delete [] es;
	+ delete [] es_dot;
	+
	+ }

	// Write final image to PPM file
	image_to_ppm(img, argv[5], width, height);

	-
	- // Free dynamically allocated memory
	- delete [] p;
	- delete [] pres;
	- delete [] es;
	- delete [] es_dot;
	delete [] img;

	cout << "Terminating successfully\n\n";
	diff --git a/raytracer/rt_kernel.h b/raytracer/rt_kernel.h
	t@@ -26,6 +26,6 @@ int rt(float4* p, const unsigned int np,
	rgb* img, const unsigned int width, const unsigned int height,
	f3 origo, f3 L, f3 eye, f3 lookat, float imgw,
	const int visualize, const float max_val,
	- float* pres, float* es_dot, float* es);
	+ float* fixvel, float* pres, float* es_dot, float* es);

	#endif
	diff --git a/src/Makefile b/src/Makefile
	t@@ -82,5 +82,5 @@ backup:

	clean:
	$(RM) $(OBJECTS)
	- $(RM) $(EXECUTABLE)
	+ $(RM) ../sphere_*

	diff --git a/src/constants.cuh b/src/constants.cuh
	t@@ -14,15 +14,13 @@ __constant__ unsigned int devC_shearmodel; // Shear force …
	__constant__ float devC_k_n; // Material normal stiffness
	__constant__ float devC_k_s; // Material shear stiffness
	__constant__ float devC_k_r; // Material rolling stiffness
	+__constant__ float devC_gamma_n; // Material normal viscosity
	__constant__ float devC_gamma_s; // Material shear viscosity
	__constant__ float devC_gamma_r; // Material rolling viscos…
	-__constant__ float devC_mu_s; // Material shear friction coef…
	+__constant__ float devC_mu_s; // Material static shear fricti…
	+__constant__ float devC_mu_d; // Material dynamic shear frict…
	__constant__ float devC_mu_r; // Material rolling friction …
	-__constant__ float devC_C; // Material cohesion
	__constant__ float devC_rho; // Material density
	-__constant__ float devC_E; // Material Young's modulus
	-__constant__ float devC_K; // Material bulk modulus
	-__constant__ float devC_nu; // Material viscosity
	__constant__ float devC_kappa; // Capillary bond prefactor
	__constant__ float devC_db; // Debonding distance
	__constant__ float devC_V_b; // Liquid volume of capillary …
	diff --git a/src/datatypes.h b/src/datatypes.h
	t@@ -35,15 +35,13 @@ struct Particles {
	float *k_n;
	float *k_s;
	float *k_r;
	+ float *gamma_n;
	float *gamma_s;
	float *gamma_r;
	float *mu_s;
	+ float *mu_d;
	float *mu_r;
	- float *C;
	float *rho;
	- float *E;
	- float *K;
	- float *nu;
	float *es_dot;
	float *es;
	float *p;
	t@@ -80,15 +78,13 @@ struct Params {
	float k_n;
	float k_s;
	float k_r;
	+ float gamma_n;
	float gamma_s;
	float gamma_r;
	float mu_s;
	+ float mu_d;
	float mu_r;
	- float C;
	float rho;
	- float E;
	- float K;
	- float nu;
	float kappa;
	float db;
	float V_b;
	diff --git a/src/device.cu b/src/device.cu
	t@@ -152,65 +152,71 @@ __device__ float contactLinear_wall(float3* N, float3* T…
	float4* dev_vel_sorted, float4* dev_angvel…
	float3 n, float delta, float wvel)
	{
	- // Fetch velocities from global memory
	- float4 linvel_a_tmp = dev_vel_sorted[idx_a];
	- float4 angvel_a_tmp = dev_angvel_sorted[idx_a];
	+ // Fetch particle velocities from global memory
	+ const float4 linvel_tmp = dev_vel_sorted[idx_a];
	+ const float4 angvel_tmp = dev_angvel_sorted[idx_a];

	// Convert velocities to three-component vectors
	- float3 linvel_a = make_float3(linvel_a_tmp.x,
	- linvel_a_tmp.y,
	- linvel_a_tmp.z);
	- float3 angvel_a = make_float3(angvel_a_tmp.x,
	- angvel_a_tmp.y,
	- angvel_a_tmp.z);
	+ const float3 linvel = make_float3(linvel_tmp.x,
	+ linvel_tmp.y,
	+ linvel_tmp.z);
	+ const float3 angvel = make_float3(angvel_tmp.x,
	+ angvel_tmp.y,
	+ angvel_tmp.z);

	// Store the length of the angular velocity for later use
	- float angvel_length = length(angvel_a);
	+ const float angvel_length = length(angvel);

	// Contact velocity is the sum of the linear and
	// rotational components
	- float3 vel = linvel_a + radius_a * cross(n, angvel_a) + wvel;
	+ const float3 vel = linvel + radius_a * cross(n, angvel) + wvel;

	// Normal component of the contact velocity
	- float vel_n = dot(vel, n);
	+ const float vel_n = dot(vel, n);

	// The tangential velocity is the contact velocity
	// with the normal component subtracted
	- float3 vel_t = vel - n * (dot(vel, n));
	- float vel_t_length = length(vel_t);
	+ const float3 vel_t = vel - n * (dot(vel, n));
	+ const float vel_t_length = length(vel_t);

	// Calculate elastic normal component
	//float3 f_n = -devC_k_n * delta * n;

	// Normal force component: Elastic - viscous damping
	- float3 f_n = (-devC_k_n * delta - devC_nu * vel_n) * n;
	+ float3 f_n = (-devC_k_n * delta - devC_gamma_n * vel_n) * n;

	// Make sure the viscous damping doesn't exceed the elastic component,
	// i.e. the damping factor doesn't exceed the critical damping, 2sqrt(mk_n)
	if (dot(f_n, n) < 0.0f)
	f_n = make_float3(0.0f, 0.0f, 0.0f);

	- float f_n_length = length(f_n); // Save length for later use
	+ const float f_n_length = length(f_n); // Save length for later use

	// Initialize vectors
	- float3 f_s = make_float3(0.0f, 0.0f, 0.0f);
	+ float3 f_t = make_float3(0.0f, 0.0f, 0.0f);
	float3 T_res = make_float3(0.0f, 0.0f, 0.0f);

	// Check that the tangential velocity is high enough to avoid
	// divide by zero (producing a NaN)
	if (vel_t_length > 0.f) {

	- // Shear force component
	- // Limited by Mohr Coulomb failure criterion
	- f_s = -1.0f * fmin(devC_gamma_s * vel_t_length,
	- devC_mu_s * f_n_length)
	- * vel_t/vel_t_length;
	+ const float f_t_visc = devC_gamma_s * vel_t_length; // Tangential force b…
	+ const float f_t_limit = devC_mu_s * f_n_length; // Max. friction

	- // Shear energy production rate [W]
	- *es_dot += -dot(vel_t, f_s);
	+ // If the shear force component exceeds the friction,
	+ // the particle slips and energy is dissipated
	+ if (f_t_visc < f_t_limit) {
	+ f_t = -1.0f * f_t_visc * vel_t/vel_t_length;
	+
	+ } else { // Dynamic friction, friction failure
	+ f_t = -1.0f * f_t_limit * vel_t/vel_t_length;
	+
	+ // Shear energy production rate [W]
	+ //*es_dot += -dot(vel_t, f_t);
	+ }
	}

	- if (angvel_length > 0.f) {
	+/* if (angvel_length > 0.f) {
	// Apply rolling resistance (Zhou et al. 1999)
	//T_res = -angvel_a/angvel_length * devC_mu_r * radius_a * f_n_length;

	t@@ -218,13 +224,14 @@ __device__ float contactLinear_wall(float3* N, float3* T…
	T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_length,
	devC_mu_r * radius_a * f_n_length)
	* angvel_a/angvel_length;
	- }
	+ }*/

	// Total force from wall
	- *N += f_n + f_s;
	+ *N += f_n + f_t;
	+// *N += f_n;

	// Total torque from wall
	- T += -radius_a cross(n, f_s) + T_res;
	+ T += -radius_a cross(n, f_t) + T_res;

	// Pressure excerted onto particle from this contact
	p += f_n_length / (4.0f PI * radius_a*radius_a);
	t@@ -247,49 +254,49 @@ __device__ void contactLinearViscous(float3* N, float3* …
	{

	// Allocate variables and fetch missing time=t values for particle A and B
	- float4 vel_a = dev_vel_sorted[idx_a];
	- float4 vel_b = dev_vel_sorted[idx_b];
	- float4 angvel4_a = dev_angvel_sorted[idx_a];
	- float4 angvel4_b = dev_angvel_sorted[idx_b];
	+ const float4 vel_a = dev_vel_sorted[idx_a];
	+ const float4 vel_b = dev_vel_sorted[idx_b];
	+ const float4 angvel4_a = dev_angvel_sorted[idx_a];
	+ const float4 angvel4_b = dev_angvel_sorted[idx_b];

	// Convert to float3's
	- float3 angvel_a = make_float3(angvel4_a.x, angvel4_a.y, angvel4_a.z);
	- float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
	+ const float3 angvel_a = make_float3(angvel4_a.x, angvel4_a.y, angvel4_a.z);
	+ const float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);

	// Force between grain pair decomposed into normal- and tangential part
	- float3 f_n, f_s, f_c, T_res;
	+ float3 f_n, f_t, f_c, T_res;

	// Normal vector of contact
	- float3 n_ab = x_ab/x_ab_length;
	+ const float3 n_ab = x_ab/x_ab_length;

	// Relative contact interface velocity, w/o rolling
	- float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x,
	- vel_a.y - vel_b.y,
	- vel_a.z - vel_b.z);
	+ const float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x,
	+ vel_a.y - vel_b.y,
	+ vel_a.z - vel_b.z);

	// Relative contact interface velocity of particle surfaces at
	// the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10)
	- float3 vel_ab = vel_ab_linear
	- + radius_a * cross(n_ab, angvel_a)
	- + radius_b * cross(n_ab, angvel_b);
	+ const float3 vel_ab = vel_ab_linear
	+ + radius_a * cross(n_ab, angvel_a)
	+ + radius_b * cross(n_ab, angvel_b);

	// Relative contact interface rolling velocity
	- float3 angvel_ab = angvel_a - angvel_b;
	- float angvel_ab_length = length(angvel_ab);
	+ const float3 angvel_ab = angvel_a - angvel_b;
	+ const float angvel_ab_length = length(angvel_ab);

	// Normal component of the relative contact interface velocity
	- float vel_n_ab = dot(vel_ab_linear, n_ab);
	+ const float vel_n_ab = dot(vel_ab_linear, n_ab);

	// Tangential component of the relative contact interface velocity
	// Hinrichsen and Wolf 2004, eq. 13.9
	- float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
	- float vel_t_ab_length = length(vel_t_ab);
	+ const float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
	+ const float vel_t_ab_length = length(vel_t_ab);

	// Compute the normal stiffness of the contact
	//float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b);

	// Calculate rolling radius
	- float R_bar = (radius_a + radius_b) / 2.0f;
	+ const float R_bar = (radius_a + radius_b) / 2.0f;

	// Normal force component: linear-elastic approximation (Augier 2009, eq. 3)
	// with velocity dependant damping
	t@@ -311,20 +318,20 @@ __device__ void contactLinearViscous(float3* N, float3* …
	//f_n = -k_n_ab * delta_ab * n_ab;

	// Normal force component: Elastic - viscous damping
	- f_n = (-devC_k_n * delta_ab - devC_nu * vel_n_ab) * n_ab;
	+ f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;

	// Make sure the viscous damping doesn't exceed the elastic component,
	// i.e. the damping factor doesn't exceed the critical damping, 2sqrt(mk_n)
	if (dot(f_n, n_ab) < 0.0f)
	f_n = make_float3(0.0f, 0.0f, 0.0f);

	- float f_n_length = length(f_n);
	+ const float f_n_length = length(f_n);

	// Add max. capillary force
	f_c = -kappa * sqrtf(radius_a * radius_b) * n_ab;

	// Initialize force vectors to zero
	- f_s = make_float3(0.0f, 0.0f, 0.0f);
	+ f_t = make_float3(0.0f, 0.0f, 0.0f);
	T_res = make_float3(0.0f, 0.0f, 0.0f);

	// Shear force component: Nonlinear relation
	t@@ -332,16 +339,31 @@ __device__ void contactLinearViscous(float3* N, float3* …
	// to the normal force
	if (vel_t_ab_length > 0.f) {

	- // Shear force
	- f_s = -1.0f * fmin(devC_gamma_s * vel_t_ab_length,
	- devC_mu_s * length(f_n-f_c))
	- * vel_t_ab/vel_t_ab_length;
	+ // Tangential force by viscous model
	+ const float f_t_visc = devC_gamma_s * vel_t_ab_length;
	+
	+ // Determine max. friction
	+ float f_t_limit;
	+ if (vel_t_ab_length > 0.001f) { // Dynamic
	+ f_t_limit = devC_mu_d * length(f_n-f_c);
	+ } else { // Static
	+ f_t_limit = devC_mu_s * length(f_n-f_c);
	+ }
	+
	+ // If the shear force component exceeds the friction,
	+ // the particle slips and energy is dissipated
	+ if (f_t_visc < f_t_limit) { // Static
	+ f_t = -1.0f * f_t_visc * vel_t_ab/vel_t_ab_length;
	+
	+ } else { // Dynamic, friction failure
	+ f_t = -1.0f * f_t_limit * vel_t_ab/vel_t_ab_length;

	- // Shear friction production rate [W]
	- *es_dot += -dot(vel_t_ab, f_s);
	+ // Shear friction production rate [W]
	+ //*es_dot += -dot(vel_t_ab, f_t);
	+ }
	}

	- if (angvel_ab_length > 0.f) {
	+/* if (angvel_ab_length > 0.f) {
	// Apply rolling resistance (Zhou et al. 1999)
	//T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);

	t@@ -350,11 +372,11 @@ __device__ void contactLinearViscous(float3* N, float3* …
	devC_mu_r * R_bar * f_n_length)
	* angvel_ab/angvel_ab_length;
	}
	-
	+*/

	// Add force components from this collision to total force for particle
	- *N += f_n + f_s + f_c;
	- T += -R_bar cross(n_ab, f_s) + T_res;
	+ *N += f_n + f_t + f_c;
	+ T += -R_bar cross(n_ab, f_t) + T_res;

	// Pressure excerted onto the particle from this contact
	p += f_n_length / (4.0f PI * radius_a*radius_a);
	t@@ -378,88 +400,133 @@ __device__ void contactLinear(float3* N, float3* T,
	{

	// Allocate variables and fetch missing time=t values for particle A and B
	- float4 vel_b = dev_vel[idx_b_orig];
	- float4 angvel4_b = dev_angvel[idx_b_orig];
	+ const float4 vel_b = dev_vel[idx_b_orig];
	+ const float4 angvel4_b = dev_angvel[idx_b_orig];

	// Fetch previous sum of shear displacement for the contact pair
	- float4 delta_t0 = dev_delta_t[mempos];
	+ const float4 delta_t0_4 = dev_delta_t[mempos];
	+
	+ const float3 delta_t0_uncor = make_float3(delta_t0_4.x,
	+ delta_t0_4.y,
	+ delta_t0_4.z);

	// Convert to float3
	- float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);
	+ const float3 angvel_b = make_float3(angvel4_b.x, angvel4_b.y, angvel4_b.z);

	// Force between grain pair decomposed into normal- and tangential part
	- float3 f_n, f_s, f_c, T_res;
	+ float3 f_n, f_t, f_c, T_res;

	// Normal vector of contact
	- float3 n_ab = x_ab/x_ab_length;
	+ const float3 n_ab = x_ab/x_ab_length;

	// Relative contact interface velocity, w/o rolling
	- float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x,
	- vel_a.y - vel_b.y,
	- vel_a.z - vel_b.z);
	+ const float3 vel_ab_linear = make_float3(vel_a.x - vel_b.x,
	+ vel_a.y - vel_b.y,
	+ vel_a.z - vel_b.z);

	// Relative contact interface velocity of particle surfaces at
	// the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10)
	- float3 vel_ab = vel_ab_linear
	- + radius_a * cross(n_ab, angvel_a)
	- + radius_b * cross(n_ab, angvel_b);
	+ const float3 vel_ab = vel_ab_linear
	+ + radius_a * cross(n_ab, angvel_a)
	+ + radius_b * cross(n_ab, angvel_b);

	// Relative contact interface rolling velocity
	- float3 angvel_ab = angvel_a - angvel_b;
	- float angvel_ab_length = length(angvel_ab);
	+ const float3 angvel_ab = angvel_a - angvel_b;
	+ const float angvel_ab_length = length(angvel_ab);

	// Normal component of the relative contact interface velocity
	- float vel_n_ab = dot(vel_ab_linear, n_ab);
	+ const float vel_n_ab = dot(vel_ab_linear, n_ab);

	// Tangential component of the relative contact interface velocity
	// Hinrichsen and Wolf 2004, eq. 13.9
	- float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
	+ const float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab));
	+ const float vel_t_ab_length = length(vel_t_ab);

	- // Add tangential displacement to total tangential displacement
	- float3 delta_t = make_float3(delta_t0.x, delta_t0.y, delta_t0.z) + vel_t_ab…
	- float delta_t_length = length(delta_t);
	+ // Correct tangential displacement vector, which is
	+ // necessary if the tangential plane rotated
	+ const float3 delta_t0 = delta_t0_uncor - (n_ab * dot(delta_t0_uncor, n_ab));
	+ const float delta_t0_length = length(delta_t0);
	+
	+ // New tangential displacement vector
	+ float3 delta_t;

	// Compute the normal stiffness of the contact
	//float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b);

	// Calculate rolling radius
	- float R_bar = (radius_a + radius_b) / 2.0f;
	+ const float R_bar = (radius_a + radius_b) / 2.0f;

	// Normal force component: Elastic
	//f_n = -devC_k_n * delta_ab * n_ab;

	// Normal force component: Elastic - viscous damping
	- f_n = (-devC_k_n * delta_ab - devC_nu * vel_n_ab) * n_ab;
	+ f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab;

	// Make sure the viscous damping doesn't exceed the elastic component,
	// i.e. the damping factor doesn't exceed the critical damping, 2sqrt(mk_n)
	if (dot(f_n, n_ab) < 0.0f)
	f_n = make_float3(0.0f, 0.0f, 0.0f);

	- float f_n_length = length(f_n);
	+ const float f_n_length = length(f_n);

	// Add max. capillary force
	f_c = -devC_kappa * sqrtf(radius_a * radius_b) * n_ab;

	// Initialize force vectors to zero
	- f_s = make_float3(0.0f, 0.0f, 0.0f);
	+ f_t = make_float3(0.0f, 0.0f, 0.0f);
	T_res = make_float3(0.0f, 0.0f, 0.0f);

	- // Shear force component: Nonlinear relation
	- // Coulomb's law of friction limits the tangential force to less or equal
	- // to the normal force
	- if (delta_t_length > 0.f) {
	+ // Apply a tangential force if the previous tangential displacement
	+ // is non-zero, or the current sliding velocity is non-zero.
	+ if (delta_t0_length > 0.f \|\| vel_t_ab_length > 0.f) {
	+
	+ // Shear force: Visco-Elastic, limited by Coulomb friction
	+ float3 f_t_elast = -1.0f * devC_k_s * delta_t0;
	+ float3 f_t_visc = -1.0f * devC_gamma_s * vel_t_ab;
	+
	+ float f_t_limit;
	+
	+ if (vel_t_ab_length > 0.001f) { // Dynamic friciton
	+ f_t_limit = devC_mu_d * length(f_n-f_c);
	+ } else { // Static friction
	+ f_t_limit = devC_mu_s * length(f_n-f_c);
	+ }
	+
	+ // Tangential force before friction limit correction
	+ f_t = f_t_elast + f_t_visc;
	+ float f_t_length = length(f_t);
	+
	+ // If failure criterion is not met, contact is viscous-linear elastic.
	+ // If failure criterion is met, contact force is limited,
	+ // resulting in a slip and energy dissipation
	+ if (f_t_length > f_t_limit) { // Dynamic case
	+
	+ // Frictional force is reduced to equal the limit
	+ f_t *= f_t_limit/f_t_length;

	- // Shear force: Elastic, limited by Mohr-Coulomb
	- f_s = -1.0f * fmin(devC_k_s * delta_t_length,
	- devC_mu_s * length(f_n-f_c))
	- * delta_t/delta_t_length;
	+ // A slip event zeros the displacement vector
	+ //delta_t = make_float3(0.0f, 0.0f, 0.0f);

	- // Shear friction production rate [W]
	- *es_dot += -dot(vel_t_ab, f_s);
	+ // In a slip event, the tangential spring is adjusted to a
	+ // length which is consistent with Coulomb's equation
	+ // (Hinrichsen and Wolf, 2004)
	+ delta_t = -1.0f/devC_k_s * f_t + devC_gamma_s * vel_t_ab;
	+
	+ // Shear friction heat production rate:
	+ // The energy lost from the tangential spring is dissipated as heat
	+ //*es_dot += -dot(vel_t_ab, f_t);
	+ es_dot += length(delta_t0 - delta_t) devC_k_s / devC_dt; // Seen in E…
	+
	+ } else { // Static case
	+
	+ // No correction of f_t is required
	+
	+ // Add tangential displacement to total tangential displacement
	+ delta_t = delta_t0 + vel_t_ab * devC_dt;
	+ }
	}

	- /*if (angvel_ab_length > 0.f) {
	+ if (angvel_ab_length > 0.f) {
	// Apply rolling resistance (Zhou et al. 1999)
	//T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n);

	t@@ -467,11 +534,11 @@ __device__ void contactLinear(float3* N, float3* T,
	T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length,
	devC_mu_r * R_bar * f_n_length)
	* angvel_ab/angvel_ab_length;
	- }*/
	+ }

	// Add force components from this collision to total force for particle
	- *N += f_n + f_s + f_c;
	- T += -R_bar cross(n_ab, f_s) + T_res;
	+ *N += f_n + f_t + f_c;
	+ T += -R_bar cross(n_ab, f_t) + T_res;

	// Pressure excerted onto the particle from this contact
	p += f_n_length / (4.0f PI * radius_a*radius_a);
	t@@ -533,7 +600,7 @@ __device__ void bondLinear(float3* N, float3* T, float* es…
	//float vel_t_ab_length = length(vel_t_ab);

	float3 f_n = make_float3(0.0f, 0.0f, 0.0f);
	- float3 f_s = make_float3(0.0f, 0.0f, 0.0f);
	+ float3 f_t = make_float3(0.0f, 0.0f, 0.0f);

	// Mean radius
	float R_bar = (radius_a + radius_b)/2.0f;
	t@@ -543,15 +610,15 @@ __device__ void bondLinear(float3* N, float3* T, float* …

	if (length(vel_t_ab) > 0.f) {
	// Shear force component: Viscous
	- f_s = -1.0f * devC_gamma_s * vel_t_ab;
	+ f_t = -1.0f * devC_gamma_s * vel_t_ab;

	// Shear friction production rate [W]
	- *es_dot += -dot(vel_t_ab, f_s);
	+ //*es_dot += -dot(vel_t_ab, f_t);
	}

	// Add force components from this bond to total force for particle
	- *N += f_n + f_s;
	- T += -R_bar cross(n_ab, f_s);
	+ *N += f_n + f_t;
	+ T += -R_bar cross(n_ab, f_t);

	// Pressure excerted onto the particle from this bond
	p += length(f_n) / (4.0f PI * radius_a*radius_a);
	t@@ -763,7 +830,8 @@ __device__ void findContactsInCell(int3 targetCell,
	unsigned int* dev_cellEnd,
	unsigned int* dev_gridParticleIndex,
	int* nc,
	- unsigned int* dev_contacts)
	+ unsigned int* dev_contacts,
	+ float4* dev_distmod)
	{
	// Variable containing modifier for interparticle
	// vector, if it crosses a periodic boundary
	t@@ -897,11 +965,15 @@ __device__ void findContactsInCell(int3 targetCell,

	// Write the interparticle vector and radius of particle B
	//dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4…
	+ dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(d…

	// Increment contact counter
	++*nc;
	}

	+ // Write the inter-particle position vector correction and radius of p…
	+ //dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(d…
	+
	// Check wether particles are bonded together
	/*if (bonds.x == idx_b \|\| bonds.y == idx_b \|\|
	bonds.z == idx_b \|\| bonds.w == idx_b) {
	t@@ -927,7 +999,8 @@ __global__ void topology(unsigned int* dev_cellStart,
	unsigned int* dev_cellEnd, // Input: Particles in…
	unsigned int* dev_gridParticleIndex, // Input: Unsort…
	float4* dev_x_sorted, float* dev_radius_sorted,
	- unsigned int* dev_contacts)
	+ unsigned int* dev_contacts,
	+ float4* dev_distmod)
	{
	// Thread index equals index of particle A
	unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
	t@@ -958,7 +1031,7 @@ __global__ void topology(unsigned int* dev_cellStart,
	dev_x_sorted, dev_radius_sorted,
	dev_cellStart, dev_cellEnd,
	dev_gridParticleIndex,
	- &nc, dev_contacts);
	+ &nc, dev_contacts, dev_distmod);
	}
	}
	}
	t@@ -983,6 +1056,7 @@ __global__ void interact(unsigned int* dev_gridParticleIn…
	float4* dev_w_nx, float4* dev_w_mvfd,
	float* dev_w_force, //uint4* dev_bonds_sorted,
	unsigned int* dev_contacts,
	+ float4* dev_distmod,
	float4* dev_delta_t)
	{
	// Thread index equals index of particle A
	t@@ -1027,6 +1101,7 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	unsigned int idx_b_orig, mempos;
	float delta_n, x_ab_length;
	float4 x_b;
	+ float4 distmod;
	float radius_b;
	float3 x_ab;
	float4 vel_a = dev_vel_sorted[idx_a];
	t@@ -1039,11 +1114,16 @@ __global__ void interact(unsigned int* dev_gridParticl…
	mempos = (unsigned int)(idx_a_orig * devC_nc + i);
	__syncthreads();
	idx_b_orig = dev_contacts[mempos];
	+ distmod = dev_distmod[mempos];
	x_b = dev_x[idx_b_orig];
	- radius_b = dev_radius[idx_b_orig];
	- x_ab = make_float3(x_a.x - x_b.x,
	- x_a.y - x_b.y,
	- x_a.z - x_b.z);
	+ //radius_b = dev_radius[idx_b_orig];
	+ radius_b = distmod.w;
	+
	+ // Inter-particle vector, corrected for periodic boundaries
	+ x_ab = make_float3(x_a.x - x_b.x + distmod.x,
	+ x_a.y - x_b.y + distmod.y,
	+ x_a.z - x_b.z + distmod.z);
	+
	x_ab_length = length(x_ab);
	delta_n = x_ab_length - (radius_a + radius_b);

	t@@ -1055,16 +1135,16 @@ __global__ void interact(unsigned int* dev_gridParticl…
	//cuPrintf("\nProcessing contact, idx_a_orig = %u, idx_b_orig = %u…
	// idx_a_orig, idx_b_orig, i, delta_n);
	contactLinear(&N, &T, &es_dot, &p,
	- idx_a_orig,
	- idx_b_orig,
	- vel_a,
	- dev_vel,
	- angvel_a,
	- dev_angvel,
	- radius_a, radius_b,
	- x_ab, x_ab_length,
	- delta_n, dev_delta_t,
	- mempos);
	+ idx_a_orig,
	+ idx_b_orig,
	+ vel_a,
	+ dev_vel,
	+ angvel_a,
	+ dev_angvel,
	+ radius_a, radius_b,
	+ x_ab, x_ab_length,
	+ delta_n, dev_delta_t,
	+ mempos);
	} else {
	__syncthreads();
	// Remove this contact (there is no particle with index=np)
	t@@ -1074,7 +1154,7 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	}
	} else {
	__syncthreads();
	- dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	+ dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	}
	} // Contact loop end

	t@@ -1266,18 +1346,24 @@ __global__ void integrate(float4* dev_x_sorted, float4…
	acc.z += devC_g[2];
	//acc.z -= 9.82f;

	- // Only update velocity (and position) if the horizontal velocity is not f…
	+ // Update angular velocity
	+ angvel.x += angacc.x * dt;
	+ angvel.y += angacc.y * dt;
	+ angvel.z += angacc.z * dt;
	+
	+ // Check if particle has a fixed horizontal velocity
	if (vel.w > 0.0f) {

	- // Zero horizontal acceleration
	+ // Zero horizontal acceleration and disable
	+ // gravity to counteract segregation.
	+ // Particles may move in the z-dimension,
	+ // to allow for dilation.
	acc.x = 0.0f;
	acc.y = 0.0f;
	-
	- // Update vertical linear velocity
	- vel.z += acc.z * dt;
	+ acc.z -= devC_g[2];

	// Zero the angular acceleration and -velocity
	- angacc = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	+ angvel = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	}

	// Update linear velocity
	t@@ -1285,11 +1371,6 @@ __global__ void integrate(float4* dev_x_sorted, float4*…
	vel.y += acc.y * dt;
	vel.z += acc.z * dt;

	- // Update angular velocity
	- angvel.x += angacc.x * dt;
	- angvel.y += angacc.y * dt;
	- angvel.z += angacc.z * dt;
	-
	// Update position. First-order Euler's scheme:
	//x.x += vel.x * dt;
	//x.y += vel.y * dt;
	t@@ -1490,27 +1571,23 @@ __host__ void transferToConstantMemory(Particles* p,
	params->k_n = p->k_n[0];
	params->k_s = p->k_s[0];
	params->k_r = p->k_r[0];
	+ params->gamma_n = p->gamma_n[0];
	params->gamma_s = p->gamma_s[0];
	params->gamma_r = p->gamma_r[0];
	params->mu_s = p->mu_s[0];
	+ params->mu_d = p->mu_d[0];
	params->mu_r = p->mu_r[0];
	- params->C = p->C[0];
	params->rho = p->rho[0];
	- params->E = p->E[0];
	- params->K = p->K[0];
	- params->nu = p->nu[0];
	cudaMemcpyToSymbol("devC_k_n", &params->k_n, sizeof(float));
	cudaMemcpyToSymbol("devC_k_s", &params->k_s, sizeof(float));
	cudaMemcpyToSymbol("devC_k_r", &params->k_r, sizeof(float));
	+ cudaMemcpyToSymbol("devC_gamma_n", &params->gamma_n, sizeof(float));
	cudaMemcpyToSymbol("devC_gamma_s", &params->gamma_s, sizeof(float));
	cudaMemcpyToSymbol("devC_gamma_r", &params->gamma_r, sizeof(float));
	cudaMemcpyToSymbol("devC_mu_s", &params->mu_s, sizeof(float));
	+ cudaMemcpyToSymbol("devC_mu_d", &params->mu_d, sizeof(float));
	cudaMemcpyToSymbol("devC_mu_r", &params->mu_r, sizeof(float));
	- cudaMemcpyToSymbol("devC_C", &params->C, sizeof(float));
	cudaMemcpyToSymbol("devC_rho", &params->rho, sizeof(float));
	- cudaMemcpyToSymbol("devC_E", &params->E, sizeof(float));
	- cudaMemcpyToSymbol("devC_K", &params->K, sizeof(float));
	- cudaMemcpyToSymbol("devC_nu", &params->nu, sizeof(float));
	cudaMemcpyToSymbol("devC_kappa", &params->kappa, sizeof(float));
	cudaMemcpyToSymbol("devC_db", &params->db, sizeof(float));
	cudaMemcpyToSymbol("devC_V_b", &params->V_b, sizeof(float));
	t@@ -1591,6 +1668,8 @@ __host__ void gpuMain(float4* host_x,

	// Particle contact bookkeeping
	unsigned int* dev_contacts;
	+ // Particle pair distance correction across periodic boundaries
	+ float4* dev_distmod;
	// x,y,z contains the interparticle vector, corrected if contact
	// is across a periodic boundary.
	float4* dev_delta_t; // Accumulated shear distance of contact
	t@@ -1630,6 +1709,7 @@ __host__ void gpuMain(float4* host_x,

	// Particle contact bookkeeping arrays
	cudaMalloc((void*)&dev_contacts, sizeof(unsigned int)p->np*NC); // Max NC …
	+ cudaMalloc((void*)&dev_distmod, sizeof(float4)p->np*NC);
	cudaMalloc((void*)&dev_delta_t, sizeof(float4)p->np*NC);

	// Wall arrays
	t@@ -1668,10 +1748,12 @@ __host__ void gpuMain(float4* host_x,
	cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)p->npNC, cudaMemcpyHostT…
	delete[] npu;

	- // Create array of 0.0 values on the host and transfer these to the shear di…
	+ // Create array of 0.0 values on the host and transfer these to the distance
	+ // modifier and shear displacement arrays
	float4* zerosf4 = new float4[p->np*NC];
	for (unsigned int i=0; i<(p->np*NC); ++i)
	zerosf4[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	+ cudaMemcpy(dev_distmod, zerosf4, sizeof(float4)p->npNC, cudaMemcpyHostToDe…
	cudaMemcpy(dev_delta_t, zerosf4, sizeof(float4)p->npNC, cudaMemcpyHostToDe…
	delete[] zerosf4;

	t@@ -1827,7 +1909,8 @@ __host__ void gpuMain(float4* host_x,
	dev_gridParticleIndex,
	dev_x_sorted,
	dev_radius_sorted,
	- dev_contacts);
	+ dev_contacts,
	+ dev_distmod);

	// Empty cuPrintf() buffer to console
	//cudaThreadSynchronize();
	t@@ -1835,7 +1918,7 @@ __host__ void gpuMain(float4* host_x,

	// Synchronization point
	cudaThreadSynchronize();
	- checkForCudaErrors("Post topology. Possibly caused by numerical instabil…
	+ checkForCudaErrors("Post topology: One or more particles moved outside t…
	}


	t@@ -1851,7 +1934,8 @@ __host__ void gpuMain(float4* host_x,
	dev_es_dot, dev_es, dev_p,
	dev_w_nx, dev_w_mvfd, dev_w_force,
	//dev_bonds_sorted,
	- dev_contacts,
	+ dev_contacts,
	+ dev_distmod,
	dev_delta_t);

	// Empty cuPrintf() buffer to console
	t@@ -1886,6 +1970,7 @@ __host__ void gpuMain(float4* host_x,
	cudaThreadSynchronize();
	checkForCudaErrors("Post integrateWalls");

	+
	// Update timers and counters
	time->current += time->dt;
	filetimeclock += time->dt;
	t@@ -1997,6 +2082,7 @@ __host__ void gpuMain(float4* host_x,
	//cudaFree(dev_bonds);
	//cudaFree(dev_bonds_sorted);
	cudaFree(dev_contacts);
	+ cudaFree(dev_distmod);
	cudaFree(dev_delta_t);

	// Cell-related arrays
	diff --git a/src/file_io.cpp b/src/file_io.cpp
	t@@ -85,14 +85,12 @@ int fwritebin(char target, Particles p, float4 *host_x, …
	fwrite(&p->k_n[j], sizeof(p->k_n[j]), 1, fp);
	fwrite(&p->k_s[j], sizeof(p->k_s[j]), 1, fp);
	fwrite(&p->k_r[j], sizeof(p->k_r[j]), 1, fp);
	+ fwrite(&p->gamma_n[j], sizeof(p->gamma_n[j]), 1, fp);
	fwrite(&p->gamma_s[j], sizeof(p->gamma_s[j]), 1, fp);
	fwrite(&p->gamma_r[j], sizeof(p->gamma_r[j]), 1, fp);
	fwrite(&p->mu_s[j], sizeof(p->mu_s[j]), 1, fp);
	+ fwrite(&p->mu_d[j], sizeof(p->mu_d[j]), 1, fp);
	fwrite(&p->mu_r[j], sizeof(p->mu_r[j]), 1, fp);
	- fwrite(&p->C[j], sizeof(p->C[j]), 1, fp);
	- fwrite(&p->E[j], sizeof(p->E[j]), 1, fp);
	- fwrite(&p->K[j], sizeof(p->K[j]), 1, fp);
	- fwrite(&p->nu[j], sizeof(p->nu[j]), 1, fp);
	fwrite(&p->es_dot[j], sizeof(p->es_dot[j]), 1, fp);
	fwrite(&p->es[j], sizeof(p->es[j]), 1, fp);
	fwrite(&p->p[j], sizeof(p->p[j]), 1, fp);
	diff --git a/src/main.cpp b/src/main.cpp
	t@@ -156,14 +156,12 @@ int main(int argc, char *argv[])
	p.k_n = new float[p.np]; // Particle normal stiffnesses
	p.k_s = new float[p.np]; // Particle shear stiffness…
	p.k_r = new float[p.np]; // Particle rolling stiffne…
	+ p.gamma_n = new float[p.np]; // Particle normal viscosity
	p.gamma_s = new float[p.np]; // Particle shear viscosity
	p.gamma_r = new float[p.np]; // Particle rolling viscosity
	- p.mu_s = new float[p.np]; // Inter-particle shear contact friction …
	+ p.mu_s = new float[p.np]; // Inter-particle static shear contact fr…
	+ p.mu_d = new float[p.np]; // Inter-particle dynamic shear co…
	p.mu_r = new float[p.np]; // Inter-particle rolling contact …
	- p.C = new float[p.np]; // Inter-particle tensile strengths (cohe…
	- p.E = new float[p.np]; // Youngs modulus for each particle
	- p.K = new float[p.np]; // Bulk modulus for each particle
	- p.nu = new float[p.np]; // Poisson's ratio for each particle
	p.es_dot = new float[p.np]; // Rate of shear energy dissipation
	p.es = new float[p.np]; // Total shear energy dissipation
	p.p = new float[p.np]; // Pressure excerted onto par…
	t@@ -247,21 +245,17 @@ int main(int argc, char *argv[])
	return 1;
	if (fread(&p.k_r[j], sizeof(p.k_r[j]), 1, fp) != 1)
	return 1;
	+ if (fread(&p.gamma_n[j], sizeof(p.gamma_n[j]), 1, fp) != 1)
	+ return 1;
	if (fread(&p.gamma_s[j], sizeof(p.gamma_s[j]), 1, fp) != 1)
	return 1;
	if (fread(&p.gamma_r[j], sizeof(p.gamma_r[j]), 1, fp) != 1)
	return 1;
	if (fread(&p.mu_s[j], sizeof(p.mu_s[j]), 1, fp) != 1)
	return 1;
	- if (fread(&p.mu_r[j], sizeof(p.mu_r[j]), 1, fp) != 1)
	- return 1;
	- if (fread(&p.C[j], sizeof(p.C[j]), 1, fp) != 1)
	+ if (fread(&p.mu_d[j], sizeof(p.mu_d[j]), 1, fp) != 1)
	return 1;
	- if (fread(&p.E[j], sizeof(p.E[j]), 1, fp) != 1)
	- return 1;
	- if (fread(&p.K[j], sizeof(p.K[j]), 1, fp) != 1)
	- return 1;
	- if (fread(&p.nu[j], sizeof(p.nu[j]), 1, fp) != 1)
	+ if (fread(&p.mu_r[j], sizeof(p.mu_r[j]), 1, fp) != 1)
	return 1;
	if (fread(&p.es_dot[j], sizeof(p.es_dot[j]), 1, fp) != 1)
	return 1;
	t@@ -431,15 +425,13 @@ int main(int argc, char *argv[])
	delete[] p.k_n;
	delete[] p.k_s;
	delete[] p.k_r;
	+ delete[] p.gamma_n;
	delete[] p.gamma_s;
	delete[] p.gamma_r;
	delete[] p.mu_s;
	+ delete[] p.mu_d;
	delete[] p.mu_r;
	- delete[] p.C;
	delete[] p.rho;
	- delete[] p.E;
	- delete[] p.K;
	- delete[] p.nu;
	delete[] p.es_dot;
	delete[] p.es;
	delete[] p.p;