GopherProxy

	tIt compiles without errors. Still need to update sphere.py to new file format …
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit c7e1dd22dc1892e430ab40a74fcb8df9de98c9d7
	parent ad86704c71d456826b62ba4d8b1e23dc9f420467
	Author: Anders Damsgaard <[email protected]>
	Date: Sat, 27 Oct 2012 00:37:45 +0200

	It compiles without errors. Still need to update sphere.py to new file format

	Diffstat:
	M src/constants.cuh \| 1 +
	M src/contactmodels.cuh \| 16 ++++++++--------
	M src/contactsearch.cuh \| 54 +++++++++++++++++------------…
	D src/datatypes.cuh \| 18 ------------------
	M src/device.cu \| 220 +++++++++++++++--------------…
	M src/integration.cuh \| 27 +++++++++++++--------------
	M src/sorting.cuh \| 8 ++++----
	M src/sphere.cpp \| 16 +++++++++++++++-
	M src/sphere.h \| 5 ++++-

	9 files changed, 181 insertions(+), 184 deletions(-)
	---
	diff --git a/src/constants.cuh b/src/constants.cuh
	t@@ -9,6 +9,7 @@
	// Constant memory size: 64 kb
	__constant__ unsigned int devC_nd; // Number of dimensions
	__constant__ unsigned int devC_np; // Number of particles
	+__constant__ unsigned int devC_nw; // Number of dynamic walls
	__constant__ int devC_nc; // Max. number of contacts a particle can…
	__constant__ Float devC_dt; // Computational time step length

	diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
	t@@ -44,7 +44,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Fl…
	//Float3 f_n = -devC_params.k_n * delta * n;

	// Normal force component: Elastic - viscous damping
	- Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_wn * vel_n) * n;
	+ Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_n * vel_n) * n;

	// Make sure the viscous damping doesn't exceed the elastic component,
	// i.e. the damping factor doesn't exceed the critical damping, 2sqrt(mk_n)
	t@@ -61,7 +61,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Fl…
	// divide by zero (producing a NaN)
	if (vel_t_length > 0.f) {

	- Float f_t_visc = devC_params.gamma_wt * vel_t_length; // Tangential force…
	+ Float f_t_visc = devC_params.gamma_t * vel_t_length; // Tangential force …
	Float f_t_limit = devC_params.mu_s * f_n_length; // Max. friction

	// If the shear force component exceeds the friction,
	t@@ -382,15 +382,15 @@ __device__ void contactLinear(Float3* F, Float3* T,
	// Shear friction heat production rate:
	// The energy lost from the tangential spring is dissipated as heat
	//*es_dot += -dot(vel_t_ab, f_t);
	- es_dot += length(delta_t0 - delta_t) devC_params.k_t / devC_params.dt…
	- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
	+ es_dot += length(delta_t0 - delta_t) devC_params.k_t / devC_dt; // Se…
	+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;

	} else { // Static case

	// No correction of f_t is required

	// Add tangential displacement to total tangential displacement
	- delta_t = delta_t0 + vel_t_ab * devC_params.dt;
	+ delta_t = delta_t0 + vel_t_ab * devC_dt;
	}
	}

	t@@ -555,15 +555,15 @@ __device__ void contactHertz(Float3* F, Float3* T,
	// Shear friction heat production rate:
	// The energy lost from the tangential spring is dissipated as heat
	//*es_dot += -dot(vel_t_ab, f_t);
	- es_dot += length(delta_t0 - delta_t) devC_params.k_t / devC_params.dt…
	- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt;
	+ es_dot += length(delta_t0 - delta_t) devC_params.k_t / devC_dt; // Se…
	+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt;

	} else { // Static case

	// No correction of f_t is required

	// Add tangential displacement to total tangential displacement
	- delta_t = delta_t0 + vel_t_ab * devC_params.dt;
	+ delta_t = delta_t0 + vel_t_ab * devC_dt;
	}
	}

	diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
	t@@ -14,7 +14,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)
	// Check whether x- and y boundaries are to be treated as periodic
	// 1: x- and y boundaries periodic
	// 2: x boundaries periodic
	- if (devC_params.periodic == 1) {
	+ if (devC_grid.periodic == 1) {

	// Periodic x-boundary
	if (targetCell->x < 0) {
	t@@ -38,7 +38,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)


	// Only x-boundaries are periodic
	- } else if (devC_params.periodic == 2) {
	+ } else if (devC_grid.periodic == 2) {

	// Periodic x-boundary
	if (targetCell->x < 0) {
	t@@ -77,7 +77,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod)

	// Find overlaps between particle no. 'idx' and particles in cell 'gridpos'.
	// Contacts are processed as soon as they are identified.
	-// Used for shearmodel=1, where contact history is not needed.
	+// Used for contactmodel=1, where contact history is not needed.
	// Kernel executed on device, and callable from device only.
	// Function is called from interact().
	__device__ void findAndProcessContactsInCell(int3 targetCell,
	t@@ -175,7 +175,7 @@ __device__ void findAndProcessContactsInCell(int3 targetCe…

	// Find overlaps between particle no. 'idx' and particles in cell 'gridpos'
	// Write the indexes of the overlaps in array contacts[].
	-// Used for shearmodel=2, where bookkeeping of contact history is necessary.
	+// Used for contactmodel=2, where bookkeeping of contact history is necessary.
	// Kernel executed on device, and callable from device only.
	// Function is called from topology().
	__device__ void findContactsInCell(int3 targetCell,
	t@@ -257,7 +257,7 @@ __device__ void findContactsInCell(int3 targetCell,
	for (int i=0; i < devC_nc; ++i) {
	__syncthreads();
	cidx = dev_contacts[(unsigned int)(idx_a_orig*devC_nc+i)];
	- if (cidx == devC_params.np) // Write to position of now-deleted co…
	+ if (cidx == devC_np) // Write to position of now-deleted contact
	cpos = i;
	else if (cidx == idx_b_orig) { // Write to position of same contact
	cpos = i;
	t@@ -320,7 +320,7 @@ __global__ void topology(unsigned int* dev_cellStart,
	{
	// Thread index equals index of particle A
	unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;
	- if (idx_a < devC_params.np) {
	+ if (idx_a < devC_np) {
	// Fetch particle data in global read
	Float4 x_a = dev_x_sorted[idx_a];
	Float radius_a = x_a.w;
	t@@ -356,13 +356,13 @@ __global__ void topology(unsigned int* dev_cellStart,


	// For a single particle:
	-// If shearmodel=1:
	+// If contactmodel=1:
	// Search for neighbors to particle 'idx' inside the 27 closest cells,
	// and compute the resulting force and torque on it.
	-// If shearmodel=2:
	+// If contactmodel=2:
	// Process contacts saved in dev_contacts by topology(), and compute
	// the resulting force and torque on it.
	-// For all shearmodels:
	+// For all contactmodels:
	// Collide with top- and bottom walls, save resulting force on upper wall.
	// Kernel is executed on device, and is callable from host only.
	// Function is called from mainGPU loop.
	t@@ -371,15 +371,19 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	unsigned int* dev_cellEnd,
	Float4* dev_x,
	Float4* dev_x_sorted,
	- Float4* dev_vel_sorted, Float4* dev_angvel_sorted,
	- Float4* dev_vel, Float4* dev_angvel,
	- Float4* dev_force, Float4* dev_torque,
	+ Float4* dev_vel_sorted,
	+ Float4* dev_angvel_sorted,
	+ Float4* dev_vel,
	+ Float4* dev_angvel,
	+ Float4* dev_force,
	+ Float4* dev_torque,
	Float* dev_es_dot,
	Float* dev_ev_dot,
	Float* dev_es,
	Float* dev_ev,
	Float* dev_p,
	- Float4* dev_w_nx, Float4* dev_w_mvfd,
	+ Float4* dev_w_nx,
	+ Float4* dev_w_mvfd,
	Float* dev_w_force, //uint4* dev_bonds_sorted,
	unsigned int* dev_contacts,
	Float4* dev_distmod,
	t@@ -388,7 +392,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	// Thread index equals index of particle A
	unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x;

	- if (idx_a < devC_params.np) {
	+ if (idx_a < devC_np) {

	// Fetch particle data in global read
	unsigned int idx_a_orig = dev_gridParticleIndex[idx_a];
	t@@ -424,7 +428,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	Float3 T = MAKE_FLOAT3(0.0f, 0.0f, 0.0f);

	// Apply linear elastic, frictional contact model to registered contacts
	- if (devC_params.shearmodel == 2 \|\| devC_params.shearmodel == 3) {
	+ if (devC_params.contactmodel == 2 \|\| devC_params.contactmodel == 3) {
	unsigned int idx_b_orig, mempos;
	Float delta_n, x_ab_length, radius_b;
	Float3 x_ab;
	t@@ -452,11 +456,11 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	delta_n = x_ab_length - (radius_a + radius_b);


	- if (idx_b_orig != (unsigned int)devC_params.np) {
	+ if (idx_b_orig != (unsigned int)devC_np) {

	// Process collision if the particles are overlapping
	if (delta_n < 0.0f) {
	- if (devC_params.shearmodel == 2) {
	+ if (devC_params.contactmodel == 2) {
	contactLinear(&F, &T, &es_dot, &ev_dot, &p,
	idx_a_orig,
	idx_b_orig,
	t@@ -468,7 +472,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	x_ab, x_ab_length,
	delta_n, dev_delta_t,
	mempos);
	- } else if (devC_params.shearmodel == 3) {
	+ } else if (devC_params.contactmodel == 3) {
	contactHertz(&F, &T, &es_dot, &ev_dot, &p,
	idx_a_orig,
	idx_b_orig,
	t@@ -484,7 +488,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	} else {
	__syncthreads();
	// Remove this contact (there is no particle with index=np)
	- dev_contacts[mempos] = devC_params.np;
	+ dev_contacts[mempos] = devC_np;
	// Zero sum of shear displacement in this position
	dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	}
	t@@ -498,8 +502,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd…


	// Find contacts and process collisions immidiately for
	- // shearmodel 1 (visco-frictional).
	- } else if (devC_params.shearmodel == 1) {
	+ // contactmodel 1 (visco-frictional).
	+ } else if (devC_params.contactmodel == 1) {

	int3 gridPos;
	int3 targetPos;
	t@@ -553,7 +557,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	}


	- if (devC_params.periodic == 0) {
	+ if (devC_grid.periodic == 0) {

	// Left wall
	delta_w = x_a.x - radius_a - origo.x;
	t@@ -591,7 +595,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	w_n, delta_w, 0.0f);
	}

	- } else if (devC_params.periodic == 2) {
	+ } else if (devC_grid.periodic == 2) {

	// Front wall
	delta_w = x_a.y - radius_a - origo.y;
	t@@ -622,8 +626,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	dev_torque[orig_idx] = MAKE_FLOAT4(T.x, T.y, T.z, 0.0f);
	dev_es_dot[orig_idx] = es_dot;
	dev_ev_dot[orig_idx] = ev_dot;
	- dev_es[orig_idx] += es_dot * devC_params.dt;
	- dev_ev[orig_idx] += ev_dot * devC_params.dt;
	+ dev_es[orig_idx] += es_dot * devC_dt;
	+ dev_ev[orig_idx] += ev_dot * devC_dt;
	dev_p[orig_idx] = p;
	dev_w_force[orig_idx] = w_force;
	}
	diff --git a/src/datatypes.cuh b/src/datatypes.cuh
	t@@ -1,18 +0,0 @@
	-// datatypes.cuh -- Device structure templates and function prototypes
	-
	-// Avoiding multiple inclusions of header file
	-#ifndef DATATYPES_CUH_
	-#define DATATYPES_CUH_
	-
	-#include "datatypes.h"
	-#include "vector_functions.h"
	-
	-unsigned int iDivUp(unsigned int a, unsigned int b);
	-void checkForCudaErrors(const char* checkpoint_description);
	-void checkForCudaErrors(const char* checkpoint_description, const unsigned int…
	-
	-// Device constant memory structures
	-__constant__ Params devC_params;
	-__constant__ Grid devC_grid;
	-
	-#endif
	diff --git a/src/device.cu b/src/device.cu
	t@@ -97,8 +97,7 @@ __global__ void checkConstantValues(int* dev_equal,

	// Compare values between global- and constant
	// memory structures
	- if (dev_grid->nd != devC_grid.nd \|\|
	- dev_grid->origo[0] != devC_grid.origo[0] \|\|
	+ if (dev_grid->origo[0] != devC_grid.origo[0] \|\|
	dev_grid->origo[1] != devC_grid.origo[1] \|\|
	dev_grid->origo[2] != devC_grid.origo[2] \|\|
	dev_grid->L[0] != devC_grid.L[0] \|\|
	t@@ -106,35 +105,28 @@ __global__ void checkConstantValues(int* dev_equal,
	dev_grid->L[2] != devC_grid.L[2] \|\|
	dev_grid->num[0] != devC_grid.num[0] \|\|
	dev_grid->num[1] != devC_grid.num[1] \|\|
	- dev_grid->num[2] != devC_grid.num[2])
	+ dev_grid->num[2] != devC_grid.num[2] \|\|
	+ dev_grid->periodic != devC_grid.periodic)
	*dev_equal = 1; // Not ok

	- else if (dev_params->global != devC_params.global \|\|
	- dev_params->g[0] != devC_params.g[0] \|\|
	+
	+ else if (dev_params->g[0] != devC_params.g[0] \|\|
	dev_params->g[1] != devC_params.g[1] \|\|
	dev_params->g[2] != devC_params.g[2] \|\|
	- dev_params->dt != devC_params.dt \|\|
	- dev_params->np != devC_params.np \|\|
	- dev_params->nw != devC_params.nw \|\|
	- dev_params->wmode[0] != devC_params.wmode[0] \|\|
	dev_params->k_n != devC_params.k_n \|\|
	dev_params->k_t != devC_params.k_t \|\|
	dev_params->k_r != devC_params.k_r \|\|
	dev_params->gamma_n != devC_params.gamma_n \|\|
	dev_params->gamma_t != devC_params.gamma_t \|\|
	dev_params->gamma_r != devC_params.gamma_r \|\|
	- dev_params->gamma_wn != devC_params.gamma_wn \|\|
	- dev_params->gamma_wt != devC_params.gamma_wt \|\|
	- dev_params->gamma_wr != devC_params.gamma_wr \|\|
	dev_params->mu_s != devC_params.mu_s \|\|
	dev_params->mu_d != devC_params.mu_d \|\|
	dev_params->mu_r != devC_params.mu_r \|\|
	dev_params->rho != devC_params.rho \|\|
	+ dev_params->contactmodel != devC_params.contactmodel \|\|
	dev_params->kappa != devC_params.kappa \|\|
	dev_params->db != devC_params.db \|\|
	- dev_params->V_b != devC_params.V_b \|\|
	- dev_params->periodic != devC_params.periodic \|\|
	- dev_params->shearmodel != devC_params.shearmodel)
	+ dev_params->V_b != devC_params.V_b)
	*dev_equal = 2; // Not ok

	}
	t@@ -155,8 +147,8 @@ __host__ void DEM::checkConstantMemory()
	cudaMalloc((void**)&dev_params, sizeof(Params));

	// Copy structure data from host to global device memory
	- cudaMemcpy(dev_grid, grid, sizeof(Grid), cudaMemcpyHostToDevice);
	- cudaMemcpy(dev_params, params, sizeof(Params), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_grid, &grid, sizeof(Grid), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_params, &params, sizeof(Params), cudaMemcpyHostToDevice);

	// Compare values between global and constant memory
	// structures on the device.
	t@@ -197,17 +189,18 @@ __host__ void DEM::transferToConstantDeviceMemory()

	cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd));
	cudaMemcpyToSymbol("devC_np", &np, sizeof(np));
	+ cudaMemcpyToSymbol("devC_nw", &walls.nw, sizeof(unsigned int));
	cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int));
	cudaMemcpyToSymbol("devC_dt", &time.dt, sizeof(Float));
	- cudaMemcpyToSymbol(devC_grid, grid, sizeof(Grid));
	- cudaMemcpyToSymbol(devC_params, params, sizeof(Params));
	+ cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid));
	+ cudaMemcpyToSymbol(devC_params, &params, sizeof(Params));

	checkForCudaErrors("After transferring to device constant memory");

	if (verbose == 1)
	cout << "Done\n";

	- //checkConstantMemory();
	+ checkConstantMemory();
	}


	t@@ -223,35 +216,35 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
	std::cout << " Allocating device memory: ";

	// Particle arrays
	- cudaMalloc((void**)&dev_k.x, memSizeF4);
	- cudaMalloc((void**)&dev_sort.x_sorted, memSizeF4);
	- cudaMalloc((void**)&dev_k.vel, memSizeF4);
	- cudaMalloc((void**)&dev_sort.vel_sorted, memSizeF4);
	- cudaMalloc((void**)&dev_k.angvel, memSizeF4);
	- cudaMalloc((void**)&dev_sort.angvel_sorted, memSizeF4);
	- cudaMalloc((void**)&dev_k.acc, memSizeF4);
	+ cudaMalloc((void**)&dev_k->x, memSizeF4);
	+ cudaMalloc((void**)&dev_sort->x_sorted, memSizeF4);
	+ cudaMalloc((void**)&dev_k->vel, memSizeF4);
	+ cudaMalloc((void**)&dev_sort->vel_sorted, memSizeF4);
	+ cudaMalloc((void**)&dev_k->angvel, memSizeF4);
	+ cudaMalloc((void**)&dev_sort->angvel_sorted, memSizeF4);
	+ cudaMalloc((void**)&dev_k->acc, memSizeF4);
	k.acc = new Float4[np];
	- cudaMalloc((void**)&dev_k.angacc, memSizeF4);
	+ cudaMalloc((void**)&dev_k->angacc, memSizeF4);
	k.angacc = new Float4[np];
	- cudaMalloc((void**)&dev_k.force, memSizeF4);
	- cudaMalloc((void**)&dev_k.torque, memSizeF4);
	- cudaMalloc((void**)&dev_k.angpos, memSizeF4);
	- cudaMalloc((void**)&dev_e.es_dot, memSizeF);
	- cudaMalloc((void**)&dev_e.ev_dot, memSizeF);
	- cudaMalloc((void**)&dev_e.es, memSizeF);
	- cudaMalloc((void**)&dev_e.ev, memSizeF);
	- cudaMalloc((void**)&dev_e.p, memSizeF);
	+ cudaMalloc((void**)&dev_k->force, memSizeF4);
	+ cudaMalloc((void**)&dev_k->torque, memSizeF4);
	+ cudaMalloc((void**)&dev_k->angpos, memSizeF4);
	+ cudaMalloc((void**)&dev_e->es_dot, memSizeF);
	+ cudaMalloc((void**)&dev_e->ev_dot, memSizeF);
	+ cudaMalloc((void**)&dev_e->es, memSizeF);
	+ cudaMalloc((void**)&dev_e->ev, memSizeF);
	+ cudaMalloc((void**)&dev_e->p, memSizeF);

	// Cell-related arrays
	- cudaMalloc((void*)&dev_sort.gridParticleCellID, sizeof(unsigned int)np);
	- cudaMalloc((void*)&dev_sort.gridParticleIndex, sizeof(unsigned int)np);
	- cudaMalloc((void*)&dev_sort.cellStart, sizeof(unsigned int)grid.num[0]*gri…
	- cudaMalloc((void*)&dev_sort.cellEnd, sizeof(unsigned int)grid.num[0]*grid.…
	+ cudaMalloc((void*)&dev_sort->gridParticleCellID, sizeof(unsigned int)np);
	+ cudaMalloc((void*)&dev_sort->gridParticleIndex, sizeof(unsigned int)np);
	+ cudaMalloc((void*)&dev_sort->cellStart, sizeof(unsigned int)grid.num[0]*gr…
	+ cudaMalloc((void*)&dev_sort->cellEnd, sizeof(unsigned int)grid.num[0]*grid…

	// Particle contact bookkeeping arrays
	- cudaMalloc((void*)&dev_k.contacts, sizeof(unsigned int)np*NC); // Max NC c…
	- cudaMalloc((void*)&dev_k.distmod, sizeof(Float4)np*NC);
	- cudaMalloc((void*)&dev_k.delta_t, sizeof(Float4)np*NC);
	+ cudaMalloc((void*)&dev_k->contacts, sizeof(unsigned int)np*NC); // Max NC …
	+ cudaMalloc((void*)&dev_k->distmod, sizeof(Float4)np*NC);
	+ cudaMalloc((void*)&dev_k->delta_t, sizeof(Float4)np*NC);

	// Host contact bookkeeping arrays
	k.contacts = new unsigned int[np*NC];
	t@@ -262,9 +255,9 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
	k.delta_t = new Float4[np*NC];

	// Wall arrays
	- cudaMalloc((void*)&dev_walls.nx, sizeof(Float4)walls.nw);
	- cudaMalloc((void*)&dev_walls.mvfd, sizeof(Float4)walls.nw);
	- cudaMalloc((void*)&dev_walls.force, sizeof(Float)walls.nw*np);
	+ cudaMalloc((void*)&dev_walls->nx, sizeof(Float4)walls.nw);
	+ cudaMalloc((void*)&dev_walls->mvfd, sizeof(Float4)walls.nw);
	+ cudaMalloc((void*)&dev_walls->force, sizeof(Float)walls.nw*np);
	// dev_w_force_partial allocated later

	checkForCudaErrors("End of allocateGlobalDeviceMemory");
	t@@ -306,7 +299,7 @@ __host__ void DEM::freeGlobalDeviceMemory()
	cudaFree(dev_walls->nx);
	cudaFree(dev_walls->mvfd);
	cudaFree(dev_walls->force);
	- cudaFree(dev_w_force_partial);
	+ //cudaFree(dev_w_force_partial);

	if (verbose == 1)
	printf("Done\n");
	t@@ -323,19 +316,19 @@ __host__ void DEM::transferToGlobalDeviceMemory()
	cudaMemcpy(dev_e, e, sizeof(Energies), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_time, time, sizeof(Time), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_walls, walls, sizeof(Walls), cudaMemcpyHostToDevice);*/
	- cudaMemcpy(dev_k, k, sizeof(k), cudaMemcpyHostToDevice);
	- cudaMemcpy(dev_e, e, sizeof(e), cudaMemcpyHostToDevice);
	- cudaMemcpy(dev_time, time, sizeof(time), cudaMemcpyHostToDevice);
	- cudaMemcpy(dev_walls, walls, sizeof(walls), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_k, &k, sizeof(k), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_e, &e, sizeof(e), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_time, &time, sizeof(time), cudaMemcpyHostToDevice);
	+ cudaMemcpy(dev_walls, &walls, sizeof(walls), cudaMemcpyHostToDevice);

	checkForCudaErrors("End of transferToGlobalDeviceMemory");
	if (verbose == 1)
	std::cout << "Done\n";
	}

	-__host__ void DEM::transferToGlobalDeviceMemory()
	+__host__ void DEM::transferFromGlobalDeviceMemory()
	{
	- cout << " Transfering data to the device: ";
	+ std::cout << " Transfering data to the device: ";

	// Copy structure data from host to global device memory
	cudaMemcpy(&k, dev_k, sizeof(k), cudaMemcpyDeviceToHost);
	t@@ -409,7 +402,7 @@ __host__ void DEM::startTime()
	long iter = 0;

	// Create first status.dat
	- sprintf(file,"output/%s.status.dat", inputbin);
	+ sprintf(file,"output/%s.status.dat", sid);
	fp = fopen(file, "w");
	fprintf(fp,"%2.4e %2.4e %d\n",
	time.current,
	t@@ -418,7 +411,7 @@ __host__ void DEM::startTime()
	fclose(fp);

	// Write first output data file: output0.bin, thus testing writing of bin fi…
	- sprintf(file,"output/%s.output0.bin", inputbin);
	+ sprintf(file,"output/%s.output0.bin", sid);
	writebin(file);

	if (verbose == 1) {
	t@@ -426,7 +419,7 @@ __host__ void DEM::startTime()
	<< " IMPORTANT: Do not close this terminal, doing so will \n"
	<< " terminate this SPHERE process. Follow the \n"
	<< " progress by executing:\n"
	- << " $ ./sphere_status " << inputbin << "\n\n";
	+ << " $ ./sphere_status " << sid << "\n\n";
	}


	t@@ -475,9 +468,9 @@ __host__ void DEM::startTime()
	// in the fine, uniform and homogenous grid.
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort.gridParticleCellID,
	- dev_sort.gridParticleIndex,
	- dev_k.x);
	+ calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort->gridParticleCellID,
	+ dev_sort->gridParticleIndex,
	+ dev_k->x);

	// Synchronization point
	cudaThreadSynchronize();
	t@@ -489,9 +482,9 @@ __host__ void DEM::startTime()
	// Sort particle (key, particle ID) pairs by hash key with Thrust radix so…
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort.gridParticleCellID),
	- thrust::device_ptr<uint>(dev_sort.gridParticleCellID +…
	- thrust::device_ptr<uint>(dev_sort.gridParticleIndex));
	+ thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort->gridParticleCellID),
	+ thrust::device_ptr<uint>(dev_sort->gridParticleCellID …
	+ thrust::device_ptr<uint>(dev_sort->gridParticleIndex));
	cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implic…
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_thrustsort);
	t@@ -501,7 +494,7 @@ __host__ void DEM::startTime()
	// Zero cell array values by setting cellStart to its highest possible val…
	// specified with pointer value 0xffffffff, which for a 32 bit unsigned int
	// is 4294967295.
	- cudaMemset(dev_sort.cellStart, 0xffffffff,
	+ cudaMemset(dev_sort->cellStart, 0xffffffff,
	grid.num[0]grid.num[1]grid.num[2]*sizeof(unsigned int));
	cudaThreadSynchronize();
	checkForCudaErrors("Post cudaMemset");
	t@@ -510,15 +503,15 @@ __host__ void DEM::startTime()
	// coherent memory access. Save ordered configurations in new arrays (*_so…
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort.cellStart,
	- dev_sort.cellEnd,
	- dev_sort.gridParticleCellID…
	- dev_sort.gridParticleIndex,
	- dev_k.x, dev_k.vel,
	- dev_k.angvel,
	- dev_sort.x_sorted,
	- dev_sort.vel_sorted,
	- dev_sort.angvel_sorted);
	+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort->cellStart,
	+ dev_sort->cellEnd,
	+ dev_sort->gridParticleCellI…
	+ dev_sort->gridParticleIndex,
	+ dev_k->x, dev_k->vel,
	+ dev_k->angvel,
	+ dev_sort->x_sorted,
	+ dev_sort->vel_sorted,
	+ dev_sort->angvel_sorted);

	// Synchronization point
	cudaThreadSynchronize();
	t@@ -533,12 +526,12 @@ __host__ void DEM::startTime()
	// For each particle: Search contacts in neighbor cells
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- topology<<<dimGrid, dimBlock>>>(dev_sort.cellStart,
	- dev_sort.cellEnd,
	- dev_sort.gridParticleIndex,
	- dev_sort.x_sorted,
	- dev_k.contacts,
	- dev_k.distmod);
	+ topology<<<dimGrid, dimBlock>>>(dev_sort->cellStart,
	+ dev_sort->cellEnd,
	+ dev_sort->gridParticleIndex,
	+ dev_sort->x_sorted,
	+ dev_k->contacts,
	+ dev_k->distmod);


	// Synchronization point
	t@@ -552,26 +545,28 @@ __host__ void DEM::startTime()
	// For each particle: Process collisions and compute resulting forces.
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- interact<<<dimGrid, dimBlock>>>(dev_sort.gridParticleIndex,
	- dev_sort.cellStart,
	- dev_sort.cellEnd,
	- dev_k.x,
	- dev_sort.x_sorted,
	- dev_sort.vel_sorted,
	- dev_sort.angvel_sorted,
	- dev_k.vel,
	- dev_k.angvel,
	- dev_k.force,
	- dev_k.torque,
	- dev_e.es_dot,
	- dev_e.ev_dot,
	- dev_e.es, dev_e.ev, dev_e.p,
	- dev_walls.nx,
	- dev_walls.mvfd,
	- dev_walls.force,
	- dev_k.contacts,
	- dev_k.distmod,
	- dev_k.delta_t);
	+ interact<<<dimGrid, dimBlock>>>(dev_sort->gridParticleIndex,
	+ dev_sort->cellStart,
	+ dev_sort->cellEnd,
	+ dev_k->x,
	+ dev_sort->x_sorted,
	+ dev_sort->vel_sorted,
	+ dev_sort->angvel_sorted,
	+ dev_k->vel,
	+ dev_k->angvel,
	+ dev_k->force,
	+ dev_k->torque,
	+ dev_e->es_dot,
	+ dev_e->ev_dot,
	+ dev_e->es,
	+ dev_e->ev,
	+ dev_e->p,
	+ dev_walls->nx,
	+ dev_walls->mvfd,
	+ dev_walls->force,
	+ dev_k->contacts,
	+ dev_k->distmod,
	+ dev_k->delta_t);


	// Synchronization point
	t@@ -583,16 +578,16 @@ __host__ void DEM::startTime()
	// Update particle kinematics
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- integrate<<<dimGrid, dimBlock>>>(dev_sort.x_sorted,
	- dev_sort.vel_sorted,
	- dev_sort.angvel_sorted,
	- dev_k.x,
	- dev_k.vel,
	- dev_k.angvel,
	- dev_k.force,
	- dev_k.torque,
	- dev_k.angpos,
	- dev_sort.gridParticleIndex);
	+ integrate<<<dimGrid, dimBlock>>>(dev_sort->x_sorted,
	+ dev_sort->vel_sorted,
	+ dev_sort->angvel_sorted,
	+ dev_k->x,
	+ dev_k->vel,
	+ dev_k->angvel,
	+ dev_k->force,
	+ dev_k->torque,
	+ dev_k->angpos,
	+ dev_sort->gridParticleIndex);

	cudaThreadSynchronize();
	if (PROFILING == 1)
	t@@ -601,7 +596,7 @@ __host__ void DEM::startTime()
	// Summation of forces on wall
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- summation<<<dimGrid, dimBlock>>>(dev_walls.force, dev_w_force_partial);
	+ summation<<<dimGrid, dimBlock>>>(dev_walls->force, dev_w_force_partial);

	// Synchronization point
	cudaThreadSynchronize();
	t@@ -612,8 +607,7 @@ __host__ void DEM::startTime()
	// Update wall kinematics
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- integrateWalls<<< 1, walls.nw>>>(dev_walls.nx,
	- dev_walls.mvfd,
	+ integrateWalls<<< 1, walls.nw>>>(dev_walls,
	dev_w_force_partial,
	blocksPerGrid);

	t@@ -651,7 +645,7 @@ __host__ void DEM::startTime()

	// Write binary output file
	time.step_count += 1;
	- sprintf(file,"output/%s.output%d.bin", inputbin, time.step_count);
	+ sprintf(file,"output/%s.output%d.bin", sid, time.step_count);
	writebin(file);


	t@@ -686,7 +680,7 @@ __host__ void DEM::startTime()
	}

	// Update status.dat at the interval of filetime
	- sprintf(file,"output/%s.status.dat", inputbin);
	+ sprintf(file,"output/%s.status.dat", sid);
	fp = fopen(file, "w");
	fprintf(fp,"%2.4e %2.4e %d\n",
	time.current,
	diff --git a/src/integration.cuh b/src/integration.cuh
	t@@ -15,7 +15,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_…
	{
	unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id

	- if (idx < devC_params.np) { // Condition prevents block size error
	+ if (idx < devC_np) { // Condition prevents block size error

	// Copy data to temporary arrays to avoid any potential read-after-write,
	// write-after-read, or write-after-write hazards.
	t@@ -35,7 +35,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_…
	Float radius = x.w;

	// Coherent read from constant memory to registers
	- Float dt = devC_params.dt;
	+ Float dt = devC_dt;
	Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_gr…
	Float3 L = MAKE_FLOAT3(devC_grid.L[0], devC_grid.L[1], devC_grid.L[2]);
	Float rho = devC_params.rho;
	t@@ -106,7 +106,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
	x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;

	// Move particle across boundary if it is periodic
	- if (devC_params.periodic == 1) {
	+ if (devC_grid.periodic == 1) {
	if (x.x < origo.x)
	x.x += L.x;
	if (x.x > L.x)
	t@@ -115,7 +115,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
	x.y += L.y;
	if (x.y > L.y)
	x.y -= L.y;
	- } else if (devC_params.periodic == 2) {
	+ } else if (devC_grid.periodic == 2) {
	if (x.x < origo.x)
	x.x += L.x;
	if (x.x > L.x)
	t@@ -142,7 +142,7 @@ __global__ void summation(Float* in, Float *out)
	unsigned int cacheIdx = threadIdx.x;

	Float temp = 0.0f;
	- while (idx < devC_params.np) {
	+ while (idx < devC_np) {
	temp += in[idx];
	idx += blockDim.x * gridDim.x;
	}
	t@@ -168,20 +168,19 @@ __global__ void summation(Float* in, Float *out)
	}

	// Update wall positions
	-__global__ void integrateWalls(Float4* dev_w_nx,
	- Float4* dev_w_mvfd,
	+__global__ void integrateWalls(Walls* dev_walls,
	Float* dev_w_force_partial,
	unsigned int blocksPerGrid)
	{
	unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id

	- if (idx < devC_params.nw) { // Condition prevents block size error
	+ if (idx < devC_nw) { // Condition prevents block size error

	// Copy data to temporary arrays to avoid any potential read-after-write,
	// write-after-read, or write-after-write hazards.
	- Float4 w_nx = dev_w_nx[idx];
	- Float4 w_mvfd = dev_w_mvfd[idx];
	- int wmode = devC_params.wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
	+ Float4 w_nx = dev_walls->nx[idx];
	+ Float4 w_mvfd = dev_walls->mvfd[idx];
	+ int wmode = dev_walls->wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
	Float acc;

	if (wmode == 0) // Wall fixed: do nothing
	t@@ -193,7 +192,7 @@ __global__ void integrateWalls(Float4* dev_w_nx,
	w_mvfd.z += dev_w_force_partial[i];
	}

	- Float dt = devC_params.dt;
	+ Float dt = devC_dt;

	// Normal load = Deviatoric stress times wall surface area,
	// directed downwards.
	t@@ -216,8 +215,8 @@ __global__ void integrateWalls(Float4* dev_w_nx,
	w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;

	// Store data in global memory
	- dev_w_nx[idx] = w_nx;
	- dev_w_mvfd[idx] = w_mvfd;
	+ dev_walls->nx[idx] = w_nx;
	+ dev_walls->mvfd[idx] = w_mvfd;
	}
	} // End of integrateWalls(...)

	diff --git a/src/sorting.cuh b/src/sorting.cuh
	t@@ -25,7 +25,7 @@ __global__ void calcParticleCellID(unsigned int* dev_gridPar…
	//unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
	unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;

	- if (idx < devC_params.np) { // Condition prevents block size error
	+ if (idx < devC_np) { // Condition prevents block size error

	//volatile Float4 x = dev_x[idx]; // Ensure coalesced read
	Float4 x = dev_x[idx]; // Ensure coalesced read
	t@@ -69,7 +69,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
	unsigned int cellID;

	// Read cellID data and store it in shared memory (shared_data)
	- if (idx < devC_params.np) { // Condition prevents block size error
	+ if (idx < devC_np) { // Condition prevents block size error
	cellID = dev_gridParticleCellID[idx];

	// Load hash data into shared memory, allowing access to neighbor particle…
	t@@ -86,7 +86,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
	__syncthreads();

	// Find lowest and highest particle index in each cell
	- if (idx < devC_params.np) { // Condition prevents block size error
	+ if (idx < devC_np) { // Condition prevents block size error
	// If this particle has a different cell index to the previous particle, i…
	// particle in the cell -> Store the index of this particle in the cell.
	// The previous particle must be the last particle in the previous cell.
	t@@ -97,7 +97,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart,
	}

	// Check wether the thread is the last one
	- if (idx == (devC_params.np - 1))
	+ if (idx == (devC_np - 1))
	dev_cellEnd[cellID] = idx + 1;


	diff --git a/src/sphere.cpp b/src/sphere.cpp
	t@@ -1,6 +1,7 @@
	#include <iostream>
	#include <cstdio>
	#include <cstdlib>
	+#include <cstring>

	#include "typedefs.h"
	#include "datatypes.h"
	t@@ -9,7 +10,7 @@

	// Constructor: Reads an input binary, and optionally checks
	// and reports the values
	-DEM::DEM(const char *inputbin,
	+DEM::DEM(char *inputbin,
	const int verbosity,
	const int checkVals)
	: verbose(verbosity)
	t@@ -17,6 +18,19 @@ DEM::DEM(const char *inputbin,
	using std::cout;
	using std::cerr;

	+ // Get basename from input file
	+ char *name = inputbin;
	+ char *sid = name;
	+ while (*name) {
	+ if (*name++ == '/') {
	+ sid = name;
	+ }
	+ } // sid is now everything after the last '/' char
	+ char *lastdot = strrchr(sid, '.');
	+ if (lastdot != NULL)
	+ *lastdot = '\0'; // Put in string-end char. at last dot
	+
	+
	// Initialize CUDA
	initializeGPU();

	diff --git a/src/sphere.h b/src/sphere.h
	t@@ -10,6 +10,9 @@ class DEM {
	// Values and functions only accessible from the class internally
	private:

	+ // Simulation ID
	+ char *sid;
	+
	// Output level
	int verbose;

	t@@ -70,7 +73,7 @@ class DEM {
	public:

	// Constructor, some parameters with default values
	- DEM(const char *inputbin,
	+ DEM(char *inputbin,
	const int verbosity = 1,
	const int checkVals = 1);