GopherProxy

	tElastc shearmodel fixed - sphere - GPU-based 3D discrete element method algori…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 29f60d6d74f37ea5f08849e81702f314cafc6166
	parent 65ad78a8f1f926d405fc84504351359b5b0f5a81
	Author: Anders Damsgaard Christensen <[email protected]>
	Date: Thu, 23 Feb 2012 12:18:04 +0100

	Elastc shearmodel fixed

	Diffstat:
	M src/device.cu \| 72 ++++++++++++++++-------------…

	1 file changed, 37 insertions(+), 35 deletions(-)
	---
	diff --git a/src/device.cu b/src/device.cu
	t@@ -11,7 +11,7 @@
	#include "datatypes.cuh"
	#include "constants.cuh"

	-#include "cuPrintf.cu"
	+//#include "cuPrintf.cu"


	// Returns the cellID containing the particle, based cubic grid
	t@@ -763,8 +763,7 @@ __device__ void findContactsInCell(int3 targetCell,
	unsigned int* dev_cellEnd,
	unsigned int* dev_gridParticleIndex,
	int* nc,
	- unsigned int* dev_contacts,
	- float4* dev_x_ab_r_b)
	+ unsigned int* dev_contacts)
	{
	// Variable containing modifier for interparticle
	// vector, if it crosses a periodic boundary
	t@@ -890,15 +889,14 @@ __device__ void findContactsInCell(int3 targetCell,
	}

	__syncthreads();
	- //cuPrintf("\nOverlap: idx_a = %u (orig=%u), idx_b = %u (orig=%u), c…
	- // idx_a, idx_a_orig, idx_b, idx_b_orig, cpos);

	// Write the particle index to the relevant position,
	// no matter if it already is there or not (concurrency of write)
	dev_contacts[(unsigned int)(idx_a_orig*devC_nc+cpos)] = idx_b_orig;

	+
	// Write the interparticle vector and radius of particle B
	- dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4(…
	+ //dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_float4…

	// Increment contact counter
	++*nc;
	t@@ -929,8 +927,7 @@ __global__ void topology(unsigned int* dev_cellStart,
	unsigned int* dev_cellEnd, // Input: Particles in…
	unsigned int* dev_gridParticleIndex, // Input: Unsort…
	float4* dev_x_sorted, float* dev_radius_sorted,
	- unsigned int* dev_contacts,
	- float4* dev_x_ab_r_b)
	+ unsigned int* dev_contacts)
	{
	// Thread index equals index of particle A
	unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x;
	t@@ -961,7 +958,7 @@ __global__ void topology(unsigned int* dev_cellStart,
	dev_x_sorted, dev_radius_sorted,
	dev_cellStart, dev_cellEnd,
	dev_gridParticleIndex,
	- &nc, dev_contacts, dev_x_ab_r_b);
	+ &nc, dev_contacts);
	}
	}
	}
	t@@ -977,6 +974,7 @@ __global__ void topology(unsigned int* dev_cellStart,
	__global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsort…
	unsigned int* dev_cellStart,
	unsigned int* dev_cellEnd,
	+ float4* dev_x, float* dev_radius,
	float4* dev_x_sorted, float* dev_radius_sorted,
	float4* dev_vel_sorted, float4* dev_angvel_sorted,
	float4* dev_vel, float4* dev_angvel,
	t@@ -985,7 +983,6 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
	float4* dev_w_nx, float4* dev_w_mvfd,
	float* dev_w_force, //uint4* dev_bonds_sorted,
	unsigned int* dev_contacts,
	- float4* dev_x_ab_r_b,
	float4* dev_delta_t)
	{
	// Thread index equals index of particle A
	t@@ -1029,7 +1026,8 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	if (devC_shearmodel == 2) {
	unsigned int idx_b_orig, mempos;
	float delta_n, x_ab_length;
	- float4 x_ab_r_b;
	+ float4 x_b;
	+ float radius_b;
	float3 x_ab;
	float4 vel_a = dev_vel_sorted[idx_a];
	float4 angvel4_a = dev_angvel_sorted[idx_a];
	t@@ -1037,19 +1035,23 @@ __global__ void interact(unsigned int* dev_gridParticl…

	// Loop over all possible contacts, and remove contacts
	// that no longer are valid (delta_n > 0.0)
	- for (int i = 0; i<devC_nc; ++i) {
	- mempos = idx_a_orig * devC_nc + i;
	+ for (int i = 0; i<(int)devC_nc; ++i) {
	+ mempos = (unsigned int)(idx_a_orig * devC_nc + i);
	+ __syncthreads();
	idx_b_orig = dev_contacts[mempos];
	- x_ab_r_b = dev_x_ab_r_b[mempos];
	- x_ab = make_float3(x_ab_r_b.x,
	- x_ab_r_b.y,
	- x_ab_r_b.z);
	+ x_b = dev_x[idx_b_orig];
	+ radius_b = dev_radius[idx_b_orig];
	+ x_ab = make_float3(x_a.x - x_b.x,
	+ x_a.y - x_b.y,
	+ x_a.z - x_b.z);
	x_ab_length = length(x_ab);
	- delta_n = x_ab_length - (radius_a + x_ab_r_b.w);
	+ delta_n = x_ab_length - (radius_a + radius_b);

	- // Process collision if the particles are overlapping
	- if (delta_n < 0.0f) {
	- if (idx_b_orig != devC_np) {
	+
	+ if (idx_b_orig != (unsigned int)devC_np) {
	+
	+ // Process collision if the particles are overlapping
	+ if (delta_n < 0.0f) {
	//cuPrintf("\nProcessing contact, idx_a_orig = %u, idx_b_orig = %u…
	// idx_a_orig, idx_b_orig, i, delta_n);
	contactLinear(&N, &T, &es_dot, &p,
	t@@ -1059,18 +1061,23 @@ __global__ void interact(unsigned int* dev_gridParticl…
	dev_vel,
	angvel_a,
	dev_angvel,
	- radius_a, x_ab_r_b.w,
	+ radius_a, radius_b,
	x_ab, x_ab_length,
	delta_n, dev_delta_t,
	mempos);
	+ } else {
	+ __syncthreads();
	+ // Remove this contact (there is no particle with index=np)
	+ dev_contacts[mempos] = devC_np;
	+ // Zero sum of shear displacement in this position
	+ dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	}
	} else {
	- // Remove this contact (there is no particle with index=np)
	- dev_contacts[mempos] = devC_np;
	- // Zero sum of shear displacement in this position
	+ __syncthreads();
	dev_delta_t[mempos] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	}
	- }
	+ } // Contact loop end
	+
	} else if (devC_shearmodel == 1) {

	int3 gridPos;
	t@@ -1586,8 +1593,6 @@ __host__ void gpuMain(float4* host_x,
	unsigned int* dev_contacts;
	// x,y,z contains the interparticle vector, corrected if contact
	// is across a periodic boundary.
	- // w contains radius of particle b.
	- float4* dev_x_ab_r_b;
	float4* dev_delta_t; // Accumulated shear distance of contact

	// Particle memory size
	t@@ -1625,7 +1630,6 @@ __host__ void gpuMain(float4* host_x,

	// Particle contact bookkeeping arrays
	cudaMalloc((void*)&dev_contacts, sizeof(unsigned int)p->np*NC); // Max NC …
	- cudaMalloc((void*)&dev_x_ab_r_b, sizeof(float4)p->np*NC);
	cudaMalloc((void*)&dev_delta_t, sizeof(float4)p->np*NC);

	// Wall arrays
	t@@ -1668,7 +1672,6 @@ __host__ void gpuMain(float4* host_x,
	float4* zerosf4 = new float4[p->np*NC];
	for (unsigned int i=0; i<(p->np*NC); ++i)
	zerosf4[i] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
	- cudaMemcpy(dev_x_ab_r_b, zerosf4, sizeof(float4)p->npNC, cudaMemcpyHostToD…
	cudaMemcpy(dev_delta_t, zerosf4, sizeof(float4)p->npNC, cudaMemcpyHostToDe…
	delete[] zerosf4;

	t@@ -1824,8 +1827,7 @@ __host__ void gpuMain(float4* host_x,
	dev_gridParticleIndex,
	dev_x_sorted,
	dev_radius_sorted,
	- dev_contacts,
	- dev_x_ab_r_b);
	+ dev_contacts);

	// Empty cuPrintf() buffer to console
	//cudaThreadSynchronize();
	t@@ -1836,10 +1838,12 @@ __host__ void gpuMain(float4* host_x,
	checkForCudaErrors("Post topology. Possibly caused by numerical instabil…
	}

	+
	// For each particle: Process collisions and compute resulting forces.
	interact<<<dimGrid, dimBlock>>>(dev_gridParticleIndex,
	dev_cellStart,
	dev_cellEnd,
	+ dev_x, dev_radius,
	dev_x_sorted, dev_radius_sorted,
	dev_vel_sorted, dev_angvel_sorted,
	dev_vel, dev_angvel,
	t@@ -1847,7 +1851,7 @@ __host__ void gpuMain(float4* host_x,
	dev_es_dot, dev_es, dev_p,
	dev_w_nx, dev_w_mvfd, dev_w_force,
	//dev_bonds_sorted,
	- dev_contacts, dev_x_ab_r_b,
	+ dev_contacts,
	dev_delta_t);

	// Empty cuPrintf() buffer to console
	t@@ -1858,7 +1862,6 @@ __host__ void gpuMain(float4* host_x,
	cudaThreadSynchronize();
	checkForCudaErrors("Post interact - often caused if particles move outside…

	-
	// Update particle kinematics
	integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, dev_vel_sorted,
	dev_angvel_sorted, dev_radius_sorted,
	t@@ -1994,7 +1997,6 @@ __host__ void gpuMain(float4* host_x,
	//cudaFree(dev_bonds);
	//cudaFree(dev_bonds_sorted);
	cudaFree(dev_contacts);
	- cudaFree(dev_x_ab_r_b);
	cudaFree(dev_delta_t);

	// Cell-related arrays