GopherProxy

	tShearmodel=2 refinements - sphere - GPU-based 3D discrete element method algor…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 77d03f69783d2598c0920041d85d8d722c2c1868
	parent 5e1012c4c642e3a1e740547d141434bfebe3146d
	Author: Anders Damsgaard <[email protected]>
	Date: Mon, 15 Oct 2012 14:29:37 +0200

	Shearmodel=2 refinements

	Diffstat:
	M python/sphere.py \| 101 ++++++++++++++++++++++-------…
	M src/constants.cuh \| 2 +-
	M src/contactsearch.cuh \| 6 +++---
	M src/datatypes.h \| 11 ++++++++---
	M src/device.cu \| 48 +++++++++++++++++++++++++++++…
	M src/integration.cuh \| 7 +++++--
	M src/main.cpp \| 9 ++++++---

	7 files changed, 142 insertions(+), 42 deletions(-)
	---
	diff --git a/python/sphere.py b/python/sphere.py
	t@@ -43,7 +43,7 @@ class Spherebin:
	self.fixvel = numpy.zeros(self.np, dtype=numpy.float64)
	self.xsum = numpy.zeros(self.np, dtype=numpy.float64)
	self.radius = numpy.ones(self.np, dtype=numpy.float64)
	- self.rho = numpy.ones(self.np, dtype=numpy.float64)
	+ self.rho = numpy.ones(self.np, dtype=numpy.float64) * 2600.0
	self.k_n = numpy.ones(self.np, dtype=numpy.float64) * 1.16e9
	self.k_t = numpy.ones(self.np, dtype=numpy.float64) * 1.16e9
	self.k_r = numpy.zeros(self.np, dtype=numpy.float64)
	t@@ -62,18 +62,19 @@ class Spherebin:
	self.bonds = numpy.ones(self.np*4, dtype=numpy.uint32).reshape(self.np,4…

	# Constant, single-value physical parameters
	- self.globalparams = numpy.zeros(1, dtype=numpy.int32)
	- self.g = numpy.array([0.0, 0.0, -9.80], dtype=numpy.float64)
	+ self.globalparams = numpy.ones(1, dtype=numpy.int32)
	+ self.g = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64)
	self.kappa = numpy.zeros(1, dtype=numpy.float64)
	self.db = numpy.zeros(1, dtype=numpy.float64)
	self.V_b = numpy.zeros(1, dtype=numpy.float64)
	- self.shearmodel = numpy.ones(1, dtype=numpy.uint32)
	+ self.shearmodel = numpy.ones(1, dtype=numpy.uint32) * 2

	# Wall data
	self.nw = numpy.ones(1, dtype=numpy.uint32) * nw
	self.wmode = numpy.zeros(self.nw, dtype=numpy.int32)
	self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float64).reshape(s…
	- self.w_x = numpy.zeros(self.nw, dtype=numpy.float64)
	+ self.w_n[nw-1,nd-1] = -1.0
	+ self.w_x = numpy.ones(self.nw, dtype=numpy.float64)
	self.w_m = numpy.zeros(self.nw, dtype=numpy.float64)
	self.w_vel = numpy.zeros(self.nw, dtype=numpy.float64)
	self.w_force = numpy.zeros(self.nw, dtype=numpy.float64)
	t@@ -81,9 +82,9 @@ class Spherebin:

	# x- and y-boundary behavior
	self.periodic = numpy.zeros(1, dtype=numpy.uint32)
	- self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1e3
	- self.gamma_ws = numpy.ones(1, dtype=numpy.float64) * 1e3
	- self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1e3
	+ self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3
	+ self.gamma_ws = numpy.ones(1, dtype=numpy.float64) * 1.0e3
	+ self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1.0e3


	# Read binary data
	t@@ -390,7 +391,7 @@ class Spherebin:
	self.shearmodel[0] = shearmodel

	# Initialize upper wall
	- self.wmode[0] = 0 # 0: devs, 1: vel
	+ self.wmode[0] = 0 # 0: fixed, 1: devs, 2: vel
	self.w_n[0,2] = -1.0
	self.w_x[0] = self.L[2]
	self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
	t@@ -400,23 +401,34 @@ class Spherebin:
	#self.nw[0] = numpy.ones(1, dtype=numpy.uint32) * 1
	self.nw = numpy.ones(1, dtype=numpy.uint32) * 1

	- # Fit cell number according to world dimensions
	- def fitNum(self):
	- """ Create cells. cellsize is increased from 2*r_max until it fits
	- the horizontal grid.
	- Call after setting self.L, self.num and self.radius
	+
	+ # Generate grid based on particle positions
	+ def initGrid(self):
	+ """ Initialize grid suitable for the particle positions set previously.
	+ The margin parameter adjusts the distance (in no. of max. radii)
	+ from the particle boundaries.
	"""
	+
	+
	+ # Cell configuration
	cellsize_min = 2.1 * numpy.amax(self.radius)
	self.num[0] = numpy.ceil((self.L[0]-self.origo[0])/cellsize_min)
	self.num[1] = numpy.ceil((self.L[1]-self.origo[1])/cellsize_min)
	self.num[2] = numpy.ceil((self.L[2]-self.origo[2])/cellsize_min)

	+ if (self.num[0] < 4 or self.num[1] < 4 or self.num[2] < 4):
	+ print("Error: The grid must be at least 3 cells in each direction")
	+ print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], self.nu…
	+
	+ # Put upper wall at top boundary
	+ self.w_x[0] = self.L[0]
	+

	# Generate grid based on particle positions
	- def initGrid(self, g = numpy.array([0.0, 0.0, -9.80665]),
	- margin = numpy.array([2.0, 2.0, 2.0]),
	- periodic = 1,
	- shearmodel = 2):
	+ def initGridAndWorldsize(self, g = numpy.array([0.0, 0.0, -9.80665]),
	+ margin = numpy.array([2.0, 2.0, 2.0]),
	+ periodic = 1,
	+ shearmodel = 2):
	""" Initialize grid suitable for the particle positions set previously.
	The margin parameter adjusts the distance (in no. of max. radii)
	from the particle boundaries.
	t@@ -438,8 +450,13 @@ class Spherebin:
	numpy.amax(self.x[:,2] + self.radius[:])]) \
	+ margin*r_max

	- # Fit cell size to world dimensions
	- fitNum()
	+ cellsize_min = 2.1 * r_max
	+ self.num[0] = numpy.ceil((self.L[0]-self.origo[0])/cellsize_min)
	+ self.num[1] = numpy.ceil((self.L[1]-self.origo[1])/cellsize_min)
	+ self.num[2] = numpy.ceil((self.L[2]-self.origo[2])/cellsize_min)
	+
	+ if (self.num[0] < 4 or self.num[1] < 4 or self.num[2]):
	+ print("Error: The grid must be at least 3 cells in each direction")

	self.shearmodel[0] = shearmodel

	t@@ -603,7 +620,7 @@ class Spherebin:
	self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s…

	# Initialize upper wall
	- self.wmode[0] = 0
	+ self.wmode[0] = 1 # devs
	self.w_n[0,2] = -1.0
	self.w_x[0] = self.L[2]
	self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
	t@@ -633,7 +650,7 @@ class Spherebin:
	self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s…

	# Initialize upper wall
	- self.wmode[0] = 1
	+ self.wmode[0] = 2 # strain rate controlled
	self.w_n[0,2] = -1.0
	self.w_x[0] = self.L[2]
	self.w_m[0] = self.rho[0] * self.np * math.pi * (cellsize/2.0)**3
	t@@ -929,31 +946,57 @@ class Spherebin:


	def render(binary,
	- out = '~/img_out/rt-out',
	+ out = '../img_out/out',
	graphicsformat = 'jpg',
	resolution = numpy.array([800, 800]),
	workhorse = 'GPU',
	method = 'pressure',
	max_val = 4e3,
	- rt_bin = '../raytracer/rt'):
	+ rt_bin = '../raytracer/rt',
	+ verbose=True):
	""" Render target binary using the sphere raytracer.
	"""

	+ stdout = ""
	+ if (verbose == False):
	+ stdout = " > /dev/null"
	+
	# Use raytracer to render the scene into a temporary PPM file
	if workhorse == 'GPU':
	- subprocess.call(rt_bin + " GPU {0} {1} {2} {3}.ppm {4} {5}" \
	- .format(binary, resolution[0], resolution[1], out, method, max_val), s…
	+ subprocess.call(rt_bin + " GPU {0} {1} {2} {3}.ppm {4} {5}"\
	+ .format(binary, resolution[0], resolution[1], out, method, max_val) + …
	if workhorse == 'CPU':
	- subprocess.call(rt_bin + " CPU {0} {1} {2} {3}.ppm" \
	- .format(binary, resolution[0], resolution[1], out), shell=True)
	+ subprocess.call(rt_bin + " CPU {0} {1} {2} {3}.ppm"\
	+ .format(binary, resolution[0], resolution[1], out) + stdout, shell=Tru…

	# Use ImageMagick's convert command to change the graphics format
	- subprocess.call("convert {0}.ppm {0}.{1}" \
	+ subprocess.call("convert {0}.ppm {0}.{1}"\
	.format(out, graphicsformat), shell=True)

	# Delete temporary PPM file
	subprocess.call("rm {0}.ppm".format(out), shell=True)

	+def renderAll(project,
	+ rt_bin = "~/code/sphere/raytracer/rt",
	+ out_folder = "../img_out",
	+ graphics_format = "png",
	+ workhorse = "GPU",
	+ method = "pressure",
	+ max_val = 10e3,
	+ resolution = numpy.array([800, 800])):
	+
	+ lastfile = status(project)
	+
	+ for i in range(lastfile+1):
	+ # Input binary filename
	+ fn = "../output/{0}.output{1}.bin".format(project, i)
	+
	+ # Output image name (without extension)
	+ out = out_folder + "/{0}.output{1}".format(project, i)
	+
	+ # Call raytracer, also converts to format
	+ render(fn, out, graphics_format, resolution, workhorse, method, max_val, r…
	+

	def visualize(project, method = 'energy', savefig = False, outformat = 'png'):
	""" Visualize output from the target project,
	diff --git a/src/constants.cuh b/src/constants.cuh
	t@@ -9,7 +9,7 @@ __constant__ Float devC_dt; // Time step leng…
	__constant__ int devC_global; // Parameter properties, 1: g…
	__constant__ Float devC_g[ND]; // Gravitational acceleration …
	__constant__ unsigned int devC_np; // Number of particles
	-__constant__ char devC_nc; // Max. number of contacts a par…
	+__constant__ int devC_nc; // Max. number of contacts a part…
	__constant__ unsigned int devC_shearmodel; // Shear force model: 1: viscous, f…
	__constant__ Float devC_k_n; // Material normal stiffness
	__constant__ Float devC_k_t; // Material tangential stiffness
	diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
	t@@ -437,7 +437,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…

	// Loop over all possible contacts, and remove contacts
	// that no longer are valid (delta_n > 0.0)
	- for (int i = 0; i<(int)devC_nc; ++i) {
	+ for (int i = 0; i<devC_nc; ++i) {
	mempos = (unsigned int)(idx_a_orig * devC_nc + i);
	__syncthreads();
	idx_b_orig = dev_contacts[mempos];
	t@@ -484,11 +484,11 @@ __global__ void interact(unsigned int* dev_gridParticleI…
	dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	}

	- }/* else { // if dev_contacts[mempos] == devC_np
	+ } else { // if dev_contacts[mempos] == devC_np
	__syncthreads();
	// Zero sum of shear displacement in this position
	dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	- } */
	+ }
	} // Contact loop end


	diff --git a/src/datatypes.h b/src/datatypes.h
	t@@ -10,10 +10,15 @@


	// Enable profiling of kernel runtimes?
	-// 0: No
	+// 0: No (default)
	// 1: Yes
	#define PROFILING 0

	+// Output information about contacts to stdout?
	+// 0: No (default)
	+// 1: Yes
	+#define CONTACTINFO 0
	+

	//////////////////////
	// TYPE DEFINITIONS //
	t@@ -60,8 +65,8 @@ const unsigned int ND = 3;
	const Float VERS = 0.25;

	// Max. number of contacts per particle
	-//const char NC = 16;
	-const char NC = 32;
	+//const int NC = 16;
	+const int NC = 32;


	///////////////////////////
	diff --git a/src/device.cu b/src/device.cu
	t@@ -95,7 +95,7 @@ __host__ void transferToConstantMemory(Particles* p,
	cout << "\n Transfering data to constant device memory: ";

	cudaMemcpyToSymbol("devC_np", &p->np, sizeof(p->np));
	- cudaMemcpyToSymbol("devC_nc", &NC, sizeof(char));
	+ cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int));
	cudaMemcpyToSymbol("devC_origo", grid->origo, sizeof(Float)*ND);
	cudaMemcpyToSymbol("devC_L", grid->L, sizeof(Float)*ND);
	cudaMemcpyToSymbol("devC_num", grid->num, sizeof(unsigned int)*ND);
	t@@ -217,11 +217,14 @@ __host__ void gpuMain(Float4* host_x,

	// Particle contact bookkeeping
	unsigned int* dev_contacts;
	+ unsigned int* host_contacts = new unsigned int[p->np*NC];
	// Particle pair distance correction across periodic boundaries
	Float4* dev_distmod;
	+ Float4* host_distmod = new Float4[p->np*NC];
	// x,y,z contains the interparticle vector, corrected if contact
	// is across a periodic boundary.
	Float4* dev_delta_t; // Accumulated shear distance of contact
	+ Float4* host_delta_t = new Float4[p->np*NC];

	// Particle memory size
	unsigned int memSizeF = sizeof(Float) * p->np;
	t@@ -616,6 +619,14 @@ __host__ void gpuMain(Float4* host_x,
	cudaMemcpy(host_w_mvfd, dev_w_mvfd,
	sizeof(Float)params->nw4, cudaMemcpyDeviceToHost);

	+
	+ // Contact information
	+ if (CONTACTINFO == 1) {
	+ cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)p->npNC,…
	+ cudaMemcpy(host_delta_t, dev_delta_t, memSizeF4*NC, cudaMemcpyDeviceTo…
	+ cudaMemcpy(host_distmod, dev_distmod, memSizeF4*NC, cudaMemcpyDeviceTo…
	+ }
	+
	// Pause the CPU thread until all CUDA calls previously issued are compl…
	cudaThreadSynchronize();

	t@@ -633,6 +644,36 @@ __host__ void gpuMain(Float4* host_x,
	exit(EXIT_FAILURE);
	}

	+ if (CONTACTINFO == 1) {
	+ // Write contact information to stdout
	+ cout << "\n\n---------------------------\n"
	+ << "t = " << time->current << " s.\n"
	+ << "---------------------------\n";
	+
	+ for (int n = 0; n < p->np; ++n) {
	+ cout << "\n## Particle " << n << " ##\n";
	+
	+ cout << "- contacts:\n";
	+ for (int nc = 0; nc < NC; ++nc)
	+ cout << "[" << nc << "]=" << host_contacts[nc+NC*n] << '\n';
	+
	+ cout << "\n- delta_t:\n";
	+ for (int nc = 0; nc < NC; ++nc)
	+ cout << host_delta_t[nc+NC*n].x << '\t'
	+ << host_delta_t[nc+NC*n].y << '\t'
	+ << host_delta_t[nc+NC*n].z << '\t'
	+ << host_delta_t[nc+NC*n].w << '\n';
	+
	+ cout << "\n- distmod:\n";
	+ for (int nc = 0; nc < NC; ++nc)
	+ cout << host_distmod[nc+NC*n].x << '\t'
	+ << host_distmod[nc+NC*n].y << '\t'
	+ << host_distmod[nc+NC*n].z << '\t'
	+ << host_distmod[nc+NC*n].w << '\n';
	+ }
	+ cout << '\n';
	+ }
	+
	// Update status.dat at the interval of filetime
	sprintf(file,"%s/output/%s.status.dat", cwd, inputbin);
	fp = fopen(file, "w");
	t@@ -726,5 +767,10 @@ __host__ void gpuMain(Float4* host_x,
	cudaFree(dev_w_force);
	cudaFree(dev_w_force_partial);

	+ // Contact info arrays
	+ delete [] host_contacts;
	+ delete [] host_distmod;
	+ delete [] host_delta_t;
	+
	printf("Done\n");
	} /* EOF */
	diff --git a/src/integration.cuh b/src/integration.cuh
	t@@ -181,9 +181,12 @@ __global__ void integrateWalls(Float4* dev_w_nx,
	// write-after-read, or write-after-write hazards.
	Float4 w_nx = dev_w_nx[idx];
	Float4 w_mvfd = dev_w_mvfd[idx];
	- int wmode = devC_wmode[idx]; // Wall BC, 0: devs, 1: vel
	+ int wmode = devC_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
	Float acc;

	+ if (wmode == 0) // Wall fixed: do nothing
	+ return;
	+
	// Find the final sum of forces on wall
	w_mvfd.z = 0.0f;
	for (int i=0; i<blocksPerGrid; ++i) {
	t@@ -201,7 +204,7 @@ __global__ void integrateWalls(Float4* dev_w_nx,
	acc = (w_mvfd.z + N)/w_mvfd.x;

	// If Wall BC is controlled by velocity, it should not change
	- if (wmode == 1) {
	+ if (wmode == 2) {
	acc = 0.0f;
	}

	diff --git a/src/main.cpp b/src/main.cpp
	t@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
	p.es = new Float[p.np]; // Total shear energy dissipation
	p.ev = new Float[p.np]; // Total viscous energy dissipation
	p.p = new Float[p.np]; // Pressure excerted onto particle
	- params.wmode = new int[MAXWALLS]; // Wall BC's, 0: devs, 1: vel
	+ params.wmode = new int[MAXWALLS]; // Wall BC's, 0: fixed, 1: devs, 2: vel

	// Allocate Float4 host arrays
	Float4 *host_x = new Float4[p.np]; // Center coordinates for each part…
	t@@ -300,6 +300,7 @@ int main(int argc, char *argv[])
	if (fread(&params.g[i], sizeof(params.g[i]), 1, fp) != 1)
	exit(1);
	}
	+
	if (fread(&params.kappa, sizeof(params.kappa), 1, fp) != 1)
	exit(1);
	if (fread(&params.db, sizeof(params.db), 1, fp) != 1)
	t@@ -328,7 +329,7 @@ int main(int argc, char *argv[])

	// Read wall data
	for (j=0; j<params.nw; ++j) {
	- // Wall condition mode: 0: devs, 1: vel
	+ // Wall condition mode: 0: fixed, 1: devs, 2: vel
	if (fread(&params.wmode[j], sizeof(params.wmode[j]), 1, fp) != 1)
	exit(1);

	t@@ -409,8 +410,10 @@ int main(int argc, char *argv[])

	cout << " - Top BC: ";
	if (params.wmode[0] == 0)
	- cout << "Deviatoric stress\n";
	+ cout << "Fixed\n";
	else if (params.wmode[0] == 1)
	+ cout << "Deviatoric stress\n";
	+ else if (params.wmode[0] == 2)
	cout << "Velocity\n";
	else {
	cerr << "Top boundary condition not recognized!\n";