GopherProxy

	tCurrently experimenting with integration schemes - sphere - GPU-based 3D discr…
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit 68987a0bcf02ed1b43919109acc000099694f787
	parent b6d92242b72333dd94520921dc43198ef420e907
	Author: Anders Damsgaard <[email protected]>
	Date: Tue, 13 Nov 2012 16:04:12 +0100

	Currently experimenting with integration schemes

	Diffstat:
	M python/sphere.py \| 63 +++++++++++++++++++++++------…
	M python/tests.py \| 56 ++++++++++++++++-------------…
	M src/contactmodels.cuh \| 2 +-
	M src/device.cu \| 10 +++++++++-
	M src/integration.cuh \| 108 +++++++++++++++++------------…
	M src/main.cpp \| 10 ++++++++--
	M src/raytracer.cuh \| 6 ++++--
	M src/sphere.cpp \| 9 +++++++--
	M src/sphere.h \| 3 ++-

	9 files changed, 166 insertions(+), 101 deletions(-)
	---
	diff --git a/python/sphere.py b/python/sphere.py
	t@@ -2,7 +2,7 @@
	import math
	import numpy
	import matplotlib as mpl
	-mpl.use('Agg')
	+#mpl.use('Agg')
	import matplotlib.pyplot as plt
	from matplotlib.font_manager import FontProperties
	import subprocess
	t@@ -17,9 +17,10 @@ class Spherebin:
	"""

	# Constructor - Initialize arrays
	- def __init__(self, np=1, nd=3, nw=1):
	+ def __init__(self, np = 1, nd = 3, nw = 1, sid = 'unnamed'):
	self.nd = numpy.ones(1, dtype=numpy.int32) * nd
	self.np = numpy.ones(1, dtype=numpy.uint32) * np
	+ self.sid = sid

	# Time parameters
	self.time_dt = numpy.zeros(1, dtype=numpy.float64)
	t@@ -66,7 +67,7 @@ class Spherebin:
	self.mu_ws = numpy.ones(1, dtype=numpy.float64)
	self.mu_wd = numpy.ones(1, dtype=numpy.float64)
	self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0
	- self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLi…
	+ self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLine…
	self.kappa = numpy.zeros(1, dtype=numpy.float64)
	self.db = numpy.zeros(1, dtype=numpy.float64)
	self.V_b = numpy.zeros(1, dtype=numpy.float64)
	t@@ -262,11 +263,12 @@ class Spherebin:
	fh.close()

	# Write binary data
	- def writebin(self, targetbin, verbose = True):
	+ def writebin(self, folder = "../input/", verbose = True):
	""" Reads a target SPHERE binary file and returns data.
	"""
	fh = None
	try:
	+ targetbin = folder + "/" + self.sid + ".bin"
	if (verbose == True):
	print("Output file: {0}".format(targetbin))

	t@@ -313,8 +315,6 @@ class Spherebin:
	fh.write(self.ev.astype(numpy.float64))
	fh.write(self.p.astype(numpy.float64))

	-
	-
	fh.write(self.g.astype(numpy.float64))
	fh.write(self.k_n.astype(numpy.float64))
	fh.write(self.k_t.astype(numpy.float64))
	t@@ -452,7 +452,8 @@ class Spherebin:
	print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], self.nu…

	# Put upper wall at top boundary
	- self.w_x[0] = self.L[0]
	+ if (self.nw > 0):
	+ self.w_x[0] = self.L[0]


	# Generate grid based on particle positions
	t@@ -760,8 +761,8 @@ class Spherebin:
	gamma_n = 0,
	gamma_t = 0,
	gamma_r = 0,
	- gamma_wn = 1e3,
	- gamma_wt = 1e3,
	+ gamma_wn = 1e4,
	+ gamma_wt = 1e4,
	capillaryCohesion = 0):
	""" Initialize particle parameters to default values.
	Radii must be set prior to calling this function.
	t@@ -950,12 +951,44 @@ class Spherebin:

	return porosity_grid

	+ def run(self, verbose=True, hideinputfile=False):
	+ """ Execute sphere with target project
	+ """
	+ quiet = ""
	+ stdout = ""
	+ if (verbose == False):
	+ quiet = "-q"
	+ if (hideinputfile == True):
	+ stdout = " > /dev/null"
	+ subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + self.sid + ".bi…
	+
	+ def render(self,
	+ method = "pres",
	+ max_val = 1e3,
	+ graphicsformat = "png",
	+ verbose=True):
	+ """ Render all output files that belong to the simulation, determined by s…
	+ """
	+
	+ quiet = ""
	+ if (verbose == False):
	+ quiet = "-q"
	+
	+ # Render images using sphere raytracer
	+ subprocess.call("cd ..; ./sphere_* " + quiet + " --method " + method + " {…
	+
	+ # Convert images to compressed format
	+ convert()
	+
	+
	def convert(graphicsformat = "png",
	folder = "../img_out"):
	""" Converts all PPM images in img_out to graphicsformat,
	using ImageMagick """
	+ #quiet = " > /dev/null"
	+ quiet = ""
	# Convert images
	- subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F`; conv…
	+ subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F .ppm`;…

	# Remove PPM files
	subprocess.call("rm " + folder + "/*.ppm", shell=True)
	t@@ -1031,7 +1064,7 @@ def visualize(project, method = 'energy', savefig = True…
	# Read energy values from project binaries
	sb = Spherebin()
	for i in range(lastfile+1):
	- fn = "../output/{0}.output{1}.bin".format(project, i)
	+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
	sb.readbin(fn, verbose = False)

	Epot[i] = sb.energy("pot")
	t@@ -1109,7 +1142,7 @@ def visualize(project, method = 'energy', savefig = True…
	# Read energy values from project binaries
	sb = Spherebin()
	for i in range(lastfile+1):
	- fn = "../output/{0}.output{1}.bin".format(project, i)
	+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
	sb.readbin(fn, verbose = False)

	# Allocate arrays on first run
	t@@ -1155,7 +1188,7 @@ def visualize(project, method = 'energy', savefig = True…
	# Read stress values from project binaries
	for i in range(lastfile+1):

	- fn = "../output/{0}.output{1}.bin".format(project, i)
	+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
	sb.readbin(fn, verbose = False)

	# First iteration: Allocate arrays and find constant values
	t@@ -1227,7 +1260,7 @@ def visualize(project, method = 'energy', savefig = True…
	else:
	plt.show()

	-def run(project, verbose=True, hideinputfile=False):
	+def run(binary, verbose=True, hideinputfile=False):
	""" Execute sphere with target project
	"""
	quiet = ""
	t@@ -1236,7 +1269,7 @@ def run(project, verbose=True, hideinputfile=False):
	quiet = "-q"
	if (hideinputfile == True):
	stdout = " > /dev/null"
	- subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + project + ".bin" …
	+ subprocess.call("cd ..; ./sphere_* " + quiet + " " + binary + " " + stdout, …

	def status(project):
	""" Check the status.dat file for the target project,
	diff --git a/python/tests.py b/python/tests.py
	t@@ -13,33 +13,35 @@ def compare(first, second, string):
	print("### Input/output tests ###")

	# Generate data in python
	-orig = Spherebin(np = 100, nw = 0)
	-orig.generateRadii(histogram = False)
	-orig.defaultParams()
	-orig.initRandomGridPos(g = numpy.zeros(orig.nd))
	-orig.initTemporal(current = 0.0, total = 0.0)
	-orig.time_total = 2.0*orig.time_dt;
	-orig.time_file_dt = orig.time_dt;
	-orig.writebin("orig.bin", verbose=False)
	-
	-# Test Python IO routines
	-py = Spherebin()
	-py.readbin("orig.bin", verbose=False)
	-compare(orig, py, "Python IO:")
	-
	-# Test C++ IO routines
	-run("python/orig.bin", verbose=False, hideinputfile=True)
	-cpp = Spherebin()
	-cpp.readbin("../output/orig.output0.bin", verbose=False)
	-compare(orig, cpp, "C++ IO: ")
	-
	-# Test CUDA IO routines
	-cuda = Spherebin()
	-cuda.readbin("../output/orig.output1.bin", verbose=False)
	-cuda.time_current = orig.time_current
	-cuda.time_step_count = orig.time_step_count
	-compare(orig, cuda, "CUDA IO: ")
	+for N in [1, 2, 10, 1e2, 1e3, 1e4]:
	+ print("{} particle(s)".format(int(N)))
	+ orig = Spherebin(np = int(N), nw = 0, sid = "test")
	+ orig.generateRadii(histogram = False)
	+ orig.defaultParams()
	+ orig.initRandomGridPos(g = numpy.zeros(orig.nd), gridnum=numpy.array([N*N+3,…
	+ orig.initTemporal(current = 0.0, total = 0.0)
	+ orig.time_total = 2.0*orig.time_dt;
	+ orig.time_file_dt = orig.time_dt;
	+ orig.writebin(verbose=False)
	+
	+ # Test Python IO routines
	+ py = Spherebin()
	+ py.readbin("../input/test.bin", verbose=False)
	+ compare(orig, py, "Python IO:")
	+
	+ # Test C++ IO routines
	+ orig.run(verbose=False, hideinputfile=True)
	+ cpp = Spherebin()
	+ cpp.readbin("../output/test.output0.bin", verbose=False)
	+ compare(orig, cpp, "C++ IO: ")
	+
	+ # Test CUDA IO routines
	+ cuda = Spherebin()
	+ cuda.readbin("../output/test.output1.bin", verbose=False)
	+ cuda.time_current = orig.time_current
	+ cuda.time_step_count = orig.time_step_count
	+ compare(orig, cuda, "CUDA IO: ")

	# Remove temporary files
	-subprocess.call("rm orig.bin; rm ../output/orig*.bin", shell=True)
	+subprocess.call("rm ../{input,output}/test*bin", shell=True)

	diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
	t@@ -183,7 +183,7 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
	} // f_n is OK! */

	// Normal force component: Elastic
	- //f_n = -k_n_ab * delta_ab * n_ab;
	+ //f_n = -devC_params.k_n * delta_ab * n_ab;

	// Normal force component: Elastic - viscous damping
	f_n = (-devC_params.k_n * delta_ab - devC_params.gamma_n * vel_n_ab) * n_ab;
	diff --git a/src/device.cu b/src/device.cu
	t@@ -227,6 +227,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)

	k.acc = new Float4[np];
	k.angacc = new Float4[np];
	+#pragma omp parallel for if(np>100)
	+ for (unsigned int i = 0; i<np; ++i) {
	+ k.acc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
	+ k.angacc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
	+ }

	// Kinematics arrays
	cudaMalloc((void**)&dev_x, memSizeF4);
	t@@ -262,9 +267,10 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
	cudaMalloc((void*)&dev_cellStart, sizeof(unsigned int)grid.num[0]*grid.num…
	cudaMalloc((void*)&dev_cellEnd, sizeof(unsigned int)grid.num[0]*grid.num[1…

	- // Host contact bookkeeping arrays
	+ // Host contact bookkeeping arrays
	k.contacts = new unsigned int[np*NC];
	// Initialize contacts lists to np
	+#pragma omp parallel for if(np>100)
	for (unsigned int i=0; i<(np*NC); ++i)
	k.contacts[i] = np;
	k.distmod = new Float4[np*NC];
	t@@ -687,6 +693,8 @@ __host__ void DEM::startTime()
	dev_force,
	dev_torque,
	dev_angpos,
	+ dev_acc,
	+ dev_angacc,
	dev_xysum,
	dev_gridParticleIndex);
	cudaThreadSynchronize();
	diff --git a/src/integration.cuh b/src/integration.cuh
	t@@ -4,13 +4,13 @@
	// integration.cuh
	// Functions responsible for temporal integration

	-
	// Second order integration scheme based on Taylor expansion of particle kinem…
	// Kernel executed on device, and callable from host only.
	__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Inp…
	Float4* dev_angvel_sorted,
	Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, …
	Float4* dev_force, Float4* dev_torque, Float4* dev_a…
	+ Float4* dev_acc, Float4* dev_angacc,
	Float2* dev_xysum,
	unsigned int* dev_gridParticleIndex) // Input: Sorte…
	{
	t@@ -24,19 +24,15 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
	Float4 force = dev_force[orig_idx];
	Float4 torque = dev_torque[orig_idx];
	Float4 angpos = dev_angpos[orig_idx];
	-
	- Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
	-
	- // Initialize acceleration vectors to zero
	- Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	- Float4 angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	-
	- // Fetch particle position and velocity values from global read
	+ Float4 acc = dev_acc[orig_idx];
	+ Float4 angacc = dev_angacc[orig_idx];
	Float4 x = dev_x_sorted[idx];
	Float4 vel = dev_vel_sorted[idx];
	Float4 angvel = dev_angvel_sorted[idx];
	Float radius = x.w;

	+ Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
	+
	// Coherent read from constant memory to registers
	Float dt = devC_dt;
	Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_gr…
	t@@ -44,7 +40,48 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev…
	Float rho = devC_params.rho;

	// Particle mass
	- Float m = 4.0f/3.0f * PI * radiusradiusradius * rho;
	+ Float m = 4.0/3.0 * PI * radiusradiusradius * rho;
	+
	+#if 0
	+ //// First-order Euler integration scheme ///
	+ // Update angular position
	+ angpos.x += angvel.x * dt;
	+ angpos.y += angvel.y * dt;
	+ angpos.z += angvel.z * dt;
	+
	+ // Update position
	+ x.x += vel.x * dt;
	+ x.y += vel.y * dt;
	+ x.z += vel.z * dt;
	+#else
	+
	+ /// Second-order scheme based on Taylor expansion ///
	+ // Update angular position
	+ angpos.x += angvel.x * dt + angacc.x * dtdt 0.5;
	+ angpos.y += angvel.y * dt + angacc.y * dtdt 0.5;
	+ angpos.z += angvel.z * dt + angacc.z * dtdt 0.5;
	+
	+ // Update position
	+ x.x += vel.x * dt + acc.x * dtdt 0.5;
	+ x.y += vel.y * dt + acc.y * dtdt 0.5;
	+ x.z += vel.z * dt + acc.z * dtdt 0.5;
	+#endif
	+
	+ // Update angular velocity
	+ angvel.x += angacc.x * dt;
	+ angvel.y += angacc.y * dt;
	+ angvel.z += angacc.z * dt;
	+
	+ // Update linear velocity
	+ vel.x += acc.x * dt;
	+ vel.y += acc.y * dt;
	+ vel.z += acc.z * dt;
	+
	+ // Add x-displacement for this time step to
	+ // sum of x-displacements
	+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
	+ xysum.x += vel.x * dt;
	+ xysum.y += vel.y * dt;// + (acc.y * dtdt 0.5f;

	// Update linear acceleration of particle
	acc.x = force.x / m;
	t@@ -53,9 +90,9 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_…

	// Update angular acceleration of particle
	// (angacc = (total moment)/Intertia, intertia = 2/5mr^2)
	- angacc.x = torque.x * 1.0f / (2.0f/5.0f * m * radius*radius);
	- angacc.y = torque.y * 1.0f / (2.0f/5.0f * m * radius*radius);
	- angacc.z = torque.z * 1.0f / (2.0f/5.0f * m * radius*radius);
	+ angacc.x = torque.x * 1.0 / (2.0/5.0 * m * radius*radius);
	+ angacc.y = torque.y * 1.0 / (2.0/5.0 * m * radius*radius);
	+ angacc.z = torque.z * 1.0 / (2.0/5.0 * m * radius*radius);

	// Add gravity
	acc.x += devC_params.g[0];
	t@@ -69,47 +106,14 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* d…
	// gravity to counteract segregation.
	// Particles may move in the z-dimension,
	// to allow for dilation.
	- acc.x = 0.0f;
	- acc.y = 0.0f;
	+ acc.x = 0.0;
	+ acc.y = 0.0;
	acc.z -= devC_params.g[2];

	// Zero the angular acceleration
	angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
	}

	- // Update angular velocity
	- angvel.x += angacc.x * dt;
	- angvel.y += angacc.y * dt;
	- angvel.z += angacc.z * dt;
	-
	- // Update linear velocity
	- vel.x += acc.x * dt;
	- vel.y += acc.y * dt;
	- vel.z += acc.z * dt;
	-
	- // Update position. First-order Euler's scheme:
	- //x.x += vel.x * dt;
	- //x.y += vel.y * dt;
	- //x.z += vel.z * dt;
	-
	- // Update angular position. Second-order scheme based on Taylor expansion
	- // (greater accuracy than the first-order Euler's scheme)
	- angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f;
	- angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f;
	- angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f;
	-
	- // Update position. Second-order scheme based on Taylor expansion
	- // (greater accuracy than the first-order Euler's scheme)
	- x.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
	- x.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
	- x.z += vel.z * dt + (acc.z * dt*dt)/2.0f;
	-
	- // Add x-displacement for this time step to
	- // sum of x-displacements
	- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
	- xysum.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
	- xysum.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
	-

	// Move particle across boundary if it is periodic
	if (devC_grid.periodic == 1) {
	t@@ -133,6 +137,8 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…

	// Store data in global memory at original, pre-sort positions
	dev_xysum[orig_idx] += xysum;
	+ dev_acc[orig_idx] = acc;
	+ dev_angacc[orig_idx] = angacc;
	dev_angvel[orig_idx] = angvel;
	dev_vel[orig_idx] = vel;
	dev_angpos[orig_idx] = angpos;
	t@@ -220,8 +226,10 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
	w_mvfd.y += acc * dt;

	// Update position. Second-order scheme based on Taylor expansion
	- // (greater accuracy than the first-order Euler's scheme)
	- w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
	+ //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
	+
	+ // Update position. First-order Euler integration scheme
	+ w_nx.w += w_mvfd.y * dt;

	// Store data in global memory
	dev_walls_nx[idx] = w_nx;
	diff --git a/src/main.cpp b/src/main.cpp
	t@@ -30,6 +30,7 @@ int main(const int argc, const char *argv[])
	// Default values
	int verbose = 1;
	int checkVals = 1;
	+ int dry = 0;
	int render = 0; // whether to render an image
	int method = 0; // visualization method
	int nfiles = 0; // number of input files
	t@@ -48,6 +49,7 @@ int main(const int argc, const char *argv[])
	<< "-h, --help\t\tprint help\n"
	<< "-V, --version\t\tprint version information and exit\n"
	<< "-q, --quiet\t\tsuppress status messages to stdout\n"
	+ << "-n, --dry\t\tshow key experiment parameters and quit\n"
	<< "-r, --render\t\trender input files instead of simulating temporal …
	<< "-dcv, --dpnt-check-values\t\tdon't check values before running\n"
	<< "Raytracer (-r) specific options:\n"
	t@@ -75,6 +77,9 @@ int main(const int argc, const char *argv[])
	else if (argvi == "-q" \|\| argvi == "--quiet")
	verbose = 0;

	+ else if (argvi == "-n" \|\| argvi == "--dry")
	+ dry = 1;
	+
	else if (argvi == "-r" \|\| argvi == "--render")
	render = 1;

	t@@ -110,10 +115,11 @@ int main(const int argc, const char *argv[])
	else {
	nfiles++;

	- std::cout << argv[0] << ": processing input file: " << argvi << std::end…
	+ if (verbose == 1)
	+ std::cout << argv[0] << ": processing input file: " << argvi << std::e…

	// Create DEM class, read data from input binary, check values
	- DEM dem(argvi, verbose, checkVals);
	+ DEM dem(argvi, verbose, checkVals, dry);

	// Render image if requested
	if (render == 1)
	diff --git a/src/raytracer.cuh b/src/raytracer.cuh
	t@@ -485,8 +485,10 @@ __host__ void DEM::render(


	// Report color visualization method and color map range
	- cout << " " << desc << " color map range: [0, "
	- << maxval << "] " << unit << endl;
	+ if (verbose == 1) {
	+ cout << " " << desc << " color map range: [0, "
	+ << maxval << "] " << unit << endl;
	+ }

	// Copy linarr to dev_linarr if required
	if (transfer == 1) {
	diff --git a/src/sphere.cpp b/src/sphere.cpp
	t@@ -13,7 +13,8 @@
	// and reports the values
	DEM::DEM(const std::string inputbin,
	const int verbosity,
	- const int checkVals)
	+ const int checkVals,
	+ const int dry)
	: verbose(verbosity)
	{
	using std::cout;
	t@@ -36,8 +37,12 @@ DEM::DEM(const std::string inputbin,
	checkValues();

	// Report data values
	- if (verbose == 1)
	+ if (dry == 1)
	reportValues();
	+
	+ // If this is a dry run, exit
	+ if (dry == 1)
	+ exit(1);

	// Initialize CUDA
	initializeGPU();
	diff --git a/src/sphere.h b/src/sphere.h
	t@@ -126,7 +126,8 @@ class DEM {
	// Constructor, some parameters with default values
	DEM(std::string inputbin,
	const int verbosity = 1,
	- const int checkVals = 1);
	+ const int checkVals = 1,
	+ const int dry = 0);

	// Destructor
	~DEM(void);