Introduction
Introduction Statistics Contact Development Disclaimer Help
tCurrently experimenting with integration schemes - sphere - GPU-based 3D discr…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit 68987a0bcf02ed1b43919109acc000099694f787
parent b6d92242b72333dd94520921dc43198ef420e907
Author: Anders Damsgaard <[email protected]>
Date: Tue, 13 Nov 2012 16:04:12 +0100
Currently experimenting with integration schemes
Diffstat:
M python/sphere.py | 63 +++++++++++++++++++++++------…
M python/tests.py | 56 ++++++++++++++++-------------…
M src/contactmodels.cuh | 2 +-
M src/device.cu | 10 +++++++++-
M src/integration.cuh | 108 +++++++++++++++++------------…
M src/main.cpp | 10 ++++++++--
M src/raytracer.cuh | 6 ++++--
M src/sphere.cpp | 9 +++++++--
M src/sphere.h | 3 ++-
9 files changed, 166 insertions(+), 101 deletions(-)
---
diff --git a/python/sphere.py b/python/sphere.py
t@@ -2,7 +2,7 @@
import math
import numpy
import matplotlib as mpl
-mpl.use('Agg')
+#mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
import subprocess
t@@ -17,9 +17,10 @@ class Spherebin:
"""
# Constructor - Initialize arrays
- def __init__(self, np=1, nd=3, nw=1):
+ def __init__(self, np = 1, nd = 3, nw = 1, sid = 'unnamed'):
self.nd = numpy.ones(1, dtype=numpy.int32) * nd
self.np = numpy.ones(1, dtype=numpy.uint32) * np
+ self.sid = sid
# Time parameters
self.time_dt = numpy.zeros(1, dtype=numpy.float64)
t@@ -66,7 +67,7 @@ class Spherebin:
self.mu_ws = numpy.ones(1, dtype=numpy.float64)
self.mu_wd = numpy.ones(1, dtype=numpy.float64)
self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0
- self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLi…
+ self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLine…
self.kappa = numpy.zeros(1, dtype=numpy.float64)
self.db = numpy.zeros(1, dtype=numpy.float64)
self.V_b = numpy.zeros(1, dtype=numpy.float64)
t@@ -262,11 +263,12 @@ class Spherebin:
fh.close()
# Write binary data
- def writebin(self, targetbin, verbose = True):
+ def writebin(self, folder = "../input/", verbose = True):
""" Reads a target SPHERE binary file and returns data.
"""
fh = None
try:
+ targetbin = folder + "/" + self.sid + ".bin"
if (verbose == True):
print("Output file: {0}".format(targetbin))
t@@ -313,8 +315,6 @@ class Spherebin:
fh.write(self.ev.astype(numpy.float64))
fh.write(self.p.astype(numpy.float64))
-
-
fh.write(self.g.astype(numpy.float64))
fh.write(self.k_n.astype(numpy.float64))
fh.write(self.k_t.astype(numpy.float64))
t@@ -452,7 +452,8 @@ class Spherebin:
print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], self.nu…
# Put upper wall at top boundary
- self.w_x[0] = self.L[0]
+ if (self.nw > 0):
+ self.w_x[0] = self.L[0]
# Generate grid based on particle positions
t@@ -760,8 +761,8 @@ class Spherebin:
gamma_n = 0,
gamma_t = 0,
gamma_r = 0,
- gamma_wn = 1e3,
- gamma_wt = 1e3,
+ gamma_wn = 1e4,
+ gamma_wt = 1e4,
capillaryCohesion = 0):
""" Initialize particle parameters to default values.
Radii must be set prior to calling this function.
t@@ -950,12 +951,44 @@ class Spherebin:
return porosity_grid
+ def run(self, verbose=True, hideinputfile=False):
+ """ Execute sphere with target project
+ """
+ quiet = ""
+ stdout = ""
+ if (verbose == False):
+ quiet = "-q"
+ if (hideinputfile == True):
+ stdout = " > /dev/null"
+ subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + self.sid + ".bi…
+
+ def render(self,
+ method = "pres",
+ max_val = 1e3,
+ graphicsformat = "png",
+ verbose=True):
+ """ Render all output files that belong to the simulation, determined by s…
+ """
+
+ quiet = ""
+ if (verbose == False):
+ quiet = "-q"
+
+ # Render images using sphere raytracer
+ subprocess.call("cd ..; ./sphere_* " + quiet + " --method " + method + " {…
+
+ # Convert images to compressed format
+ convert()
+
+
def convert(graphicsformat = "png",
folder = "../img_out"):
""" Converts all PPM images in img_out to graphicsformat,
using ImageMagick """
+ #quiet = " > /dev/null"
+ quiet = ""
# Convert images
- subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F`; conv…
+ subprocess.call("for F in " + folder + "/*.ppm ; do BASE=`basename $F .ppm`;…
# Remove PPM files
subprocess.call("rm " + folder + "/*.ppm", shell=True)
t@@ -1031,7 +1064,7 @@ def visualize(project, method = 'energy', savefig = True…
# Read energy values from project binaries
sb = Spherebin()
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
Epot[i] = sb.energy("pot")
t@@ -1109,7 +1142,7 @@ def visualize(project, method = 'energy', savefig = True…
# Read energy values from project binaries
sb = Spherebin()
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
# Allocate arrays on first run
t@@ -1155,7 +1188,7 @@ def visualize(project, method = 'energy', savefig = True…
# Read stress values from project binaries
for i in range(lastfile+1):
- fn = "../output/{0}.output{1}.bin".format(project, i)
+ fn = "../output/{0}.output{1:0=5}.bin".format(project, i)
sb.readbin(fn, verbose = False)
# First iteration: Allocate arrays and find constant values
t@@ -1227,7 +1260,7 @@ def visualize(project, method = 'energy', savefig = True…
else:
plt.show()
-def run(project, verbose=True, hideinputfile=False):
+def run(binary, verbose=True, hideinputfile=False):
""" Execute sphere with target project
"""
quiet = ""
t@@ -1236,7 +1269,7 @@ def run(project, verbose=True, hideinputfile=False):
quiet = "-q"
if (hideinputfile == True):
stdout = " > /dev/null"
- subprocess.call("cd ..; ./sphere_* " + quiet + " input/" + project + ".bin" …
+ subprocess.call("cd ..; ./sphere_* " + quiet + " " + binary + " " + stdout, …
def status(project):
""" Check the status.dat file for the target project,
diff --git a/python/tests.py b/python/tests.py
t@@ -13,33 +13,35 @@ def compare(first, second, string):
print("### Input/output tests ###")
# Generate data in python
-orig = Spherebin(np = 100, nw = 0)
-orig.generateRadii(histogram = False)
-orig.defaultParams()
-orig.initRandomGridPos(g = numpy.zeros(orig.nd))
-orig.initTemporal(current = 0.0, total = 0.0)
-orig.time_total = 2.0*orig.time_dt;
-orig.time_file_dt = orig.time_dt;
-orig.writebin("orig.bin", verbose=False)
-
-# Test Python IO routines
-py = Spherebin()
-py.readbin("orig.bin", verbose=False)
-compare(orig, py, "Python IO:")
-
-# Test C++ IO routines
-run("python/orig.bin", verbose=False, hideinputfile=True)
-cpp = Spherebin()
-cpp.readbin("../output/orig.output0.bin", verbose=False)
-compare(orig, cpp, "C++ IO: ")
-
-# Test CUDA IO routines
-cuda = Spherebin()
-cuda.readbin("../output/orig.output1.bin", verbose=False)
-cuda.time_current = orig.time_current
-cuda.time_step_count = orig.time_step_count
-compare(orig, cuda, "CUDA IO: ")
+for N in [1, 2, 10, 1e2, 1e3, 1e4]:
+ print("{} particle(s)".format(int(N)))
+ orig = Spherebin(np = int(N), nw = 0, sid = "test")
+ orig.generateRadii(histogram = False)
+ orig.defaultParams()
+ orig.initRandomGridPos(g = numpy.zeros(orig.nd), gridnum=numpy.array([N*N+3,…
+ orig.initTemporal(current = 0.0, total = 0.0)
+ orig.time_total = 2.0*orig.time_dt;
+ orig.time_file_dt = orig.time_dt;
+ orig.writebin(verbose=False)
+
+ # Test Python IO routines
+ py = Spherebin()
+ py.readbin("../input/test.bin", verbose=False)
+ compare(orig, py, "Python IO:")
+
+ # Test C++ IO routines
+ orig.run(verbose=False, hideinputfile=True)
+ cpp = Spherebin()
+ cpp.readbin("../output/test.output0.bin", verbose=False)
+ compare(orig, cpp, "C++ IO: ")
+
+ # Test CUDA IO routines
+ cuda = Spherebin()
+ cuda.readbin("../output/test.output1.bin", verbose=False)
+ cuda.time_current = orig.time_current
+ cuda.time_step_count = orig.time_step_count
+ compare(orig, cuda, "CUDA IO: ")
# Remove temporary files
-subprocess.call("rm orig.bin; rm ../output/orig*.bin", shell=True)
+subprocess.call("rm ../{input,output}/test*bin", shell=True)
diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh
t@@ -183,7 +183,7 @@ __device__ void contactLinearViscous(Float3* F, Float3* T,
} // f_n is OK! */
// Normal force component: Elastic
- //f_n = -k_n_ab * delta_ab * n_ab;
+ //f_n = -devC_params.k_n * delta_ab * n_ab;
// Normal force component: Elastic - viscous damping
f_n = (-devC_params.k_n * delta_ab - devC_params.gamma_n * vel_n_ab) * n_ab;
diff --git a/src/device.cu b/src/device.cu
t@@ -227,6 +227,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
k.acc = new Float4[np];
k.angacc = new Float4[np];
+#pragma omp parallel for if(np>100)
+ for (unsigned int i = 0; i<np; ++i) {
+ k.acc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
+ k.angacc[i] = MAKE_FLOAT4(0.0, 0.0, 0.0, 0.0);
+ }
// Kinematics arrays
cudaMalloc((void**)&dev_x, memSizeF4);
t@@ -262,9 +267,10 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num…
cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1…
- // Host contact bookkeeping arrays
+ // Host contact bookkeeping arrays
k.contacts = new unsigned int[np*NC];
// Initialize contacts lists to np
+#pragma omp parallel for if(np>100)
for (unsigned int i=0; i<(np*NC); ++i)
k.contacts[i] = np;
k.distmod = new Float4[np*NC];
t@@ -687,6 +693,8 @@ __host__ void DEM::startTime()
dev_force,
dev_torque,
dev_angpos,
+ dev_acc,
+ dev_angacc,
dev_xysum,
dev_gridParticleIndex);
cudaThreadSynchronize();
diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -4,13 +4,13 @@
// integration.cuh
// Functions responsible for temporal integration
-
// Second order integration scheme based on Taylor expansion of particle kinem…
// Kernel executed on device, and callable from host only.
__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Inp…
Float4* dev_angvel_sorted,
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, …
Float4* dev_force, Float4* dev_torque, Float4* dev_a…
+ Float4* dev_acc, Float4* dev_angacc,
Float2* dev_xysum,
unsigned int* dev_gridParticleIndex) // Input: Sorte…
{
t@@ -24,19 +24,15 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
Float4 force = dev_force[orig_idx];
Float4 torque = dev_torque[orig_idx];
Float4 angpos = dev_angpos[orig_idx];
-
- Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
-
- // Initialize acceleration vectors to zero
- Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
- Float4 angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
-
- // Fetch particle position and velocity values from global read
+ Float4 acc = dev_acc[orig_idx];
+ Float4 angacc = dev_angacc[orig_idx];
Float4 x = dev_x_sorted[idx];
Float4 vel = dev_vel_sorted[idx];
Float4 angvel = dev_angvel_sorted[idx];
Float radius = x.w;
+ Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
+
// Coherent read from constant memory to registers
Float dt = devC_dt;
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_gr…
t@@ -44,7 +40,48 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev…
Float rho = devC_params.rho;
// Particle mass
- Float m = 4.0f/3.0f * PI * radius*radius*radius * rho;
+ Float m = 4.0/3.0 * PI * radius*radius*radius * rho;
+
+#if 0
+ //// First-order Euler integration scheme ///
+ // Update angular position
+ angpos.x += angvel.x * dt;
+ angpos.y += angvel.y * dt;
+ angpos.z += angvel.z * dt;
+
+ // Update position
+ x.x += vel.x * dt;
+ x.y += vel.y * dt;
+ x.z += vel.z * dt;
+#else
+
+ /// Second-order scheme based on Taylor expansion ///
+ // Update angular position
+ angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
+ angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
+ angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
+
+ // Update position
+ x.x += vel.x * dt + acc.x * dt*dt * 0.5;
+ x.y += vel.y * dt + acc.y * dt*dt * 0.5;
+ x.z += vel.z * dt + acc.z * dt*dt * 0.5;
+#endif
+
+ // Update angular velocity
+ angvel.x += angacc.x * dt;
+ angvel.y += angacc.y * dt;
+ angvel.z += angacc.z * dt;
+
+ // Update linear velocity
+ vel.x += acc.x * dt;
+ vel.y += acc.y * dt;
+ vel.z += acc.z * dt;
+
+ // Add x-displacement for this time step to
+ // sum of x-displacements
+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
+ xysum.x += vel.x * dt;
+ xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
// Update linear acceleration of particle
acc.x = force.x / m;
t@@ -53,9 +90,9 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_…
// Update angular acceleration of particle
// (angacc = (total moment)/Intertia, intertia = 2/5*m*r^2)
- angacc.x = torque.x * 1.0f / (2.0f/5.0f * m * radius*radius);
- angacc.y = torque.y * 1.0f / (2.0f/5.0f * m * radius*radius);
- angacc.z = torque.z * 1.0f / (2.0f/5.0f * m * radius*radius);
+ angacc.x = torque.x * 1.0 / (2.0/5.0 * m * radius*radius);
+ angacc.y = torque.y * 1.0 / (2.0/5.0 * m * radius*radius);
+ angacc.z = torque.z * 1.0 / (2.0/5.0 * m * radius*radius);
// Add gravity
acc.x += devC_params.g[0];
t@@ -69,47 +106,14 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* d…
// gravity to counteract segregation.
// Particles may move in the z-dimension,
// to allow for dilation.
- acc.x = 0.0f;
- acc.y = 0.0f;
+ acc.x = 0.0;
+ acc.y = 0.0;
acc.z -= devC_params.g[2];
// Zero the angular acceleration
angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
}
- // Update angular velocity
- angvel.x += angacc.x * dt;
- angvel.y += angacc.y * dt;
- angvel.z += angacc.z * dt;
-
- // Update linear velocity
- vel.x += acc.x * dt;
- vel.y += acc.y * dt;
- vel.z += acc.z * dt;
-
- // Update position. First-order Euler's scheme:
- //x.x += vel.x * dt;
- //x.y += vel.y * dt;
- //x.z += vel.z * dt;
-
- // Update angular position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f;
- angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f;
- angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f;
-
- // Update position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- x.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
- x.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
- x.z += vel.z * dt + (acc.z * dt*dt)/2.0f;
-
- // Add x-displacement for this time step to
- // sum of x-displacements
- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.x += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.y += vel.y * dt + (acc.y * dt*dt)/2.0f;
-
// Move particle across boundary if it is periodic
if (devC_grid.periodic == 1) {
t@@ -133,6 +137,8 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
// Store data in global memory at original, pre-sort positions
dev_xysum[orig_idx] += xysum;
+ dev_acc[orig_idx] = acc;
+ dev_angacc[orig_idx] = angacc;
dev_angvel[orig_idx] = angvel;
dev_vel[orig_idx] = vel;
dev_angpos[orig_idx] = angpos;
t@@ -220,8 +226,10 @@ __global__ void integrateWalls(Float4* dev_walls_nx,
w_mvfd.y += acc * dt;
// Update position. Second-order scheme based on Taylor expansion
- // (greater accuracy than the first-order Euler's scheme)
- w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
+ //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
+
+ // Update position. First-order Euler integration scheme
+ w_nx.w += w_mvfd.y * dt;
// Store data in global memory
dev_walls_nx[idx] = w_nx;
diff --git a/src/main.cpp b/src/main.cpp
t@@ -30,6 +30,7 @@ int main(const int argc, const char *argv[])
// Default values
int verbose = 1;
int checkVals = 1;
+ int dry = 0;
int render = 0; // whether to render an image
int method = 0; // visualization method
int nfiles = 0; // number of input files
t@@ -48,6 +49,7 @@ int main(const int argc, const char *argv[])
<< "-h, --help\t\tprint help\n"
<< "-V, --version\t\tprint version information and exit\n"
<< "-q, --quiet\t\tsuppress status messages to stdout\n"
+ << "-n, --dry\t\tshow key experiment parameters and quit\n"
<< "-r, --render\t\trender input files instead of simulating temporal …
<< "-dcv, --dpnt-check-values\t\tdon't check values before running\n"
<< "Raytracer (-r) specific options:\n"
t@@ -75,6 +77,9 @@ int main(const int argc, const char *argv[])
else if (argvi == "-q" || argvi == "--quiet")
verbose = 0;
+ else if (argvi == "-n" || argvi == "--dry")
+ dry = 1;
+
else if (argvi == "-r" || argvi == "--render")
render = 1;
t@@ -110,10 +115,11 @@ int main(const int argc, const char *argv[])
else {
nfiles++;
- std::cout << argv[0] << ": processing input file: " << argvi << std::end…
+ if (verbose == 1)
+ std::cout << argv[0] << ": processing input file: " << argvi << std::e…
// Create DEM class, read data from input binary, check values
- DEM dem(argvi, verbose, checkVals);
+ DEM dem(argvi, verbose, checkVals, dry);
// Render image if requested
if (render == 1)
diff --git a/src/raytracer.cuh b/src/raytracer.cuh
t@@ -485,8 +485,10 @@ __host__ void DEM::render(
// Report color visualization method and color map range
- cout << " " << desc << " color map range: [0, "
- << maxval << "] " << unit << endl;
+ if (verbose == 1) {
+ cout << " " << desc << " color map range: [0, "
+ << maxval << "] " << unit << endl;
+ }
// Copy linarr to dev_linarr if required
if (transfer == 1) {
diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -13,7 +13,8 @@
// and reports the values
DEM::DEM(const std::string inputbin,
const int verbosity,
- const int checkVals)
+ const int checkVals,
+ const int dry)
: verbose(verbosity)
{
using std::cout;
t@@ -36,8 +37,12 @@ DEM::DEM(const std::string inputbin,
checkValues();
// Report data values
- if (verbose == 1)
+ if (dry == 1)
reportValues();
+
+ // If this is a dry run, exit
+ if (dry == 1)
+ exit(1);
// Initialize CUDA
initializeGPU();
diff --git a/src/sphere.h b/src/sphere.h
t@@ -126,7 +126,8 @@ class DEM {
// Constructor, some parameters with default values
DEM(std::string inputbin,
const int verbosity = 1,
- const int checkVals = 1);
+ const int checkVals = 1,
+ const int dry = 0);
// Destructor
~DEM(void);
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.