Introduction
Introduction Statistics Contact Development Disclaimer Help
tIO now works in Python, C++ and CUDA, see python/tests.py. Now working on impl…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit 472d81e85b29f2ad92df2031ff56a9ac2cdf8699
parent c494a43f245cf27c2c498ce27636347fbbb1a909
Author: Anders Damsgaard <[email protected]>
Date: Fri, 2 Nov 2012 09:29:51 +0100
IO now works in Python, C++ and CUDA, see python/tests.py. Now working on imple…
Diffstat:
M python/sphere.py | 77 +++++++++++++++--------------…
M python/tests.py | 20 +++++++++++---------
M src/Makefile | 4 ++--
M src/contactsearch.cuh | 29 +++++++++++++++++++----------
M src/datatypes.h | 1 -
M src/device.cu | 346 ++++++++++++++++-------------…
M src/file_io.cpp | 1 -
M src/integration.cuh | 18 ++++++++++--------
M src/sphere.cpp | 22 ++++++++++++----------
M src/sphere.h | 51 +++++++++++++++++++++++++----…
M src/utility.cu | 4 ++--
11 files changed, 315 insertions(+), 258 deletions(-)
---
diff --git a/python/sphere.py b/python/sphere.py
t@@ -51,17 +51,21 @@ class Spherebin:
self.ev = numpy.zeros(self.np, dtype=numpy.float64)
self.p = numpy.zeros(self.np, dtype=numpy.float64)
- self.g = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64)
- self.k_n = numpy.ones(1, dtype=numpy.float64) * 1.16e9
- self.k_t = numpy.ones(1, dtype=numpy.float64) * 1.16e9
- self.k_r = numpy.zeros(1, dtype=numpy.float64)
- self.gamma_n = numpy.zeros(1, dtype=numpy.float64)
- self.gamma_t = numpy.zeros(1, dtype=numpy.float64)
- self.gamma_r = numpy.zeros(1, dtype=numpy.float64)
- self.mu_s = numpy.ones(1, dtype=numpy.float64)
- self.mu_d = numpy.ones(1, dtype=numpy.float64)
- self.mu_r = numpy.zeros(1, dtype=numpy.float64)
- self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0
+ self.g = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64)
+ self.k_n = numpy.ones(1, dtype=numpy.float64) * 1.16e9
+ self.k_t = numpy.ones(1, dtype=numpy.float64) * 1.16e9
+ self.k_r = numpy.zeros(1, dtype=numpy.float64)
+ self.gamma_n = numpy.zeros(1, dtype=numpy.float64)
+ self.gamma_t = numpy.zeros(1, dtype=numpy.float64)
+ self.gamma_r = numpy.zeros(1, dtype=numpy.float64)
+ self.mu_s = numpy.ones(1, dtype=numpy.float64)
+ self.mu_d = numpy.ones(1, dtype=numpy.float64)
+ self.mu_r = numpy.zeros(1, dtype=numpy.float64)
+ self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3
+ self.gamma_wt = numpy.ones(1, dtype=numpy.float64) * 1.0e3
+ self.mu_ws = numpy.ones(1, dtype=numpy.float64)
+ self.mu_wd = numpy.ones(1, dtype=numpy.float64)
+ self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0
self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLi…
self.kappa = numpy.zeros(1, dtype=numpy.float64)
self.db = numpy.zeros(1, dtype=numpy.float64)
t@@ -70,19 +74,16 @@ class Spherebin:
# Wall data
self.nw = numpy.ones(1, dtype=numpy.uint32) * nw
self.wmode = numpy.zeros(self.nw, dtype=numpy.int32)
+
self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float64).reshape(s…
- self.w_n[nw-1,nd-1] = -1.0
+ if (self.nw > 0):
+ self.w_n[0,2] = -1.0
self.w_x = numpy.ones(self.nw, dtype=numpy.float64)
self.w_m = numpy.zeros(self.nw, dtype=numpy.float64)
self.w_vel = numpy.zeros(self.nw, dtype=numpy.float64)
self.w_force = numpy.zeros(self.nw, dtype=numpy.float64)
self.w_devs = numpy.zeros(self.nw, dtype=numpy.float64)
- # x- and y-boundary behavior
- self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3
- self.gamma_wt = numpy.ones(1, dtype=numpy.float64) * 1.0e3
- self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1.0e3
-
# Compare the values of two Spherebin objects, and check
# whether the values are identical
def __cmp__(self, other):
t@@ -136,8 +137,7 @@ class Spherebin:
(self.w_force == other.w_force).all() and\
(self.w_devs == other.w_devs).all() and\
self.gamma_wn == other.gamma_wn and\
- self.gamma_wt == other.gamma_wt and\
- self.gamma_wr == other.gamma_wr\
+ self.gamma_wt == other.gamma_wt\
).all() == True):
return 0 # All equal
else:
t@@ -224,6 +224,10 @@ class Spherebin:
self.mu_s = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.mu_d = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.mu_r = numpy.fromfile(fh, dtype=numpy.float64, count=1)
+ self.gamma_wn = numpy.fromfile(fh, dtype=numpy.float64, count=1)
+ self.gamma_wt = numpy.fromfile(fh, dtype=numpy.float64, count=1)
+ self.mu_ws = numpy.fromfile(fh, dtype=numpy.float64, count=1)
+ self.mu_wd = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.rho = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.contactmodel = numpy.fromfile(fh, dtype=numpy.uint32, count=1)
self.kappa = numpy.fromfile(fh, dtype=numpy.float64, count=1)
t@@ -250,10 +254,6 @@ class Spherebin:
self.w_force = numpy.fromfile(fh, dtype=numpy.float64, count=1)
self.w_devs = numpy.fromfile(fh, dtype=numpy.float64, count=1)
- # x- and y-boundary behavior
- self.gamma_wn = numpy.fromfile(fh, dtype=numpy.float64, count=1)
- self.gamma_wt = numpy.fromfile(fh, dtype=numpy.float64, count=1)
- self.gamma_wr = numpy.fromfile(fh, dtype=numpy.float64, count=1)
fh.close()
t@@ -325,6 +325,10 @@ class Spherebin:
fh.write(self.mu_s.astype(numpy.float64))
fh.write(self.mu_d.astype(numpy.float64))
fh.write(self.mu_r.astype(numpy.float64))
+ fh.write(self.gamma_wn.astype(numpy.float64))
+ fh.write(self.gamma_wt.astype(numpy.float64))
+ fh.write(self.mu_ws.astype(numpy.float64))
+ fh.write(self.mu_wd.astype(numpy.float64))
fh.write(self.rho.astype(numpy.float64))
fh.write(self.contactmodel.astype(numpy.uint32))
fh.write(self.kappa.astype(numpy.float64))
t@@ -343,11 +347,6 @@ class Spherebin:
fh.write(self.w_force[i].astype(numpy.float64))
fh.write(self.w_devs[i].astype(numpy.float64))
- # x- and y-boundary behavior
- fh.write(self.gamma_wn.astype(numpy.float64))
- fh.write(self.gamma_wt.astype(numpy.float64))
- fh.write(self.gamma_wr.astype(numpy.float64))
-
fh.close()
finally:
t@@ -633,14 +632,15 @@ class Spherebin:
self.L = self.num * cellsize
# Initialize upper wall
- self.wmode[0] = 0
- self.w_n[0,2] = -1.0
- self.w_x[0] = self.L[2]
- self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
- self.w_vel[0] = 0.0
- self.w_force[0] = 0.0
- self.w_devs[0] = 0.0
- self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
+ if (self.nw > 0):
+ self.wmode[0] = 0
+ self.w_n[0,2] = -1.0
+ self.w_x[0] = self.L[2]
+ self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3
+ self.w_vel[0] = 0.0
+ self.w_force[0] = 0.0
+ self.w_devs[0] = 0.0
+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1
# Adjust grid and upper wall for consolidation under deviatoric stress
def consolidate(self, deviatoric_stress = 10e3,
t@@ -755,8 +755,6 @@ class Spherebin:
# Set wall viscosities to zero
self.gamma_wn[0] = 0.0
self.gamma_wt[0] = 0.0
- self.gamma_wr[0] = 0.0
-
def initTemporal(self, total,
t@@ -792,7 +790,6 @@ class Spherebin:
gamma_r = 0,
gamma_wn = 1e3,
gamma_wt = 1e3,
- gamma_wr = 2e3,
capillaryCohesion = 0):
""" Initialize particle parameters to default values.
Radii must be set prior to calling this function.
t@@ -837,8 +834,6 @@ class Spherebin:
# Wall viscosities
self.gamma_wn[0] = gamma_wn # normal
self.gamma_wt[0] = gamma_wt # sliding
- self.gamma_wr[0] = gamma_wr # rolling
-
### Parameters related to capillary bonds
diff --git a/python/tests.py b/python/tests.py
t@@ -12,17 +12,13 @@ def compare(first, second, string):
print("### Input/output tests ###")
# Generate data in python
-orig = Spherebin(100)
+orig = Spherebin(np = 100, nw = 0)
orig.generateRadii()
orig.defaultParams()
-orig.initRandomGridPos()
-orig.initTemporal(total = 0.0, current = 0.0)
-orig.xysum = numpy.ones(orig.np*2, dtype=numpy.float64).reshape(orig.np, 2) * 1
-orig.vel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, o…
-orig.force = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np,…
-orig.angpos = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np…
-orig.angvel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np…
-orig.torque = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np…
+orig.initRandomGridPos(g = numpy.zeros(orig.nd))
+orig.initTemporal(current = 0.0, total = 0.0)
+orig.time_total = 2.0*orig.time_dt;
+orig.time_file_dt = orig.time_dt;
orig.writebin("orig.bin", verbose=False)
# Test Python IO routines
t@@ -36,4 +32,10 @@ cpp = Spherebin()
cpp.readbin("../output/orig.output0.bin", verbose=False)
compare(orig, cpp, "C++ IO: ")
+# Test CUDA IO routines
+cuda = Spherebin()
+cuda.readbin("../output/orig.output1.bin", verbose=False)
+cuda.time_current = orig.time_current
+cuda.time_step_count = orig.time_step_count
+compare(orig, cuda, "CUDA IO: ")
diff --git a/src/Makefile b/src/Makefile
t@@ -27,8 +27,8 @@ NVCCFLAGS=--use_fast_math -O3 -m64 -gencode=arch=compute_20,…
# Debugable code? Beware that enabling this option will
# considerably slow down the execution.
-CCFLAGS=-g -O0 -Wall
-NVCCFLAGS=-g -G -O0 -m64 -gencode=arch=compute_20,code=\"sm_20,compute_20\" -X…
+#CCFLAGS=-g -O0 -Wall
+#NVCCFLAGS=-g -G -O0 -m64 -gencode=arch=compute_20,code=\"sm_20,compute_20\" -…
DATE=`date +'%Y.%m.%d-%H:%M:%S'`
BACKUPNAME=sphere.$(DATE).tar.gz
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh
t@@ -91,8 +91,8 @@ __device__ void findAndProcessContactsInCell(int3 targetCell,
Float4* dev_angvel_sorted,
unsigned int* dev_cellStart,
unsigned int* dev_cellEnd,
- Float4* dev_w_nx,
- Float4* dev_w_mvfd)
+ Float4* dev_walls_nx,
+ Float4* dev_walls_mvfd)
//uint4 bonds)
{
t@@ -382,9 +382,9 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
Float* dev_es,
Float* dev_ev,
Float* dev_p,
- Float4* dev_w_nx,
- Float4* dev_w_mvfd,
- Float* dev_w_force, //uint4* dev_bonds_sorted,
+ Float4* dev_walls_nx,
+ Float4* dev_walls_mvfd,
+ Float* dev_walls_force_pp, //uint4* dev_bonds_sorted,
unsigned int* dev_contacts,
Float4* dev_distmod,
Float4* dev_delta_t)
t@@ -399,9 +399,6 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
Float4 x_a = dev_x_sorted[idx_a];
Float radius_a = x_a.w;
- // Fetch wall data in global read
- Float4 w_up_nx = dev_w_nx[0];
- Float4 w_up_mvfd = dev_w_mvfd[0];
// Fetch world dimensions in constant memory read
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0],
t@@ -411,6 +408,17 @@ __global__ void interact(unsigned int* dev_gridParticleIn…
devC_grid.L[1],
devC_grid.L[2]);
+ // Fetch wall data in global read
+ Float4 w_up_nx;
+ Float4 w_up_mvfd;
+ if (devC_nw > 0) {
+ w_up_nx = dev_walls_nx[0];
+ w_up_mvfd = dev_walls_mvfd[0];
+ } else {
+ w_up_nx = MAKE_FLOAT4(0.0f, 0.0f, -1.0f, L.z);
+ w_up_mvfd = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f);
+ }
+
// Index of particle which is bonded to particle A.
// The index is equal to the particle no (p.np)
// if particle A is bond-less.
t@@ -526,7 +534,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
dev_x_sorted,
dev_vel_sorted, dev_angvel_sorted,
dev_cellStart, dev_cellEnd,
- dev_w_nx, dev_w_mvfd);
+ dev_walls_nx, dev_walls_mvfd);
}
}
}
t@@ -629,7 +637,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd…
dev_es[orig_idx] += es_dot * devC_dt;
dev_ev[orig_idx] += ev_dot * devC_dt;
dev_p[orig_idx] = p;
- dev_w_force[orig_idx] = w_force;
+ if (devC_nw > 0)
+ dev_walls_force_pp[orig_idx] = w_force;
}
} // End of interact(...)
diff --git a/src/datatypes.h b/src/datatypes.h
t@@ -95,7 +95,6 @@ struct Walls {
int wmode[MAXWALLS]; // Wall modes
Float4* nx; // Wall normal and position
Float4* mvfd; // Wall mass, velocity, force and dev. stress
- Float* force; // Resulting forces on walls per particle
};
#endif
diff --git a/src/device.cu b/src/device.cu
t@@ -220,38 +220,44 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
if (verbose == 1)
std::cout << " Allocating global device memory: ";
- // Particle arrays
- cudaMalloc((void**)&dev_k->x, memSizeF4);
- cudaMalloc((void**)&dev_sort->x_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k->vel, memSizeF4);
- cudaMalloc((void**)&dev_sort->vel_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k->angvel, memSizeF4);
- cudaMalloc((void**)&dev_sort->angvel_sorted, memSizeF4);
- cudaMalloc((void**)&dev_k->acc, memSizeF4);
k.acc = new Float4[np];
- cudaMalloc((void**)&dev_k->angacc, memSizeF4);
k.angacc = new Float4[np];
- cudaMalloc((void**)&dev_k->force, memSizeF4);
- cudaMalloc((void**)&dev_k->torque, memSizeF4);
- cudaMalloc((void**)&dev_k->angpos, memSizeF4);
- cudaMalloc((void**)&dev_e->es_dot, memSizeF);
- cudaMalloc((void**)&dev_e->ev_dot, memSizeF);
- cudaMalloc((void**)&dev_e->es, memSizeF);
- cudaMalloc((void**)&dev_e->ev, memSizeF);
- cudaMalloc((void**)&dev_e->p, memSizeF);
- // Cell-related arrays
- cudaMalloc((void**)&dev_sort->gridParticleCellID, sizeof(unsigned int)*np);
- cudaMalloc((void**)&dev_sort->gridParticleIndex, sizeof(unsigned int)*np);
- cudaMalloc((void**)&dev_sort->cellStart, sizeof(unsigned int)*grid.num[0]*gr…
- cudaMalloc((void**)&dev_sort->cellEnd, sizeof(unsigned int)*grid.num[0]*grid…
+ // Kinematics arrays
+ cudaMalloc((void**)&dev_x, memSizeF4);
+ cudaMalloc((void**)&dev_xysum, memSizeF4);
+ cudaMalloc((void**)&dev_vel, memSizeF4);
+ cudaMalloc((void**)&dev_acc, memSizeF4);
+ cudaMalloc((void**)&dev_force, memSizeF4);
+ cudaMalloc((void**)&dev_angpos, memSizeF4);
+ cudaMalloc((void**)&dev_angvel, memSizeF4);
+ cudaMalloc((void**)&dev_angacc, memSizeF4);
+ cudaMalloc((void**)&dev_torque, memSizeF4);
// Particle contact bookkeeping arrays
- cudaMalloc((void**)&dev_k->contacts, sizeof(unsigned int)*np*NC); // Max NC …
- cudaMalloc((void**)&dev_k->distmod, sizeof(Float4)*np*NC);
- cudaMalloc((void**)&dev_k->delta_t, sizeof(Float4)*np*NC);
+ cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*np*NC); // Max NC con…
+ cudaMalloc((void**)&dev_distmod, memSizeF4*NC);
+ cudaMalloc((void**)&dev_delta_t, memSizeF4*NC);
+
+ // Sorted arrays
+ cudaMalloc((void**)&dev_x_sorted, memSizeF4);
+ cudaMalloc((void**)&dev_vel_sorted, memSizeF4);
+ cudaMalloc((void**)&dev_angvel_sorted, memSizeF4);
+
+ // Energy arrays
+ cudaMalloc((void**)&dev_es_dot, memSizeF);
+ cudaMalloc((void**)&dev_ev_dot, memSizeF);
+ cudaMalloc((void**)&dev_es, memSizeF);
+ cudaMalloc((void**)&dev_ev, memSizeF);
+ cudaMalloc((void**)&dev_p, memSizeF);
+
+ // Cell-related arrays
+ cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*np);
+ cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*np);
+ cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num…
+ cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1…
- // Host contact bookkeeping arrays
+ // Host contact bookkeeping arrays
k.contacts = new unsigned int[np*NC];
// Initialize contacts lists to np
for (unsigned int i=0; i<(np*NC); ++i)
t@@ -260,10 +266,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
k.delta_t = new Float4[np*NC];
// Wall arrays
- cudaMalloc((void**)&dev_walls->nx, sizeof(Float4)*walls.nw);
- cudaMalloc((void**)&dev_walls->mvfd, sizeof(Float4)*walls.nw);
- cudaMalloc((void**)&dev_walls->force, sizeof(Float)*walls.nw*np);
- // dev_w_force_partial allocated later
+ cudaMalloc((void**)&dev_walls_wmode, sizeof(int)*walls.nw);
+ cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw);
+ cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw);
+ cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np);
+ // dev_walls_force_partial allocated later
checkForCudaErrors("End of allocateGlobalDeviceMemory");
if (verbose == 1)
t@@ -275,36 +282,40 @@ __host__ void DEM::freeGlobalDeviceMemory()
if (verbose == 1)
printf("\nLiberating device memory: ");
// Particle arrays
- cudaFree(dev_k->x);
- cudaFree(dev_sort->x_sorted);
- cudaFree(dev_k->vel);
- cudaFree(dev_sort->vel_sorted);
- cudaFree(dev_k->angvel);
- cudaFree(dev_sort->angvel_sorted);
- cudaFree(dev_k->acc);
- cudaFree(dev_k->angacc);
- cudaFree(dev_k->force);
- cudaFree(dev_k->torque);
- cudaFree(dev_k->angpos);
- cudaFree(dev_e->es_dot);
- cudaFree(dev_e->ev_dot);
- cudaFree(dev_e->es);
- cudaFree(dev_e->ev);
- cudaFree(dev_e->p);
- cudaFree(dev_k->contacts);
- cudaFree(dev_k->distmod);
- cudaFree(dev_k->delta_t);
+ cudaFree(dev_x);
+ cudaFree(dev_xysum);
+ cudaFree(dev_vel);
+ cudaFree(dev_acc);
+ cudaFree(dev_force);
+ cudaFree(dev_angpos);
+ cudaFree(dev_angvel);
+ cudaFree(dev_angacc);
+ cudaFree(dev_torque);
+
+ cudaFree(dev_contacts);
+ cudaFree(dev_distmod);
+ cudaFree(dev_delta_t);
+
+ cudaFree(dev_es_dot);
+ cudaFree(dev_es);
+ cudaFree(dev_ev_dot);
+ cudaFree(dev_ev);
+ cudaFree(dev_p);
+
+ cudaFree(dev_x_sorted);
+ cudaFree(dev_vel_sorted);
+ cudaFree(dev_angvel_sorted);
// Cell-related arrays
- cudaFree(dev_sort->gridParticleIndex);
- cudaFree(dev_sort->cellStart);
- cudaFree(dev_sort->cellEnd);
+ cudaFree(dev_gridParticleIndex);
+ cudaFree(dev_cellStart);
+ cudaFree(dev_cellEnd);
// Wall arrays
- cudaFree(dev_walls->nx);
- cudaFree(dev_walls->mvfd);
- cudaFree(dev_walls->force);
- //cudaFree(dev_w_force_partial);
+ cudaFree(dev_walls_nx);
+ cudaFree(dev_walls_mvfd);
+ cudaFree(dev_walls_force_partial);
+ cudaFree(dev_walls_force_pp);
if (verbose == 1)
printf("Done\n");
t@@ -321,55 +332,53 @@ __host__ void DEM::transferToGlobalDeviceMemory()
unsigned int memSizeF4 = sizeof(Float4) * np;
// Copy static-size structure data from host to global device memory
- cudaMemcpy(dev_time, &time, sizeof(Time), cudaMemcpyHostToDevice);
+ //cudaMemcpy(dev_time, &time, sizeof(Time), cudaMemcpyHostToDevice);
// Kinematic particle values
- cudaMemcpy( dev_k->x, k.x,
+ cudaMemcpy( dev_x, k.x,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->xysum, k.xysum,
+ cudaMemcpy( dev_xysum, k.xysum,
sizeof(Float2)*np, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->vel, k.vel,
+ cudaMemcpy( dev_vel, k.vel,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->acc, k.acc,
+ cudaMemcpy( dev_acc, k.acc,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->force, k.force,
+ cudaMemcpy( dev_force, k.force,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->angpos, k.angpos,
+ cudaMemcpy( dev_angpos, k.angpos,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->angvel, k.angvel,
+ cudaMemcpy( dev_angvel, k.angvel,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->angacc, k.angacc,
+ cudaMemcpy( dev_angacc, k.angacc,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->torque, k.torque,
+ cudaMemcpy( dev_torque, k.torque,
memSizeF4, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->contacts, k.contacts,
+ cudaMemcpy( dev_contacts, k.contacts,
sizeof(unsigned int)*np*NC, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->distmod, k.distmod,
+ cudaMemcpy( dev_distmod, k.distmod,
memSizeF4*NC, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_k->delta_t, k.delta_t,
+ cudaMemcpy( dev_delta_t, k.delta_t,
memSizeF4*NC, cudaMemcpyHostToDevice);
// Individual particle energy values
- cudaMemcpy( dev_e->es_dot, e.es_dot,
+ cudaMemcpy( dev_es_dot, e.es_dot,
memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_e->es, e.es,
+ cudaMemcpy( dev_es, e.es,
memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_e->ev_dot, e.ev_dot,
+ cudaMemcpy( dev_ev_dot, e.ev_dot,
memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_e->ev, e.ev,
+ cudaMemcpy( dev_ev, e.ev,
memSizeF, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_e->p, e.p,
+ cudaMemcpy( dev_p, e.p,
memSizeF, cudaMemcpyHostToDevice);
// Wall parameters
- cudaMemcpy( dev_walls->wmode, walls.wmode,
+ cudaMemcpy( dev_walls_wmode, walls.wmode,
sizeof(int)*walls.nw, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_walls->nx, walls.nx,
+ cudaMemcpy( dev_walls_nx, walls.nx,
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_walls->mvfd, walls.mvfd,
+ cudaMemcpy( dev_walls_mvfd, walls.mvfd,
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice);
- cudaMemcpy( dev_walls->force, walls.force,
- memSizeF*walls.nw, cudaMemcpyHostToDevice);
checkForCudaErrors("End of transferToGlobalDeviceMemory");
if (verbose == 1)
t@@ -385,55 +394,53 @@ __host__ void DEM::transferFromGlobalDeviceMemory()
unsigned int memSizeF4 = sizeof(Float4) * np;
// Copy static-size structure data from host to global device memory
- cudaMemcpy(&time, dev_time, sizeof(Time), cudaMemcpyDeviceToHost);
+ //cudaMemcpy(&time, dev_time, sizeof(Time), cudaMemcpyDeviceToHost);
// Kinematic particle values
- cudaMemcpy( k.x, dev_k->x,
+ cudaMemcpy( k.x, dev_x,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.xysum, dev_k->xysum,
+ cudaMemcpy( k.xysum, dev_xysum,
sizeof(Float2)*np, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.vel, dev_k->vel,
+ cudaMemcpy( k.vel, dev_vel,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.acc, dev_k->acc,
+ cudaMemcpy( k.acc, dev_acc,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.force, dev_k->force,
+ cudaMemcpy( k.force, dev_force,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.angpos, dev_k->angpos,
+ cudaMemcpy( k.angpos, dev_angpos,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.angvel, dev_k->angvel,
+ cudaMemcpy( k.angvel, dev_angvel,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.angacc, dev_k->angacc,
+ cudaMemcpy( k.angacc, dev_angacc,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.torque, dev_k->torque,
+ cudaMemcpy( k.torque, dev_torque,
memSizeF4, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.contacts, dev_k->contacts,
+ cudaMemcpy( k.contacts, dev_contacts,
sizeof(unsigned int)*np*NC, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.distmod, dev_k->distmod,
+ cudaMemcpy( k.distmod, dev_distmod,
memSizeF4*NC, cudaMemcpyDeviceToHost);
- cudaMemcpy( k.delta_t, dev_k->delta_t,
+ cudaMemcpy( k.delta_t, dev_delta_t,
memSizeF4*NC, cudaMemcpyDeviceToHost);
// Individual particle energy values
- cudaMemcpy( e.es_dot, dev_e->es_dot,
+ cudaMemcpy( e.es_dot, dev_es_dot,
memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy( e.es, dev_e->es,
+ cudaMemcpy( e.es, dev_es,
memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy( e.ev_dot, dev_e->ev_dot,
+ cudaMemcpy( e.ev_dot, dev_ev_dot,
memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy( e.ev, dev_e->ev,
+ cudaMemcpy( e.ev, dev_ev,
memSizeF, cudaMemcpyDeviceToHost);
- cudaMemcpy( e.p, dev_e->p,
+ cudaMemcpy( e.p, dev_p,
memSizeF, cudaMemcpyDeviceToHost);
// Wall parameters
- cudaMemcpy( walls.wmode, dev_walls->wmode,
+ cudaMemcpy( walls.wmode, dev_walls_wmode,
sizeof(int)*walls.nw, cudaMemcpyDeviceToHost);
- cudaMemcpy( walls.nx, dev_walls->nx,
+ cudaMemcpy( walls.nx, dev_walls_nx,
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);
- cudaMemcpy( walls.mvfd, dev_walls->mvfd,
+ cudaMemcpy( walls.mvfd, dev_walls_mvfd,
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost);
- cudaMemcpy( walls.force, dev_walls->force,
- memSizeF*walls.nw, cudaMemcpyDeviceToHost);
checkForCudaErrors("End of transferFromGlobalDeviceMemory");
}
t@@ -476,8 +483,8 @@ __host__ void DEM::startTime()
// Shared memory per block
unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1);
- Float* dev_w_force_partial;
- cudaMalloc((void**)&dev_w_force_partial, sizeof(Float)*dimGrid.x);
+ // Pre-sum of force per wall
+ cudaMalloc((void**)&dev_walls_force_partial, sizeof(Float)*dimGrid.x);
// Report to stdout
if (verbose == 1) {
t@@ -562,9 +569,9 @@ __host__ void DEM::startTime()
// in the fine, uniform and homogenous grid.
if (PROFILING == 1)
startTimer(&kernel_tic);
- calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort->gridParticleCellID,
- dev_sort->gridParticleIndex,
- dev_k->x);
+ calcParticleCellID<<<dimGrid, dimBlock>>>(dev_gridParticleCellID,
+ dev_gridParticleIndex,
+ dev_x);
// Synchronization point
cudaThreadSynchronize();
t@@ -576,9 +583,9 @@ __host__ void DEM::startTime()
// Sort particle (key, particle ID) pairs by hash key with Thrust radix so…
if (PROFILING == 1)
startTimer(&kernel_tic);
- thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort->gridParticleCellID),
- thrust::device_ptr<uint>(dev_sort->gridParticleCellID …
- thrust::device_ptr<uint>(dev_sort->gridParticleIndex));
+ thrust::sort_by_key(thrust::device_ptr<uint>(dev_gridParticleCellID),
+ thrust::device_ptr<uint>(dev_gridParticleCellID + np),
+ thrust::device_ptr<uint>(dev_gridParticleIndex));
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implic…
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_thrustsort);
t@@ -588,7 +595,7 @@ __host__ void DEM::startTime()
// Zero cell array values by setting cellStart to its highest possible val…
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int
// is 4294967295.
- cudaMemset(dev_sort->cellStart, 0xffffffff,
+ cudaMemset(dev_cellStart, 0xffffffff,
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
cudaThreadSynchronize();
checkForCudaErrors("Post cudaMemset");
t@@ -597,15 +604,15 @@ __host__ void DEM::startTime()
// coherent memory access. Save ordered configurations in new arrays (*_so…
if (PROFILING == 1)
startTimer(&kernel_tic);
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort->cellStart,
- dev_sort->cellEnd,
- dev_sort->gridParticleCellI…
- dev_sort->gridParticleIndex,
- dev_k->x, dev_k->vel,
- dev_k->angvel,
- dev_sort->x_sorted,
- dev_sort->vel_sorted,
- dev_sort->angvel_sorted);
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart,
+ dev_cellEnd,
+ dev_gridParticleCellID,
+ dev_gridParticleIndex,
+ dev_x, dev_vel,
+ dev_angvel,
+ dev_x_sorted,
+ dev_vel_sorted,
+ dev_angvel_sorted);
// Synchronization point
cudaThreadSynchronize();
t@@ -620,12 +627,12 @@ __host__ void DEM::startTime()
// For each particle: Search contacts in neighbor cells
if (PROFILING == 1)
startTimer(&kernel_tic);
- topology<<<dimGrid, dimBlock>>>(dev_sort->cellStart,
- dev_sort->cellEnd,
- dev_sort->gridParticleIndex,
- dev_sort->x_sorted,
- dev_k->contacts,
- dev_k->distmod);
+ topology<<<dimGrid, dimBlock>>>(dev_cellStart,
+ dev_cellEnd,
+ dev_gridParticleIndex,
+ dev_x_sorted,
+ dev_contacts,
+ dev_distmod);
// Synchronization point
t@@ -639,28 +646,28 @@ __host__ void DEM::startTime()
// For each particle: Process collisions and compute resulting forces.
if (PROFILING == 1)
startTimer(&kernel_tic);
- interact<<<dimGrid, dimBlock>>>(dev_sort->gridParticleIndex,
- dev_sort->cellStart,
- dev_sort->cellEnd,
- dev_k->x,
- dev_sort->x_sorted,
- dev_sort->vel_sorted,
- dev_sort->angvel_sorted,
- dev_k->vel,
- dev_k->angvel,
- dev_k->force,
- dev_k->torque,
- dev_e->es_dot,
- dev_e->ev_dot,
- dev_e->es,
- dev_e->ev,
- dev_e->p,
- dev_walls->nx,
- dev_walls->mvfd,
- dev_walls->force,
- dev_k->contacts,
- dev_k->distmod,
- dev_k->delta_t);
+ interact<<<dimGrid, dimBlock>>>(dev_gridParticleIndex,
+ dev_cellStart,
+ dev_cellEnd,
+ dev_x,
+ dev_x_sorted,
+ dev_vel_sorted,
+ dev_angvel_sorted,
+ dev_vel,
+ dev_angvel,
+ dev_force,
+ dev_torque,
+ dev_es_dot,
+ dev_ev_dot,
+ dev_es,
+ dev_ev,
+ dev_p,
+ dev_walls_nx,
+ dev_walls_mvfd,
+ dev_walls_force_pp,
+ dev_contacts,
+ dev_distmod,
+ dev_delta_t);
// Synchronization point
t@@ -672,17 +679,20 @@ __host__ void DEM::startTime()
// Update particle kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrate<<<dimGrid, dimBlock>>>(dev_sort->x_sorted,
- dev_sort->vel_sorted,
- dev_sort->angvel_sorted,
- dev_k->x,
- dev_k->vel,
- dev_k->angvel,
- dev_k->force,
- dev_k->torque,
- dev_k->angpos,
- dev_k->xysum,
- dev_sort->gridParticleIndex);
+ integrate<<<dimGrid, dimBlock>>>(dev_x_sorted,
+ dev_vel_sorted,
+ dev_angvel_sorted,
+ dev_x,
+ dev_vel,
+ dev_angvel,
+ dev_force,
+ dev_torque,
+ dev_angpos,
+ dev_xysum,
+ dev_gridParticleIndex);
+ cudaThreadSynchronize();
+ checkForCudaErrors("Post integrate");
+
cudaThreadSynchronize();
if (PROFILING == 1)
t@@ -691,20 +701,26 @@ __host__ void DEM::startTime()
// Summation of forces on wall
if (PROFILING == 1)
startTimer(&kernel_tic);
- summation<<<dimGrid, dimBlock>>>(dev_walls->force, dev_w_force_partial);
-
+ if (walls.nw > 0) {
+ summation<<<dimGrid, dimBlock>>>(dev_walls_force_pp,
+ dev_walls_force_partial);
+ }
// Synchronization point
cudaThreadSynchronize();
if (PROFILING == 1)
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_summation);
- checkForCudaErrors("Post integrate & wall force summation");
+ checkForCudaErrors("Post wall force summation");
// Update wall kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrateWalls<<< 1, walls.nw>>>(dev_walls,
- dev_w_force_partial,
- blocksPerGrid);
+ if (walls.nw > 0) {
+ integrateWalls<<< 1, walls.nw>>>(dev_walls_nx,
+ dev_walls_mvfd,
+ dev_walls_wmode,
+ dev_walls_force_partial,
+ blocksPerGrid);
+ }
// Synchronization point
cudaThreadSynchronize();
diff --git a/src/file_io.cpp b/src/file_io.cpp
t@@ -189,7 +189,6 @@ void DEM::readbin(const char *target)
// Wall mass (x), velocity (y), force (z), and deviatoric stress (w)
walls.nx = new Float4[walls.nw];
walls.mvfd = new Float4[walls.nw];
- walls.force = new Float[walls.nw*np];
ifs.read(as_bytes(walls.wmode), sizeof(walls.wmode));
for (i = 0; i<walls.nw; ++i) {
diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -175,8 +175,10 @@ __global__ void summation(Float* in, Float *out)
}
// Update wall positions
-__global__ void integrateWalls(Walls* dev_walls,
- Float* dev_w_force_partial,
+__global__ void integrateWalls(Float4* dev_walls_nx,
+ Float4* dev_walls_mvfd,
+ int* dev_walls_wmode,
+ Float* dev_walls_force_partial,
unsigned int blocksPerGrid)
{
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id
t@@ -185,9 +187,9 @@ __global__ void integrateWalls(Walls* dev_walls,
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
- Float4 w_nx = dev_walls->nx[idx];
- Float4 w_mvfd = dev_walls->mvfd[idx];
- int wmode = dev_walls->wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
+ Float4 w_nx = dev_walls_nx[idx];
+ Float4 w_mvfd = dev_walls_mvfd[idx];
+ int wmode = dev_walls_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel
Float acc;
if (wmode == 0) // Wall fixed: do nothing
t@@ -196,7 +198,7 @@ __global__ void integrateWalls(Walls* dev_walls,
// Find the final sum of forces on wall
w_mvfd.z = 0.0f;
for (int i=0; i<blocksPerGrid; ++i) {
- w_mvfd.z += dev_w_force_partial[i];
+ w_mvfd.z += dev_walls_force_partial[i];
}
Float dt = devC_dt;
t@@ -222,8 +224,8 @@ __global__ void integrateWalls(Walls* dev_walls,
w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f;
// Store data in global memory
- dev_walls->nx[idx] = w_nx;
- dev_walls->mvfd[idx] = w_mvfd;
+ dev_walls_nx[idx] = w_nx;
+ dev_walls_mvfd[idx] = w_mvfd;
}
} // End of integrateWalls(...)
diff --git a/src/sphere.cpp b/src/sphere.cpp
t@@ -207,16 +207,18 @@ void DEM::reportValues()
else
cout << " - 1st and 2nd dim. boundaries: Visco-frictional walls\n";
- cout << " - Top BC: ";
- if (walls.wmode[0] == 0)
- cout << "Fixed\n";
- else if (walls.wmode[0] == 1)
- cout << "Deviatoric stress\n";
- else if (walls.wmode[0] == 2)
- cout << "Velocity\n";
- else {
- cerr << "Top BC not recognized!\n";
- exit(1);
+ if (walls.nw > 0) {
+ cout << " - Top BC: ";
+ if (walls.wmode[0] == 0)
+ cout << "Fixed\n";
+ else if (walls.wmode[0] == 1)
+ cout << "Deviatoric stress\n";
+ else if (walls.wmode[0] == 2)
+ cout << "Velocity\n";
+ else {
+ cerr << "Top BC not recognized!\n";
+ exit(1);
+ }
}
cout << " - Grid: ";
diff --git a/src/sphere.h b/src/sphere.h
t@@ -25,13 +25,14 @@ class DEM {
// Number of particles
unsigned int np;
+ // HOST STRUCTURES
// Structure containing individual particle kinematics
Kinematics k; // host
- Kinematics *dev_k; // device
+ //Kinematics *dev_k; // device
// Structure containing energy values
Energies e; // host
- Energies *dev_e; // device
+ //Energies *dev_e; // device
// Structure of global parameters
Params params; // host
t@@ -40,15 +41,46 @@ class DEM {
Grid grid; // host
// Structure containing sorting arrays
- Sorting *dev_sort; // device
+ //Sorting *dev_sort; // device
// Structure of temporal parameters
Time time; // host
- Time *dev_time; // device
+ //Time *dev_time; // device
// Structure of wall parameters
Walls walls; // host
- Walls *dev_walls; // device
+ //Walls *dev_walls; // device
+
+ // DEVICE ARRAYS
+ Float4 *dev_x;
+ Float2 *dev_xysum;
+ Float4 *dev_vel;
+ Float4 *dev_acc;
+ Float4 *dev_force;
+ Float4 *dev_angpos;
+ Float4 *dev_angvel;
+ Float4 *dev_angacc;
+ Float4 *dev_torque;
+ unsigned int *dev_contacts;
+ Float4 *dev_distmod;
+ Float4 *dev_delta_t;
+ Float *dev_es_dot;
+ Float *dev_es;
+ Float *dev_ev_dot;
+ Float *dev_ev;
+ Float *dev_p;
+ Float4 *dev_x_sorted;
+ Float4 *dev_vel_sorted;
+ Float4 *dev_angvel_sorted;
+ unsigned int *dev_gridParticleCellID;
+ unsigned int *dev_gridParticleIndex;
+ unsigned int *dev_cellStart;
+ unsigned int *dev_cellEnd;
+ int *dev_walls_wmode;
+ Float4 *dev_walls_nx; // normal, pos.
+ Float4 *dev_walls_mvfd; // Mass, velocity, force, dev. stress
+ Float *dev_walls_force_partial; // Pre-sum per wall
+ Float *dev_walls_force_pp; // Force per particle per wall
// GPU initialization, must be called before startTime()
void initializeGPU(void);
t@@ -101,10 +133,11 @@ class DEM {
void startTime(void);
// Render particles using raytracing
- // render(const char *target,
- // Float3 lookat,
- // Float3 eye,
- // Float focalLength);
+ void render(const char *target,
+ const Float3 lookat,
+ const Float3 eye,
+ const Float focalLength = 1.0,
+ const int method = 1);
};
diff --git a/src/utility.cu b/src/utility.cu
t@@ -11,7 +11,7 @@ void checkForCudaErrors(const char* checkpoint_description)
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
std::cerr << "\nCuda error detected, checkpoint: " << checkpoint_descripti…
- << "\nError string: " << cudaGetErrorString(err) << "\n";
+ << "\nError string: " << cudaGetErrorString(err) << std::endl;
exit(EXIT_FAILURE);
}
}
t@@ -22,7 +22,7 @@ void checkForCudaErrors(const char* checkpoint_description, …
if (err != cudaSuccess) {
std::cerr << "\nCuda error detected, checkpoint: " << checkpoint_descripti…
<< "\nduring iteration " << iteration
- << "\nError string: " << cudaGetErrorString(err) << "\n";
+ << "\nError string: " << cudaGetErrorString(err) << std::endl;
exit(EXIT_FAILURE);
}
}
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.