tIO now works in Python, C++ and CUDA, see python/tests.py. Now working on impl… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 472d81e85b29f2ad92df2031ff56a9ac2cdf8699 | |
parent c494a43f245cf27c2c498ce27636347fbbb1a909 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Fri, 2 Nov 2012 09:29:51 +0100 | |
IO now works in Python, C++ and CUDA, see python/tests.py. Now working on imple… | |
Diffstat: | |
M python/sphere.py | 77 +++++++++++++++--------------… | |
M python/tests.py | 20 +++++++++++--------- | |
M src/Makefile | 4 ++-- | |
M src/contactsearch.cuh | 29 +++++++++++++++++++---------- | |
M src/datatypes.h | 1 - | |
M src/device.cu | 346 ++++++++++++++++-------------… | |
M src/file_io.cpp | 1 - | |
M src/integration.cuh | 18 ++++++++++-------- | |
M src/sphere.cpp | 22 ++++++++++++---------- | |
M src/sphere.h | 51 +++++++++++++++++++++++++----… | |
M src/utility.cu | 4 ++-- | |
11 files changed, 315 insertions(+), 258 deletions(-) | |
--- | |
diff --git a/python/sphere.py b/python/sphere.py | |
t@@ -51,17 +51,21 @@ class Spherebin: | |
self.ev = numpy.zeros(self.np, dtype=numpy.float64) | |
self.p = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.g = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64) | |
- self.k_n = numpy.ones(1, dtype=numpy.float64) * 1.16e9 | |
- self.k_t = numpy.ones(1, dtype=numpy.float64) * 1.16e9 | |
- self.k_r = numpy.zeros(1, dtype=numpy.float64) | |
- self.gamma_n = numpy.zeros(1, dtype=numpy.float64) | |
- self.gamma_t = numpy.zeros(1, dtype=numpy.float64) | |
- self.gamma_r = numpy.zeros(1, dtype=numpy.float64) | |
- self.mu_s = numpy.ones(1, dtype=numpy.float64) | |
- self.mu_d = numpy.ones(1, dtype=numpy.float64) | |
- self.mu_r = numpy.zeros(1, dtype=numpy.float64) | |
- self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0 | |
+ self.g = numpy.array([0.0, 0.0, 0.0], dtype=numpy.float64) | |
+ self.k_n = numpy.ones(1, dtype=numpy.float64) * 1.16e9 | |
+ self.k_t = numpy.ones(1, dtype=numpy.float64) * 1.16e9 | |
+ self.k_r = numpy.zeros(1, dtype=numpy.float64) | |
+ self.gamma_n = numpy.zeros(1, dtype=numpy.float64) | |
+ self.gamma_t = numpy.zeros(1, dtype=numpy.float64) | |
+ self.gamma_r = numpy.zeros(1, dtype=numpy.float64) | |
+ self.mu_s = numpy.ones(1, dtype=numpy.float64) | |
+ self.mu_d = numpy.ones(1, dtype=numpy.float64) | |
+ self.mu_r = numpy.zeros(1, dtype=numpy.float64) | |
+ self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3 | |
+ self.gamma_wt = numpy.ones(1, dtype=numpy.float64) * 1.0e3 | |
+ self.mu_ws = numpy.ones(1, dtype=numpy.float64) | |
+ self.mu_wd = numpy.ones(1, dtype=numpy.float64) | |
+ self.rho = numpy.ones(1, dtype=numpy.float64) * 2600.0 | |
self.contactmodel = numpy.ones(1, dtype=numpy.uint32) * 2 # contactLi… | |
self.kappa = numpy.zeros(1, dtype=numpy.float64) | |
self.db = numpy.zeros(1, dtype=numpy.float64) | |
t@@ -70,19 +74,16 @@ class Spherebin: | |
# Wall data | |
self.nw = numpy.ones(1, dtype=numpy.uint32) * nw | |
self.wmode = numpy.zeros(self.nw, dtype=numpy.int32) | |
+ | |
self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float64).reshape(s… | |
- self.w_n[nw-1,nd-1] = -1.0 | |
+ if (self.nw > 0): | |
+ self.w_n[0,2] = -1.0 | |
self.w_x = numpy.ones(self.nw, dtype=numpy.float64) | |
self.w_m = numpy.zeros(self.nw, dtype=numpy.float64) | |
self.w_vel = numpy.zeros(self.nw, dtype=numpy.float64) | |
self.w_force = numpy.zeros(self.nw, dtype=numpy.float64) | |
self.w_devs = numpy.zeros(self.nw, dtype=numpy.float64) | |
- # x- and y-boundary behavior | |
- self.gamma_wn = numpy.ones(1, dtype=numpy.float64) * 1.0e3 | |
- self.gamma_wt = numpy.ones(1, dtype=numpy.float64) * 1.0e3 | |
- self.gamma_wr = numpy.ones(1, dtype=numpy.float64) * 1.0e3 | |
- | |
# Compare the values of two Spherebin objects, and check | |
# whether the values are identical | |
def __cmp__(self, other): | |
t@@ -136,8 +137,7 @@ class Spherebin: | |
(self.w_force == other.w_force).all() and\ | |
(self.w_devs == other.w_devs).all() and\ | |
self.gamma_wn == other.gamma_wn and\ | |
- self.gamma_wt == other.gamma_wt and\ | |
- self.gamma_wr == other.gamma_wr\ | |
+ self.gamma_wt == other.gamma_wt\ | |
).all() == True): | |
return 0 # All equal | |
else: | |
t@@ -224,6 +224,10 @@ class Spherebin: | |
self.mu_s = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.mu_d = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.mu_r = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.gamma_wn = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.gamma_wt = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.mu_ws = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.mu_wd = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.rho = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.contactmodel = numpy.fromfile(fh, dtype=numpy.uint32, count=1) | |
self.kappa = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
t@@ -250,10 +254,6 @@ class Spherebin: | |
self.w_force = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.w_devs = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
- # x- and y-boundary behavior | |
- self.gamma_wn = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
- self.gamma_wt = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
- self.gamma_wr = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
fh.close() | |
t@@ -325,6 +325,10 @@ class Spherebin: | |
fh.write(self.mu_s.astype(numpy.float64)) | |
fh.write(self.mu_d.astype(numpy.float64)) | |
fh.write(self.mu_r.astype(numpy.float64)) | |
+ fh.write(self.gamma_wn.astype(numpy.float64)) | |
+ fh.write(self.gamma_wt.astype(numpy.float64)) | |
+ fh.write(self.mu_ws.astype(numpy.float64)) | |
+ fh.write(self.mu_wd.astype(numpy.float64)) | |
fh.write(self.rho.astype(numpy.float64)) | |
fh.write(self.contactmodel.astype(numpy.uint32)) | |
fh.write(self.kappa.astype(numpy.float64)) | |
t@@ -343,11 +347,6 @@ class Spherebin: | |
fh.write(self.w_force[i].astype(numpy.float64)) | |
fh.write(self.w_devs[i].astype(numpy.float64)) | |
- # x- and y-boundary behavior | |
- fh.write(self.gamma_wn.astype(numpy.float64)) | |
- fh.write(self.gamma_wt.astype(numpy.float64)) | |
- fh.write(self.gamma_wr.astype(numpy.float64)) | |
- | |
fh.close() | |
finally: | |
t@@ -633,14 +632,15 @@ class Spherebin: | |
self.L = self.num * cellsize | |
# Initialize upper wall | |
- self.wmode[0] = 0 | |
- self.w_n[0,2] = -1.0 | |
- self.w_x[0] = self.L[2] | |
- self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3 | |
- self.w_vel[0] = 0.0 | |
- self.w_force[0] = 0.0 | |
- self.w_devs[0] = 0.0 | |
- self.nw = numpy.ones(1, dtype=numpy.uint32) * 1 | |
+ if (self.nw > 0): | |
+ self.wmode[0] = 0 | |
+ self.w_n[0,2] = -1.0 | |
+ self.w_x[0] = self.L[2] | |
+ self.w_m[0] = self.rho[0] * self.np * math.pi * r_max**3 | |
+ self.w_vel[0] = 0.0 | |
+ self.w_force[0] = 0.0 | |
+ self.w_devs[0] = 0.0 | |
+ self.nw = numpy.ones(1, dtype=numpy.uint32) * 1 | |
# Adjust grid and upper wall for consolidation under deviatoric stress | |
def consolidate(self, deviatoric_stress = 10e3, | |
t@@ -755,8 +755,6 @@ class Spherebin: | |
# Set wall viscosities to zero | |
self.gamma_wn[0] = 0.0 | |
self.gamma_wt[0] = 0.0 | |
- self.gamma_wr[0] = 0.0 | |
- | |
def initTemporal(self, total, | |
t@@ -792,7 +790,6 @@ class Spherebin: | |
gamma_r = 0, | |
gamma_wn = 1e3, | |
gamma_wt = 1e3, | |
- gamma_wr = 2e3, | |
capillaryCohesion = 0): | |
""" Initialize particle parameters to default values. | |
Radii must be set prior to calling this function. | |
t@@ -837,8 +834,6 @@ class Spherebin: | |
# Wall viscosities | |
self.gamma_wn[0] = gamma_wn # normal | |
self.gamma_wt[0] = gamma_wt # sliding | |
- self.gamma_wr[0] = gamma_wr # rolling | |
- | |
### Parameters related to capillary bonds | |
diff --git a/python/tests.py b/python/tests.py | |
t@@ -12,17 +12,13 @@ def compare(first, second, string): | |
print("### Input/output tests ###") | |
# Generate data in python | |
-orig = Spherebin(100) | |
+orig = Spherebin(np = 100, nw = 0) | |
orig.generateRadii() | |
orig.defaultParams() | |
-orig.initRandomGridPos() | |
-orig.initTemporal(total = 0.0, current = 0.0) | |
-orig.xysum = numpy.ones(orig.np*2, dtype=numpy.float64).reshape(orig.np, 2) * 1 | |
-orig.vel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np, o… | |
-orig.force = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np,… | |
-orig.angpos = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np… | |
-orig.angvel = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np… | |
-orig.torque = numpy.ones(orig.np*orig.nd, dtype=numpy.float64).reshape(orig.np… | |
+orig.initRandomGridPos(g = numpy.zeros(orig.nd)) | |
+orig.initTemporal(current = 0.0, total = 0.0) | |
+orig.time_total = 2.0*orig.time_dt; | |
+orig.time_file_dt = orig.time_dt; | |
orig.writebin("orig.bin", verbose=False) | |
# Test Python IO routines | |
t@@ -36,4 +32,10 @@ cpp = Spherebin() | |
cpp.readbin("../output/orig.output0.bin", verbose=False) | |
compare(orig, cpp, "C++ IO: ") | |
+# Test CUDA IO routines | |
+cuda = Spherebin() | |
+cuda.readbin("../output/orig.output1.bin", verbose=False) | |
+cuda.time_current = orig.time_current | |
+cuda.time_step_count = orig.time_step_count | |
+compare(orig, cuda, "CUDA IO: ") | |
diff --git a/src/Makefile b/src/Makefile | |
t@@ -27,8 +27,8 @@ NVCCFLAGS=--use_fast_math -O3 -m64 -gencode=arch=compute_20,… | |
# Debugable code? Beware that enabling this option will | |
# considerably slow down the execution. | |
-CCFLAGS=-g -O0 -Wall | |
-NVCCFLAGS=-g -G -O0 -m64 -gencode=arch=compute_20,code=\"sm_20,compute_20\" -X… | |
+#CCFLAGS=-g -O0 -Wall | |
+#NVCCFLAGS=-g -G -O0 -m64 -gencode=arch=compute_20,code=\"sm_20,compute_20\" -… | |
DATE=`date +'%Y.%m.%d-%H:%M:%S'` | |
BACKUPNAME=sphere.$(DATE).tar.gz | |
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh | |
t@@ -91,8 +91,8 @@ __device__ void findAndProcessContactsInCell(int3 targetCell, | |
Float4* dev_angvel_sorted, | |
unsigned int* dev_cellStart, | |
unsigned int* dev_cellEnd, | |
- Float4* dev_w_nx, | |
- Float4* dev_w_mvfd) | |
+ Float4* dev_walls_nx, | |
+ Float4* dev_walls_mvfd) | |
//uint4 bonds) | |
{ | |
t@@ -382,9 +382,9 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
Float* dev_es, | |
Float* dev_ev, | |
Float* dev_p, | |
- Float4* dev_w_nx, | |
- Float4* dev_w_mvfd, | |
- Float* dev_w_force, //uint4* dev_bonds_sorted, | |
+ Float4* dev_walls_nx, | |
+ Float4* dev_walls_mvfd, | |
+ Float* dev_walls_force_pp, //uint4* dev_bonds_sorted, | |
unsigned int* dev_contacts, | |
Float4* dev_distmod, | |
Float4* dev_delta_t) | |
t@@ -399,9 +399,6 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
Float4 x_a = dev_x_sorted[idx_a]; | |
Float radius_a = x_a.w; | |
- // Fetch wall data in global read | |
- Float4 w_up_nx = dev_w_nx[0]; | |
- Float4 w_up_mvfd = dev_w_mvfd[0]; | |
// Fetch world dimensions in constant memory read | |
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], | |
t@@ -411,6 +408,17 @@ __global__ void interact(unsigned int* dev_gridParticleIn… | |
devC_grid.L[1], | |
devC_grid.L[2]); | |
+ // Fetch wall data in global read | |
+ Float4 w_up_nx; | |
+ Float4 w_up_mvfd; | |
+ if (devC_nw > 0) { | |
+ w_up_nx = dev_walls_nx[0]; | |
+ w_up_mvfd = dev_walls_mvfd[0]; | |
+ } else { | |
+ w_up_nx = MAKE_FLOAT4(0.0f, 0.0f, -1.0f, L.z); | |
+ w_up_mvfd = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ } | |
+ | |
// Index of particle which is bonded to particle A. | |
// The index is equal to the particle no (p.np) | |
// if particle A is bond-less. | |
t@@ -526,7 +534,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
dev_x_sorted, | |
dev_vel_sorted, dev_angvel_sorted, | |
dev_cellStart, dev_cellEnd, | |
- dev_w_nx, dev_w_mvfd); | |
+ dev_walls_nx, dev_walls_mvfd); | |
} | |
} | |
} | |
t@@ -629,7 +637,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
dev_es[orig_idx] += es_dot * devC_dt; | |
dev_ev[orig_idx] += ev_dot * devC_dt; | |
dev_p[orig_idx] = p; | |
- dev_w_force[orig_idx] = w_force; | |
+ if (devC_nw > 0) | |
+ dev_walls_force_pp[orig_idx] = w_force; | |
} | |
} // End of interact(...) | |
diff --git a/src/datatypes.h b/src/datatypes.h | |
t@@ -95,7 +95,6 @@ struct Walls { | |
int wmode[MAXWALLS]; // Wall modes | |
Float4* nx; // Wall normal and position | |
Float4* mvfd; // Wall mass, velocity, force and dev. stress | |
- Float* force; // Resulting forces on walls per particle | |
}; | |
#endif | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -220,38 +220,44 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
if (verbose == 1) | |
std::cout << " Allocating global device memory: "; | |
- // Particle arrays | |
- cudaMalloc((void**)&dev_k->x, memSizeF4); | |
- cudaMalloc((void**)&dev_sort->x_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k->vel, memSizeF4); | |
- cudaMalloc((void**)&dev_sort->vel_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k->angvel, memSizeF4); | |
- cudaMalloc((void**)&dev_sort->angvel_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k->acc, memSizeF4); | |
k.acc = new Float4[np]; | |
- cudaMalloc((void**)&dev_k->angacc, memSizeF4); | |
k.angacc = new Float4[np]; | |
- cudaMalloc((void**)&dev_k->force, memSizeF4); | |
- cudaMalloc((void**)&dev_k->torque, memSizeF4); | |
- cudaMalloc((void**)&dev_k->angpos, memSizeF4); | |
- cudaMalloc((void**)&dev_e->es_dot, memSizeF); | |
- cudaMalloc((void**)&dev_e->ev_dot, memSizeF); | |
- cudaMalloc((void**)&dev_e->es, memSizeF); | |
- cudaMalloc((void**)&dev_e->ev, memSizeF); | |
- cudaMalloc((void**)&dev_e->p, memSizeF); | |
- // Cell-related arrays | |
- cudaMalloc((void**)&dev_sort->gridParticleCellID, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_sort->gridParticleIndex, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_sort->cellStart, sizeof(unsigned int)*grid.num[0]*gr… | |
- cudaMalloc((void**)&dev_sort->cellEnd, sizeof(unsigned int)*grid.num[0]*grid… | |
+ // Kinematics arrays | |
+ cudaMalloc((void**)&dev_x, memSizeF4); | |
+ cudaMalloc((void**)&dev_xysum, memSizeF4); | |
+ cudaMalloc((void**)&dev_vel, memSizeF4); | |
+ cudaMalloc((void**)&dev_acc, memSizeF4); | |
+ cudaMalloc((void**)&dev_force, memSizeF4); | |
+ cudaMalloc((void**)&dev_angpos, memSizeF4); | |
+ cudaMalloc((void**)&dev_angvel, memSizeF4); | |
+ cudaMalloc((void**)&dev_angacc, memSizeF4); | |
+ cudaMalloc((void**)&dev_torque, memSizeF4); | |
// Particle contact bookkeeping arrays | |
- cudaMalloc((void**)&dev_k->contacts, sizeof(unsigned int)*np*NC); // Max NC … | |
- cudaMalloc((void**)&dev_k->distmod, sizeof(Float4)*np*NC); | |
- cudaMalloc((void**)&dev_k->delta_t, sizeof(Float4)*np*NC); | |
+ cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*np*NC); // Max NC con… | |
+ cudaMalloc((void**)&dev_distmod, memSizeF4*NC); | |
+ cudaMalloc((void**)&dev_delta_t, memSizeF4*NC); | |
+ | |
+ // Sorted arrays | |
+ cudaMalloc((void**)&dev_x_sorted, memSizeF4); | |
+ cudaMalloc((void**)&dev_vel_sorted, memSizeF4); | |
+ cudaMalloc((void**)&dev_angvel_sorted, memSizeF4); | |
+ | |
+ // Energy arrays | |
+ cudaMalloc((void**)&dev_es_dot, memSizeF); | |
+ cudaMalloc((void**)&dev_ev_dot, memSizeF); | |
+ cudaMalloc((void**)&dev_es, memSizeF); | |
+ cudaMalloc((void**)&dev_ev, memSizeF); | |
+ cudaMalloc((void**)&dev_p, memSizeF); | |
+ | |
+ // Cell-related arrays | |
+ cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*np); | |
+ cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*np); | |
+ cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num… | |
+ cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1… | |
- // Host contact bookkeeping arrays | |
+ // Host contact bookkeeping arrays | |
k.contacts = new unsigned int[np*NC]; | |
// Initialize contacts lists to np | |
for (unsigned int i=0; i<(np*NC); ++i) | |
t@@ -260,10 +266,11 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
k.delta_t = new Float4[np*NC]; | |
// Wall arrays | |
- cudaMalloc((void**)&dev_walls->nx, sizeof(Float4)*walls.nw); | |
- cudaMalloc((void**)&dev_walls->mvfd, sizeof(Float4)*walls.nw); | |
- cudaMalloc((void**)&dev_walls->force, sizeof(Float)*walls.nw*np); | |
- // dev_w_force_partial allocated later | |
+ cudaMalloc((void**)&dev_walls_wmode, sizeof(int)*walls.nw); | |
+ cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw); | |
+ cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw); | |
+ cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np); | |
+ // dev_walls_force_partial allocated later | |
checkForCudaErrors("End of allocateGlobalDeviceMemory"); | |
if (verbose == 1) | |
t@@ -275,36 +282,40 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
if (verbose == 1) | |
printf("\nLiberating device memory: "); | |
// Particle arrays | |
- cudaFree(dev_k->x); | |
- cudaFree(dev_sort->x_sorted); | |
- cudaFree(dev_k->vel); | |
- cudaFree(dev_sort->vel_sorted); | |
- cudaFree(dev_k->angvel); | |
- cudaFree(dev_sort->angvel_sorted); | |
- cudaFree(dev_k->acc); | |
- cudaFree(dev_k->angacc); | |
- cudaFree(dev_k->force); | |
- cudaFree(dev_k->torque); | |
- cudaFree(dev_k->angpos); | |
- cudaFree(dev_e->es_dot); | |
- cudaFree(dev_e->ev_dot); | |
- cudaFree(dev_e->es); | |
- cudaFree(dev_e->ev); | |
- cudaFree(dev_e->p); | |
- cudaFree(dev_k->contacts); | |
- cudaFree(dev_k->distmod); | |
- cudaFree(dev_k->delta_t); | |
+ cudaFree(dev_x); | |
+ cudaFree(dev_xysum); | |
+ cudaFree(dev_vel); | |
+ cudaFree(dev_acc); | |
+ cudaFree(dev_force); | |
+ cudaFree(dev_angpos); | |
+ cudaFree(dev_angvel); | |
+ cudaFree(dev_angacc); | |
+ cudaFree(dev_torque); | |
+ | |
+ cudaFree(dev_contacts); | |
+ cudaFree(dev_distmod); | |
+ cudaFree(dev_delta_t); | |
+ | |
+ cudaFree(dev_es_dot); | |
+ cudaFree(dev_es); | |
+ cudaFree(dev_ev_dot); | |
+ cudaFree(dev_ev); | |
+ cudaFree(dev_p); | |
+ | |
+ cudaFree(dev_x_sorted); | |
+ cudaFree(dev_vel_sorted); | |
+ cudaFree(dev_angvel_sorted); | |
// Cell-related arrays | |
- cudaFree(dev_sort->gridParticleIndex); | |
- cudaFree(dev_sort->cellStart); | |
- cudaFree(dev_sort->cellEnd); | |
+ cudaFree(dev_gridParticleIndex); | |
+ cudaFree(dev_cellStart); | |
+ cudaFree(dev_cellEnd); | |
// Wall arrays | |
- cudaFree(dev_walls->nx); | |
- cudaFree(dev_walls->mvfd); | |
- cudaFree(dev_walls->force); | |
- //cudaFree(dev_w_force_partial); | |
+ cudaFree(dev_walls_nx); | |
+ cudaFree(dev_walls_mvfd); | |
+ cudaFree(dev_walls_force_partial); | |
+ cudaFree(dev_walls_force_pp); | |
if (verbose == 1) | |
printf("Done\n"); | |
t@@ -321,55 +332,53 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
unsigned int memSizeF4 = sizeof(Float4) * np; | |
// Copy static-size structure data from host to global device memory | |
- cudaMemcpy(dev_time, &time, sizeof(Time), cudaMemcpyHostToDevice); | |
+ //cudaMemcpy(dev_time, &time, sizeof(Time), cudaMemcpyHostToDevice); | |
// Kinematic particle values | |
- cudaMemcpy( dev_k->x, k.x, | |
+ cudaMemcpy( dev_x, k.x, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->xysum, k.xysum, | |
+ cudaMemcpy( dev_xysum, k.xysum, | |
sizeof(Float2)*np, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->vel, k.vel, | |
+ cudaMemcpy( dev_vel, k.vel, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->acc, k.acc, | |
+ cudaMemcpy( dev_acc, k.acc, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->force, k.force, | |
+ cudaMemcpy( dev_force, k.force, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->angpos, k.angpos, | |
+ cudaMemcpy( dev_angpos, k.angpos, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->angvel, k.angvel, | |
+ cudaMemcpy( dev_angvel, k.angvel, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->angacc, k.angacc, | |
+ cudaMemcpy( dev_angacc, k.angacc, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->torque, k.torque, | |
+ cudaMemcpy( dev_torque, k.torque, | |
memSizeF4, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->contacts, k.contacts, | |
+ cudaMemcpy( dev_contacts, k.contacts, | |
sizeof(unsigned int)*np*NC, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->distmod, k.distmod, | |
+ cudaMemcpy( dev_distmod, k.distmod, | |
memSizeF4*NC, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_k->delta_t, k.delta_t, | |
+ cudaMemcpy( dev_delta_t, k.delta_t, | |
memSizeF4*NC, cudaMemcpyHostToDevice); | |
// Individual particle energy values | |
- cudaMemcpy( dev_e->es_dot, e.es_dot, | |
+ cudaMemcpy( dev_es_dot, e.es_dot, | |
memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_e->es, e.es, | |
+ cudaMemcpy( dev_es, e.es, | |
memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_e->ev_dot, e.ev_dot, | |
+ cudaMemcpy( dev_ev_dot, e.ev_dot, | |
memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_e->ev, e.ev, | |
+ cudaMemcpy( dev_ev, e.ev, | |
memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_e->p, e.p, | |
+ cudaMemcpy( dev_p, e.p, | |
memSizeF, cudaMemcpyHostToDevice); | |
// Wall parameters | |
- cudaMemcpy( dev_walls->wmode, walls.wmode, | |
+ cudaMemcpy( dev_walls_wmode, walls.wmode, | |
sizeof(int)*walls.nw, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_walls->nx, walls.nx, | |
+ cudaMemcpy( dev_walls_nx, walls.nx, | |
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_walls->mvfd, walls.mvfd, | |
+ cudaMemcpy( dev_walls_mvfd, walls.mvfd, | |
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
- cudaMemcpy( dev_walls->force, walls.force, | |
- memSizeF*walls.nw, cudaMemcpyHostToDevice); | |
checkForCudaErrors("End of transferToGlobalDeviceMemory"); | |
if (verbose == 1) | |
t@@ -385,55 +394,53 @@ __host__ void DEM::transferFromGlobalDeviceMemory() | |
unsigned int memSizeF4 = sizeof(Float4) * np; | |
// Copy static-size structure data from host to global device memory | |
- cudaMemcpy(&time, dev_time, sizeof(Time), cudaMemcpyDeviceToHost); | |
+ //cudaMemcpy(&time, dev_time, sizeof(Time), cudaMemcpyDeviceToHost); | |
// Kinematic particle values | |
- cudaMemcpy( k.x, dev_k->x, | |
+ cudaMemcpy( k.x, dev_x, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.xysum, dev_k->xysum, | |
+ cudaMemcpy( k.xysum, dev_xysum, | |
sizeof(Float2)*np, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.vel, dev_k->vel, | |
+ cudaMemcpy( k.vel, dev_vel, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.acc, dev_k->acc, | |
+ cudaMemcpy( k.acc, dev_acc, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.force, dev_k->force, | |
+ cudaMemcpy( k.force, dev_force, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.angpos, dev_k->angpos, | |
+ cudaMemcpy( k.angpos, dev_angpos, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.angvel, dev_k->angvel, | |
+ cudaMemcpy( k.angvel, dev_angvel, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.angacc, dev_k->angacc, | |
+ cudaMemcpy( k.angacc, dev_angacc, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.torque, dev_k->torque, | |
+ cudaMemcpy( k.torque, dev_torque, | |
memSizeF4, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.contacts, dev_k->contacts, | |
+ cudaMemcpy( k.contacts, dev_contacts, | |
sizeof(unsigned int)*np*NC, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.distmod, dev_k->distmod, | |
+ cudaMemcpy( k.distmod, dev_distmod, | |
memSizeF4*NC, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( k.delta_t, dev_k->delta_t, | |
+ cudaMemcpy( k.delta_t, dev_delta_t, | |
memSizeF4*NC, cudaMemcpyDeviceToHost); | |
// Individual particle energy values | |
- cudaMemcpy( e.es_dot, dev_e->es_dot, | |
+ cudaMemcpy( e.es_dot, dev_es_dot, | |
memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( e.es, dev_e->es, | |
+ cudaMemcpy( e.es, dev_es, | |
memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( e.ev_dot, dev_e->ev_dot, | |
+ cudaMemcpy( e.ev_dot, dev_ev_dot, | |
memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( e.ev, dev_e->ev, | |
+ cudaMemcpy( e.ev, dev_ev, | |
memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( e.p, dev_e->p, | |
+ cudaMemcpy( e.p, dev_p, | |
memSizeF, cudaMemcpyDeviceToHost); | |
// Wall parameters | |
- cudaMemcpy( walls.wmode, dev_walls->wmode, | |
+ cudaMemcpy( walls.wmode, dev_walls_wmode, | |
sizeof(int)*walls.nw, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( walls.nx, dev_walls->nx, | |
+ cudaMemcpy( walls.nx, dev_walls_nx, | |
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( walls.mvfd, dev_walls->mvfd, | |
+ cudaMemcpy( walls.mvfd, dev_walls_mvfd, | |
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost); | |
- cudaMemcpy( walls.force, dev_walls->force, | |
- memSizeF*walls.nw, cudaMemcpyDeviceToHost); | |
checkForCudaErrors("End of transferFromGlobalDeviceMemory"); | |
} | |
t@@ -476,8 +483,8 @@ __host__ void DEM::startTime() | |
// Shared memory per block | |
unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1); | |
- Float* dev_w_force_partial; | |
- cudaMalloc((void**)&dev_w_force_partial, sizeof(Float)*dimGrid.x); | |
+ // Pre-sum of force per wall | |
+ cudaMalloc((void**)&dev_walls_force_partial, sizeof(Float)*dimGrid.x); | |
// Report to stdout | |
if (verbose == 1) { | |
t@@ -562,9 +569,9 @@ __host__ void DEM::startTime() | |
// in the fine, uniform and homogenous grid. | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort->gridParticleCellID, | |
- dev_sort->gridParticleIndex, | |
- dev_k->x); | |
+ calcParticleCellID<<<dimGrid, dimBlock>>>(dev_gridParticleCellID, | |
+ dev_gridParticleIndex, | |
+ dev_x); | |
// Synchronization point | |
cudaThreadSynchronize(); | |
t@@ -576,9 +583,9 @@ __host__ void DEM::startTime() | |
// Sort particle (key, particle ID) pairs by hash key with Thrust radix so… | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort->gridParticleCellID), | |
- thrust::device_ptr<uint>(dev_sort->gridParticleCellID … | |
- thrust::device_ptr<uint>(dev_sort->gridParticleIndex)); | |
+ thrust::sort_by_key(thrust::device_ptr<uint>(dev_gridParticleCellID), | |
+ thrust::device_ptr<uint>(dev_gridParticleCellID + np), | |
+ thrust::device_ptr<uint>(dev_gridParticleIndex)); | |
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implic… | |
if (PROFILING == 1) | |
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_thrustsort); | |
t@@ -588,7 +595,7 @@ __host__ void DEM::startTime() | |
// Zero cell array values by setting cellStart to its highest possible val… | |
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int | |
// is 4294967295. | |
- cudaMemset(dev_sort->cellStart, 0xffffffff, | |
+ cudaMemset(dev_cellStart, 0xffffffff, | |
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int)); | |
cudaThreadSynchronize(); | |
checkForCudaErrors("Post cudaMemset"); | |
t@@ -597,15 +604,15 @@ __host__ void DEM::startTime() | |
// coherent memory access. Save ordered configurations in new arrays (*_so… | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort->cellStart, | |
- dev_sort->cellEnd, | |
- dev_sort->gridParticleCellI… | |
- dev_sort->gridParticleIndex, | |
- dev_k->x, dev_k->vel, | |
- dev_k->angvel, | |
- dev_sort->x_sorted, | |
- dev_sort->vel_sorted, | |
- dev_sort->angvel_sorted); | |
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart, | |
+ dev_cellEnd, | |
+ dev_gridParticleCellID, | |
+ dev_gridParticleIndex, | |
+ dev_x, dev_vel, | |
+ dev_angvel, | |
+ dev_x_sorted, | |
+ dev_vel_sorted, | |
+ dev_angvel_sorted); | |
// Synchronization point | |
cudaThreadSynchronize(); | |
t@@ -620,12 +627,12 @@ __host__ void DEM::startTime() | |
// For each particle: Search contacts in neighbor cells | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- topology<<<dimGrid, dimBlock>>>(dev_sort->cellStart, | |
- dev_sort->cellEnd, | |
- dev_sort->gridParticleIndex, | |
- dev_sort->x_sorted, | |
- dev_k->contacts, | |
- dev_k->distmod); | |
+ topology<<<dimGrid, dimBlock>>>(dev_cellStart, | |
+ dev_cellEnd, | |
+ dev_gridParticleIndex, | |
+ dev_x_sorted, | |
+ dev_contacts, | |
+ dev_distmod); | |
// Synchronization point | |
t@@ -639,28 +646,28 @@ __host__ void DEM::startTime() | |
// For each particle: Process collisions and compute resulting forces. | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- interact<<<dimGrid, dimBlock>>>(dev_sort->gridParticleIndex, | |
- dev_sort->cellStart, | |
- dev_sort->cellEnd, | |
- dev_k->x, | |
- dev_sort->x_sorted, | |
- dev_sort->vel_sorted, | |
- dev_sort->angvel_sorted, | |
- dev_k->vel, | |
- dev_k->angvel, | |
- dev_k->force, | |
- dev_k->torque, | |
- dev_e->es_dot, | |
- dev_e->ev_dot, | |
- dev_e->es, | |
- dev_e->ev, | |
- dev_e->p, | |
- dev_walls->nx, | |
- dev_walls->mvfd, | |
- dev_walls->force, | |
- dev_k->contacts, | |
- dev_k->distmod, | |
- dev_k->delta_t); | |
+ interact<<<dimGrid, dimBlock>>>(dev_gridParticleIndex, | |
+ dev_cellStart, | |
+ dev_cellEnd, | |
+ dev_x, | |
+ dev_x_sorted, | |
+ dev_vel_sorted, | |
+ dev_angvel_sorted, | |
+ dev_vel, | |
+ dev_angvel, | |
+ dev_force, | |
+ dev_torque, | |
+ dev_es_dot, | |
+ dev_ev_dot, | |
+ dev_es, | |
+ dev_ev, | |
+ dev_p, | |
+ dev_walls_nx, | |
+ dev_walls_mvfd, | |
+ dev_walls_force_pp, | |
+ dev_contacts, | |
+ dev_distmod, | |
+ dev_delta_t); | |
// Synchronization point | |
t@@ -672,17 +679,20 @@ __host__ void DEM::startTime() | |
// Update particle kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrate<<<dimGrid, dimBlock>>>(dev_sort->x_sorted, | |
- dev_sort->vel_sorted, | |
- dev_sort->angvel_sorted, | |
- dev_k->x, | |
- dev_k->vel, | |
- dev_k->angvel, | |
- dev_k->force, | |
- dev_k->torque, | |
- dev_k->angpos, | |
- dev_k->xysum, | |
- dev_sort->gridParticleIndex); | |
+ integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, | |
+ dev_vel_sorted, | |
+ dev_angvel_sorted, | |
+ dev_x, | |
+ dev_vel, | |
+ dev_angvel, | |
+ dev_force, | |
+ dev_torque, | |
+ dev_angpos, | |
+ dev_xysum, | |
+ dev_gridParticleIndex); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrors("Post integrate"); | |
+ | |
cudaThreadSynchronize(); | |
if (PROFILING == 1) | |
t@@ -691,20 +701,26 @@ __host__ void DEM::startTime() | |
// Summation of forces on wall | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- summation<<<dimGrid, dimBlock>>>(dev_walls->force, dev_w_force_partial); | |
- | |
+ if (walls.nw > 0) { | |
+ summation<<<dimGrid, dimBlock>>>(dev_walls_force_pp, | |
+ dev_walls_force_partial); | |
+ } | |
// Synchronization point | |
cudaThreadSynchronize(); | |
if (PROFILING == 1) | |
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_summation); | |
- checkForCudaErrors("Post integrate & wall force summation"); | |
+ checkForCudaErrors("Post wall force summation"); | |
// Update wall kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrateWalls<<< 1, walls.nw>>>(dev_walls, | |
- dev_w_force_partial, | |
- blocksPerGrid); | |
+ if (walls.nw > 0) { | |
+ integrateWalls<<< 1, walls.nw>>>(dev_walls_nx, | |
+ dev_walls_mvfd, | |
+ dev_walls_wmode, | |
+ dev_walls_force_partial, | |
+ blocksPerGrid); | |
+ } | |
// Synchronization point | |
cudaThreadSynchronize(); | |
diff --git a/src/file_io.cpp b/src/file_io.cpp | |
t@@ -189,7 +189,6 @@ void DEM::readbin(const char *target) | |
// Wall mass (x), velocity (y), force (z), and deviatoric stress (w) | |
walls.nx = new Float4[walls.nw]; | |
walls.mvfd = new Float4[walls.nw]; | |
- walls.force = new Float[walls.nw*np]; | |
ifs.read(as_bytes(walls.wmode), sizeof(walls.wmode)); | |
for (i = 0; i<walls.nw; ++i) { | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -175,8 +175,10 @@ __global__ void summation(Float* in, Float *out) | |
} | |
// Update wall positions | |
-__global__ void integrateWalls(Walls* dev_walls, | |
- Float* dev_w_force_partial, | |
+__global__ void integrateWalls(Float4* dev_walls_nx, | |
+ Float4* dev_walls_mvfd, | |
+ int* dev_walls_wmode, | |
+ Float* dev_walls_force_partial, | |
unsigned int blocksPerGrid) | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
t@@ -185,9 +187,9 @@ __global__ void integrateWalls(Walls* dev_walls, | |
// Copy data to temporary arrays to avoid any potential read-after-write, | |
// write-after-read, or write-after-write hazards. | |
- Float4 w_nx = dev_walls->nx[idx]; | |
- Float4 w_mvfd = dev_walls->mvfd[idx]; | |
- int wmode = dev_walls->wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel | |
+ Float4 w_nx = dev_walls_nx[idx]; | |
+ Float4 w_mvfd = dev_walls_mvfd[idx]; | |
+ int wmode = dev_walls_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel | |
Float acc; | |
if (wmode == 0) // Wall fixed: do nothing | |
t@@ -196,7 +198,7 @@ __global__ void integrateWalls(Walls* dev_walls, | |
// Find the final sum of forces on wall | |
w_mvfd.z = 0.0f; | |
for (int i=0; i<blocksPerGrid; ++i) { | |
- w_mvfd.z += dev_w_force_partial[i]; | |
+ w_mvfd.z += dev_walls_force_partial[i]; | |
} | |
Float dt = devC_dt; | |
t@@ -222,8 +224,8 @@ __global__ void integrateWalls(Walls* dev_walls, | |
w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f; | |
// Store data in global memory | |
- dev_walls->nx[idx] = w_nx; | |
- dev_walls->mvfd[idx] = w_mvfd; | |
+ dev_walls_nx[idx] = w_nx; | |
+ dev_walls_mvfd[idx] = w_mvfd; | |
} | |
} // End of integrateWalls(...) | |
diff --git a/src/sphere.cpp b/src/sphere.cpp | |
t@@ -207,16 +207,18 @@ void DEM::reportValues() | |
else | |
cout << " - 1st and 2nd dim. boundaries: Visco-frictional walls\n"; | |
- cout << " - Top BC: "; | |
- if (walls.wmode[0] == 0) | |
- cout << "Fixed\n"; | |
- else if (walls.wmode[0] == 1) | |
- cout << "Deviatoric stress\n"; | |
- else if (walls.wmode[0] == 2) | |
- cout << "Velocity\n"; | |
- else { | |
- cerr << "Top BC not recognized!\n"; | |
- exit(1); | |
+ if (walls.nw > 0) { | |
+ cout << " - Top BC: "; | |
+ if (walls.wmode[0] == 0) | |
+ cout << "Fixed\n"; | |
+ else if (walls.wmode[0] == 1) | |
+ cout << "Deviatoric stress\n"; | |
+ else if (walls.wmode[0] == 2) | |
+ cout << "Velocity\n"; | |
+ else { | |
+ cerr << "Top BC not recognized!\n"; | |
+ exit(1); | |
+ } | |
} | |
cout << " - Grid: "; | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -25,13 +25,14 @@ class DEM { | |
// Number of particles | |
unsigned int np; | |
+ // HOST STRUCTURES | |
// Structure containing individual particle kinematics | |
Kinematics k; // host | |
- Kinematics *dev_k; // device | |
+ //Kinematics *dev_k; // device | |
// Structure containing energy values | |
Energies e; // host | |
- Energies *dev_e; // device | |
+ //Energies *dev_e; // device | |
// Structure of global parameters | |
Params params; // host | |
t@@ -40,15 +41,46 @@ class DEM { | |
Grid grid; // host | |
// Structure containing sorting arrays | |
- Sorting *dev_sort; // device | |
+ //Sorting *dev_sort; // device | |
// Structure of temporal parameters | |
Time time; // host | |
- Time *dev_time; // device | |
+ //Time *dev_time; // device | |
// Structure of wall parameters | |
Walls walls; // host | |
- Walls *dev_walls; // device | |
+ //Walls *dev_walls; // device | |
+ | |
+ // DEVICE ARRAYS | |
+ Float4 *dev_x; | |
+ Float2 *dev_xysum; | |
+ Float4 *dev_vel; | |
+ Float4 *dev_acc; | |
+ Float4 *dev_force; | |
+ Float4 *dev_angpos; | |
+ Float4 *dev_angvel; | |
+ Float4 *dev_angacc; | |
+ Float4 *dev_torque; | |
+ unsigned int *dev_contacts; | |
+ Float4 *dev_distmod; | |
+ Float4 *dev_delta_t; | |
+ Float *dev_es_dot; | |
+ Float *dev_es; | |
+ Float *dev_ev_dot; | |
+ Float *dev_ev; | |
+ Float *dev_p; | |
+ Float4 *dev_x_sorted; | |
+ Float4 *dev_vel_sorted; | |
+ Float4 *dev_angvel_sorted; | |
+ unsigned int *dev_gridParticleCellID; | |
+ unsigned int *dev_gridParticleIndex; | |
+ unsigned int *dev_cellStart; | |
+ unsigned int *dev_cellEnd; | |
+ int *dev_walls_wmode; | |
+ Float4 *dev_walls_nx; // normal, pos. | |
+ Float4 *dev_walls_mvfd; // Mass, velocity, force, dev. stress | |
+ Float *dev_walls_force_partial; // Pre-sum per wall | |
+ Float *dev_walls_force_pp; // Force per particle per wall | |
// GPU initialization, must be called before startTime() | |
void initializeGPU(void); | |
t@@ -101,10 +133,11 @@ class DEM { | |
void startTime(void); | |
// Render particles using raytracing | |
- // render(const char *target, | |
- // Float3 lookat, | |
- // Float3 eye, | |
- // Float focalLength); | |
+ void render(const char *target, | |
+ const Float3 lookat, | |
+ const Float3 eye, | |
+ const Float focalLength = 1.0, | |
+ const int method = 1); | |
}; | |
diff --git a/src/utility.cu b/src/utility.cu | |
t@@ -11,7 +11,7 @@ void checkForCudaErrors(const char* checkpoint_description) | |
cudaError_t err = cudaGetLastError(); | |
if (err != cudaSuccess) { | |
std::cerr << "\nCuda error detected, checkpoint: " << checkpoint_descripti… | |
- << "\nError string: " << cudaGetErrorString(err) << "\n"; | |
+ << "\nError string: " << cudaGetErrorString(err) << std::endl; | |
exit(EXIT_FAILURE); | |
} | |
} | |
t@@ -22,7 +22,7 @@ void checkForCudaErrors(const char* checkpoint_description, … | |
if (err != cudaSuccess) { | |
std::cerr << "\nCuda error detected, checkpoint: " << checkpoint_descripti… | |
<< "\nduring iteration " << iteration | |
- << "\nError string: " << cudaGetErrorString(err) << "\n"; | |
+ << "\nError string: " << cudaGetErrorString(err) << std::endl; | |
exit(EXIT_FAILURE); | |
} | |
} |