tIt compiles without errors. Still need to update sphere.py to new file format … | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit c7e1dd22dc1892e430ab40a74fcb8df9de98c9d7 | |
parent ad86704c71d456826b62ba4d8b1e23dc9f420467 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Sat, 27 Oct 2012 00:37:45 +0200 | |
It compiles without errors. Still need to update sphere.py to new file format | |
Diffstat: | |
M src/constants.cuh | 1 + | |
M src/contactmodels.cuh | 16 ++++++++-------- | |
M src/contactsearch.cuh | 54 +++++++++++++++++------------… | |
D src/datatypes.cuh | 18 ------------------ | |
M src/device.cu | 220 +++++++++++++++--------------… | |
M src/integration.cuh | 27 +++++++++++++-------------- | |
M src/sorting.cuh | 8 ++++---- | |
M src/sphere.cpp | 16 +++++++++++++++- | |
M src/sphere.h | 5 ++++- | |
9 files changed, 181 insertions(+), 184 deletions(-) | |
--- | |
diff --git a/src/constants.cuh b/src/constants.cuh | |
t@@ -9,6 +9,7 @@ | |
// Constant memory size: 64 kb | |
__constant__ unsigned int devC_nd; // Number of dimensions | |
__constant__ unsigned int devC_np; // Number of particles | |
+__constant__ unsigned int devC_nw; // Number of dynamic walls | |
__constant__ int devC_nc; // Max. number of contacts a particle can… | |
__constant__ Float devC_dt; // Computational time step length | |
diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh | |
t@@ -44,7 +44,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Fl… | |
//Float3 f_n = -devC_params.k_n * delta * n; | |
// Normal force component: Elastic - viscous damping | |
- Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_wn * vel_n) * n; | |
+ Float3 f_n = (-devC_params.k_n * delta - devC_params.gamma_n * vel_n) * n; | |
// Make sure the viscous damping doesn't exceed the elastic component, | |
// i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n) | |
t@@ -61,7 +61,7 @@ __device__ Float contactLinear_wall(Float3* F, Float3* T, Fl… | |
// divide by zero (producing a NaN) | |
if (vel_t_length > 0.f) { | |
- Float f_t_visc = devC_params.gamma_wt * vel_t_length; // Tangential force… | |
+ Float f_t_visc = devC_params.gamma_t * vel_t_length; // Tangential force … | |
Float f_t_limit = devC_params.mu_s * f_n_length; // Max. friction | |
// If the shear force component exceeds the friction, | |
t@@ -382,15 +382,15 @@ __device__ void contactLinear(Float3* F, Float3* T, | |
// Shear friction heat production rate: | |
// The energy lost from the tangential spring is dissipated as heat | |
//*es_dot += -dot(vel_t_ab, f_t); | |
- *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt… | |
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt; | |
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_dt; // Se… | |
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt; | |
} else { // Static case | |
// No correction of f_t is required | |
// Add tangential displacement to total tangential displacement | |
- delta_t = delta_t0 + vel_t_ab * devC_params.dt; | |
+ delta_t = delta_t0 + vel_t_ab * devC_dt; | |
} | |
} | |
t@@ -555,15 +555,15 @@ __device__ void contactHertz(Float3* F, Float3* T, | |
// Shear friction heat production rate: | |
// The energy lost from the tangential spring is dissipated as heat | |
//*es_dot += -dot(vel_t_ab, f_t); | |
- *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_params.dt… | |
- //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_params.dt; | |
+ *es_dot += length(delta_t0 - delta_t) * devC_params.k_t / devC_dt; // Se… | |
+ //*es_dot += fabs(dot(delta_t0 - delta_t, f_t)) / devC_dt; | |
} else { // Static case | |
// No correction of f_t is required | |
// Add tangential displacement to total tangential displacement | |
- delta_t = delta_t0 + vel_t_ab * devC_params.dt; | |
+ delta_t = delta_t0 + vel_t_ab * devC_dt; | |
} | |
} | |
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh | |
t@@ -14,7 +14,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod) | |
// Check whether x- and y boundaries are to be treated as periodic | |
// 1: x- and y boundaries periodic | |
// 2: x boundaries periodic | |
- if (devC_params.periodic == 1) { | |
+ if (devC_grid.periodic == 1) { | |
// Periodic x-boundary | |
if (targetCell->x < 0) { | |
t@@ -38,7 +38,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod) | |
// Only x-boundaries are periodic | |
- } else if (devC_params.periodic == 2) { | |
+ } else if (devC_grid.periodic == 2) { | |
// Periodic x-boundary | |
if (targetCell->x < 0) { | |
t@@ -77,7 +77,7 @@ __device__ int findDistMod(int3* targetCell, Float3* distmod) | |
// Find overlaps between particle no. 'idx' and particles in cell 'gridpos'. | |
// Contacts are processed as soon as they are identified. | |
-// Used for shearmodel=1, where contact history is not needed. | |
+// Used for contactmodel=1, where contact history is not needed. | |
// Kernel executed on device, and callable from device only. | |
// Function is called from interact(). | |
__device__ void findAndProcessContactsInCell(int3 targetCell, | |
t@@ -175,7 +175,7 @@ __device__ void findAndProcessContactsInCell(int3 targetCe… | |
// Find overlaps between particle no. 'idx' and particles in cell 'gridpos' | |
// Write the indexes of the overlaps in array contacts[]. | |
-// Used for shearmodel=2, where bookkeeping of contact history is necessary. | |
+// Used for contactmodel=2, where bookkeeping of contact history is necessary. | |
// Kernel executed on device, and callable from device only. | |
// Function is called from topology(). | |
__device__ void findContactsInCell(int3 targetCell, | |
t@@ -257,7 +257,7 @@ __device__ void findContactsInCell(int3 targetCell, | |
for (int i=0; i < devC_nc; ++i) { | |
__syncthreads(); | |
cidx = dev_contacts[(unsigned int)(idx_a_orig*devC_nc+i)]; | |
- if (cidx == devC_params.np) // Write to position of now-deleted co… | |
+ if (cidx == devC_np) // Write to position of now-deleted contact | |
cpos = i; | |
else if (cidx == idx_b_orig) { // Write to position of same contact | |
cpos = i; | |
t@@ -320,7 +320,7 @@ __global__ void topology(unsigned int* dev_cellStart, | |
{ | |
// Thread index equals index of particle A | |
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x; | |
- if (idx_a < devC_params.np) { | |
+ if (idx_a < devC_np) { | |
// Fetch particle data in global read | |
Float4 x_a = dev_x_sorted[idx_a]; | |
Float radius_a = x_a.w; | |
t@@ -356,13 +356,13 @@ __global__ void topology(unsigned int* dev_cellStart, | |
// For a single particle: | |
-// If shearmodel=1: | |
+// If contactmodel=1: | |
// Search for neighbors to particle 'idx' inside the 27 closest cells, | |
// and compute the resulting force and torque on it. | |
-// If shearmodel=2: | |
+// If contactmodel=2: | |
// Process contacts saved in dev_contacts by topology(), and compute | |
// the resulting force and torque on it. | |
-// For all shearmodels: | |
+// For all contactmodels: | |
// Collide with top- and bottom walls, save resulting force on upper wall. | |
// Kernel is executed on device, and is callable from host only. | |
// Function is called from mainGPU loop. | |
t@@ -371,15 +371,19 @@ __global__ void interact(unsigned int* dev_gridParticleI… | |
unsigned int* dev_cellEnd, | |
Float4* dev_x, | |
Float4* dev_x_sorted, | |
- Float4* dev_vel_sorted, Float4* dev_angvel_sorted, | |
- Float4* dev_vel, Float4* dev_angvel, | |
- Float4* dev_force, Float4* dev_torque, | |
+ Float4* dev_vel_sorted, | |
+ Float4* dev_angvel_sorted, | |
+ Float4* dev_vel, | |
+ Float4* dev_angvel, | |
+ Float4* dev_force, | |
+ Float4* dev_torque, | |
Float* dev_es_dot, | |
Float* dev_ev_dot, | |
Float* dev_es, | |
Float* dev_ev, | |
Float* dev_p, | |
- Float4* dev_w_nx, Float4* dev_w_mvfd, | |
+ Float4* dev_w_nx, | |
+ Float4* dev_w_mvfd, | |
Float* dev_w_force, //uint4* dev_bonds_sorted, | |
unsigned int* dev_contacts, | |
Float4* dev_distmod, | |
t@@ -388,7 +392,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
// Thread index equals index of particle A | |
unsigned int idx_a = blockIdx.x * blockDim.x + threadIdx.x; | |
- if (idx_a < devC_params.np) { | |
+ if (idx_a < devC_np) { | |
// Fetch particle data in global read | |
unsigned int idx_a_orig = dev_gridParticleIndex[idx_a]; | |
t@@ -424,7 +428,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
Float3 T = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
// Apply linear elastic, frictional contact model to registered contacts | |
- if (devC_params.shearmodel == 2 || devC_params.shearmodel == 3) { | |
+ if (devC_params.contactmodel == 2 || devC_params.contactmodel == 3) { | |
unsigned int idx_b_orig, mempos; | |
Float delta_n, x_ab_length, radius_b; | |
Float3 x_ab; | |
t@@ -452,11 +456,11 @@ __global__ void interact(unsigned int* dev_gridParticleI… | |
delta_n = x_ab_length - (radius_a + radius_b); | |
- if (idx_b_orig != (unsigned int)devC_params.np) { | |
+ if (idx_b_orig != (unsigned int)devC_np) { | |
// Process collision if the particles are overlapping | |
if (delta_n < 0.0f) { | |
- if (devC_params.shearmodel == 2) { | |
+ if (devC_params.contactmodel == 2) { | |
contactLinear(&F, &T, &es_dot, &ev_dot, &p, | |
idx_a_orig, | |
idx_b_orig, | |
t@@ -468,7 +472,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
x_ab, x_ab_length, | |
delta_n, dev_delta_t, | |
mempos); | |
- } else if (devC_params.shearmodel == 3) { | |
+ } else if (devC_params.contactmodel == 3) { | |
contactHertz(&F, &T, &es_dot, &ev_dot, &p, | |
idx_a_orig, | |
idx_b_orig, | |
t@@ -484,7 +488,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
} else { | |
__syncthreads(); | |
// Remove this contact (there is no particle with index=np) | |
- dev_contacts[mempos] = devC_params.np; | |
+ dev_contacts[mempos] = devC_np; | |
// Zero sum of shear displacement in this position | |
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
} | |
t@@ -498,8 +502,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
// Find contacts and process collisions immidiately for | |
- // shearmodel 1 (visco-frictional). | |
- } else if (devC_params.shearmodel == 1) { | |
+ // contactmodel 1 (visco-frictional). | |
+ } else if (devC_params.contactmodel == 1) { | |
int3 gridPos; | |
int3 targetPos; | |
t@@ -553,7 +557,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
} | |
- if (devC_params.periodic == 0) { | |
+ if (devC_grid.periodic == 0) { | |
// Left wall | |
delta_w = x_a.x - radius_a - origo.x; | |
t@@ -591,7 +595,7 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
w_n, delta_w, 0.0f); | |
} | |
- } else if (devC_params.periodic == 2) { | |
+ } else if (devC_grid.periodic == 2) { | |
// Front wall | |
delta_w = x_a.y - radius_a - origo.y; | |
t@@ -622,8 +626,8 @@ __global__ void interact(unsigned int* dev_gridParticleInd… | |
dev_torque[orig_idx] = MAKE_FLOAT4(T.x, T.y, T.z, 0.0f); | |
dev_es_dot[orig_idx] = es_dot; | |
dev_ev_dot[orig_idx] = ev_dot; | |
- dev_es[orig_idx] += es_dot * devC_params.dt; | |
- dev_ev[orig_idx] += ev_dot * devC_params.dt; | |
+ dev_es[orig_idx] += es_dot * devC_dt; | |
+ dev_ev[orig_idx] += ev_dot * devC_dt; | |
dev_p[orig_idx] = p; | |
dev_w_force[orig_idx] = w_force; | |
} | |
diff --git a/src/datatypes.cuh b/src/datatypes.cuh | |
t@@ -1,18 +0,0 @@ | |
-// datatypes.cuh -- Device structure templates and function prototypes | |
- | |
-// Avoiding multiple inclusions of header file | |
-#ifndef DATATYPES_CUH_ | |
-#define DATATYPES_CUH_ | |
- | |
-#include "datatypes.h" | |
-#include "vector_functions.h" | |
- | |
-unsigned int iDivUp(unsigned int a, unsigned int b); | |
-void checkForCudaErrors(const char* checkpoint_description); | |
-void checkForCudaErrors(const char* checkpoint_description, const unsigned int… | |
- | |
-// Device constant memory structures | |
-__constant__ Params devC_params; | |
-__constant__ Grid devC_grid; | |
- | |
-#endif | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -97,8 +97,7 @@ __global__ void checkConstantValues(int* dev_equal, | |
// Compare values between global- and constant | |
// memory structures | |
- if (dev_grid->nd != devC_grid.nd || | |
- dev_grid->origo[0] != devC_grid.origo[0] || | |
+ if (dev_grid->origo[0] != devC_grid.origo[0] || | |
dev_grid->origo[1] != devC_grid.origo[1] || | |
dev_grid->origo[2] != devC_grid.origo[2] || | |
dev_grid->L[0] != devC_grid.L[0] || | |
t@@ -106,35 +105,28 @@ __global__ void checkConstantValues(int* dev_equal, | |
dev_grid->L[2] != devC_grid.L[2] || | |
dev_grid->num[0] != devC_grid.num[0] || | |
dev_grid->num[1] != devC_grid.num[1] || | |
- dev_grid->num[2] != devC_grid.num[2]) | |
+ dev_grid->num[2] != devC_grid.num[2] || | |
+ dev_grid->periodic != devC_grid.periodic) | |
*dev_equal = 1; // Not ok | |
- else if (dev_params->global != devC_params.global || | |
- dev_params->g[0] != devC_params.g[0] || | |
+ | |
+ else if (dev_params->g[0] != devC_params.g[0] || | |
dev_params->g[1] != devC_params.g[1] || | |
dev_params->g[2] != devC_params.g[2] || | |
- dev_params->dt != devC_params.dt || | |
- dev_params->np != devC_params.np || | |
- dev_params->nw != devC_params.nw || | |
- dev_params->wmode[0] != devC_params.wmode[0] || | |
dev_params->k_n != devC_params.k_n || | |
dev_params->k_t != devC_params.k_t || | |
dev_params->k_r != devC_params.k_r || | |
dev_params->gamma_n != devC_params.gamma_n || | |
dev_params->gamma_t != devC_params.gamma_t || | |
dev_params->gamma_r != devC_params.gamma_r || | |
- dev_params->gamma_wn != devC_params.gamma_wn || | |
- dev_params->gamma_wt != devC_params.gamma_wt || | |
- dev_params->gamma_wr != devC_params.gamma_wr || | |
dev_params->mu_s != devC_params.mu_s || | |
dev_params->mu_d != devC_params.mu_d || | |
dev_params->mu_r != devC_params.mu_r || | |
dev_params->rho != devC_params.rho || | |
+ dev_params->contactmodel != devC_params.contactmodel || | |
dev_params->kappa != devC_params.kappa || | |
dev_params->db != devC_params.db || | |
- dev_params->V_b != devC_params.V_b || | |
- dev_params->periodic != devC_params.periodic || | |
- dev_params->shearmodel != devC_params.shearmodel) | |
+ dev_params->V_b != devC_params.V_b) | |
*dev_equal = 2; // Not ok | |
} | |
t@@ -155,8 +147,8 @@ __host__ void DEM::checkConstantMemory() | |
cudaMalloc((void**)&dev_params, sizeof(Params)); | |
// Copy structure data from host to global device memory | |
- cudaMemcpy(dev_grid, grid, sizeof(Grid), cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_params, params, sizeof(Params), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_grid, &grid, sizeof(Grid), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_params, ¶ms, sizeof(Params), cudaMemcpyHostToDevice); | |
// Compare values between global and constant memory | |
// structures on the device. | |
t@@ -197,17 +189,18 @@ __host__ void DEM::transferToConstantDeviceMemory() | |
cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd)); | |
cudaMemcpyToSymbol("devC_np", &np, sizeof(np)); | |
+ cudaMemcpyToSymbol("devC_nw", &walls.nw, sizeof(unsigned int)); | |
cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int)); | |
cudaMemcpyToSymbol("devC_dt", &time.dt, sizeof(Float)); | |
- cudaMemcpyToSymbol(devC_grid, grid, sizeof(Grid)); | |
- cudaMemcpyToSymbol(devC_params, params, sizeof(Params)); | |
+ cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid)); | |
+ cudaMemcpyToSymbol(devC_params, ¶ms, sizeof(Params)); | |
checkForCudaErrors("After transferring to device constant memory"); | |
if (verbose == 1) | |
cout << "Done\n"; | |
- //checkConstantMemory(); | |
+ checkConstantMemory(); | |
} | |
t@@ -223,35 +216,35 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
std::cout << " Allocating device memory: "; | |
// Particle arrays | |
- cudaMalloc((void**)&dev_k.x, memSizeF4); | |
- cudaMalloc((void**)&dev_sort.x_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k.vel, memSizeF4); | |
- cudaMalloc((void**)&dev_sort.vel_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k.angvel, memSizeF4); | |
- cudaMalloc((void**)&dev_sort.angvel_sorted, memSizeF4); | |
- cudaMalloc((void**)&dev_k.acc, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->x, memSizeF4); | |
+ cudaMalloc((void**)&dev_sort->x_sorted, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->vel, memSizeF4); | |
+ cudaMalloc((void**)&dev_sort->vel_sorted, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->angvel, memSizeF4); | |
+ cudaMalloc((void**)&dev_sort->angvel_sorted, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->acc, memSizeF4); | |
k.acc = new Float4[np]; | |
- cudaMalloc((void**)&dev_k.angacc, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->angacc, memSizeF4); | |
k.angacc = new Float4[np]; | |
- cudaMalloc((void**)&dev_k.force, memSizeF4); | |
- cudaMalloc((void**)&dev_k.torque, memSizeF4); | |
- cudaMalloc((void**)&dev_k.angpos, memSizeF4); | |
- cudaMalloc((void**)&dev_e.es_dot, memSizeF); | |
- cudaMalloc((void**)&dev_e.ev_dot, memSizeF); | |
- cudaMalloc((void**)&dev_e.es, memSizeF); | |
- cudaMalloc((void**)&dev_e.ev, memSizeF); | |
- cudaMalloc((void**)&dev_e.p, memSizeF); | |
+ cudaMalloc((void**)&dev_k->force, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->torque, memSizeF4); | |
+ cudaMalloc((void**)&dev_k->angpos, memSizeF4); | |
+ cudaMalloc((void**)&dev_e->es_dot, memSizeF); | |
+ cudaMalloc((void**)&dev_e->ev_dot, memSizeF); | |
+ cudaMalloc((void**)&dev_e->es, memSizeF); | |
+ cudaMalloc((void**)&dev_e->ev, memSizeF); | |
+ cudaMalloc((void**)&dev_e->p, memSizeF); | |
// Cell-related arrays | |
- cudaMalloc((void**)&dev_sort.gridParticleCellID, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_sort.gridParticleIndex, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_sort.cellStart, sizeof(unsigned int)*grid.num[0]*gri… | |
- cudaMalloc((void**)&dev_sort.cellEnd, sizeof(unsigned int)*grid.num[0]*grid.… | |
+ cudaMalloc((void**)&dev_sort->gridParticleCellID, sizeof(unsigned int)*np); | |
+ cudaMalloc((void**)&dev_sort->gridParticleIndex, sizeof(unsigned int)*np); | |
+ cudaMalloc((void**)&dev_sort->cellStart, sizeof(unsigned int)*grid.num[0]*gr… | |
+ cudaMalloc((void**)&dev_sort->cellEnd, sizeof(unsigned int)*grid.num[0]*grid… | |
// Particle contact bookkeeping arrays | |
- cudaMalloc((void**)&dev_k.contacts, sizeof(unsigned int)*np*NC); // Max NC c… | |
- cudaMalloc((void**)&dev_k.distmod, sizeof(Float4)*np*NC); | |
- cudaMalloc((void**)&dev_k.delta_t, sizeof(Float4)*np*NC); | |
+ cudaMalloc((void**)&dev_k->contacts, sizeof(unsigned int)*np*NC); // Max NC … | |
+ cudaMalloc((void**)&dev_k->distmod, sizeof(Float4)*np*NC); | |
+ cudaMalloc((void**)&dev_k->delta_t, sizeof(Float4)*np*NC); | |
// Host contact bookkeeping arrays | |
k.contacts = new unsigned int[np*NC]; | |
t@@ -262,9 +255,9 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
k.delta_t = new Float4[np*NC]; | |
// Wall arrays | |
- cudaMalloc((void**)&dev_walls.nx, sizeof(Float4)*walls.nw); | |
- cudaMalloc((void**)&dev_walls.mvfd, sizeof(Float4)*walls.nw); | |
- cudaMalloc((void**)&dev_walls.force, sizeof(Float)*walls.nw*np); | |
+ cudaMalloc((void**)&dev_walls->nx, sizeof(Float4)*walls.nw); | |
+ cudaMalloc((void**)&dev_walls->mvfd, sizeof(Float4)*walls.nw); | |
+ cudaMalloc((void**)&dev_walls->force, sizeof(Float)*walls.nw*np); | |
// dev_w_force_partial allocated later | |
checkForCudaErrors("End of allocateGlobalDeviceMemory"); | |
t@@ -306,7 +299,7 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
cudaFree(dev_walls->nx); | |
cudaFree(dev_walls->mvfd); | |
cudaFree(dev_walls->force); | |
- cudaFree(dev_w_force_partial); | |
+ //cudaFree(dev_w_force_partial); | |
if (verbose == 1) | |
printf("Done\n"); | |
t@@ -323,19 +316,19 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
cudaMemcpy(dev_e, e, sizeof(Energies), cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_time, time, sizeof(Time), cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_walls, walls, sizeof(Walls), cudaMemcpyHostToDevice);*/ | |
- cudaMemcpy(dev_k, k, sizeof(k), cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_e, e, sizeof(e), cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_time, time, sizeof(time), cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_walls, walls, sizeof(walls), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_k, &k, sizeof(k), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_e, &e, sizeof(e), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_time, &time, sizeof(time), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_walls, &walls, sizeof(walls), cudaMemcpyHostToDevice); | |
checkForCudaErrors("End of transferToGlobalDeviceMemory"); | |
if (verbose == 1) | |
std::cout << "Done\n"; | |
} | |
-__host__ void DEM::transferToGlobalDeviceMemory() | |
+__host__ void DEM::transferFromGlobalDeviceMemory() | |
{ | |
- cout << " Transfering data to the device: "; | |
+ std::cout << " Transfering data to the device: "; | |
// Copy structure data from host to global device memory | |
cudaMemcpy(&k, dev_k, sizeof(k), cudaMemcpyDeviceToHost); | |
t@@ -409,7 +402,7 @@ __host__ void DEM::startTime() | |
long iter = 0; | |
// Create first status.dat | |
- sprintf(file,"output/%s.status.dat", inputbin); | |
+ sprintf(file,"output/%s.status.dat", sid); | |
fp = fopen(file, "w"); | |
fprintf(fp,"%2.4e %2.4e %d\n", | |
time.current, | |
t@@ -418,7 +411,7 @@ __host__ void DEM::startTime() | |
fclose(fp); | |
// Write first output data file: output0.bin, thus testing writing of bin fi… | |
- sprintf(file,"output/%s.output0.bin", inputbin); | |
+ sprintf(file,"output/%s.output0.bin", sid); | |
writebin(file); | |
if (verbose == 1) { | |
t@@ -426,7 +419,7 @@ __host__ void DEM::startTime() | |
<< " IMPORTANT: Do not close this terminal, doing so will \n" | |
<< " terminate this SPHERE process. Follow the \n" | |
<< " progress by executing:\n" | |
- << " $ ./sphere_status " << inputbin << "\n\n"; | |
+ << " $ ./sphere_status " << sid << "\n\n"; | |
} | |
t@@ -475,9 +468,9 @@ __host__ void DEM::startTime() | |
// in the fine, uniform and homogenous grid. | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort.gridParticleCellID, | |
- dev_sort.gridParticleIndex, | |
- dev_k.x); | |
+ calcParticleCellID<<<dimGrid, dimBlock>>>(dev_sort->gridParticleCellID, | |
+ dev_sort->gridParticleIndex, | |
+ dev_k->x); | |
// Synchronization point | |
cudaThreadSynchronize(); | |
t@@ -489,9 +482,9 @@ __host__ void DEM::startTime() | |
// Sort particle (key, particle ID) pairs by hash key with Thrust radix so… | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort.gridParticleCellID), | |
- thrust::device_ptr<uint>(dev_sort.gridParticleCellID +… | |
- thrust::device_ptr<uint>(dev_sort.gridParticleIndex)); | |
+ thrust::sort_by_key(thrust::device_ptr<uint>(dev_sort->gridParticleCellID), | |
+ thrust::device_ptr<uint>(dev_sort->gridParticleCellID … | |
+ thrust::device_ptr<uint>(dev_sort->gridParticleIndex)); | |
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implic… | |
if (PROFILING == 1) | |
stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, &t_thrustsort); | |
t@@ -501,7 +494,7 @@ __host__ void DEM::startTime() | |
// Zero cell array values by setting cellStart to its highest possible val… | |
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int | |
// is 4294967295. | |
- cudaMemset(dev_sort.cellStart, 0xffffffff, | |
+ cudaMemset(dev_sort->cellStart, 0xffffffff, | |
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int)); | |
cudaThreadSynchronize(); | |
checkForCudaErrors("Post cudaMemset"); | |
t@@ -510,15 +503,15 @@ __host__ void DEM::startTime() | |
// coherent memory access. Save ordered configurations in new arrays (*_so… | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort.cellStart, | |
- dev_sort.cellEnd, | |
- dev_sort.gridParticleCellID… | |
- dev_sort.gridParticleIndex, | |
- dev_k.x, dev_k.vel, | |
- dev_k.angvel, | |
- dev_sort.x_sorted, | |
- dev_sort.vel_sorted, | |
- dev_sort.angvel_sorted); | |
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_sort->cellStart, | |
+ dev_sort->cellEnd, | |
+ dev_sort->gridParticleCellI… | |
+ dev_sort->gridParticleIndex, | |
+ dev_k->x, dev_k->vel, | |
+ dev_k->angvel, | |
+ dev_sort->x_sorted, | |
+ dev_sort->vel_sorted, | |
+ dev_sort->angvel_sorted); | |
// Synchronization point | |
cudaThreadSynchronize(); | |
t@@ -533,12 +526,12 @@ __host__ void DEM::startTime() | |
// For each particle: Search contacts in neighbor cells | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- topology<<<dimGrid, dimBlock>>>(dev_sort.cellStart, | |
- dev_sort.cellEnd, | |
- dev_sort.gridParticleIndex, | |
- dev_sort.x_sorted, | |
- dev_k.contacts, | |
- dev_k.distmod); | |
+ topology<<<dimGrid, dimBlock>>>(dev_sort->cellStart, | |
+ dev_sort->cellEnd, | |
+ dev_sort->gridParticleIndex, | |
+ dev_sort->x_sorted, | |
+ dev_k->contacts, | |
+ dev_k->distmod); | |
// Synchronization point | |
t@@ -552,26 +545,28 @@ __host__ void DEM::startTime() | |
// For each particle: Process collisions and compute resulting forces. | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- interact<<<dimGrid, dimBlock>>>(dev_sort.gridParticleIndex, | |
- dev_sort.cellStart, | |
- dev_sort.cellEnd, | |
- dev_k.x, | |
- dev_sort.x_sorted, | |
- dev_sort.vel_sorted, | |
- dev_sort.angvel_sorted, | |
- dev_k.vel, | |
- dev_k.angvel, | |
- dev_k.force, | |
- dev_k.torque, | |
- dev_e.es_dot, | |
- dev_e.ev_dot, | |
- dev_e.es, dev_e.ev, dev_e.p, | |
- dev_walls.nx, | |
- dev_walls.mvfd, | |
- dev_walls.force, | |
- dev_k.contacts, | |
- dev_k.distmod, | |
- dev_k.delta_t); | |
+ interact<<<dimGrid, dimBlock>>>(dev_sort->gridParticleIndex, | |
+ dev_sort->cellStart, | |
+ dev_sort->cellEnd, | |
+ dev_k->x, | |
+ dev_sort->x_sorted, | |
+ dev_sort->vel_sorted, | |
+ dev_sort->angvel_sorted, | |
+ dev_k->vel, | |
+ dev_k->angvel, | |
+ dev_k->force, | |
+ dev_k->torque, | |
+ dev_e->es_dot, | |
+ dev_e->ev_dot, | |
+ dev_e->es, | |
+ dev_e->ev, | |
+ dev_e->p, | |
+ dev_walls->nx, | |
+ dev_walls->mvfd, | |
+ dev_walls->force, | |
+ dev_k->contacts, | |
+ dev_k->distmod, | |
+ dev_k->delta_t); | |
// Synchronization point | |
t@@ -583,16 +578,16 @@ __host__ void DEM::startTime() | |
// Update particle kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrate<<<dimGrid, dimBlock>>>(dev_sort.x_sorted, | |
- dev_sort.vel_sorted, | |
- dev_sort.angvel_sorted, | |
- dev_k.x, | |
- dev_k.vel, | |
- dev_k.angvel, | |
- dev_k.force, | |
- dev_k.torque, | |
- dev_k.angpos, | |
- dev_sort.gridParticleIndex); | |
+ integrate<<<dimGrid, dimBlock>>>(dev_sort->x_sorted, | |
+ dev_sort->vel_sorted, | |
+ dev_sort->angvel_sorted, | |
+ dev_k->x, | |
+ dev_k->vel, | |
+ dev_k->angvel, | |
+ dev_k->force, | |
+ dev_k->torque, | |
+ dev_k->angpos, | |
+ dev_sort->gridParticleIndex); | |
cudaThreadSynchronize(); | |
if (PROFILING == 1) | |
t@@ -601,7 +596,7 @@ __host__ void DEM::startTime() | |
// Summation of forces on wall | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- summation<<<dimGrid, dimBlock>>>(dev_walls.force, dev_w_force_partial); | |
+ summation<<<dimGrid, dimBlock>>>(dev_walls->force, dev_w_force_partial); | |
// Synchronization point | |
cudaThreadSynchronize(); | |
t@@ -612,8 +607,7 @@ __host__ void DEM::startTime() | |
// Update wall kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrateWalls<<< 1, walls.nw>>>(dev_walls.nx, | |
- dev_walls.mvfd, | |
+ integrateWalls<<< 1, walls.nw>>>(dev_walls, | |
dev_w_force_partial, | |
blocksPerGrid); | |
t@@ -651,7 +645,7 @@ __host__ void DEM::startTime() | |
// Write binary output file | |
time.step_count += 1; | |
- sprintf(file,"output/%s.output%d.bin", inputbin, time.step_count); | |
+ sprintf(file,"output/%s.output%d.bin", sid, time.step_count); | |
writebin(file); | |
t@@ -686,7 +680,7 @@ __host__ void DEM::startTime() | |
} | |
// Update status.dat at the interval of filetime | |
- sprintf(file,"output/%s.status.dat", inputbin); | |
+ sprintf(file,"output/%s.status.dat", sid); | |
fp = fopen(file, "w"); | |
fprintf(fp,"%2.4e %2.4e %d\n", | |
time.current, | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -15,7 +15,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_… | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
- if (idx < devC_params.np) { // Condition prevents block size error | |
+ if (idx < devC_np) { // Condition prevents block size error | |
// Copy data to temporary arrays to avoid any potential read-after-write, | |
// write-after-read, or write-after-write hazards. | |
t@@ -35,7 +35,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_… | |
Float radius = x.w; | |
// Coherent read from constant memory to registers | |
- Float dt = devC_params.dt; | |
+ Float dt = devC_dt; | |
Float3 origo = MAKE_FLOAT3(devC_grid.origo[0], devC_grid.origo[1], devC_gr… | |
Float3 L = MAKE_FLOAT3(devC_grid.L[0], devC_grid.L[1], devC_grid.L[2]); | |
Float rho = devC_params.rho; | |
t@@ -106,7 +106,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de… | |
x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
// Move particle across boundary if it is periodic | |
- if (devC_params.periodic == 1) { | |
+ if (devC_grid.periodic == 1) { | |
if (x.x < origo.x) | |
x.x += L.x; | |
if (x.x > L.x) | |
t@@ -115,7 +115,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de… | |
x.y += L.y; | |
if (x.y > L.y) | |
x.y -= L.y; | |
- } else if (devC_params.periodic == 2) { | |
+ } else if (devC_grid.periodic == 2) { | |
if (x.x < origo.x) | |
x.x += L.x; | |
if (x.x > L.x) | |
t@@ -142,7 +142,7 @@ __global__ void summation(Float* in, Float *out) | |
unsigned int cacheIdx = threadIdx.x; | |
Float temp = 0.0f; | |
- while (idx < devC_params.np) { | |
+ while (idx < devC_np) { | |
temp += in[idx]; | |
idx += blockDim.x * gridDim.x; | |
} | |
t@@ -168,20 +168,19 @@ __global__ void summation(Float* in, Float *out) | |
} | |
// Update wall positions | |
-__global__ void integrateWalls(Float4* dev_w_nx, | |
- Float4* dev_w_mvfd, | |
+__global__ void integrateWalls(Walls* dev_walls, | |
Float* dev_w_force_partial, | |
unsigned int blocksPerGrid) | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
- if (idx < devC_params.nw) { // Condition prevents block size error | |
+ if (idx < devC_nw) { // Condition prevents block size error | |
// Copy data to temporary arrays to avoid any potential read-after-write, | |
// write-after-read, or write-after-write hazards. | |
- Float4 w_nx = dev_w_nx[idx]; | |
- Float4 w_mvfd = dev_w_mvfd[idx]; | |
- int wmode = devC_params.wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel | |
+ Float4 w_nx = dev_walls->nx[idx]; | |
+ Float4 w_mvfd = dev_walls->mvfd[idx]; | |
+ int wmode = dev_walls->wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: vel | |
Float acc; | |
if (wmode == 0) // Wall fixed: do nothing | |
t@@ -193,7 +192,7 @@ __global__ void integrateWalls(Float4* dev_w_nx, | |
w_mvfd.z += dev_w_force_partial[i]; | |
} | |
- Float dt = devC_params.dt; | |
+ Float dt = devC_dt; | |
// Normal load = Deviatoric stress times wall surface area, | |
// directed downwards. | |
t@@ -216,8 +215,8 @@ __global__ void integrateWalls(Float4* dev_w_nx, | |
w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f; | |
// Store data in global memory | |
- dev_w_nx[idx] = w_nx; | |
- dev_w_mvfd[idx] = w_mvfd; | |
+ dev_walls->nx[idx] = w_nx; | |
+ dev_walls->mvfd[idx] = w_mvfd; | |
} | |
} // End of integrateWalls(...) | |
diff --git a/src/sorting.cuh b/src/sorting.cuh | |
t@@ -25,7 +25,7 @@ __global__ void calcParticleCellID(unsigned int* dev_gridPar… | |
//unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
- if (idx < devC_params.np) { // Condition prevents block size error | |
+ if (idx < devC_np) { // Condition prevents block size error | |
//volatile Float4 x = dev_x[idx]; // Ensure coalesced read | |
Float4 x = dev_x[idx]; // Ensure coalesced read | |
t@@ -69,7 +69,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, | |
unsigned int cellID; | |
// Read cellID data and store it in shared memory (shared_data) | |
- if (idx < devC_params.np) { // Condition prevents block size error | |
+ if (idx < devC_np) { // Condition prevents block size error | |
cellID = dev_gridParticleCellID[idx]; | |
// Load hash data into shared memory, allowing access to neighbor particle… | |
t@@ -86,7 +86,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, | |
__syncthreads(); | |
// Find lowest and highest particle index in each cell | |
- if (idx < devC_params.np) { // Condition prevents block size error | |
+ if (idx < devC_np) { // Condition prevents block size error | |
// If this particle has a different cell index to the previous particle, i… | |
// particle in the cell -> Store the index of this particle in the cell. | |
// The previous particle must be the last particle in the previous cell. | |
t@@ -97,7 +97,7 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, | |
} | |
// Check wether the thread is the last one | |
- if (idx == (devC_params.np - 1)) | |
+ if (idx == (devC_np - 1)) | |
dev_cellEnd[cellID] = idx + 1; | |
diff --git a/src/sphere.cpp b/src/sphere.cpp | |
t@@ -1,6 +1,7 @@ | |
#include <iostream> | |
#include <cstdio> | |
#include <cstdlib> | |
+#include <cstring> | |
#include "typedefs.h" | |
#include "datatypes.h" | |
t@@ -9,7 +10,7 @@ | |
// Constructor: Reads an input binary, and optionally checks | |
// and reports the values | |
-DEM::DEM(const char *inputbin, | |
+DEM::DEM(char *inputbin, | |
const int verbosity, | |
const int checkVals) | |
: verbose(verbosity) | |
t@@ -17,6 +18,19 @@ DEM::DEM(const char *inputbin, | |
using std::cout; | |
using std::cerr; | |
+ // Get basename from input file | |
+ char *name = inputbin; | |
+ char *sid = name; | |
+ while (*name) { | |
+ if (*name++ == '/') { | |
+ sid = name; | |
+ } | |
+ } // sid is now everything after the last '/' char | |
+ char *lastdot = strrchr(sid, '.'); | |
+ if (lastdot != NULL) | |
+ *lastdot = '\0'; // Put in string-end char. at last dot | |
+ | |
+ | |
// Initialize CUDA | |
initializeGPU(); | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -10,6 +10,9 @@ class DEM { | |
// Values and functions only accessible from the class internally | |
private: | |
+ // Simulation ID | |
+ char *sid; | |
+ | |
// Output level | |
int verbose; | |
t@@ -70,7 +73,7 @@ class DEM { | |
public: | |
// Constructor, some parameters with default values | |
- DEM(const char *inputbin, | |
+ DEM(char *inputbin, | |
const int verbosity = 1, | |
const int checkVals = 1); | |