tNow with correct sizeof() parameter - sphere - GPU-based 3D discrete element m… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 80378c0db50ee07b6f13d09b0d1fcd3eb1b9e796 | |
parent 3ec084a03251a13fe4cc09d8f58426174f88e96b | |
Author: Anders Damsgaard <[email protected]> | |
Date: Mon, 22 Oct 2012 20:41:13 +0200 | |
Now with correct sizeof() parameter | |
Diffstat: | |
M src/Makefile | 2 +- | |
M src/datatypes.h | 4 ++-- | |
M src/device.cu | 278 ++++++++++++++++++++++-------… | |
M src/main.cpp | 4 ++-- | |
4 files changed, 207 insertions(+), 81 deletions(-) | |
--- | |
diff --git a/src/Makefile b/src/Makefile | |
t@@ -1,5 +1,5 @@ | |
# Cuda paths | |
-CUDA_INSTALL_PATH=/usr/local/bin | |
+CUDA_INSTALL_PATH=/usr/local/cuda | |
#CUDA_BIN=$(CUDA_INSTALL_PATH)/bin | |
# Define the editor and optional arguments | |
diff --git a/src/datatypes.h b/src/datatypes.h | |
t@@ -167,8 +167,8 @@ void gpuMain(Float4* host_x, | |
Float4* host_torque, | |
Float4* host_angpos, | |
uint4* host_bonds, | |
- Particles* p, Grid* grid, | |
- Time* time, Params* params, | |
+ Particles p, Grid grid, | |
+ Time time, Params params, | |
Float4* host_w_nx, | |
Float4* host_w_mvfd, | |
const char* cwd, | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -17,6 +17,7 @@ | |
#include "contactsearch.cuh" | |
#include "integration.cuh" | |
+#include "cuPrintf.cu" | |
// Wrapper function for initializing the CUDA components. | |
// Called from main.cpp | |
t@@ -83,6 +84,129 @@ __host__ void stopTimer(cudaEvent_t *kernel_tic, | |
*sum += *kernel_elapsed; | |
} | |
+// Check values of parameters in constant memory | |
+__global__ void checkConstantValues(int* dev_equal, | |
+ Grid* dev_grid, | |
+ Params* dev_params) | |
+{ | |
+ | |
+ // Values ok (0) | |
+ *dev_equal = 0; | |
+ | |
+ cuPrintf("dev_grid.nd = %u\n", dev_grid->nd); | |
+ cuPrintf("devC_grid.nd = %u\n", devC_grid.nd); | |
+ cuPrintf("dev_grid.num.x = %u\n", dev_grid->num[0]); | |
+ cuPrintf("devC_grid.num.x = %u\n", devC_grid.num[1]); | |
+ cuPrintf("dev_params.np = %u\n", dev_params->np); | |
+ cuPrintf("devC_params.np = %u\n", devC_params.np); | |
+ cuPrintf("dev_params.mu_s = %f\n", dev_params->mu_s); | |
+ cuPrintf("devC_params.mu_s = %f\n", devC_params.mu_s); | |
+ | |
+ if (dev_grid->nd != 3) { | |
+ *dev_equal = 3; | |
+ return; | |
+ } else if (devC_grid.nd != 3) { | |
+ *dev_equal = 4; | |
+ return; | |
+ } else if (devC_grid.num[0] != 4) { | |
+ *dev_equal = 6; | |
+ return; | |
+ } else if (dev_grid->num[0] != 4) { | |
+ *dev_equal = 5; | |
+ return; | |
+ } | |
+ | |
+ | |
+ // Compare values between global- and constant | |
+ // memory structures | |
+ if (dev_grid->nd != devC_grid.nd || | |
+ dev_grid->origo[0] != devC_grid.origo[0] || | |
+ dev_grid->origo[1] != devC_grid.origo[1] || | |
+ dev_grid->origo[2] != devC_grid.origo[2] || | |
+ dev_grid->L[0] != devC_grid.L[0] || | |
+ dev_grid->L[1] != devC_grid.L[1] || | |
+ dev_grid->L[2] != devC_grid.L[2] || | |
+ dev_grid->num[0] != devC_grid.num[0] || | |
+ dev_grid->num[1] != devC_grid.num[1] || | |
+ dev_grid->num[2] != devC_grid.num[2]) | |
+ *dev_equal = 1; // Not ok | |
+ | |
+ else if (dev_params->global != devC_params.global || | |
+ dev_params->g[0] != devC_params.g[0] || | |
+ dev_params->g[1] != devC_params.g[1] || | |
+ dev_params->g[2] != devC_params.g[2] || | |
+ dev_params->dt != devC_params.dt || | |
+ dev_params->np != devC_params.np || | |
+ dev_params->nw != devC_params.nw || | |
+ dev_params->wmode[0] != devC_params.wmode[0] || | |
+ dev_params->k_n != devC_params.k_n || | |
+ dev_params->k_t != devC_params.k_t || | |
+ dev_params->k_r != devC_params.k_r || | |
+ dev_params->gamma_n != devC_params.gamma_n || | |
+ dev_params->gamma_t != devC_params.gamma_t || | |
+ dev_params->gamma_r != devC_params.gamma_r || | |
+ dev_params->gamma_wn != devC_params.gamma_wn || | |
+ dev_params->gamma_wt != devC_params.gamma_wt || | |
+ dev_params->gamma_wr != devC_params.gamma_wr || | |
+ dev_params->mu_s != devC_params.mu_s || | |
+ dev_params->mu_d != devC_params.mu_d || | |
+ dev_params->mu_r != devC_params.mu_r || | |
+ dev_params->rho != devC_params.rho || | |
+ dev_params->kappa != devC_params.kappa || | |
+ dev_params->db != devC_params.db || | |
+ dev_params->V_b != devC_params.V_b || | |
+ dev_params->periodic != devC_params.periodic || | |
+ dev_params->shearmodel != devC_params.shearmodel) | |
+ *dev_equal = 2; // Not ok | |
+ | |
+} | |
+ | |
+ | |
+// Copy the constant data components to device memory, | |
+// and check whether the values correspond to the | |
+// values in constant memory. | |
+__host__ void checkConstantMemory(Grid* grid, Params* params) | |
+{ | |
+ cudaPrintfInit(); | |
+ | |
+ // Allocate space in global device memory | |
+ Grid* dev_grid; | |
+ Params* dev_params; | |
+ cudaMalloc((void**)&dev_grid, sizeof(Grid)); | |
+ cudaMalloc((void**)&dev_params, sizeof(Params)); | |
+ | |
+ // Copy structure data from host to global device memory | |
+ cudaMemcpy(dev_grid, grid, sizeof(Grid), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_params, params, sizeof(Params), cudaMemcpyHostToDevice); | |
+ | |
+ // Compare values between global and constant memory | |
+ // structures on the device. | |
+ int* equal = new int; // The values are equal = 0, if not = 1 | |
+ *equal = 0; | |
+ int* dev_equal; | |
+ cudaMalloc((void**)&dev_equal, sizeof(int)); | |
+ checkConstantValues<<<1,1>>>(dev_equal, dev_grid, dev_params); | |
+ checkForCudaErrors("After constant memory check"); | |
+ | |
+ // Copy result to host | |
+ cudaMemcpy(equal, dev_equal, sizeof(int), cudaMemcpyDeviceToHost); | |
+ | |
+ // Free global device memory | |
+ cudaFree(dev_grid); | |
+ cudaFree(dev_params); | |
+ cudaFree(dev_equal); | |
+ | |
+ cudaPrintfDisplay(stdout, true); | |
+ | |
+ // Are the values equal? | |
+ if (*equal != 0) { | |
+ std::cerr << "Error! The values in constant memory do not " | |
+ << "seem to be correct (" << *equal << ").\n"; | |
+ exit(1); | |
+ } else { | |
+ std::cout << " Constant values ok (" << *equal << ").\n"; | |
+ } | |
+} | |
// Copy selected constant components to constant device memory. | |
//extern "C" | |
__host__ void transferToConstantMemory(Particles* p, | |
t@@ -105,7 +229,7 @@ __host__ void transferToConstantMemory(Particles* p, | |
//cudaMemcpyToSymbol("devC_nw", ¶ms->nw, sizeof(unsigned int)); | |
//cudaMemcpyToSymbol("devC_periodic", ¶ms->periodic, sizeof(int)); | |
- cudaMemcpyToSymbol(devC_grid, grid, sizeof(grid)); | |
+ cudaMemcpyToSymbol(devC_grid, grid, sizeof(Grid)); | |
if (params->global == 1) { | |
// If the physical properties of the particles are global (params.global =… | |
t@@ -122,7 +246,7 @@ __host__ void transferToConstantMemory(Particles* p, | |
params->mu_d = p->mu_d[0]; | |
params->mu_r = p->mu_r[0]; | |
params->rho = p->rho[0]; | |
- cudaMemcpyToSymbol(devC_params, params, sizeof(params)); | |
+ cudaMemcpyToSymbol(devC_params, params, sizeof(Params)); | |
//cudaMemcpyToSymbol("devC_k_n", ¶ms->k_n, sizeof(Float)); | |
//cudaMemcpyToSymbol("devC_k_t", ¶ms->k_t, sizeof(Float)); | |
//cudaMemcpyToSymbol("devC_k_r", ¶ms->k_r, sizeof(Float)); | |
t@@ -153,6 +277,8 @@ __host__ void transferToConstantMemory(Particles* p, | |
} | |
checkForCudaErrors("After transferring to device constant memory"); | |
+ checkConstantMemory(grid, params); | |
+ | |
cout << "Done\n"; | |
} | |
t@@ -167,10 +293,10 @@ __host__ void gpuMain(Float4* host_x, | |
Float4* host_torque, | |
Float4* host_angpos, | |
uint4* host_bonds, | |
- Particles* p, | |
- Grid* grid, | |
- Time* time, | |
- Params* params, | |
+ Particles p, | |
+ Grid grid, | |
+ Time time, | |
+ Params params, | |
Float4* host_w_nx, | |
Float4* host_w_mvfd, | |
const char* cwd, | |
t@@ -180,7 +306,7 @@ __host__ void gpuMain(Float4* host_x, | |
using std::cout; // Namespace directive | |
// Copy data to constant global device memory | |
- transferToConstantMemory(p, grid, time, params); | |
+ transferToConstantMemory(&p, &grid, &time, ¶ms); | |
// Declare pointers for particle variables on the device | |
Float4* dev_x; // Particle position | |
t@@ -221,18 +347,18 @@ __host__ void gpuMain(Float4* host_x, | |
// Particle contact bookkeeping | |
unsigned int* dev_contacts; | |
- unsigned int* host_contacts = new unsigned int[p->np*NC]; | |
+ unsigned int* host_contacts = new unsigned int[p.np*NC]; | |
// Particle pair distance correction across periodic boundaries | |
Float4* dev_distmod; | |
- Float4* host_distmod = new Float4[p->np*NC]; | |
+ Float4* host_distmod = new Float4[p.np*NC]; | |
// x,y,z contains the interparticle vector, corrected if contact | |
// is across a periodic boundary. | |
Float4* dev_delta_t; // Accumulated shear distance of contact | |
- Float4* host_delta_t = new Float4[p->np*NC]; | |
+ Float4* host_delta_t = new Float4[p.np*NC]; | |
// Particle memory size | |
- unsigned int memSizeF = sizeof(Float) * p->np; | |
- unsigned int memSizeF4 = sizeof(Float4) * p->np; | |
+ unsigned int memSizeF = sizeof(Float) * p.np; | |
+ unsigned int memSizeF4 = sizeof(Float4) * p.np; | |
// Allocate device memory for particle variables, | |
// tie to previously declared pointers | |
t@@ -257,24 +383,24 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMalloc((void**)&dev_es, memSizeF); | |
cudaMalloc((void**)&dev_ev, memSizeF); | |
cudaMalloc((void**)&dev_p, memSizeF); | |
- //cudaMalloc((void**)&dev_bonds, sizeof(uint4) * p->np); | |
- //cudaMalloc((void**)&dev_bonds_sorted, sizeof(uint4) * p->np); | |
+ //cudaMalloc((void**)&dev_bonds, sizeof(uint4) * p.np); | |
+ //cudaMalloc((void**)&dev_bonds_sorted, sizeof(uint4) * p.np); | |
// Cell-related arrays | |
- cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*p->np); | |
- cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*p->np); | |
- cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid->num[0]*grid->n… | |
- cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid->num[0]*grid->num… | |
+ cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*p.np); | |
+ cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*p.np); | |
+ cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.num… | |
+ cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num[1… | |
// Particle contact bookkeeping arrays | |
- cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p->np*NC); // Max NC … | |
- cudaMalloc((void**)&dev_distmod, sizeof(Float4)*p->np*NC); | |
- cudaMalloc((void**)&dev_delta_t, sizeof(Float4)*p->np*NC); | |
+ cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*p.np*NC); // Max NC c… | |
+ cudaMalloc((void**)&dev_distmod, sizeof(Float4)*p.np*NC); | |
+ cudaMalloc((void**)&dev_delta_t, sizeof(Float4)*p.np*NC); | |
// Wall arrays | |
- cudaMalloc((void**)&dev_w_nx, sizeof(Float)*params->nw*4); | |
- cudaMalloc((void**)&dev_w_mvfd, sizeof(Float)*params->nw*4); | |
- cudaMalloc((void**)&dev_w_force, sizeof(Float)*params->nw*p->np); | |
+ cudaMalloc((void**)&dev_w_nx, sizeof(Float)*params.nw*4); | |
+ cudaMalloc((void**)&dev_w_mvfd, sizeof(Float)*params.nw*4); | |
+ cudaMalloc((void**)&dev_w_force, sizeof(Float)*params.nw*p.np); | |
checkForCudaErrors("Post device memory allocation"); | |
cout << "Done\n"; | |
t@@ -291,32 +417,32 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMemcpy(dev_force, host_force, memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_torque, host_torque, memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_angpos, host_angpos, memSizeF4, cudaMemcpyHostToDevice); | |
- //cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p->np, cudaMemcpyHostToD… | |
- cudaMemcpy(dev_radius, p->radius, memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_es_dot, p->es_dot, memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_ev_dot, p->ev_dot, memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_es, p->es, memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_ev, p->ev, memSizeF, cudaMemcpyHostToDevice); | |
- cudaMemcpy(dev_p, p->p, memSizeF, cudaMemcpyHostToDevice); | |
+ //cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p.np, cudaMemcpyHostToDe… | |
+ cudaMemcpy(dev_radius, p.radius, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_es_dot, p.es_dot, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_ev_dot, p.ev_dot, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_es, p.es, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_ev, p.ev, memSizeF, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_p, p.p, memSizeF, cudaMemcpyHostToDevice); | |
// Wall data (wall mass and number in constant memory) | |
- cudaMemcpy(dev_w_nx, host_w_nx, sizeof(Float)*params->nw*4, cudaMemcpyHostTo… | |
- cudaMemcpy(dev_w_mvfd, host_w_mvfd, sizeof(Float)*params->nw*4, cudaMemcpyHo… | |
+ cudaMemcpy(dev_w_nx, host_w_nx, sizeof(Float)*params.nw*4, cudaMemcpyHostToD… | |
+ cudaMemcpy(dev_w_mvfd, host_w_mvfd, sizeof(Float)*params.nw*4, cudaMemcpyHos… | |
// Initialize contacts lists to p.np | |
- unsigned int* npu = new unsigned int[p->np*NC]; | |
- for (unsigned int i=0; i<(p->np*NC); ++i) | |
- npu[i] = p->np; | |
- cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)*p->np*NC, cudaMemcpyHostT… | |
+ unsigned int* npu = new unsigned int[p.np*NC]; | |
+ for (unsigned int i=0; i<(p.np*NC); ++i) | |
+ npu[i] = p.np; | |
+ cudaMemcpy(dev_contacts, npu, sizeof(unsigned int)*p.np*NC, cudaMemcpyHostTo… | |
delete[] npu; | |
// Create array of 0.0 values on the host and transfer these to the distance | |
// modifier and shear displacement arrays | |
- Float4* zerosF4 = new Float4[p->np*NC]; | |
- for (unsigned int i=0; i<(p->np*NC); ++i) | |
+ Float4* zerosF4 = new Float4[p.np*NC]; | |
+ for (unsigned int i=0; i<(p.np*NC); ++i) | |
zerosF4[i] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
- cudaMemcpy(dev_distmod, zerosF4, sizeof(Float4)*p->np*NC, cudaMemcpyHostToDe… | |
- cudaMemcpy(dev_delta_t, zerosF4, sizeof(Float4)*p->np*NC, cudaMemcpyHostToDe… | |
+ cudaMemcpy(dev_distmod, zerosF4, sizeof(Float4)*p.np*NC, cudaMemcpyHostToDev… | |
+ cudaMemcpy(dev_delta_t, zerosF4, sizeof(Float4)*p.np*NC, cudaMemcpyHostToDev… | |
delete[] zerosF4; | |
checkForCudaErrors("Post memcopy"); | |
t@@ -339,7 +465,7 @@ __host__ void gpuMain(Float4* host_x, | |
// GPU workload configuration | |
unsigned int threadsPerBlock = 256; | |
// Create enough blocks to accomodate the particles | |
- unsigned int blocksPerGrid = iDivUp(p->np, threadsPerBlock); | |
+ unsigned int blocksPerGrid = iDivUp(p.np, threadsPerBlock); | |
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid | |
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block | |
// Shared memory per block | |
t@@ -363,18 +489,18 @@ __host__ void gpuMain(Float4* host_x, | |
sprintf(file,"%s/output/%s.status.dat", cwd, inputbin); | |
fp = fopen(file, "w"); | |
fprintf(fp,"%2.4e %2.4e %d\n", | |
- time->current, | |
- 100.0*time->current/time->total, | |
- time->step_count); | |
+ time.current, | |
+ 100.0*time.current/time.total, | |
+ time.step_count); | |
fclose(fp); | |
// Write first output data file: output0.bin, thus testing writing of bin fi… | |
sprintf(file,"%s/output/%s.output0.bin", cwd, inputbin); | |
- if (fwritebin(file, p, host_x, host_vel, | |
+ if (fwritebin(file, &p, host_x, host_vel, | |
host_angvel, host_force, | |
host_torque, host_angpos, | |
host_bonds, | |
- grid, time, params, | |
+ &grid, &time, ¶ms, | |
host_w_nx, host_w_mvfd) != 0) { | |
std::cerr << "\n Problem during fwritebin \n"; | |
exit(EXIT_FAILURE); | |
t@@ -412,11 +538,11 @@ __host__ void gpuMain(Float4* host_x, | |
cudaEventCreate(&kernel_toc); | |
} | |
- cout << " Current simulation time: " << time->current << " s."; | |
+ cout << " Current simulation time: " << time.current << " s."; | |
// MAIN CALCULATION TIME LOOP | |
- while (time->current <= time->total) { | |
+ while (time.current <= time.total) { | |
// Increment iteration counter | |
++iter; | |
t@@ -449,7 +575,7 @@ __host__ void gpuMain(Float4* host_x, | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
thrust::sort_by_key(thrust::device_ptr<uint>(dev_gridParticleCellID), | |
- thrust::device_ptr<uint>(dev_gridParticleCellID + p->n… | |
+ thrust::device_ptr<uint>(dev_gridParticleCellID + p.np… | |
thrust::device_ptr<uint>(dev_gridParticleIndex)); | |
cudaThreadSynchronize(); // Needed? Does thrust synchronize threads implic… | |
if (PROFILING == 1) | |
t@@ -461,7 +587,7 @@ __host__ void gpuMain(Float4* host_x, | |
// specified with pointer value 0xffffffff, which for a 32 bit unsigned int | |
// is 4294967295. | |
cudaMemset(dev_cellStart, 0xffffffff, | |
- grid->num[0]*grid->num[1]*grid->num[2]*sizeof(unsigned int)); | |
+ grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int)); | |
cudaThreadSynchronize(); | |
checkForCudaErrors("Post cudaMemset"); | |
t@@ -491,7 +617,7 @@ __host__ void gpuMain(Float4* host_x, | |
// The contact search in topology() is only necessary for determining | |
// the accumulated shear distance needed in the linear elastic | |
// and nonlinear contact force model | |
- if (params->shearmodel == 2 || params->shearmodel == 3) { | |
+ if (params.shearmodel == 2 || params.shearmodel == 3) { | |
// For each particle: Search contacts in neighbor cells | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
t@@ -571,7 +697,7 @@ __host__ void gpuMain(Float4* host_x, | |
// Update wall kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrateWalls<<< 1, params->nw>>>(dev_w_nx, | |
+ integrateWalls<<< 1, params.nw>>>(dev_w_nx, | |
dev_w_mvfd, | |
dev_w_force_partial, | |
blocksPerGrid); | |
t@@ -584,17 +710,17 @@ __host__ void gpuMain(Float4* host_x, | |
// Update timers and counters | |
- time->current += time->dt; | |
- filetimeclock += time->dt; | |
+ time.current += time.dt; | |
+ filetimeclock += time.dt; | |
// Report time to console | |
cout << "\r Current simulation time: " | |
- << time->current << " s. ";// << std::flush; | |
+ << time.current << " s. ";// << std::flush; | |
// Produce output binary if the time interval | |
// between output files has been reached | |
- if (filetimeclock > time->file_dt) { | |
+ if (filetimeclock > time.file_dt) { | |
// Pause the CPU thread until all CUDA calls previously issued are compl… | |
cudaThreadSynchronize(); | |
t@@ -610,23 +736,23 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMemcpy(host_force, dev_force, memSizeF4, cudaMemcpyDeviceToHost); | |
cudaMemcpy(host_torque, dev_torque, memSizeF4, cudaMemcpyDeviceToHost); | |
cudaMemcpy(host_angpos, dev_angpos, memSizeF4, cudaMemcpyDeviceToHost); | |
- //cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p->np, cudaMemcpyDev… | |
- cudaMemcpy(p->es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy(p->ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy(p->es, dev_es, memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy(p->ev, dev_ev, memSizeF, cudaMemcpyDeviceToHost); | |
- cudaMemcpy(p->p, dev_p, memSizeF, cudaMemcpyDeviceToHost); | |
+ //cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p.np, cudaMemcpyDevi… | |
+ cudaMemcpy(p.es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(p.ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(p.es, dev_es, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(p.ev, dev_ev, memSizeF, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(p.p, dev_p, memSizeF, cudaMemcpyDeviceToHost); | |
// Wall data | |
cudaMemcpy(host_w_nx, dev_w_nx, | |
- sizeof(Float)*params->nw*4, cudaMemcpyDeviceToHost); | |
+ sizeof(Float)*params.nw*4, cudaMemcpyDeviceToHost); | |
cudaMemcpy(host_w_mvfd, dev_w_mvfd, | |
- sizeof(Float)*params->nw*4, cudaMemcpyDeviceToHost); | |
+ sizeof(Float)*params.nw*4, cudaMemcpyDeviceToHost); | |
// Contact information | |
if (CONTACTINFO == 1) { | |
- cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)*p->np*NC,… | |
+ cudaMemcpy(host_contacts, dev_contacts, sizeof(unsigned int)*p.np*NC, … | |
cudaMemcpy(host_delta_t, dev_delta_t, memSizeF4*NC, cudaMemcpyDeviceTo… | |
cudaMemcpy(host_distmod, dev_distmod, memSizeF4*NC, cudaMemcpyDeviceTo… | |
} | |
t@@ -635,14 +761,14 @@ __host__ void gpuMain(Float4* host_x, | |
cudaThreadSynchronize(); | |
// Write binary output file | |
- time->step_count += 1; | |
- sprintf(file,"%s/output/%s.output%d.bin", cwd, inputbin, time->step_coun… | |
+ time.step_count += 1; | |
+ sprintf(file,"%s/output/%s.output%d.bin", cwd, inputbin, time.step_count… | |
- if (fwritebin(file, p, host_x, host_vel, | |
+ if (fwritebin(file, &p, host_x, host_vel, | |
host_angvel, host_force, | |
host_torque, host_angpos, | |
host_bonds, | |
- grid, time, params, | |
+ &grid, &time, ¶ms, | |
host_w_nx, host_w_mvfd) != 0) { | |
cout << "\n Error during fwritebin() in main loop\n"; | |
exit(EXIT_FAILURE); | |
t@@ -651,10 +777,10 @@ __host__ void gpuMain(Float4* host_x, | |
if (CONTACTINFO == 1) { | |
// Write contact information to stdout | |
cout << "\n\n---------------------------\n" | |
- << "t = " << time->current << " s.\n" | |
+ << "t = " << time.current << " s.\n" | |
<< "---------------------------\n"; | |
- for (int n = 0; n < p->np; ++n) { | |
+ for (int n = 0; n < p.np; ++n) { | |
cout << "\n## Particle " << n << " ##\n"; | |
cout << "- contacts:\n"; | |
t@@ -682,9 +808,9 @@ __host__ void gpuMain(Float4* host_x, | |
sprintf(file,"%s/output/%s.status.dat", cwd, inputbin); | |
fp = fopen(file, "w"); | |
fprintf(fp,"%2.4e %2.4e %d\n", | |
- time->current, | |
- 100.0*time->current/time->total, | |
- time->step_count); | |
+ time.current, | |
+ 100.0*time.current/time.total, | |
+ time.step_count); | |
fclose(fp); | |
filetimeclock = 0.0; | |
t@@ -701,7 +827,7 @@ __host__ void gpuMain(Float4* host_x, | |
cout << "\nSimulation ended. Statistics:\n" | |
<< " - Last output file number: " | |
- << time->step_count << "\n" | |
+ << time.step_count << "\n" | |
<< " - GPU time spent: " | |
<< dev_time_spent/1000.0f << " s\n" | |
<< " - CPU time spent: " | |
diff --git a/src/main.cpp b/src/main.cpp | |
t@@ -456,8 +456,8 @@ int main(int argc, char *argv[]) | |
host_torque, | |
host_angpos, | |
host_bonds, | |
- &p, &grid, | |
- &time, ¶ms, | |
+ p, grid, | |
+ time, params, | |
host_w_nx, | |
host_w_mvfd, | |
cwd, inputbin); |