tcopy constant memory to all devices - sphere - GPU-based 3D discrete element m… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit b620d26c6032c277e46b2e68fc566449bcdc015b | |
parent 6de1181f3e695a80fe56a139e3384cba82034404 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Mon, 30 Jun 2014 10:53:33 +0200 | |
copy constant memory to all devices | |
Diffstat: | |
M src/contactsearch.cuh | 2 -- | |
M src/device.cu | 95 +++++++++++++++--------------… | |
M src/sphere.h | 3 +++ | |
3 files changed, 49 insertions(+), 51 deletions(-) | |
--- | |
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh | |
t@@ -485,8 +485,6 @@ __global__ void interact( | |
mempos = (unsigned int)(idx_a_orig * devC_nc + i); | |
__syncthreads(); | |
idx_b_orig = dev_contacts[mempos]; | |
- //radius_b = distmod.w; | |
- | |
if (idx_b_orig != (unsigned int)devC_np) { | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -67,6 +67,7 @@ __host__ void DEM::initializeGPU(void) | |
// Register number of devices | |
cudaGetDeviceCount(&deviceCount); | |
+ ndevices = deviceCount; // store in DEM class | |
if (deviceCount == 0) { | |
std::cerr << "\nERROR: No CUDA-enabled devices availible. Bye." | |
t@@ -84,7 +85,7 @@ __host__ void DEM::initializeGPU(void) | |
// Loop through GPU's and choose the one with the most CUDA cores | |
int ncudacores; | |
int max_ncudacores = 0; | |
- for (int d=0; d<deviceCount; d++) { | |
+ for (int d=0; d<ndevices; d++) { | |
cudaGetDeviceProperties(&prop, d); | |
cudaDriverGetVersion(&cudaDriverVersion); | |
cudaRuntimeGetVersion(&cudaRuntimeVersion); | |
t@@ -107,6 +108,7 @@ __host__ void DEM::initializeGPU(void) | |
} | |
} | |
+ device = cudadevice; // store in DEM class | |
cout << " Using CUDA device ID " << cudadevice << " with " | |
<< max_ncudacores << " cores." << std::endl; | |
t@@ -140,7 +142,6 @@ __global__ void checkConstantValues(int* dev_equal, | |
Grid* dev_grid, | |
Params* dev_params) | |
{ | |
- | |
// Values ok (0) | |
*dev_equal = 0; | |
t@@ -158,7 +159,6 @@ __global__ void checkConstantValues(int* dev_equal, | |
dev_grid->periodic != devC_grid.periodic) | |
*dev_equal = 1; // Not ok | |
- | |
else if (dev_params->g[0] != devC_params.g[0] || | |
dev_params->g[1] != devC_params.g[1] || | |
dev_params->g[2] != devC_params.g[2] || | |
t@@ -188,8 +188,6 @@ __global__ void checkConstantValues(int* dev_equal, | |
// values in constant memory. | |
__host__ void DEM::checkConstantMemory() | |
{ | |
- | |
- | |
// Allocate space in global device memory | |
Grid* dev_grid; | |
Params* dev_params; | |
t@@ -217,7 +215,6 @@ __host__ void DEM::checkConstantMemory() | |
cudaFree(dev_params); | |
cudaFree(dev_equal); | |
- | |
// Are the values equal? | |
if (*equal != 0) { | |
std::cerr << "Error! The values in constant memory do not " | |
t@@ -238,25 +235,24 @@ __host__ void DEM::transferToConstantDeviceMemory() | |
if (verbose == 1) | |
cout << " Transfering data to constant device memory: "; | |
- /*// Reference by string deprecated in cuda 5.0 | |
- cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd)); | |
- cudaMemcpyToSymbol("devC_np", &np, sizeof(np)); | |
- cudaMemcpyToSymbol("devC_nw", &walls.nw, sizeof(unsigned int)); | |
- cudaMemcpyToSymbol("devC_nc", &NC, sizeof(int)); | |
- cudaMemcpyToSymbol("devC_dt", &time.dt, sizeof(Float));*/ | |
- cudaMemcpyToSymbol(devC_nd, &nd, sizeof(nd)); | |
- cudaMemcpyToSymbol(devC_np, &np, sizeof(np)); | |
- cudaMemcpyToSymbol(devC_nw, &walls.nw, sizeof(unsigned int)); | |
- cudaMemcpyToSymbol(devC_nc, &NC, sizeof(int)); | |
- cudaMemcpyToSymbol(devC_dt, &time.dt, sizeof(Float)); | |
- cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid)); | |
- cudaMemcpyToSymbol(devC_params, ¶ms, sizeof(Params)); | |
+ for (int d=0; d<ndevices; d++) { | |
+ cudaSetDevice(d); | |
+ cudaMemcpyToSymbol(devC_nd, &nd, sizeof(nd)); | |
+ cudaMemcpyToSymbol(devC_np, &np, sizeof(np)); | |
+ cudaMemcpyToSymbol(devC_nw, &walls.nw, sizeof(unsigned int)); | |
+ cudaMemcpyToSymbol(devC_nc, &NC, sizeof(int)); | |
+ cudaMemcpyToSymbol(devC_dt, &time.dt, sizeof(Float)); | |
+ cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid)); | |
+ cudaMemcpyToSymbol(devC_params, ¶ms, sizeof(Params)); | |
+ } | |
+ cudaSetDevice(device); | |
checkForCudaErrors("After transferring to device constant memory"); | |
if (verbose == 1) | |
cout << "Done\n"; | |
+ // only for device with most CUDA cores | |
checkConstantMemory(); | |
} | |
t@@ -295,7 +291,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
// Particle contact bookkeeping arrays | |
cudaMalloc((void**)&dev_contacts, | |
- sizeof(unsigned int)*np*NC); // Max NC contacts per particle | |
+ sizeof(unsigned int)*np*NC); | |
cudaMalloc((void**)&dev_distmod, memSizeF4*NC); | |
cudaMalloc((void**)&dev_delta_t, memSizeF4*NC); | |
cudaMalloc((void**)&dev_bonds, sizeof(uint2)*params.nb0); | |
t@@ -317,10 +313,10 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
// Cell-related arrays | |
cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*np); | |
cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_cellStart, | |
- sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]); | |
- cudaMalloc((void**)&dev_cellEnd, | |
- sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]); | |
+ cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int) | |
+ *grid.num[0]*grid.num[1]*grid.num[2]); | |
+ cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int) | |
+ *grid.num[0]*grid.num[1]*grid.num[2]); | |
// Host contact bookkeeping arrays | |
k.contacts = new unsigned int[np*NC]; | |
t@@ -348,6 +344,7 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
{ | |
if (verbose == 1) | |
printf("\nFreeing device memory: "); | |
+ | |
// Particle arrays | |
cudaFree(dev_x); | |
cudaFree(dev_xyzsum); | |
t@@ -416,59 +413,59 @@ __host__ void DEM::transferToGlobalDeviceMemory(int stat… | |
// Kinematic particle values | |
cudaMemcpy( dev_x, k.x, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_xyzsum, k.xyzsum, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_vel, k.vel, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_vel0, k.vel, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_acc, k.acc, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_force, k.force, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angpos, k.angpos, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angvel, k.angvel, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angvel0, k.angvel, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angacc, k.angacc, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_torque, k.torque, | |
- memSizeF4, cudaMemcpyHostToDevice); | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_contacts, k.contacts, | |
- sizeof(unsigned int)*np*NC, cudaMemcpyHostToDevice); | |
+ sizeof(unsigned int)*np*NC, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_distmod, k.distmod, | |
- memSizeF4*NC, cudaMemcpyHostToDevice); | |
+ memSizeF4*NC, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_delta_t, k.delta_t, | |
- memSizeF4*NC, cudaMemcpyHostToDevice); | |
+ memSizeF4*NC, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_bonds, k.bonds, | |
- sizeof(uint2)*params.nb0, cudaMemcpyHostToDevice); | |
+ sizeof(uint2)*params.nb0, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_bonds_delta, k.bonds_delta, | |
- sizeof(Float4)*params.nb0, cudaMemcpyHostToDevice); | |
+ sizeof(Float4)*params.nb0, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_bonds_omega, k.bonds_omega, | |
- sizeof(Float4)*params.nb0, cudaMemcpyHostToDevice); | |
+ sizeof(Float4)*params.nb0, cudaMemcpyHostToDevice); | |
// Individual particle energy values | |
cudaMemcpy( dev_es_dot, e.es_dot, | |
- memSizeF, cudaMemcpyHostToDevice); | |
+ memSizeF, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_es, e.es, | |
- memSizeF, cudaMemcpyHostToDevice); | |
+ memSizeF, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_ev_dot, e.ev_dot, | |
- memSizeF, cudaMemcpyHostToDevice); | |
+ memSizeF, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_ev, e.ev, | |
- memSizeF, cudaMemcpyHostToDevice); | |
+ memSizeF, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_p, e.p, | |
- memSizeF, cudaMemcpyHostToDevice); | |
+ memSizeF, cudaMemcpyHostToDevice); | |
// Wall parameters | |
cudaMemcpy( dev_walls_wmode, walls.wmode, | |
- sizeof(int)*walls.nw, cudaMemcpyHostToDevice); | |
+ sizeof(int)*walls.nw, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_walls_nx, walls.nx, | |
- sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
+ sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_walls_mvfd, walls.mvfd, | |
- sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
+ sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
// Fluid arrays | |
if (navierstokes == 1) { | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -53,6 +53,9 @@ class DEM { | |
unsigned int width; | |
unsigned int height; | |
+ int ndevices; // number of CUDA GPUs | |
+ int device; // primary GPU | |
+ | |
// DEVICE ARRAYS | |