tdevice.cu: estimate number of cudaCoresPerSM for CC version 8 - sphere - GPU-b… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 04f7aa6ff1ec5eb78167e8b40fb399ed3f63a093 | |
parent 401f418676345ca4b80e2725106c40fc37f28e31 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 9 Feb 2023 12:43:46 +0100 | |
device.cu: estimate number of cudaCoresPerSM for CC version 8 | |
Diffstat: | |
M src/device.cu | 104 ++++++++++++++++-------------… | |
1 file changed, 53 insertions(+), 51 deletions(-) | |
--- | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -54,6 +54,8 @@ int cudaCoresPerSM(int major, int minor) | |
return 128; | |
else if (major == 7) | |
return 32; | |
+ else if (major == 8) | |
+ return 64; | |
else | |
printf("Error in cudaCoresPerSM Device compute capability value " | |
"(%d.%d) not recognized.", major, minor); | |
t@@ -112,11 +114,11 @@ void DEM::initializeGPU(void) | |
if (verbose == 1) { | |
cout << " CUDA device ID: " << d << "\n"; | |
- cout << " - Name: " << prop.name << ", compute capability: " | |
+ cout << " - Name: " << prop.name << ", compute capability: " | |
<< prop.major << "." << prop.minor << ".\n"; | |
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000 | |
- << "." << cudaDriverVersion%100 | |
- << ", runtime version " << cudaRuntimeVersion/1000 << "." | |
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000 | |
+ << "." << cudaDriverVersion%100 | |
+ << ", runtime version " << cudaRuntimeVersion/1000 << "." | |
<< cudaRuntimeVersion%100 << std::endl; | |
} | |
} | |
t@@ -138,11 +140,11 @@ void DEM::initializeGPU(void) | |
if (verbose == 1) { | |
cout << " CUDA device ID: " << device << "\n"; | |
- cout << " - Name: " << prop.name << ", compute capability: " | |
+ cout << " - Name: " << prop.name << ", compute capability: " | |
<< prop.major << "." << prop.minor << ".\n"; | |
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000 | |
- << "." << cudaDriverVersion%100 | |
- << ", runtime version " << cudaRuntimeVersion/1000 << "." | |
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000 | |
+ << "." << cudaDriverVersion%100 | |
+ << ", runtime version " << cudaRuntimeVersion/1000 << "." | |
<< cudaRuntimeVersion%100 | |
<< "\n - " << ncudacores << " CUDA cores" << std::endl; | |
} | |
t@@ -284,7 +286,7 @@ __global__ void checkParticlePositions( | |
// Copy the constant data components to device memory, | |
-// and check whether the values correspond to the | |
+// and check whether the values correspond to the | |
// values in constant memory. | |
void DEM::checkConstantMemory() | |
{ | |
t@@ -386,7 +388,7 @@ void DEM::updateGridSize() | |
grid.L[2] = *Lz; | |
// Write value to devC_grid.L[2] | |
- //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float)); | |
+ //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float)); | |
cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid)); | |
checkForCudaErrors("DEM::updateGridSize(): write to devC_grid.L[2]"); | |
t@@ -857,15 +859,15 @@ void DEM::startTime() | |
tic = clock(); | |
//// GPU workload configuration | |
- unsigned int threadsPerBlock = 256; | |
- //unsigned int threadsPerBlock = 512; | |
+ unsigned int threadsPerBlock = 256; | |
+ //unsigned int threadsPerBlock = 512; | |
// Create enough blocks to accomodate the particles | |
- unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock); | |
+ unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock); | |
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid | |
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block | |
- unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock); | |
+ unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock); | |
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid | |
// Use 3D block and grid layout for cell-centered fluid calculations | |
t@@ -930,9 +932,9 @@ void DEM::startTime() | |
//sprintf(file,"output/%s.status.dat", sid); | |
outfile = "output/" + sid + ".status.dat"; | |
fp = fopen(outfile.c_str(), "w"); | |
- fprintf(fp,"%2.4e %2.4e %d\n", | |
- time.current, | |
- 100.0*time.current/time.total, | |
+ fprintf(fp,"%2.4e %2.4e %d\n", | |
+ time.current, | |
+ 100.0*time.current/time.total, | |
time.step_count); | |
fclose(fp); | |
t@@ -1052,19 +1054,19 @@ void DEM::startTime() | |
checkForCudaErrorsIter("Post checkParticlePositions", iter); | |
#endif | |
- // If the grid is adaptive, readjust the grid height to equal the | |
+ // If the grid is adaptive, readjust the grid height to equal the | |
// positions of the dynamic walls | |
if (grid.adaptive == 1 && walls.nw > 0) { | |
updateGridSize(); | |
} | |
- // For each particle: | |
- // Compute hash key (cell index) from position | |
+ // For each particle: | |
+ // Compute hash key (cell index) from position | |
// in the fine, uniform and homogenous grid. | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
calcParticleCellID<<<dimGrid, dimBlock>>>(dev_gridParticleCellID, | |
- dev_gridParticleIndex, | |
+ dev_gridParticleIndex, | |
dev_x); | |
// Synchronization point | |
t@@ -1093,7 +1095,7 @@ void DEM::startTime() | |
// Zero cell array values by setting cellStart to its highest | |
// possible value, specified with pointer value 0xffffffff, which | |
// for a 32 bit unsigned int is 4294967295. | |
- cudaMemset(dev_cellStart, 0xffffffff, | |
+ cudaMemset(dev_cellStart, 0xffffffff, | |
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int)); | |
cudaThreadSynchronize(); | |
checkForCudaErrorsIter("Post cudaMemset", iter); | |
t@@ -1103,14 +1105,14 @@ void DEM::startTime() | |
// configurations in new arrays (*_sorted). | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart, | |
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart, | |
dev_cellEnd, | |
- dev_gridParticleCellID, | |
+ dev_gridParticleCellID, | |
dev_gridParticleIndex, | |
- dev_x, dev_vel, | |
+ dev_x, dev_vel, | |
dev_angvel, | |
- dev_x_sorted, | |
- dev_vel_sorted, | |
+ dev_x_sorted, | |
+ dev_vel_sorted, | |
dev_angvel_sorted); | |
// Synchronization point | |
t@@ -1127,10 +1129,10 @@ void DEM::startTime() | |
// For each particle: Search contacts in neighbor cells | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- topology<<<dimGrid, dimBlock>>>(dev_cellStart, | |
+ topology<<<dimGrid, dimBlock>>>(dev_cellStart, | |
dev_cellEnd, | |
dev_gridParticleIndex, | |
- dev_x_sorted, | |
+ dev_x_sorted, | |
dev_contacts, | |
dev_distmod); | |
t@@ -1159,10 +1161,10 @@ void DEM::startTime() | |
dev_angvel_sorted, | |
dev_vel, | |
dev_angvel, | |
- dev_force, | |
- dev_torque, | |
+ dev_force, | |
+ dev_torque, | |
dev_es_dot, | |
- dev_ev_dot, | |
+ dev_ev_dot, | |
dev_es, | |
dev_ev, | |
dev_p, | |
t@@ -1828,7 +1830,7 @@ void DEM::startTime() | |
if (write_conv_log == 1) | |
convlog << iter+1 << '\t' << nijac << std::end… | |
- std::cerr << "\nIteration " << iter << ", time " | |
+ std::cerr << "\nIteration " << iter << ", time " | |
<< iter*time.dt << " s: " | |
"Error, the epsilon solution in the fluid " | |
"calculations did not converge. Try increasing… | |
t@@ -1898,7 +1900,7 @@ void DEM::startTime() | |
} // end cfd_solver == 0 | |
// Darcy solution | |
- else if (cfd_solver == 1) { | |
+ else if (cfd_solver == 1) { | |
#if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS) | |
std::cout << "\n\n@@@@@@ TIME STEP " << iter << " @@@" | |
t@@ -2027,7 +2029,7 @@ void DEM::startTime() | |
// copy porosities to the frictionless Y boundaries | |
if (grid.periodic == 2) { | |
- copyDarcyPorositiesToEdges<<<dimGridFluid, | |
+ copyDarcyPorositiesToEdges<<<dimGridFluid, | |
dimBlockFluid>>>( | |
dev_darcy_phi, | |
dev_darcy_dphi, | |
t@@ -2038,7 +2040,7 @@ void DEM::startTime() | |
// copy porosities to the frictionless lower Z boundary | |
if (grid.periodic == 2) { | |
- copyDarcyPorositiesToBottom<<<dimGridFluid, | |
+ copyDarcyPorositiesToBottom<<<dimGridFluid, | |
dimBlockFluid>>>( | |
dev_darcy_phi, | |
dev_darcy_dphi, | |
t@@ -2369,7 +2371,7 @@ void DEM::startTime() | |
if (write_conv_log == 1) | |
convlog << iter+1 << '\t' << nijac << std::end… | |
- std::cerr << "\nIteration " << iter << ", time " | |
+ std::cerr << "\nIteration " << iter << ", time " | |
<< iter*time.dt << " s: " | |
"Error, the pressure solution in the fluid " | |
"calculations did not converge. Try increasing… | |
t@@ -2471,14 +2473,14 @@ void DEM::startTime() | |
// Update particle kinematics | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
- integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, | |
- dev_vel_sorted, | |
+ integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, | |
+ dev_vel_sorted, | |
dev_angvel_sorted, | |
- dev_x, | |
- dev_vel, | |
+ dev_x, | |
+ dev_vel, | |
dev_angvel, | |
dev_force, | |
- dev_torque, | |
+ dev_torque, | |
dev_angpos, | |
dev_acc, | |
dev_angacc, | |
t@@ -2571,7 +2573,7 @@ void DEM::startTime() | |
} | |
- // Produce output binary if the time interval | |
+ // Produce output binary if the time interval | |
// between output files has been reached | |
if (filetimeclock >= time.file_dt) { | |
t@@ -2646,19 +2648,19 @@ void DEM::startTime() | |
cout << "\n## Particle " << n << " ##\n"; | |
cout << "- contacts:\n"; | |
- for (int nc = 0; nc < NC; ++nc) | |
+ for (int nc = 0; nc < NC; ++nc) | |
cout << "[" << nc << "]=" << k.contacts[nc+NC*n] << | |
'\n'; | |
cout << "\n- delta_t:\n"; | |
- for (int nc = 0; nc < NC; ++nc) | |
+ for (int nc = 0; nc < NC; ++nc) | |
cout << k.delta_t[nc+NC*n].x << '\t' | |
<< k.delta_t[nc+NC*n].y << '\t' | |
<< k.delta_t[nc+NC*n].z << '\t' | |
<< k.delta_t[nc+NC*n].w << '\n'; | |
cout << "\n- distmod:\n"; | |
- for (int nc = 0; nc < NC; ++nc) | |
+ for (int nc = 0; nc < NC; ++nc) | |
cout << k.distmod[nc+NC*n].x << '\t' | |
<< k.distmod[nc+NC*n].y << '\t' | |
<< k.distmod[nc+NC*n].z << '\t' | |
t@@ -2667,11 +2669,11 @@ void DEM::startTime() | |
cout << '\n'; | |
} | |
- // Update status.dat at the interval of filetime | |
+ // Update status.dat at the interval of filetime | |
outfile = "output/" + sid + ".status.dat"; | |
fp = fopen(outfile.c_str(), "w"); | |
- fprintf(fp,"%2.4e %2.4e %d\n", | |
- time.current, | |
+ fprintf(fp,"%2.4e %2.4e %d\n", | |
+ time.current, | |
100.0*time.current/time.total, | |
time.step_count); | |
fclose(fp); | |
t@@ -2697,7 +2699,7 @@ void DEM::startTime() | |
if (verbose == 1) { | |
cout << "\nSimulation ended. Statistics:\n" | |
- << " - Last output file number: " | |
+ << " - Last output file number: " | |
<< time.step_count << "\n" | |
<< " - GPU time spent: " | |
<< dev_time_spent/1000.0f << " s\n" | |
t@@ -2705,7 +2707,7 @@ void DEM::startTime() | |
<< time_spent << " s\n" | |
<< " - Mean duration of iteration:\n" | |
<< " " << dev_time_spent/((double)iter*1000.0f) << " s" | |
- << std::endl; | |
+ << std::endl; | |
} | |
cudaEventDestroy(dev_tic); |