Introduction
Introduction Statistics Contact Development Disclaimer Help
tdevice.cu: estimate number of cudaCoresPerSM for CC version 8 - sphere - GPU-b…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit 04f7aa6ff1ec5eb78167e8b40fb399ed3f63a093
parent 401f418676345ca4b80e2725106c40fc37f28e31
Author: Anders Damsgaard <[email protected]>
Date: Thu, 9 Feb 2023 12:43:46 +0100
device.cu: estimate number of cudaCoresPerSM for CC version 8
Diffstat:
M src/device.cu | 104 ++++++++++++++++-------------…
1 file changed, 53 insertions(+), 51 deletions(-)
---
diff --git a/src/device.cu b/src/device.cu
t@@ -54,6 +54,8 @@ int cudaCoresPerSM(int major, int minor)
return 128;
else if (major == 7)
return 32;
+ else if (major == 8)
+ return 64;
else
printf("Error in cudaCoresPerSM Device compute capability value "
"(%d.%d) not recognized.", major, minor);
t@@ -112,11 +114,11 @@ void DEM::initializeGPU(void)
if (verbose == 1) {
cout << " CUDA device ID: " << d << "\n";
- cout << " - Name: " << prop.name << ", compute capability: "
+ cout << " - Name: " << prop.name << ", compute capability: "
<< prop.major << "." << prop.minor << ".\n";
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000
- << "." << cudaDriverVersion%100
- << ", runtime version " << cudaRuntimeVersion/1000 << "."
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000
+ << "." << cudaDriverVersion%100
+ << ", runtime version " << cudaRuntimeVersion/1000 << "."
<< cudaRuntimeVersion%100 << std::endl;
}
}
t@@ -138,11 +140,11 @@ void DEM::initializeGPU(void)
if (verbose == 1) {
cout << " CUDA device ID: " << device << "\n";
- cout << " - Name: " << prop.name << ", compute capability: "
+ cout << " - Name: " << prop.name << ", compute capability: "
<< prop.major << "." << prop.minor << ".\n";
- cout << " - CUDA Driver version: " << cudaDriverVersion/1000
- << "." << cudaDriverVersion%100
- << ", runtime version " << cudaRuntimeVersion/1000 << "."
+ cout << " - CUDA Driver version: " << cudaDriverVersion/1000
+ << "." << cudaDriverVersion%100
+ << ", runtime version " << cudaRuntimeVersion/1000 << "."
<< cudaRuntimeVersion%100
<< "\n - " << ncudacores << " CUDA cores" << std::endl;
}
t@@ -284,7 +286,7 @@ __global__ void checkParticlePositions(
// Copy the constant data components to device memory,
-// and check whether the values correspond to the
+// and check whether the values correspond to the
// values in constant memory.
void DEM::checkConstantMemory()
{
t@@ -386,7 +388,7 @@ void DEM::updateGridSize()
grid.L[2] = *Lz;
// Write value to devC_grid.L[2]
- //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float));
+ //cudaMemcpyToSymbol(devC_grid.L[2], &Lz, sizeof(Float));
cudaMemcpyToSymbol(devC_grid, &grid, sizeof(Grid));
checkForCudaErrors("DEM::updateGridSize(): write to devC_grid.L[2]");
t@@ -857,15 +859,15 @@ void DEM::startTime()
tic = clock();
//// GPU workload configuration
- unsigned int threadsPerBlock = 256;
- //unsigned int threadsPerBlock = 512;
+ unsigned int threadsPerBlock = 256;
+ //unsigned int threadsPerBlock = 512;
// Create enough blocks to accomodate the particles
- unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
+ unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock);
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block
- unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
+ unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock);
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid
// Use 3D block and grid layout for cell-centered fluid calculations
t@@ -930,9 +932,9 @@ void DEM::startTime()
//sprintf(file,"output/%s.status.dat", sid);
outfile = "output/" + sid + ".status.dat";
fp = fopen(outfile.c_str(), "w");
- fprintf(fp,"%2.4e %2.4e %d\n",
- time.current,
- 100.0*time.current/time.total,
+ fprintf(fp,"%2.4e %2.4e %d\n",
+ time.current,
+ 100.0*time.current/time.total,
time.step_count);
fclose(fp);
t@@ -1052,19 +1054,19 @@ void DEM::startTime()
checkForCudaErrorsIter("Post checkParticlePositions", iter);
#endif
- // If the grid is adaptive, readjust the grid height to equal the
+ // If the grid is adaptive, readjust the grid height to equal the
// positions of the dynamic walls
if (grid.adaptive == 1 && walls.nw > 0) {
updateGridSize();
}
- // For each particle:
- // Compute hash key (cell index) from position
+ // For each particle:
+ // Compute hash key (cell index) from position
// in the fine, uniform and homogenous grid.
if (PROFILING == 1)
startTimer(&kernel_tic);
calcParticleCellID<<<dimGrid, dimBlock>>>(dev_gridParticleCellID,
- dev_gridParticleIndex,
+ dev_gridParticleIndex,
dev_x);
// Synchronization point
t@@ -1093,7 +1095,7 @@ void DEM::startTime()
// Zero cell array values by setting cellStart to its highest
// possible value, specified with pointer value 0xffffffff, which
// for a 32 bit unsigned int is 4294967295.
- cudaMemset(dev_cellStart, 0xffffffff,
+ cudaMemset(dev_cellStart, 0xffffffff,
grid.num[0]*grid.num[1]*grid.num[2]*sizeof(unsigned int));
cudaThreadSynchronize();
checkForCudaErrorsIter("Post cudaMemset", iter);
t@@ -1103,14 +1105,14 @@ void DEM::startTime()
// configurations in new arrays (*_sorted).
if (PROFILING == 1)
startTimer(&kernel_tic);
- reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart,
+ reorderArrays<<<dimGrid, dimBlock, smemSize>>>(dev_cellStart,
dev_cellEnd,
- dev_gridParticleCellID,
+ dev_gridParticleCellID,
dev_gridParticleIndex,
- dev_x, dev_vel,
+ dev_x, dev_vel,
dev_angvel,
- dev_x_sorted,
- dev_vel_sorted,
+ dev_x_sorted,
+ dev_vel_sorted,
dev_angvel_sorted);
// Synchronization point
t@@ -1127,10 +1129,10 @@ void DEM::startTime()
// For each particle: Search contacts in neighbor cells
if (PROFILING == 1)
startTimer(&kernel_tic);
- topology<<<dimGrid, dimBlock>>>(dev_cellStart,
+ topology<<<dimGrid, dimBlock>>>(dev_cellStart,
dev_cellEnd,
dev_gridParticleIndex,
- dev_x_sorted,
+ dev_x_sorted,
dev_contacts,
dev_distmod);
t@@ -1159,10 +1161,10 @@ void DEM::startTime()
dev_angvel_sorted,
dev_vel,
dev_angvel,
- dev_force,
- dev_torque,
+ dev_force,
+ dev_torque,
dev_es_dot,
- dev_ev_dot,
+ dev_ev_dot,
dev_es,
dev_ev,
dev_p,
t@@ -1828,7 +1830,7 @@ void DEM::startTime()
if (write_conv_log == 1)
convlog << iter+1 << '\t' << nijac << std::end…
- std::cerr << "\nIteration " << iter << ", time "
+ std::cerr << "\nIteration " << iter << ", time "
<< iter*time.dt << " s: "
"Error, the epsilon solution in the fluid "
"calculations did not converge. Try increasing…
t@@ -1898,7 +1900,7 @@ void DEM::startTime()
} // end cfd_solver == 0
// Darcy solution
- else if (cfd_solver == 1) {
+ else if (cfd_solver == 1) {
#if defined(REPORT_EPSILON) || defined(REPORT_FORCING_TERMS)
std::cout << "\n\n@@@@@@ TIME STEP " << iter << " @@@"
t@@ -2027,7 +2029,7 @@ void DEM::startTime()
// copy porosities to the frictionless Y boundaries
if (grid.periodic == 2) {
- copyDarcyPorositiesToEdges<<<dimGridFluid,
+ copyDarcyPorositiesToEdges<<<dimGridFluid,
dimBlockFluid>>>(
dev_darcy_phi,
dev_darcy_dphi,
t@@ -2038,7 +2040,7 @@ void DEM::startTime()
// copy porosities to the frictionless lower Z boundary
if (grid.periodic == 2) {
- copyDarcyPorositiesToBottom<<<dimGridFluid,
+ copyDarcyPorositiesToBottom<<<dimGridFluid,
dimBlockFluid>>>(
dev_darcy_phi,
dev_darcy_dphi,
t@@ -2369,7 +2371,7 @@ void DEM::startTime()
if (write_conv_log == 1)
convlog << iter+1 << '\t' << nijac << std::end…
- std::cerr << "\nIteration " << iter << ", time "
+ std::cerr << "\nIteration " << iter << ", time "
<< iter*time.dt << " s: "
"Error, the pressure solution in the fluid "
"calculations did not converge. Try increasing…
t@@ -2471,14 +2473,14 @@ void DEM::startTime()
// Update particle kinematics
if (PROFILING == 1)
startTimer(&kernel_tic);
- integrate<<<dimGrid, dimBlock>>>(dev_x_sorted,
- dev_vel_sorted,
+ integrate<<<dimGrid, dimBlock>>>(dev_x_sorted,
+ dev_vel_sorted,
dev_angvel_sorted,
- dev_x,
- dev_vel,
+ dev_x,
+ dev_vel,
dev_angvel,
dev_force,
- dev_torque,
+ dev_torque,
dev_angpos,
dev_acc,
dev_angacc,
t@@ -2571,7 +2573,7 @@ void DEM::startTime()
}
- // Produce output binary if the time interval
+ // Produce output binary if the time interval
// between output files has been reached
if (filetimeclock >= time.file_dt) {
t@@ -2646,19 +2648,19 @@ void DEM::startTime()
cout << "\n## Particle " << n << " ##\n";
cout << "- contacts:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << "[" << nc << "]=" << k.contacts[nc+NC*n] <<
'\n';
cout << "\n- delta_t:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << k.delta_t[nc+NC*n].x << '\t'
<< k.delta_t[nc+NC*n].y << '\t'
<< k.delta_t[nc+NC*n].z << '\t'
<< k.delta_t[nc+NC*n].w << '\n';
cout << "\n- distmod:\n";
- for (int nc = 0; nc < NC; ++nc)
+ for (int nc = 0; nc < NC; ++nc)
cout << k.distmod[nc+NC*n].x << '\t'
<< k.distmod[nc+NC*n].y << '\t'
<< k.distmod[nc+NC*n].z << '\t'
t@@ -2667,11 +2669,11 @@ void DEM::startTime()
cout << '\n';
}
- // Update status.dat at the interval of filetime
+ // Update status.dat at the interval of filetime
outfile = "output/" + sid + ".status.dat";
fp = fopen(outfile.c_str(), "w");
- fprintf(fp,"%2.4e %2.4e %d\n",
- time.current,
+ fprintf(fp,"%2.4e %2.4e %d\n",
+ time.current,
100.0*time.current/time.total,
time.step_count);
fclose(fp);
t@@ -2697,7 +2699,7 @@ void DEM::startTime()
if (verbose == 1) {
cout << "\nSimulation ended. Statistics:\n"
- << " - Last output file number: "
+ << " - Last output file number: "
<< time.step_count << "\n"
<< " - GPU time spent: "
<< dev_time_spent/1000.0f << " s\n"
t@@ -2705,7 +2707,7 @@ void DEM::startTime()
<< time_spent << " s\n"
<< " - Mean duration of iteration:\n"
<< " " << dev_time_spent/((double)iter*1000.0f) << " s"
- << std::endl;
+ << std::endl;
}
cudaEventDestroy(dev_tic);
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.