tAdded LBM fluid simulation, values off - sphere - GPU-based 3D discrete elemen… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 18da38f2435da18e108b208f2054eb4c11107207 | |
parent d456b79cf318820c11ea7fbf63ea807591a44cc4 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Wed, 1 May 2013 17:07:48 +0200 | |
Added LBM fluid simulation, values off | |
Diffstat: | |
M CMakeLists.txt | 7 ++++++- | |
M python/sphere.py | 110 ++++++++++++++++++++++-------… | |
M src/datatypes.h | 1 + | |
M src/device.cu | 139 ++++++++++++++++++++++++++---… | |
M src/file_io.cpp | 24 ++++++++++++++++++++++++ | |
M src/latticeboltzmann.cuh | 5 ++--- | |
M src/sorting.cuh | 5 ++++- | |
M src/sphere.h | 11 ++++++++++- | |
M tests/bond_tests.py | 5 +++-- | |
M tests/io_tests.py | 1 - | |
10 files changed, 247 insertions(+), 61 deletions(-) | |
--- | |
diff --git a/CMakeLists.txt b/CMakeLists.txt | |
t@@ -23,8 +23,13 @@ find_package(OpenMP) | |
# Uncomment to enable testing | |
enable_testing() | |
-# Set build type | |
+# Set build type = Debug | |
#set(CMAKE_BUILD_TYPE Debug) | |
+#if (CUDA_FOUND) | |
+# set(CUDA_NVCC_FLAGS -g;-G) | |
+#endif() | |
+ | |
+# Set build type = Release | |
set(CMAKE_BUILD_TYPE Release) | |
# Add source directory to project. | |
diff --git a/python/sphere.py b/python/sphere.py | |
t@@ -108,9 +108,18 @@ class Spherebin: | |
self.tau_b = numpy.ones(1, dtype=numpy.uint32) * numpy.infty | |
self.bonds = numpy.zeros((self.nb0, 2), dtype=numpy.uint32) | |
self.bonds_delta_n = numpy.zeros(self.nb0, dtype=numpy.float64) | |
- self.bonds_delta_t = numpy.zeros((self.nb0, self.nd), dtype=numpy.floa… | |
+ self.bonds_delta_t = numpy.zeros((self.nb0, self.nd), | |
+ dtype=numpy.float64) | |
self.bonds_omega_n = numpy.zeros(self.nb0, dtype=numpy.float64) | |
- self.bonds_omega_t = numpy.zeros((self.nb0, self.nd), dtype=numpy.floa… | |
+ self.bonds_omega_t = numpy.zeros((self.nb0, self.nd), | |
+ dtype=numpy.float64) | |
+ | |
+ self.nu = numpy.zeros(1, dtype=numpy.float64) | |
+ self.f_v = numpy.zeros( | |
+ (self.num[0] * self.num[1] * self.num[2], self.nd), | |
+ dtype=numpy.float64) | |
+ self.f_rho = numpy.zeros(self.num[0] * self.num[1] * self.num[2], | |
+ dtype=numpy.float64) | |
def __cmp__(self, other): | |
""" Called when to Spherebin objects are compared. | |
t@@ -176,13 +185,16 @@ class Spherebin: | |
self.bonds_delta_n == other.bonds_delta_n and\ | |
self.bonds_delta_t == other.bonds_delta_t and\ | |
self.bonds_omega_n == other.bonds_omega_n and\ | |
- self.bonds_omega_t == other.bonds_omega_t\ | |
+ self.bonds_omega_t == other.bonds_omega_t and\ | |
+ self.nu == other.nu and\ | |
+ (self.f_v == other.f_v).all() and\ | |
+ (self.f_rho == other.f_rho).all()\ | |
).all() == True): | |
return 0 # All equal | |
- else : | |
+ else: | |
return 1 | |
- def readbin(self, targetbin, verbose = True, bonds = True, devsmod = True): | |
+ def readbin(self, targetbin, verbose = True, bonds = True, devsmod = True,… | |
'Reads a target SPHERE binary file' | |
fh = None | |
t@@ -203,20 +215,16 @@ class Spherebin: | |
self.time_step_count = numpy.fromfile(fh, dtype=numpy.uint32, coun… | |
# Allocate array memory for particles | |
- self.x = numpy.zeros((self.np, self.nd), dtype=numpy.float64) | |
- self.radius = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.xysum = numpy.zeros((self.np, 2), dtype=numpy.float64) | |
- self.vel = numpy.zeros((self.np, self.nd), dtype=numpy.float64) | |
- self.fixvel = numpy.zeros(self.np, dtype=numpy.float64) | |
- #self.force = numpy.zeros((self.np, self.nd), dtype=numpy.float6… | |
- #self.angpos = numpy.zeros((self.np, self.nd), dtype=numpy.float6… | |
- #self.angvel = numpy.zeros((self.np, self.nd), dtype=numpy.float6… | |
- #self.torque = numpy.zeros((self.np, self.nd), dtype=numpy.float6… | |
- self.es_dot = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.es = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.ev_dot = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.ev = numpy.zeros(self.np, dtype=numpy.float64) | |
- self.p = numpy.zeros(self.np, dtype=numpy.float64) | |
+ self.x = numpy.empty((self.np, self.nd), dtype=numpy.float64) | |
+ self.radius = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.xysum = numpy.empty((self.np, 2), dtype=numpy.float64) | |
+ self.vel = numpy.empty((self.np, self.nd), dtype=numpy.float64) | |
+ self.fixvel = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.es_dot = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.es = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.ev_dot = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.ev = numpy.empty(self.np, dtype=numpy.float64) | |
+ self.p = numpy.empty(self.np, dtype=numpy.float64) | |
# Read remaining data from binary | |
self.origo = numpy.fromfile(fh, dtype=numpy.float64, count=self… | |
t@@ -271,13 +279,13 @@ class Spherebin: | |
# Wall data | |
self.nw = numpy.fromfile(fh, dtype=numpy.uint32, count=1) | |
- self.wmode = numpy.zeros(self.nw, dtype=numpy.int32) | |
- self.w_n = numpy.zeros(self.nw*self.nd, dtype=numpy.float64).r… | |
- self.w_x = numpy.zeros(self.nw, dtype=numpy.float64) | |
- self.w_m = numpy.zeros(self.nw, dtype=numpy.float64) | |
- self.w_vel = numpy.zeros(self.nw, dtype=numpy.float64) | |
- self.w_force = numpy.zeros(self.nw, dtype=numpy.float64) | |
- self.w_devs = numpy.zeros(self.nw, dtype=numpy.float64) | |
+ self.wmode = numpy.empty(self.nw, dtype=numpy.int32) | |
+ self.w_n = numpy.empty(self.nw*self.nd, dtype=numpy.float64).r… | |
+ self.w_x = numpy.empty(self.nw, dtype=numpy.float64) | |
+ self.w_m = numpy.empty(self.nw, dtype=numpy.float64) | |
+ self.w_vel = numpy.empty(self.nw, dtype=numpy.float64) | |
+ self.w_force = numpy.empty(self.nw, dtype=numpy.float64) | |
+ self.w_devs = numpy.empty(self.nw, dtype=numpy.float64) | |
self.wmode = numpy.fromfile(fh, dtype=numpy.int32, count=self.nw) | |
for i in range(self.nw): | |
t@@ -298,17 +306,25 @@ class Spherebin: | |
self.nb0 = numpy.fromfile(fh, dtype=numpy.uint32, count=1) | |
self.sigma_b = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.tau_b = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
- self.bonds = numpy.zeros((self.nb0, 2), dtype=numpy.uint32) | |
+ self.bonds = numpy.empty((self.nb0, 2), dtype=numpy.uint32) | |
for i in range(self.nb0): | |
self.bonds[i,0] = numpy.fromfile(fh, dtype=numpy.uint32, c… | |
self.bonds[i,1] = numpy.fromfile(fh, dtype=numpy.uint32, c… | |
self.bonds_delta_n = numpy.fromfile(fh, dtype=numpy.float64, c… | |
- self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
+ self.bonds_delta_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
self.bonds_omega_n = numpy.fromfile(fh, dtype=numpy.float64, c… | |
- self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
+ self.bonds_omega_t = numpy.fromfile(fh, dtype=numpy.float64, c… | |
else: | |
self.nb0 = numpy.zeros(1, dtype=numpy.uint32) | |
+ if (fluid == True): | |
+ ncells = self.num[0]*self.num[1]*self.num[2] | |
+ self.nu = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.f_v = numpy.empty(ncells*self.nd, dtype=numpy.float64) | |
+ self.f_rho = numpy.empty(ncells, dtype=numpy.float64) | |
+ self.f_v = numpy.fromfile(fh, dtype=numpy.float64, count=ncell… | |
+ self.f_rho = numpy.fromfile(fh, dtype=numpy.float64, count=nce… | |
+ | |
finally: | |
if fh is not None: | |
fh.close() | |
t@@ -412,11 +428,18 @@ class Spherebin: | |
fh.write(self.bonds_omega_n.astype(numpy.float64)) | |
fh.write(self.bonds_omega_t.astype(numpy.float64)) | |
+ fh.write(self.nu.astype(numpy.float64)) | |
+ fh.write(self.f_v.astype(numpy.float64)) | |
+ fh.write(self.f_rho.astype(numpy.float64)) | |
finally: | |
if fh is not None: | |
fh.close() | |
+ def readfirst(self, verbose=True): | |
+ fn = "../output/{0}.output00000.bin".format(self.sid) | |
+ self.readbin(fn, verbose) | |
+ | |
def readlast(self, verbose=True): | |
lastfile = status(self.sid) | |
fn = "../output/{0}.output{1:0=5}.bin".format(self.sid, lastfile) | |
t@@ -508,6 +531,11 @@ class Spherebin: | |
cellsize = 2 * r_max | |
self.L = self.num * cellsize | |
+ # Init fluid arrays | |
+ self.f_v = numpy.zeros((self.num[0]*self.num[1]*self.num[2],self.nd), … | |
+ self.f_rho = numpy.ones(self.num[0]*self.num[1]*self.num[2], dtype=num… | |
+ | |
+ | |
# Particle positions randomly distributed without overlap | |
for i in range(self.np): | |
overlaps = True | |
t@@ -551,6 +579,11 @@ class Spherebin: | |
print("Error: The grid must be at least 3 cells in each direction") | |
print(" Grid: x={}, y={}, z={}".format(self.num[0], self.num[1], s… | |
+ # Init fluid arrays | |
+ self.f_v = numpy.zeros((self.num[0]*self.num[1]*self.num[2],self.nd), … | |
+ self.f_rho = numpy.ones(self.num[0]*self.num[1]*self.num[2], dtype=num… | |
+ | |
+ | |
# Put upper wall at top boundary | |
if (self.nw > 0): | |
self.w_x[0] = self.L[0] | |
t@@ -591,6 +624,11 @@ class Spherebin: | |
print("Error: The grid must be at least 3 cells in each direction") | |
print(self.num) | |
+ # Init fluid arrays | |
+ self.f_v = numpy.zeros((self.num[0]*self.num[1]*self.num[2],self.nd), … | |
+ self.f_rho = numpy.ones(self.num[0]*self.num[1]*self.num[2], dtype=num… | |
+ | |
+ | |
self.contactmodel[0] = contactmodel | |
# Put upper wall at top boundary | |
t@@ -656,6 +694,10 @@ class Spherebin: | |
self.x[i,0] += 0.5*cellsize | |
self.x[i,1] += 0.5*cellsize | |
+ # Init fluid arrays | |
+ self.f_v = numpy.zeros((self.num[0]*self.num[1]*self.num[2],self.nd), … | |
+ self.f_rho = numpy.ones(self.num[0]*self.num[1]*self.num[2], dtype=num… | |
+ | |
self.contactmodel[0] = contactmodel | |
# Readjust grid to correct size | |
t@@ -717,6 +759,10 @@ class Spherebin: | |
self.num[2] = numpy.ceil(z_max/cellsize) | |
self.L = self.num * cellsize | |
+ # Init fluid arrays | |
+ self.f_v = numpy.zeros((self.num[0]*self.num[1]*self.num[2],self.nd), … | |
+ self.f_rho = numpy.ones(self.num[0]*self.num[1]*self.num[2], dtype=num… | |
+ | |
def createBondPair(self, i, j, spacing=-0.1): | |
""" Bond particles i and j. Particle j is moved adjacent to particle i, | |
and oriented randomly. | |
t@@ -940,7 +986,8 @@ class Spherebin: | |
gamma_r = 0, | |
gamma_wn = 1e4, | |
gamma_wt = 1e4, | |
- capillaryCohesion = 0): | |
+ capillaryCohesion = 0, | |
+ nu = 0.0): | |
""" Initialize particle parameters to default values. | |
Radii must be set prior to calling this function. | |
""" | |
t@@ -1007,6 +1054,9 @@ class Spherebin: | |
# Debonding distance | |
self.db[0] = (1.0 + theta/2.0) * self.V_b**(1.0/3.0) | |
+ # Fluid dynamic viscosity | |
+ self.nu[0] = nu | |
+ | |
def bond(self, i, j): | |
""" Create a bond between particles i and j """ | |
diff --git a/src/datatypes.h b/src/datatypes.h | |
t@@ -96,6 +96,7 @@ struct Params { | |
Float tau_b; // Bond shear strength | |
Float devs_A; // Amplitude of modulations in deviatoric normal str… | |
Float devs_f; // Frequency of modulations in deviatoric normal str… | |
+ Float nu; // Fluid dynamic viscosity | |
}; | |
// Structure containing wall parameters | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -25,6 +25,7 @@ | |
#include "contactsearch.cuh" | |
#include "integration.cuh" | |
#include "raytracer.cuh" | |
+#include "latticeboltzmann.cuh" | |
// Wrapper function for initializing the CUDA components. | |
t@@ -140,7 +141,8 @@ __global__ void checkConstantValues(int* dev_equal, | |
dev_params->db != devC_params.db || | |
dev_params->V_b != devC_params.V_b || | |
dev_params->lambda_bar != devC_params.lambda_bar || | |
- dev_params->nb0 != devC_params.nb0) | |
+ dev_params->nb0 != devC_params.nb0 || | |
+ dev_params->nu != devC_params.nu) | |
*dev_equal = 2; // Not ok | |
} | |
t@@ -184,11 +186,12 @@ __host__ void DEM::checkConstantMemory() | |
// Are the values equal? | |
if (*equal != 0) { | |
std::cerr << "Error! The values in constant memory do not " | |
- << "seem to be correct (" << *equal << ").\n"; | |
+ << "seem to be correct (" << *equal << ")." << std::endl; | |
exit(1); | |
} else { | |
if (verbose == 1) | |
- std::cout << " Constant values ok (" << *equal << ").\n"; | |
+ std::cout << " Constant values ok (" << *equal << ")." | |
+ << std::endl; | |
} | |
} | |
t@@ -256,7 +259,8 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
cudaMalloc((void**)&dev_torque, memSizeF4); | |
// Particle contact bookkeeping arrays | |
- cudaMalloc((void**)&dev_contacts, sizeof(unsigned int)*np*NC); // Max NC c… | |
+ cudaMalloc((void**)&dev_contacts, | |
+ sizeof(unsigned int)*np*NC); // Max NC contacts per particle | |
cudaMalloc((void**)&dev_distmod, memSizeF4*NC); | |
cudaMalloc((void**)&dev_delta_t, memSizeF4*NC); | |
cudaMalloc((void**)&dev_bonds, sizeof(uint2)*params.nb0); | |
t@@ -278,8 +282,10 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
// Cell-related arrays | |
cudaMalloc((void**)&dev_gridParticleCellID, sizeof(unsigned int)*np); | |
cudaMalloc((void**)&dev_gridParticleIndex, sizeof(unsigned int)*np); | |
- cudaMalloc((void**)&dev_cellStart, sizeof(unsigned int)*grid.num[0]*grid.n… | |
- cudaMalloc((void**)&dev_cellEnd, sizeof(unsigned int)*grid.num[0]*grid.num… | |
+ cudaMalloc((void**)&dev_cellStart, | |
+ sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]); | |
+ cudaMalloc((void**)&dev_cellEnd, | |
+ sizeof(unsigned int)*grid.num[0]*grid.num[1]*grid.num[2]); | |
// Host contact bookkeeping arrays | |
k.contacts = new unsigned int[np*NC]; | |
t@@ -298,9 +304,17 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
cudaMalloc((void**)&dev_walls_vel0, sizeof(Float)*walls.nw); | |
// dev_walls_force_partial allocated later | |
+ // Fluid arrays | |
+ cudaMalloc((void**)&dev_f, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19); | |
+ cudaMalloc((void**)&dev_f_new, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19); | |
+ cudaMalloc((void**)&dev_v_rho, | |
+ sizeof(Float4)*grid.num[0]*grid.num[1]*grid.num[2]); | |
+ | |
checkForCudaErrors("End of allocateGlobalDeviceMemory"); | |
if (verbose == 1) | |
- std::cout << "Done\n"; | |
+ std::cout << "Done" << std::endl; | |
} | |
__host__ void DEM::freeGlobalDeviceMemory() | |
t@@ -348,10 +362,16 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
cudaFree(dev_walls_force_partial); | |
cudaFree(dev_walls_force_pp); | |
cudaFree(dev_walls_vel0); | |
+ | |
+ // Fluid arrays | |
+ cudaFree(dev_f); | |
+ cudaFree(dev_f_new); | |
+ cudaFree(dev_v_rho); | |
+ | |
checkForCudaErrors("During cudaFree calls"); | |
if (verbose == 1) | |
- printf("Done\n"); | |
+ std::cout << "Done" << std::endl; | |
} | |
t@@ -427,9 +447,18 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
sizeof(Float), cudaMemcpyHostToDevice); | |
} | |
+ // Fluid arrays | |
+ if (params.nu > 0.0) { | |
+ cudaMemcpy( dev_f, f, sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2… | |
+ cudaMemcpyHostToDevice); | |
+ cudaMemcpy( dev_v_rho, v_rho, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2], | |
+ cudaMemcpyHostToDevice); | |
+ } | |
+ | |
checkForCudaErrors("End of transferToGlobalDeviceMemory"); | |
if (verbose == 1) | |
- std::cout << "Done\n"; | |
+ std::cout << "Done" << std::endl; | |
} | |
__host__ void DEM::transferFromGlobalDeviceMemory() | |
t@@ -495,8 +524,15 @@ __host__ void DEM::transferFromGlobalDeviceMemory() | |
cudaMemcpy( walls.mvfd, dev_walls_mvfd, | |
sizeof(Float4)*walls.nw, cudaMemcpyDeviceToHost); | |
- // Bond parameters | |
- | |
+ // Fluid arrays | |
+ if (params.nu > 0.0) { | |
+ cudaMemcpy( f, dev_f, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2]*19, | |
+ cudaMemcpyDeviceToHost); | |
+ cudaMemcpy( v_rho, dev_v_rho, | |
+ sizeof(Float)*grid.num[0]*grid.num[1]*grid.num[2], | |
+ cudaMemcpyDeviceToHost); | |
+ } | |
checkForCudaErrors("End of transferFromGlobalDeviceMemory"); | |
} | |
t@@ -506,6 +542,8 @@ __host__ void DEM::transferFromGlobalDeviceMemory() | |
__host__ void DEM::startTime() | |
{ | |
using std::cout; // Namespace directive | |
+ using std::cerr; // Namespace directive | |
+ using std::endl; // Namespace directive | |
std::string outfile; | |
char file[200]; | |
FILE *fp; | |
t@@ -523,14 +561,34 @@ __host__ void DEM::startTime() | |
// Start CPU clock | |
tic = clock(); | |
- // GPU workload configuration | |
- unsigned int threadsPerBlock = 256; | |
+ //// GPU workload configuration | |
+ //unsigned int threadsPerBlock = 256; | |
+ unsigned int threadsPerBlock = 512; | |
+ | |
// Create enough blocks to accomodate the particles | |
unsigned int blocksPerGrid = iDivUp(np, threadsPerBlock); | |
dim3 dimGrid(blocksPerGrid, 1, 1); // Blocks arranged in 1D grid | |
dim3 dimBlock(threadsPerBlock, 1, 1); // Threads arranged in 1D block | |
+ | |
unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock); | |
dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid | |
+ | |
+ // Use 3D block and grid layout for Lattice-Boltzmann fluid calculations | |
+ dim3 dimBlockFluid(8, 8, 8); // 512 threads per block | |
+ //dim3 dimGridFluid( | |
+ //(grid.num[2]+8-1)/8, // Use x as the z | |
+ //(grid.num[1]+8-1)/8, | |
+ //(grid.num[0]+8-1)/8); | |
+ dim3 dimGridFluid( | |
+ iDivUp(grid.num[2],dimBlockFluid.x), // Use z as x | |
+ iDivUp(grid.num[1],dimBlockFluid.y), | |
+ iDivUp(grid.num[0],dimBlockFluid.z)); // Use x as z | |
+ if (dimGridFluid.z > 64) { | |
+ cerr << "Error: dimGridFluid.z > 64" << endl; | |
+ exit(1); | |
+ } | |
+ | |
+ | |
// Shared memory per block | |
unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1); | |
t@@ -544,7 +602,16 @@ __host__ void DEM::startTime() | |
<< dimGrid.x << "*" << dimGrid.y << "*" << dimGrid.z << "\n" | |
<< " - Threads per block: " | |
<< dimBlock.x << "*" << dimBlock.y << "*" << dimBlock.z << "\n" | |
- << " - Shared memory required per block: " << smemSize << " bytes… | |
+ << " - Shared memory required per block: " << smemSize << " bytes" | |
+ << endl; | |
+ if (params.nu > 0.0) { | |
+ cout << " - Blocks per fluid grid: " | |
+ << dimGridFluid.x << "*" << dimGridFluid.y << "*" << | |
+ dimGridFluid.z << "\n" | |
+ << " - Threads per fluid block: " | |
+ << dimBlockFluid.x << "*" << dimBlockFluid.y << "*" << | |
+ dimBlockFluid.z << endl; | |
+ } | |
} | |
// Initialize counter variable values | |
t@@ -561,17 +628,17 @@ __host__ void DEM::startTime() | |
time.step_count); | |
fclose(fp); | |
- // Write first output data file: output0.bin, thus testing writing of bin … | |
- //outfile = "output/" + sid + ".output0.bin"; | |
- //sprintf(file,"output/%s.output0.bin", sid); | |
- //writebin(outfile.c_str()); | |
+ // Initialize fluid distribution array | |
+ initfluid<<< dimGridFluid, dimBlockFluid >>>(dev_v_rho, dev_f); | |
+ cudaThreadSynchronize(); | |
+ checkForCudaErrors("Post initfluid"); | |
if (verbose == 1) { | |
cout << "\n Entering the main calculation time loop...\n\n" | |
<< " IMPORTANT: Do not close this terminal, doing so will \n" | |
<< " terminate this SPHERE process. Follow the \n" | |
<< " progress by executing:\n" | |
- << " $ ./sphere_status " << sid << "\n\n"; | |
+ << " $ ./sphere_status " << sid << endl << endl; | |
} | |
t@@ -590,6 +657,7 @@ __host__ void DEM::startTime() | |
double t_topology = 0.0; | |
double t_interact = 0.0; | |
double t_bondsLinear = 0.0; | |
+ double t_latticeBoltzmannD3Q19 = 0.0; | |
double t_integrate = 0.0; | |
double t_summation = 0.0; | |
double t_integrateWalls = 0.0; | |
t@@ -606,8 +674,6 @@ __host__ void DEM::startTime() | |
// MAIN CALCULATION TIME LOOP | |
while (time.current <= time.total) { | |
- // Increment iteration counter | |
- ++iter; | |
// Print current step number to terminal | |
//printf("Step: %d\n", time.step_count); | |
t@@ -733,7 +799,8 @@ __host__ void DEM::startTime() | |
// Process particle pairs | |
if (params.nb0 > 0) { | |
- //bondsLinear<<< 1, params.nb0 >>>( | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
bondsLinear<<<dimGridBonds, dimBlock>>>( | |
dev_bonds, | |
dev_bonds_delta, | |
t@@ -751,6 +818,31 @@ __host__ void DEM::startTime() | |
checkForCudaErrors("Post bondsLinear", iter); | |
} | |
+ // Process fluid and particle interaction in each cell | |
+ if (params.nu > 0.0) { | |
+ if (PROFILING == 1) | |
+ startTimer(&kernel_tic); | |
+ latticeBoltzmannD3Q19<<< dimGridFluid, dimBlockFluid>>> ( | |
+ dev_f, | |
+ dev_f_new, | |
+ dev_v_rho, | |
+ dev_cellStart, | |
+ dev_cellEnd, | |
+ dev_x_sorted, | |
+ dev_vel_sorted, | |
+ dev_force, | |
+ dev_gridParticleIndex); | |
+ cudaThreadSynchronize(); | |
+ if (PROFILING == 1) | |
+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
+ &t_latticeBoltzmannD3Q19); | |
+ checkForCudaErrors("Post latticeBoltzmannD3Q19", iter); | |
+ | |
+ // Flip flop | |
+ swapFloatArrays(dev_f, dev_f_new); | |
+ } | |
+ | |
+ | |
// Update particle kinematics | |
if (PROFILING == 1) | |
t@@ -822,6 +914,7 @@ __host__ void DEM::startTime() | |
// Update timers and counters | |
time.current += time.dt; | |
filetimeclock += time.dt; | |
+ ++iter; | |
// Report time to console | |
if (verbose == 1) { | |
t@@ -938,6 +1031,8 @@ __host__ void DEM::startTime() | |
<< "\t(" << 100.0*t_interact/t_sum << " %)\n" | |
<< " - bondsLinear:\t" << t_bondsLinear/1000.0 << " s" | |
<< "\t(" << 100.0*t_bondsLinear/t_sum << " %)\n" | |
+ << " - latticeBoltzmann:\t" << t_latticeBoltzmannD3Q19/1000.0 << … | |
+ << "\t(" << 100.0*t_latticeBoltzmannD3Q19/t_sum << " %)\n" | |
<< " - integrate:\t\t" << t_integrate/1000.0 << " s" | |
<< "\t(" << 100.0*t_integrate/t_sum << " %)\n" | |
<< " - summation:\t\t" << t_summation/1000.0 << " s" | |
diff --git a/src/file_io.cpp b/src/file_io.cpp | |
t@@ -236,6 +236,21 @@ void DEM::readbin(const char *target) | |
ifs.read(as_bytes(k.bonds_omega[i].z), sizeof(Float)); | |
} | |
+ // Read fluid parameters | |
+ ifs.read(as_bytes(params.nu), sizeof(params.nu)); | |
+ v_rho = new Float4[grid.num[0]*grid.num[1]*grid.num[2]]; | |
+ f = new Float[grid.num[0]*grid.num[1]*grid.num[2]*19]; | |
+ if (params.nu > 0.0) { | |
+ for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) { | |
+ ifs.read(as_bytes(v_rho[i].x), sizeof(Float)); | |
+ ifs.read(as_bytes(v_rho[i].y), sizeof(Float)); | |
+ ifs.read(as_bytes(v_rho[i].z), sizeof(Float)); | |
+ } | |
+ for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) | |
+ ifs.read(as_bytes(v_rho[i].w), sizeof(Float)); | |
+ } | |
+ | |
+ | |
// Close file if it is still open | |
if (ifs.is_open()) | |
ifs.close(); | |
t@@ -394,6 +409,15 @@ void DEM::writebin(const char *target) | |
ofs.write(as_bytes(k.bonds_omega[i].z), sizeof(Float)); | |
} | |
+ ofs.write(as_bytes(params.nu), sizeof(params.nu)); | |
+ for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) { | |
+ ofs.write(as_bytes(v_rho[i].x), sizeof(Float)); | |
+ ofs.write(as_bytes(v_rho[i].y), sizeof(Float)); | |
+ ofs.write(as_bytes(v_rho[i].z), sizeof(Float)); | |
+ } | |
+ for (i = 0; i<grid.num[0]*grid.num[1]*grid.num[2]; ++i) | |
+ ofs.write(as_bytes(v_rho[i].w), sizeof(Float)); | |
+ | |
// Close file if it is still open | |
if (ofs.is_open()) | |
ofs.close(); | |
diff --git a/src/latticeboltzmann.cuh b/src/latticeboltzmann.cuh | |
t@@ -360,7 +360,7 @@ __global__ void latticeBoltzmannD3Q19( | |
dev_f_new[grid2index(nx-1, 0, z, 9)] = fmax(0.0, f_9); | |
// Edge 10 (+x,-y): Periodic | |
- /*if (x < nx-1 && y > 0) // not at boundary | |
+ if (x < nx-1 && y > 0) // not at boundary | |
dev_f_new[grid2index( x+1, y-1, z, 10)] = fmax(0.0, f_10); | |
else if (x < nx-1) // at -y boundary | |
dev_f_new[grid2index( x+1,ny-1, z, 10)] = fmax(0.0, f_10); | |
t@@ -445,11 +445,10 @@ __global__ void latticeBoltzmannD3Q19( | |
else if (y < ny-1) // at -z boundary | |
dev_f_new[grid2index( x, y+1, 0, 17)] = fmax(0.0, f_18); | |
else if (z > 0) // at +y boundary | |
- dev_f_new[grid2index( x, 0, z+1, 18)] = fmax(0.0, f_18); | |
+ dev_f_new[grid2index( x, 0, z-1, 18)] = fmax(0.0, f_18); | |
else // at +y-z boundary | |
dev_f_new[grid2index( x, 0, 0, 17)] = fmax(0.0, f_18); | |
- // */ | |
} | |
} | |
diff --git a/src/sorting.cuh b/src/sorting.cuh | |
t@@ -1,3 +1,6 @@ | |
+#ifndef SORTING_CUH_ | |
+#define SORTING_CUH_ | |
+ | |
// Returns the cellID containing the particle, based cubic grid | |
// See Bayraktar et al. 2009 | |
// Kernel is executed on the device, and is callable from the device only | |
t@@ -118,5 +121,5 @@ __global__ void reorderArrays(unsigned int* dev_cellStart, | |
} // End of reorderArrays(...) | |
- | |
+#endif | |
// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -134,9 +134,16 @@ class DEM { | |
const Float *z_pos, | |
const Float *porosity); | |
+ // Lattice-Boltzmann data arrays (D3Q19) | |
+ Float *f; // Fluid distribution (f0..f18) | |
+ Float *dev_f; // Device equivalent | |
+ Float *dev_f_new; // Device equivalent | |
+ Float4 *v_rho; // Fluid velocity v (xyz), and pressure rho (w) | |
+ Float4 *dev_v_rho; // Device equivalent | |
+ | |
- // Values and functions accessible from the outside | |
public: | |
+ // Values and functions accessible from the outside | |
// Constructor, some parameters with default values | |
DEM(std::string inputbin, | |
t@@ -201,5 +208,7 @@ class DEM { | |
}; | |
+ | |
+ | |
#endif | |
// vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4 | |
diff --git a/tests/bond_tests.py b/tests/bond_tests.py | |
t@@ -45,10 +45,10 @@ for d in distances: | |
sb.bond(0, 1) | |
sb.defaultParams(gamma_n = 0.0, gamma_t = 0.0) | |
#sb.initTemporal(total=0.5, file_dt=0.01) | |
- | |
#sb.render(verbose=False) | |
#visualize(sb.sid, "energy") | |
+ | |
print("# Stability test") | |
sb.x[0,:] = numpy.array((2.0, 2.0, 2.0)) | |
sb.x[1,:] = numpy.array((2.0+2.0*radii+d, 2.0, 2.0)) | |
t@@ -85,8 +85,9 @@ for d in distances: | |
print(passed()) | |
compareNumpyArrays(sb.vel[:,1:2], z2_2, "vel[:,1:2]") | |
compareNumpyArrays(sb.angvel, z2_3, "angvel\t") | |
- #printKinematics(sb) | |
+ printKinematics(sb) | |
#visualize(sb.sid, "energy") | |
+ continue | |
print("# Normal contraction") | |
sb.zeroKinematics() | |
diff --git a/tests/io_tests.py b/tests/io_tests.py | |
t@@ -21,7 +21,6 @@ compare(orig, py, "Python IO:") | |
# Test C++ IO routines | |
orig.run(verbose=False, hideinputfile=True) | |
-#orig.run() | |
cpp = Spherebin() | |
cpp.readbin("../output/" + orig.sid + ".output00000.bin", verbose=False) | |
compare(orig, cpp, "C++ IO: ") |