| timplemented staggered grid in several functions - sphere - GPU-based 3D discre… | |
| git clone git://src.adamsgaard.dk/sphere | |
| Log | |
| Files | |
| Refs | |
| LICENSE | |
| --- | |
| commit bd373edea562b18de93fe231cc1583108a4d1b05 | |
| parent 0548db26d1afead334b2d7be927411f8e6dfd0dc | |
| Author: Anders Damsgaard <[email protected]> | |
| Date: Tue, 3 Jun 2014 10:51:51 +0200 | |
| implemented staggered grid in several functions | |
| Diffstat: | |
| M python/fluidshear.py | 4 ++-- | |
| M src/device.cu | 180 +++++++++++++++++------------… | |
| M src/navierstokes.cuh | 431 ++++++++++++++++++++++++++---… | |
| M src/sphere.h | 11 +++++++---- | |
| M tests/stokes_law.py | 2 +- | |
| 5 files changed, 469 insertions(+), 159 deletions(-) | |
| --- | |
| diff --git a/python/fluidshear.py b/python/fluidshear.py | |
| t@@ -9,8 +9,8 @@ sid = 'fluidshear' | |
| sim = sphere.sim(sid + '-init', np = 24000, fluid = False) | |
| #sim.cleanup() | |
| sim.radius[:] = 0.05 | |
| -sim.initRandomGridPos(gridnum = [16, 16, 9000]) | |
| -sim.initTemporal(total = 5.0, file_dt = 0.05) | |
| +sim.initRandomGridPos(gridnum = [20, 20, 9000]) | |
| +sim.initTemporal(total = 10.0, file_dt = 0.05) | |
| sim.g[2] = -9.81 | |
| sim.run(dry=True) | |
| sim.run() | |
| diff --git a/src/device.cu b/src/device.cu | |
| t@@ -557,7 +557,7 @@ __host__ void DEM::startTime() | |
| unsigned int blocksPerGridBonds = iDivUp(params.nb0, threadsPerBlock); | |
| dim3 dimGridBonds(blocksPerGridBonds, 1, 1); // Blocks arranged in 1D grid | |
| - // Use 3D block and grid layout for fluid calculations | |
| + // Use 3D block and grid layout for cell-centered fluid calculations | |
| dim3 dimBlockFluid(8, 8, 8); // 512 threads per block | |
| dim3 dimGridFluid( | |
| iDivUp(grid.num[0], dimBlockFluid.x), | |
| t@@ -568,6 +568,17 @@ __host__ void DEM::startTime() | |
| exit(1); | |
| } | |
| + // Use 3D block and grid layout for cell-face fluid calculations | |
| + dim3 dimBlockFluidFace(8, 8, 8); // 512 threads per block | |
| + dim3 dimGridFluidFace( | |
| + iDivUp(grid.num[0]+1, dimBlockFluid.x), | |
| + iDivUp(grid.num[1]+1, dimBlockFluid.y), | |
| + iDivUp(grid.num[2]+1, dimBlockFluid.z)); | |
| + if (dimGridFluid.z > 64 && navierstokes == 1) { | |
| + cerr << "Error: dimGridFluid.z > 64" << endl; | |
| + exit(1); | |
| + } | |
| + | |
| // Shared memory per block | |
| unsigned int smemSize = sizeof(unsigned int)*(threadsPerBlock+1); | |
| t@@ -867,29 +878,42 @@ __host__ void DEM::startTime() | |
| }*/ | |
| if (np > 0) { | |
| - // Determine the interaction force | |
| - /*findInteractionForce<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_phi, | |
| - dev_ns_d_avg, | |
| - dev_ns_vp_avg, | |
| - dev_ns_v, | |
| - dev_ns_fi); | |
| - cudaThreadSynchronize(); | |
| - checkForCudaErrorsIter("Post findInteractionForce", iter); | |
| - // Apply interaction force to the particles | |
| - applyParticleInteractionForce<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_fi, | |
| - dev_ns_phi, | |
| - dev_ns_p, | |
| - dev_gridParticleIndex, | |
| - dev_cellStart, | |
| - dev_cellEnd, | |
| - dev_x_sorted, | |
| - dev_force); | |
| + if (iter == 0) { | |
| + // set cell center ghost nodes | |
| + setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>( | |
| + dev_ns_v, ns.bc_bot, ns.bc_top); | |
| + | |
| + // find cell face velocities | |
| + interpolateCenterToFace | |
| + <<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| + dev_ns_v, | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z); | |
| + cudaThreadSynchronize(); | |
| + checkForCudaErrors("Post interpolateCenterToFace"); | |
| + } | |
| + | |
| + setNSghostNodesFace<Float> | |
| + <<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| + ns.bc_bot, | |
| + ns.bc_top); | |
| cudaThreadSynchronize(); | |
| - checkForCudaErrorsIter("Post applyParticleInteractionForce", | |
| - iter);*/ | |
| + checkForCudaErrorsIter("Post setNSghostNodesFace", iter); | |
| + | |
| + findFaceDivTau<<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| + dev_ns_div_tau_x, | |
| + dev_ns_div_tau_y, | |
| + dev_ns_div_tau_z); | |
| + cudaThreadSynchronize(); | |
| + checkForCudaErrorsIter("Post findFaceDivTau", iter); | |
| // Per particle, find the fluid-particle interaction force f_pf | |
| // and apply it to the particle | |
| t@@ -898,9 +922,12 @@ __host__ void DEM::startTime() | |
| dev_vel, | |
| dev_ns_phi, | |
| dev_ns_p, | |
| - dev_ns_v, | |
| - dev_ns_tau, | |
| - iter, | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| + dev_ns_div_tau_x, | |
| + dev_ns_div_tau_y, | |
| + dev_ns_div_tau_z, | |
| dev_ns_f_pf, | |
| dev_force); | |
| cudaThreadSynchronize(); | |
| t@@ -961,21 +988,19 @@ __host__ void DEM::startTime() | |
| // Set the values of the ghost nodes in the grid | |
| if (PROFILING == 1) | |
| startTimer(&kernel_tic); | |
| - /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_p, | |
| - dev_ns_v, | |
| - dev_ns_v_p, | |
| - dev_ns_phi, | |
| - dev_ns_dphi, | |
| - dev_ns_epsilon);*/ | |
| + | |
| setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>( | |
| dev_ns_p, ns.bc_bot, ns.bc_top); | |
| - setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_v, ns.bc_bot, ns.bc_top); | |
| + //setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>( | |
| + //dev_ns_v, ns.bc_bot, ns.bc_top); | |
| - setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_v_p, ns.bc_bot, ns.bc_top); | |
| + setNSghostNodesFace<Float> | |
| + <<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| + dev_ns_v_p_x, | |
| + dev_ns_v_p_y, | |
| + dev_ns_v_p_z, | |
| + ns.bc_bot, ns.bc_top); | |
| setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>( | |
| dev_ns_phi, ns.bc_bot, ns.bc_top); | |
| t@@ -993,26 +1018,15 @@ __host__ void DEM::startTime() | |
| transferNSepsilonFromGlobalDeviceMemory(); | |
| printNSarray(stdout, ns.epsilon, "epsilon");*/ | |
| - // Find the fluid stress tensor, needed for predicting the flu… | |
| - // velocities | |
| - if (PROFILING == 1) | |
| - startTimer(&kernel_tic); | |
| - findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_v, | |
| - dev_ns_tau); | |
| + // interpolate velocities to cell centers which makes velocity | |
| + // prediction easier | |
| + interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>( | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| + dev_ns_v); | |
| cudaThreadSynchronize(); | |
| - if (PROFILING == 1) | |
| - stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
| - &t_findNSstressTensor); | |
| - checkForCudaErrorsIter("Post findNSstressTensor", iter); | |
| - | |
| - // Set stress tensor values in the ghost nodes | |
| - setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_tau, | |
| - ns.bc_bot, | |
| - ns.bc_top); | |
| - cudaThreadSynchronize(); | |
| - checkForCudaErrorsIter("Post setNSghostNodes_tau", iter); | |
| + checkForCudaErrorsIter("Post interpolateFaceToCenter", iter); | |
| // Find the divergence of phi*vi*v, needed for predicting the | |
| // fluid velocities | |
| t@@ -1028,53 +1042,44 @@ __host__ void DEM::startTime() | |
| &t_findNSdivphiviv); | |
| checkForCudaErrorsIter("Post findNSdivphiviv", iter); | |
| - // Find the divergence of phi*tau, needed for predicting the | |
| - // fluid velocities | |
| - if (PROFILING == 1) | |
| - startTimer(&kernel_tic); | |
| - /*findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_phi, | |
| - dev_ns_tau, | |
| - dev_ns_div_phi_tau);*/ | |
| - findNSdivtau<<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_tau, | |
| - dev_ns_div_tau); | |
| - cudaThreadSynchronize(); | |
| - if (PROFILING == 1) | |
| - stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
| - &t_findNSdivtau); | |
| - //checkForCudaErrorsIter("Post findNSdivtau", iter); | |
| - checkForCudaErrorsIter("Post findNSdivtau", iter); | |
| - | |
| // Predict the fluid velocities on the base of the old pressure | |
| // field and ignoring the incompressibility constraint | |
| if (PROFILING == 1) | |
| startTimer(&kernel_tic); | |
| - findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>( | |
| + findPredNSvelocities<<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| dev_ns_p, | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| dev_ns_v, | |
| dev_ns_phi, | |
| dev_ns_dphi, | |
| - dev_ns_div_phi_vi_v, | |
| - //dev_ns_div_phi_tau, | |
| - dev_ns_div_tau, | |
| + dev_ns_div_tau_x, | |
| + dev_ns_div_tau_y, | |
| + dev_ns_div_tau_z, | |
| ns.bc_bot, | |
| ns.bc_top, | |
| ns.beta, | |
| dev_ns_fi, | |
| ns.ndem, | |
| - dev_ns_v_p); | |
| + dev_ns_v_p_x, | |
| + dev_ns_v_p_y, | |
| + dev_ns_v_p_z); | |
| cudaThreadSynchronize(); | |
| if (PROFILING == 1) | |
| stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed, | |
| &t_findPredNSvelocities); | |
| checkForCudaErrorsIter("Post findPredNSvelocities", iter); | |
| - setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>( | |
| - dev_ns_v_p); | |
| + setNSghostNodesFace<Float> | |
| + <<<dimGridFluidFace, dimBlockFluidFace>>>( | |
| + dev_ns_v_p_x, | |
| + dev_ns_v_p_y, | |
| + dev_ns_v_p_z, | |
| + ns.bc_bot, ns.bc_top); | |
| cudaThreadSynchronize(); | |
| - checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)… | |
| - iter); | |
| + checkForCudaErrorsIter( | |
| + "Post setNSghostNodesFaceFloat3(dev_ns_v_p)", iter); | |
| // In the first iteration of the sphere program, we'll need to | |
| // manually estimate the values of epsilon. In the subsequent | |
| t@@ -1507,6 +1512,17 @@ __host__ void DEM::startTime() | |
| cudaThreadSynchronize(); | |
| checkForCudaErrorsIter("Beginning of file output section", iter); | |
| + // v_x, v_y, v_z -> v | |
| + if (navierstokes == 1) { | |
| + interpolateFaceToCenter<<<dimGridFluid, dimBlockFluid>>>( | |
| + dev_ns_v_x, | |
| + dev_ns_v_y, | |
| + dev_ns_v_z, | |
| + dev_ns_v); | |
| + cudaThreadSynchronize(); | |
| + checkForCudaErrorsIter("Post interpolateFaceToCenter", iter); | |
| + } | |
| + | |
| //// Copy device data to host memory | |
| transferFromGlobalDeviceMemory(); | |
| checkForCudaErrorsIter("After transferFromGlobalDeviceMemory()", | |
| diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh | |
| t@@ -69,7 +69,7 @@ void DEM::initNSmemDev(void) | |
| cudaMalloc((void**)&dev_ns_v_x, memSizeFvel);// velocity in stag. grid | |
| cudaMalloc((void**)&dev_ns_v_y, memSizeFvel);// velocity in stag. grid | |
| cudaMalloc((void**)&dev_ns_v_z, memSizeFvel);// velocity in stag. grid | |
| - cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity | |
| + //cudaMalloc((void**)&dev_ns_v_p, memSizeF*3); // predicted cell velocity | |
| cudaMalloc((void**)&dev_ns_v_p_x, memSizeFvel); // pred. vel. in stag. grid | |
| cudaMalloc((void**)&dev_ns_v_p_y, memSizeFvel); // pred. vel. in stag. grid | |
| cudaMalloc((void**)&dev_ns_v_p_z, memSizeFvel); // pred. vel. in stag. grid | |
| t@@ -78,7 +78,7 @@ void DEM::initNSmemDev(void) | |
| cudaMalloc((void**)&dev_ns_fi, memSizeF*3); // interaction force | |
| cudaMalloc((void**)&dev_ns_phi, memSizeF); // cell porosity | |
| cudaMalloc((void**)&dev_ns_dphi, memSizeF); // cell porosity change | |
| - cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v) | |
| + //cudaMalloc((void**)&dev_ns_div_phi_v_v, memSizeF*3); // div(phi v v) | |
| cudaMalloc((void**)&dev_ns_epsilon, memSizeF); // pressure difference | |
| cudaMalloc((void**)&dev_ns_epsilon_new, memSizeF); // new pressure diff. | |
| cudaMalloc((void**)&dev_ns_epsilon_old, memSizeF); // old pressure diff. | |
| t@@ -87,7 +87,10 @@ void DEM::initNSmemDev(void) | |
| cudaMalloc((void**)&dev_ns_f1, memSizeF); // constant addition in forci… | |
| cudaMalloc((void**)&dev_ns_f2, memSizeF*3); // constant slope in forcing | |
| cudaMalloc((void**)&dev_ns_tau, memSizeF*6); // stress tensor (symmetrical) | |
| - cudaMalloc((void**)&dev_ns_div_tau, memSizeF*6); // div(tau) | |
| + cudaMalloc((void**)&dev_ns_div_tau, memSizeF3); // div(tau), cell center | |
| + cudaMalloc((void**)&dev_ns_div_tau_x, memSizeFvel); // div(tau), cell face | |
| + cudaMalloc((void**)&dev_ns_div_tau_y, memSizeFvel); // div(tau), cell face | |
| + cudaMalloc((void**)&dev_ns_div_tau_z, memSizeFvel); // div(tau), cell face | |
| cudaMalloc((void**)&dev_ns_div_phi_vi_v, memSizeF*3); // div(phi*vi*v) | |
| //cudaMalloc((void**)&dev_ns_div_phi_tau, memSizeF*3); // div(phi*tau) | |
| cudaMalloc((void**)&dev_ns_f_pf, sizeof(Float3)*np); // particle fluid for… | |
| t@@ -103,7 +106,7 @@ void DEM::freeNSmemDev() | |
| cudaFree(dev_ns_v_x); | |
| cudaFree(dev_ns_v_y); | |
| cudaFree(dev_ns_v_z); | |
| - cudaFree(dev_ns_v_p); | |
| + //cudaFree(dev_ns_v_p); | |
| cudaFree(dev_ns_v_p_x); | |
| cudaFree(dev_ns_v_p_y); | |
| cudaFree(dev_ns_v_p_z); | |
| t@@ -112,7 +115,7 @@ void DEM::freeNSmemDev() | |
| cudaFree(dev_ns_fi); | |
| cudaFree(dev_ns_phi); | |
| cudaFree(dev_ns_dphi); | |
| - cudaFree(dev_ns_div_phi_v_v); | |
| + //cudaFree(dev_ns_div_phi_v_v); | |
| cudaFree(dev_ns_epsilon); | |
| cudaFree(dev_ns_epsilon_new); | |
| cudaFree(dev_ns_epsilon_old); | |
| t@@ -123,7 +126,10 @@ void DEM::freeNSmemDev() | |
| cudaFree(dev_ns_tau); | |
| cudaFree(dev_ns_div_phi_vi_v); | |
| //cudaFree(dev_ns_div_phi_tau); | |
| - cudaFree(dev_ns_div_tau); | |
| + //cudaFree(dev_ns_div_tau); | |
| + cudaFree(dev_ns_div_tau_x); | |
| + cudaFree(dev_ns_div_tau_y); | |
| + cudaFree(dev_ns_div_tau_z); | |
| cudaFree(dev_ns_f_pf); | |
| } | |
| t@@ -144,7 +150,7 @@ void DEM::transferNStoGlobalDeviceMemory(int statusmsg) | |
| cudaMemcpy(dev_ns_p, ns.p, memSizeF, cudaMemcpyHostToDevice); | |
| checkForCudaErrors("transferNStoGlobalDeviceMemory after first cudaMemcpy"… | |
| cudaMemcpy(dev_ns_v, ns.v, memSizeF*3, cudaMemcpyHostToDevice); | |
| - cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice); | |
| + //cudaMemcpy(dev_ns_v_p, ns.v_p, memSizeF*3, cudaMemcpyHostToDevice); | |
| cudaMemcpy(dev_ns_phi, ns.phi, memSizeF, cudaMemcpyHostToDevice); | |
| cudaMemcpy(dev_ns_dphi, ns.dphi, memSizeF, cudaMemcpyHostToDevice); | |
| t@@ -165,7 +171,7 @@ void DEM::transferNSfromGlobalDeviceMemory(int statusmsg) | |
| cudaMemcpy(ns.p, dev_ns_p, memSizeF, cudaMemcpyDeviceToHost); | |
| checkForCudaErrors("In transferNSfromGlobalDeviceMemory, dev_ns_p", 0); | |
| cudaMemcpy(ns.v, dev_ns_v, memSizeF*3, cudaMemcpyDeviceToHost); | |
| - cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost); | |
| + //cudaMemcpy(ns.v_p, dev_ns_v_p, memSizeF*3, cudaMemcpyDeviceToHost); | |
| cudaMemcpy(ns.phi, dev_ns_phi, memSizeF, cudaMemcpyDeviceToHost); | |
| cudaMemcpy(ns.dphi, dev_ns_dphi, memSizeF, cudaMemcpyDeviceToHost); | |
| cudaMemcpy(ns.norm, dev_ns_norm, memSizeF, cudaMemcpyDeviceToHost); | |
| t@@ -624,6 +630,63 @@ __global__ void setNSghostNodes( | |
| } | |
| } | |
| +// Update a field in the ghost nodes from their parent cell values. The edge | |
| +// (diagonal) cells are not written since they are not read. | |
| +// Launch per face. | |
| +template<typename T> | |
| +__global__ void setNSghostNodesFace( | |
| + T* dev_scalarfield_x, | |
| + T* dev_scalarfield_y, | |
| + T* dev_scalarfield_z, | |
| + int bc_bot, | |
| + int bc_top) | |
| +{ | |
| + // 3D thread index | |
| + const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
| + const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
| + const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| + | |
| + // Grid dimensions | |
| + const unsigned int nx = devC_grid.num[0]; | |
| + const unsigned int ny = devC_grid.num[1]; | |
| + const unsigned int nz = devC_grid.num[2]; | |
| + | |
| + // check that we are not outside the fluid grid | |
| + if (x <= nx && y <= ny && z <= nz) { | |
| + | |
| + const T val_x = dev_scalarfield_x[vidx(x,y,z)]; | |
| + const T val_y = dev_scalarfield_y[vidx(x,y,z)]; | |
| + const T val_z = dev_scalarfield_z[vidx(x,y,z)]; | |
| + | |
| + // x | |
| + if (x == 0) | |
| + dev_scalarfield_x[vidx(nx+1,y,z)] = val_x; | |
| + if (x == nx) | |
| + dev_scalarfield_x[vidx(-1,y,z)] = val_x; | |
| + | |
| + // y | |
| + if (y == 0) | |
| + dev_scalarfield_y[vidx(x,ny+1,z)] = val_y; | |
| + if (y == ny) | |
| + dev_scalarfield_y[vidx(x,-1,z)] = val_y; | |
| + | |
| + // z | |
| + if (z == 0 && bc_bot == 0) | |
| + dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Dirichlet | |
| + if (z == 1 && bc_bot == 1) | |
| + dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Neumann | |
| + if (z == 0 && bc_bot == 2) | |
| + dev_scalarfield_z[vidx(x,y,nz+1)] = val_z; // Periodic -z | |
| + | |
| + if (z == nz && bc_top == 0) | |
| + dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Dirichlet | |
| + if (z == nz-1 && bc_top == 1) | |
| + dev_scalarfield_z[vidx(x,y,nz)] = val_z; // Neumann | |
| + if (z == nz && bc_top == 2) | |
| + dev_scalarfield_z[vidx(x,y,-1)] = val_z; // Periodic +z | |
| + } | |
| +} | |
| + | |
| // Update the tensor field for the ghost nodes from their parent cell values. | |
| // The edge (diagonal) cells are not written since they are not read. Launch | |
| // this kernel for all cells in the grid. | |
| t@@ -1688,7 +1751,7 @@ __global__ void findNSdivphiviv( | |
| // Read porosity and velocity in the 6 neighbor cells | |
| __syncthreads(); | |
| const Float phi_xn = dev_ns_phi[idx(x-1,y,z)]; | |
| - //const Float phi = dev_ns_phi[idx(x,y,z)]; | |
| + const Float phi = dev_ns_phi[idx(x,y,z)]; | |
| const Float phi_xp = dev_ns_phi[idx(x+1,y,z)]; | |
| const Float phi_yn = dev_ns_phi[idx(x,y-1,z)]; | |
| const Float phi_yp = dev_ns_phi[idx(x,y+1,z)]; | |
| t@@ -1696,7 +1759,7 @@ __global__ void findNSdivphiviv( | |
| const Float phi_zp = dev_ns_phi[idx(x,y,z+1)]; | |
| const Float3 v_xn = dev_ns_v[idx(x-1,y,z)]; | |
| - //const Float3 v = dev_ns_v[idx(x,y,z)]; | |
| + const Float3 v = dev_ns_v[idx(x,y,z)]; | |
| const Float3 v_xp = dev_ns_v[idx(x+1,y,z)]; | |
| const Float3 v_yn = dev_ns_v[idx(x,y-1,z)]; | |
| const Float3 v_yp = dev_ns_v[idx(x,y+1,z)]; | |
| t@@ -1704,7 +1767,8 @@ __global__ void findNSdivphiviv( | |
| const Float3 v_zp = dev_ns_v[idx(x,y,z+1)]; | |
| // Calculate upwind coefficients | |
| - /*const Float3 a = MAKE_FLOAT3( | |
| + //* | |
| + const Float3 a = MAKE_FLOAT3( | |
| copysign(1.0, v.x), | |
| copysign(1.0, v.y), | |
| copysign(1.0, v.z)); | |
| t@@ -1763,8 +1827,9 @@ __global__ void findNSdivphiviv( | |
| // Determine the weighted average of both discretizations | |
| const Float3 div_phi_vi_v = tau*div_uw + (1.0 - tau)*div_cd; | |
| - */ | |
| + //*/ | |
| + /* | |
| // Calculate the divergence: div(phi*v_i*v) | |
| const Float3 div_phi_vi_v = MAKE_FLOAT3( | |
| // x | |
| t@@ -1779,6 +1844,7 @@ __global__ void findNSdivphiviv( | |
| (phi_xp*v_xp.z*v_xp.x - phi_xn*v_xn.z*v_xn.x)/(2.0*dx) + | |
| (phi_yp*v_yp.z*v_yp.y - phi_yn*v_yn.z*v_yn.y)/(2.0*dy) + | |
| (phi_zp*v_zp.z*v_zp.z - phi_zn*v_zn.z*v_zn.z)/(2.0*dz)); | |
| + // */ | |
| // Write divergence | |
| __syncthreads(); | |
| t@@ -1929,6 +1995,7 @@ __global__ void findNSdivphitau( | |
| } | |
| // Find the divergence of phi v v | |
| +// Unused | |
| __global__ void findNSdivphivv( | |
| Float* dev_ns_v_prod, // in | |
| Float* dev_ns_phi, // in | |
| t@@ -2017,20 +2084,27 @@ __global__ void findNSdivphivv( | |
| // Find predicted fluid velocity | |
| +// Launch per face. | |
| __global__ void findPredNSvelocities( | |
| Float* dev_ns_p, // in | |
| + Float* dev_ns_v_x, // in | |
| + Float* dev_ns_v_y, // in | |
| + Float* dev_ns_v_z, // in | |
| Float3* dev_ns_v, // in | |
| Float* dev_ns_phi, // in | |
| Float* dev_ns_dphi, // in | |
| + Float* dev_ns_div_tau_x, // in | |
| + Float* dev_ns_div_tau_y, // in | |
| + Float* dev_ns_div_tau_z, // in | |
| Float3* dev_ns_div_phi_vi_v, // in | |
| - //Float3* dev_ns_div_phi_tau, // in | |
| - Float3* dev_ns_div_tau, // in | |
| int bc_bot, // in | |
| int bc_top, // in | |
| Float beta, // in | |
| Float3* dev_ns_fi, // in | |
| unsigned int ndem, // in | |
| - Float3* dev_ns_v_p) // out | |
| + Float* dev_ns_v_p_x, // out | |
| + Float* dev_ns_v_p_y, // out | |
| + Float* dev_ns_v_p_z) // out | |
| { | |
| // 3D thread index | |
| const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
| t@@ -2048,76 +2122,123 @@ __global__ void findPredNSvelocities( | |
| const Float dz = devC_grid.L[2]/nz; | |
| // 1D thread index | |
| + const unsigned int vidx = vidx(x,y,z); | |
| const unsigned int cellidx = idx(x,y,z); | |
| // Check that we are not outside the fluid grid | |
| - if (x < nx && y < ny && z < nz) { | |
| + if (x <= nx && y <= ny && z <= nz) { | |
| // Values that are needed for calculating the predicted velocity | |
| __syncthreads(); | |
| - const Float3 v = dev_ns_v[cellidx]; | |
| - const Float phi = dev_ns_phi[cellidx]; | |
| - const Float dphi = dev_ns_dphi[cellidx]; | |
| - const Float3 div_phi_vi_v = dev_ns_div_phi_vi_v[cellidx]; | |
| - //const Float3 div_phi_tau = dev_ns_div_phi_tau[cellidx]; | |
| - const Float3 div_tau = dev_ns_div_tau[cellidx]; | |
| + const Float3 v_face = MAKE_FLOAT3( | |
| + dev_ns_v_x[vidx], | |
| + dev_ns_v_y[vidx], | |
| + dev_ns_v_z[vidx]); | |
| + const Float3 div_tau_face = MAKE_FLOAT3( | |
| + dev_ns_div_tau_x[vidx], | |
| + dev_ns_div_tau_y[vidx], | |
| + dev_ns_div_tau_z[vidx]); | |
| + | |
| + // cell center values | |
| + const Float phi_xn = dev_ns_phi[idx(x-1,y,z)]; | |
| + const Float phi_c = dev_ns_phi[cellidx]; | |
| + const Float phi_yn = dev_ns_phi[idx(x,y-1,z)]; | |
| + const Float phi_zn = dev_ns_phi[idx(x,y,z-1)]; | |
| + | |
| + const Float dphi_xn = dev_ns_dphi[idx(x-1,y,z)]; | |
| + const Float dphi_c = dev_ns_dphi[cellidx]; | |
| + const Float dphi_yn = dev_ns_dphi[idx(x,y-1,z)]; | |
| + const Float dphi_zn = dev_ns_dphi[idx(x,y,z-1)]; | |
| + | |
| + const Float3 v_xn = dev_ns_v[idx(x-1,y,z)]; | |
| + const Float3 v_c = dev_ns_v[cellidx]; | |
| + const Float3 v_yn = dev_ns_v[idx(x,y-1,z)]; | |
| + const Float3 v_zn = dev_ns_v[idx(x,y,z-1)]; | |
| + | |
| + const Float3 div_phi_vi_v_xn = dev_ns_div_phi_vi_v[idx(x-1,y,z)]; | |
| + const Float3 div_phi_vi_v_c = dev_ns_div_phi_vi_v[cellidx]; | |
| + const Float3 div_phi_vi_v_yn = dev_ns_div_phi_vi_v[idx(x,y-1,z)]; | |
| + const Float3 div_phi_vi_v_zn = dev_ns_div_phi_vi_v[idx(x,y,z-1)]; | |
| + | |
| + // component-wise average values | |
| + const Float3 phi = MAKE_FLOAT3( | |
| + (phi_c + phi_xn)/2.0, | |
| + (phi_c + phi_yn)/2.0, | |
| + (phi_c + phi_zn)/2.0); | |
| + const Float3 dphi = MAKE_FLOAT3( | |
| + (dphi_c + dphi_xn)/2.0, | |
| + (dphi_c + dphi_yn)/2.0, | |
| + (dphi_c + dphi_zn)/2.0); | |
| // The particle-fluid interaction force should only be incoorporated if | |
| // there is a fluid viscosity | |
| - Float3 f_i; | |
| - if (devC_params.mu > 0.0) | |
| - f_i = dev_ns_fi[cellidx]; | |
| - else | |
| - f_i = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| + Float3 f_i_c, f_i_xn, f_i_yn, f_i_zn; | |
| + if (devC_params.mu > 0.0) { | |
| + f_i_c = dev_ns_fi[cellidx]; | |
| + f_i_xn = dev_ns_fi[idx(x-1,y,z)]; | |
| + f_i_yn = dev_ns_fi[idx(x,y-1,z)]; | |
| + f_i_zn = dev_ns_fi[idx(x,y,z-1)]; | |
| + } else { | |
| + f_i_c = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| + f_i_xn = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| + f_i_yn = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| + f_i_zn = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| + } | |
| + const Float3 f_i = MAKE_FLOAT3( | |
| + (f_i_c.x + f_i_xn.x)/2.0, | |
| + (f_i_c.y + f_i_yn.y)/2.0, | |
| + (f_i_c.z + f_i_zn.z)/2.0); | |
| const Float dt = ndem*devC_dt; | |
| const Float rho = devC_params.rho_f; | |
| - // Find pressure gradient | |
| - Float3 grad_p = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| - | |
| // The pressure gradient is not needed in Chorin's projection method | |
| // (ns.beta=0), so only has to be looked up in pressure-dependant | |
| // projection methods | |
| Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| if (beta > 0.0) { | |
| - grad_p = gradient(dev_ns_p, x, y, z, dx, dy, dz); | |
| + __syncthreads(); | |
| + const Float p = dev_ns_p[cellidx]; | |
| + const Float p_xn = dev_ns_p[idx(x-1,y,z)]; | |
| + const Float p_yn = dev_ns_p[idx(x,y-1,z)]; | |
| + const Float p_zn = dev_ns_p[idx(x,y,z-1)]; | |
| + const Float3 grad_p = MAKE_FLOAT3( | |
| + (p - p_xn)/dx, | |
| + (p - p_yn)/dy, | |
| + (p - p_zn)/dz); | |
| #ifdef SET_1 | |
| - pressure_term = -beta*dt/(rho*phi)*grad_p; | |
| + pressure_term = -beta*dt/(rho*phi)*grad_p_x; | |
| #endif | |
| #ifdef SET_2 | |
| pressure_term = -beta*dt/rho*grad_p; | |
| #endif | |
| } | |
| - // Calculate the predicted velocity | |
| - /* | |
| - Float3 v_p = v | |
| - + pressure_term | |
| - + 1.0/devC_params.rho_f*div_phi_tau*ndem*devC_dt/phi // diffusion | |
| - + MAKE_FLOAT3(devC_params.g[0], devC_params.g[1], devC_params.g[2]) | |
| - *ndem*devC_dt // gravity term | |
| - - ndem*devC_dt/(devC_params.rho_f*phi)*f_i | |
| - //- ndem*devC_dt/(devC_params.rho_f*phi*dx*dy*dz)*f_i | |
| - - v*dphi/phi | |
| - - div_phi_vi_v*ndem*devC_dt/phi // advection term | |
| - ;*/ | |
| + // calculate the advection term using a first-order upwind scheme | |
| + // div_phi_vf_vf | |
| + | |
| + const float3 div_phi_vf_vx = make_float3( | |
| + div_phi_vx_vf, | |
| + div_phi_vy_vf, | |
| + div_phi_vz_vf)k | |
| + | |
| + // Determine the predicted velocity | |
| #ifdef SET_1 | |
| - const Float3 interaction_term = -dt/(rho*phi)*f_i; | |
| - const Float3 diffusion_term = dt/(rho*phi)*div_tau; | |
| - const Float3 gravity_term = MAKE_FLOAT3( | |
| + const Float3 interaction_term = -dt/(rho*phi)*f_i; | |
| + const Float3 diffusion_term = dt/(rho*phi)*div_tau; | |
| + const Float3 gravity_term = MAKE_FLOAT3( | |
| devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt; | |
| - const Float3 porosity_term = -1.0*v*dphi/phi; | |
| - const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi; | |
| + const Float3 porosity_term = -1.0*v*dphi/phi; | |
| + const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi; | |
| #endif | |
| #ifdef SET_2 | |
| - const Float3 interaction_term = -dt/(rho*phi)*f_i; | |
| - const Float3 diffusion_term = dt/rho*div_tau; | |
| - const Float3 gravity_term = MAKE_FLOAT3( | |
| - devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt; | |
| - const Float3 porosity_term = -1.0*v*dphi/phi; | |
| - const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi; | |
| + const Float3 interaction_term = -dt/(rho*phi)*f_i; | |
| + const Float3 diffusion_term = dt/rho*div_tau; | |
| + const Float3 gravity_term = MAKE_FLOAT3( | |
| + devC_params.g[0], devC_params.g[1], devC_params.g[2])*dt; | |
| + const Float3 porosity_term = -1.0*v*dphi/phi; | |
| + const Float3 advection_term = -1.0*div_phi_vi_v*dt/phi; | |
| #endif | |
| Float3 v_p = v | |
| t@@ -2155,8 +2276,9 @@ __global__ void findPredNSvelocities( | |
| // Save the predicted velocity | |
| __syncthreads(); | |
| - //v_p = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| - dev_ns_v_p[cellidx] = v_p; | |
| + dev_ns_v_p_x[vidx] = v_p.x; | |
| + dev_ns_v_p_y[vidx] = v_p.y; | |
| + dev_ns_v_p_z[vidx] = v_p.z; | |
| #ifdef CHECK_NS_FINITE | |
| (void)checkFiniteFloat3("v_p", x, y, z, v_p); | |
| t@@ -2176,7 +2298,7 @@ __global__ void findNSforcing( | |
| Float* dev_ns_f, | |
| Float* dev_ns_phi, | |
| Float* dev_ns_dphi, | |
| - Float3* dev_ns_v_p, | |
| + //Float3* dev_ns_v_p, | |
| unsigned int nijac, | |
| unsigned int ndem) | |
| { | |
| t@@ -2786,9 +2908,12 @@ __global__ void findInteractionForce( | |
| Float4* dev_vel, // in | |
| Float* dev_ns_phi, // in | |
| Float* dev_ns_p, // in | |
| - Float3* dev_ns_v, // in | |
| - Float* dev_ns_tau, // in | |
| - const unsigned int iter, // in | |
| + Float* dev_ns_v_x, // in | |
| + Float* dev_ns_v_y, // in | |
| + Float* dev_ns_v_z, // in | |
| + Float* dev_ns_div_tau_x,// in | |
| + Float* dev_ns_div_tau_y,// in | |
| + Float* dev_ns_div_tau_z,// in | |
| Float3* dev_ns_f_pf, // out | |
| Float4* dev_force) // out | |
| { | |
| t@@ -2819,8 +2944,30 @@ __global__ void findInteractionForce( | |
| // Fluid information | |
| __syncthreads(); | |
| - const Float phi = dev_ns_phi[cellidx]; | |
| - const Float3 v_f = dev_ns_v[cellidx]; | |
| + const Float phi = dev_ns_phi[cellidx]; | |
| + const Float v_x = dev_ns_v_x[vidx(x,y,z)]; | |
| + const Float v_x_p = dev_ns_v_x[vidx(x+1,y,z)]; | |
| + const Float v_y = dev_ns_v_x[vidx(x,y,z)]; | |
| + const Float v_y_p = dev_ns_v_x[vidx(x,y+1,z)]; | |
| + const Float v_z = dev_ns_v_x[vidx(x,y,z)]; | |
| + const Float v_z_p = dev_ns_v_x[vidx(x,y,z+1)]; | |
| + | |
| + const Float div_tau_x = dev_ns_div_tau_x[vidx(x,y,z)]; | |
| + const Float div_tau_x_p = dev_ns_div_tau_x[vidx(x+1,y,z)]; | |
| + const Float div_tau_y = dev_ns_div_tau_x[vidx(x,y,z)]; | |
| + const Float div_tau_y_p = dev_ns_div_tau_x[vidx(x,y+1,z)]; | |
| + const Float div_tau_z = dev_ns_div_tau_x[vidx(x,y,z)]; | |
| + const Float div_tau_z_p = dev_ns_div_tau_x[vidx(x,y,z+1)]; | |
| + | |
| + const Float3 v_f = MAKE_FLOAT3( | |
| + (v_x + v_x_p)/2.0, | |
| + (v_y + v_y_p)/2.0, | |
| + (v_z + v_z_p)/2.0); | |
| + | |
| + const Float3 div_tau = MAKE_FLOAT3( | |
| + (div_tau_x + div_tau_x_p)/2.0, | |
| + (div_tau_y + div_tau_y_p)/2.0, | |
| + (div_tau_z + div_tau_z_p)/2.0); | |
| const Float3 v_rel = v_f - v_p; | |
| const Float v_rel_length = length(v_rel); | |
| t@@ -2844,13 +2991,7 @@ __global__ void findInteractionForce( | |
| -1.0*gradient(dev_ns_p, i_x, i_y, i_z, dx, dy, dz)*V_p; | |
| // Viscous force | |
| - Float3 f_v; | |
| - if (iter == 0) | |
| - f_v = MAKE_FLOAT3(0.0, 0.0, 0.0); | |
| - else | |
| - f_v = | |
| - -1.0*divergence_tensor(dev_ns_tau, i_x, i_y, i_z, dx, dy, dz) | |
| - *V_p; | |
| + const Float3 f_v = -1.0*div_tau*V_p; | |
| // Interaction force on particle (force) and fluid (f_pf) | |
| __syncthreads(); | |
| t@@ -3020,6 +3161,156 @@ __global__ void applyInteractionForceToFluid( | |
| } | |
| }*/ | |
| +// Launch per cell face node. | |
| +// Cell center ghost nodes must be set prior to call. | |
| +__global__ void interpolateCenterToFace( | |
| + Float3* dev_in, | |
| + Float* dev_out_x, | |
| + Float* dev_out_y, | |
| + Float* dev_out_z) | |
| +{ | |
| + // 3D thread index | |
| + const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
| + const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
| + const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| + | |
| + // Check that we are not outside the fluid grid | |
| + if (x <= devC_grid.num[0] | |
| + && y <= devC_grid.num[1] | |
| + && z <= devC_grid.num[2]) { | |
| + | |
| + const unsigned int faceidx = vidx(x,y,z); | |
| + | |
| + __syncthreads(); | |
| + const Float3 xn = dev_in[idx(x-1,y,z)]; | |
| + const Float3 yn = dev_in[idx(x,y-1,z)]; | |
| + const Float3 zn = dev_in[idx(x,y,z-1)]; | |
| + const Float3 center = dev_in[idx(x,y,z)]; | |
| + | |
| + const Float3 x_val = (center.x - xn.x)/2.0; | |
| + const Float3 y_val = (center.y - yn.y)/2.0; | |
| + const Float3 z_val = (center.z - zn.z)/2.0; | |
| + | |
| + __syncthreads(); | |
| + dev_out_x[faceidx] = x_val; | |
| + dev_out_y[faceidx] = y_val; | |
| + dev_out_z[faceidx] = z_val; | |
| + } | |
| +} | |
| + | |
| +// Launch per cell center node | |
| +__global__ void interpolateFaceToCenter( | |
| + Float* dev_in_x, | |
| + Float* dev_in_y, | |
| + Float* dev_in_z, | |
| + Float3* dev_out) | |
| +{ | |
| + // 3D thread index | |
| + const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
| + const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
| + const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| + | |
| + // Check that we are not outside the fluid grid | |
| + if (x < devC_grid.num[0] && y < devC_grid.num[1] && z < devC_grid.num[2]) { | |
| + | |
| + const unsigned int cellidx = idx(x,y,z); | |
| + | |
| + __syncthreads(); | |
| + const Float x_n = dev_in_x[idx(x,y,z)]; | |
| + const Float x_p = dev_in_x[idx(x+1,y,z)]; | |
| + const Float y_n = dev_in_x[idx(x,y,z)]; | |
| + const Float y_p = dev_in_x[idx(x,y+1,z)]; | |
| + const Float z_n = dev_in_x[idx(x,y,z)]; | |
| + const Float z_p = dev_in_x[idx(x,y,z+1)]; | |
| + | |
| + const val = MAKE_FLOAT3( | |
| + (x_n + x_p)/2.0, | |
| + (y_n + y_p)/2.0, | |
| + (z_n + z_p)/2.0); | |
| + | |
| + __syncthreads(); | |
| + dev_out[cellidx] = val; | |
| + } | |
| +} | |
| + | |
| +// Launch per cell face node. Set velocity ghost nodes beforehand. | |
| +// Find div(tau) at all cell faces. | |
| +__global__ findFaceDivTau( | |
| + Float* dev_ns_v_x, | |
| + Float* dev_ns_v_y, | |
| + Float* dev_ns_v_z, | |
| + Float* dev_ns_div_tau_x, | |
| + Float* dev_ns_div_tau_y, | |
| + Float* dev_ns_div_tau_z) | |
| +{ | |
| + // 3D thread index | |
| + const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x; | |
| + const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y; | |
| + const unsigned int z = blockDim.z * blockIdx.z + threadIdx.z; | |
| + | |
| + // Grid dimensions | |
| + const unsigned int nx = devC_grid.num[0]; | |
| + const unsigned int ny = devC_grid.num[1]; | |
| + const unsigned int nz = devC_grid.num[2]; | |
| + | |
| + // Cell sizes | |
| + const Float dx = devC_grid.L[0]/nx; | |
| + const Float dy = devC_grid.L[1]/ny; | |
| + const Float dz = devC_grid.L[2]/nz; | |
| + | |
| + // Check that we are not outside the fluid grid | |
| + if (x <= nx && y <= ny && z <= nz) { | |
| + | |
| + const unsigned int faceidx = vidx(x,y,z); | |
| + | |
| + __syncthreads(); | |
| + const Float v_x_xn = dev_ns_v_x[vidx(x-1,y,z)]; | |
| + const Float v_x = dev_ns_v_x[faceidx]; | |
| + const Float v_x_xp = dev_ns_v_x[vidx(x+1,y,z)]; | |
| + const Float v_x_yn = dev_ns_v_x[vidx(x,y-1,z)]; | |
| + const Float v_x_yp = dev_ns_v_x[vidx(x,y+1,z)]; | |
| + const Float v_x_zn = dev_ns_v_x[vidx(x,y,z-1)]; | |
| + const Float v_x_zp = dev_ns_v_x[vidx(x,y,z+1)]; | |
| + | |
| + const Float x_y_xn = dev_nx_v_y[vidx(x-1,y,z)]; | |
| + const Float x_y_xp = dev_nx_v_y[vidx(x+1,y,z)]; | |
| + const Float v_y_yn = dev_ns_v_y[vidx(x,y-1,z)]; | |
| + const Float v_y = dev_ns_v_y[faceidx]; | |
| + const Float v_y_yp = dev_ns_v_y[vidx(x,y+1,z)]; | |
| + const Float x_y_zn = dev_nx_v_y[vidx(x,y,z-1)]; | |
| + const Float x_y_zp = dev_nx_v_y[vidx(x,y,z+1)]; | |
| + | |
| + const Float v_z_xn = dev_ns_v_z[vidx(x-1,y,z)]; | |
| + const Float v_z_xp = dev_ns_v_z[vidx(x+1,y,z)]; | |
| + const Float v_z_yn = dev_ns_v_z[vidx(x,y-1,z)]; | |
| + const Float v_z_yp = dev_ns_v_z[vidx(x,y+1,z)]; | |
| + const Float v_z_zn = dev_ns_v_z[vidx(x,y,z-1)]; | |
| + const Float v_z = dev_ns_v_z[faceidx]; | |
| + const Float v_z_zp = dev_ns_v_z[vidx(x,y,z+1)]; | |
| + | |
| + const Float div_tau_x = | |
| + devC_params.mu*( | |
| + (v_x_xp - 2.0*v_x + v_x_xn)/(dx*dx) + | |
| + (v_x_yp - 2.0*v_x + v_x_yn)/(dy*dy) + | |
| + (v_x_zp - 2.0*v_x + v_x_zn)/(dz*dz)); | |
| + const Float div_tau_y = | |
| + devC_params.mu*( | |
| + (v_y_xp - 2.0*v_y + v_y_xn)/(dx*dx) + | |
| + (v_y_yp - 2.0*v_y + v_y_yn)/(dy*dy) + | |
| + (v_y_zp - 2.0*v_y + v_y_zn)/(dz*dz)); | |
| + const Float div_tau_z = | |
| + devC_params.mu*( | |
| + (v_z_xp - 2.0*v_z + v_z_xn)/(dx*dx) + | |
| + (v_z_yp - 2.0*v_z + v_z_yn)/(dy*dy) + | |
| + (v_z_zp - 2.0*v_z + v_z_zn)/(dz*dz)); | |
| + | |
| + __syncthreads(); | |
| + dev_ns_div_tau_x[faceidx] = div_tau_x; | |
| + dev_ns_div_tau_y[faceidx] = div_tau_y; | |
| + dev_ns_div_tau_z[faceidx] = div_tau_z; | |
| + } | |
| +} | |
| + | |
| // Print final heads and free memory | |
| void DEM::endNSdev() | |
| { | |
| diff --git a/src/sphere.h b/src/sphere.h | |
| t@@ -175,7 +175,7 @@ class DEM { | |
| Float* dev_ns_v_x; // Cell fluid velocity in staggered grid | |
| Float* dev_ns_v_y; // Cell fluid velocity in staggered grid | |
| Float* dev_ns_v_z; // Cell fluid velocity in staggered grid | |
| - Float3* dev_ns_v_p; // Predicted cell fluid velocity | |
| + //Float3* dev_ns_v_p; // Predicted cell fluid velocity | |
| Float* dev_ns_v_p_x; // Predicted cell fluid velocity in st. g… | |
| Float* dev_ns_v_p_y; // Predicted cell fluid velocity in st. g… | |
| Float* dev_ns_v_p_z; // Predicted cell fluid velocity in st. g… | |
| t@@ -184,7 +184,7 @@ class DEM { | |
| Float3* dev_ns_fi; // Particle-fluid interaction force | |
| Float* dev_ns_phi; // Cell porosity | |
| Float* dev_ns_dphi; // Cell porosity change | |
| - Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction | |
| + //Float3* dev_ns_div_phi_v_v; // Divegence used in velocity prediction | |
| Float* dev_ns_epsilon; // Pressure difference | |
| Float* dev_ns_epsilon_new; // Pressure diff. after Jacobi iteration | |
| Float* dev_ns_epsilon_old; // Pressure diff. before Jacobi iteration | |
| t@@ -193,10 +193,13 @@ class DEM { | |
| Float* dev_ns_f1; // Constant terms in forcing function | |
| Float3* dev_ns_f2; // Constant slopes in forcing function | |
| Float* dev_ns_v_prod; // Outer product of fluid velocities | |
| - Float* dev_ns_tau; // Fluid stress tensor | |
| - Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v) | |
| + //Float* dev_ns_tau; // Fluid stress tensor | |
| + //Float3* dev_ns_div_phi_vi_v; // div(phi*vi*v) | |
| //Float3* dev_ns_div_phi_tau; // div(phi*tau) | |
| Float3* dev_ns_div_tau; // div(tau) | |
| + Float* dev_ns_div_tau_x; // div(tau) on x-face | |
| + Float* dev_ns_div_tau_y; // div(tau) on y-face | |
| + Float* dev_ns_div_tau_z; // div(tau) on z-face | |
| Float3* dev_ns_f_pf; // Particle-fluid interaction force | |
| //// Navier Stokes functions | |
| diff --git a/tests/stokes_law.py b/tests/stokes_law.py | |
| t@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt | |
| print("### Stokes test - single sphere sedimentation ###") | |
| ## Stokes drag | |
| -orig = sphere.sim(sid = "stokes_law_gradP", fluid = True) | |
| +orig = sphere.sim(sid = "stokes_law_set2", fluid = True) | |
| cleanup(orig) | |
| orig.defaultParams() | |
| orig.addParticle([0.5,0.5,1.46], 0.05) |