| tHalf-step leapfrog Verlet integration scheme added - sphere - GPU-based 3D dis… | |
| git clone git://src.adamsgaard.dk/sphere | |
| Log | |
| Files | |
| Refs | |
| LICENSE | |
| --- | |
| commit a8f9bb4ed81e686acb8e1a51b4ba60fedcf03dae | |
| parent 68987a0bcf02ed1b43919109acc000099694f787 | |
| Author: Anders Damsgaard Christensen <[email protected]> | |
| Date: Tue, 13 Nov 2012 20:36:03 +0100 | |
| Half-step leapfrog Verlet integration scheme added | |
| Diffstat: | |
| M src/device.cu | 10 ++++++++++ | |
| M src/integration.cuh | 154 +++++++++++++++++++----------… | |
| M src/sphere.h | 2 ++ | |
| 3 files changed, 109 insertions(+), 57 deletions(-) | |
| --- | |
| diff --git a/src/device.cu b/src/device.cu | |
| t@@ -237,10 +237,12 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
| cudaMalloc((void**)&dev_x, memSizeF4); | |
| cudaMalloc((void**)&dev_xysum, memSizeF4); | |
| cudaMalloc((void**)&dev_vel, memSizeF4); | |
| + cudaMalloc((void**)&dev_vel0, memSizeF4); | |
| cudaMalloc((void**)&dev_acc, memSizeF4); | |
| cudaMalloc((void**)&dev_force, memSizeF4); | |
| cudaMalloc((void**)&dev_angpos, memSizeF4); | |
| cudaMalloc((void**)&dev_angvel, memSizeF4); | |
| + cudaMalloc((void**)&dev_angvel0, memSizeF4); | |
| cudaMalloc((void**)&dev_angacc, memSizeF4); | |
| cudaMalloc((void**)&dev_torque, memSizeF4); | |
| t@@ -296,10 +298,12 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
| cudaFree(dev_x); | |
| cudaFree(dev_xysum); | |
| cudaFree(dev_vel); | |
| + cudaFree(dev_vel0); | |
| cudaFree(dev_acc); | |
| cudaFree(dev_force); | |
| cudaFree(dev_angpos); | |
| cudaFree(dev_angvel); | |
| + cudaFree(dev_angvel0); | |
| cudaFree(dev_angacc); | |
| cudaFree(dev_torque); | |
| t@@ -352,6 +356,8 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
| sizeof(Float2)*np, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_vel, k.vel, | |
| memSizeF4, cudaMemcpyHostToDevice); | |
| + cudaMemcpy( dev_vel0, k.vel, | |
| + memSizeF4, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_acc, k.acc, | |
| memSizeF4, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_force, k.force, | |
| t@@ -360,6 +366,8 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
| memSizeF4, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_angvel, k.angvel, | |
| memSizeF4, cudaMemcpyHostToDevice); | |
| + cudaMemcpy( dev_angvel0, k.angvel, | |
| + memSizeF4, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_angacc, k.angacc, | |
| memSizeF4, cudaMemcpyHostToDevice); | |
| cudaMemcpy( dev_torque, k.torque, | |
| t@@ -695,6 +703,8 @@ __host__ void DEM::startTime() | |
| dev_angpos, | |
| dev_acc, | |
| dev_angacc, | |
| + dev_vel0, | |
| + dev_angvel0, | |
| dev_xysum, | |
| dev_gridParticleIndex); | |
| cudaThreadSynchronize(); | |
| diff --git a/src/integration.cuh b/src/integration.cuh | |
| t@@ -11,6 +11,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_… | |
| Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, … | |
| Float4* dev_force, Float4* dev_torque, Float4* dev_a… | |
| Float4* dev_acc, Float4* dev_angacc, | |
| + Float4* dev_vel0, Float4* dev_angvel0, | |
| Float2* dev_xysum, | |
| unsigned int* dev_gridParticleIndex) // Input: Sorte… | |
| { | |
| t@@ -21,15 +22,17 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de… | |
| // Copy data to temporary arrays to avoid any potential read-after-write, | |
| // write-after-read, or write-after-write hazards. | |
| unsigned int orig_idx = dev_gridParticleIndex[idx]; | |
| - Float4 force = dev_force[orig_idx]; | |
| - Float4 torque = dev_torque[orig_idx]; | |
| - Float4 angpos = dev_angpos[orig_idx]; | |
| - Float4 acc = dev_acc[orig_idx]; | |
| - Float4 angacc = dev_angacc[orig_idx]; | |
| - Float4 x = dev_x_sorted[idx]; | |
| - Float4 vel = dev_vel_sorted[idx]; | |
| - Float4 angvel = dev_angvel_sorted[idx]; | |
| - Float radius = x.w; | |
| + Float4 force = dev_force[orig_idx]; | |
| + Float4 torque = dev_torque[orig_idx]; | |
| + Float4 angpos = dev_angpos[orig_idx]; | |
| + Float4 acc = dev_acc[orig_idx]; | |
| + Float4 angacc = dev_angacc[orig_idx]; | |
| + Float4 vel0 = dev_vel0[orig_idx]; | |
| + Float4 angvel0 = dev_angvel0[orig_idx]; | |
| + Float4 x = dev_x_sorted[idx]; | |
| + Float4 vel = dev_vel_sorted[idx]; | |
| + Float4 angvel = dev_angvel_sorted[idx]; | |
| + Float radius = x.w; | |
| Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f); | |
| t@@ -42,47 +45,6 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev… | |
| // Particle mass | |
| Float m = 4.0/3.0 * PI * radius*radius*radius * rho; | |
| -#if 0 | |
| - //// First-order Euler integration scheme /// | |
| - // Update angular position | |
| - angpos.x += angvel.x * dt; | |
| - angpos.y += angvel.y * dt; | |
| - angpos.z += angvel.z * dt; | |
| - | |
| - // Update position | |
| - x.x += vel.x * dt; | |
| - x.y += vel.y * dt; | |
| - x.z += vel.z * dt; | |
| -#else | |
| - | |
| - /// Second-order scheme based on Taylor expansion /// | |
| - // Update angular position | |
| - angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5; | |
| - angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5; | |
| - angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5; | |
| - | |
| - // Update position | |
| - x.x += vel.x * dt + acc.x * dt*dt * 0.5; | |
| - x.y += vel.y * dt + acc.y * dt*dt * 0.5; | |
| - x.z += vel.z * dt + acc.z * dt*dt * 0.5; | |
| -#endif | |
| - | |
| - // Update angular velocity | |
| - angvel.x += angacc.x * dt; | |
| - angvel.y += angacc.y * dt; | |
| - angvel.z += angacc.z * dt; | |
| - | |
| - // Update linear velocity | |
| - vel.x += acc.x * dt; | |
| - vel.y += acc.y * dt; | |
| - vel.z += acc.z * dt; | |
| - | |
| - // Add x-displacement for this time step to | |
| - // sum of x-displacements | |
| - //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
| - xysum.x += vel.x * dt; | |
| - xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f; | |
| - | |
| // Update linear acceleration of particle | |
| acc.x = force.x / m; | |
| acc.y = force.y / m; | |
| t@@ -132,17 +94,95 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* d… | |
| x.x -= L.x; | |
| } | |
| + | |
| + //// Half-step leapfrog Verlet integration scheme //// | |
| + // Update half-step linear velocities | |
| + vel0.x += acc.x * dt; | |
| + vel0.y += acc.y * dt; | |
| + vel0.z += acc.z * dt; | |
| + | |
| + // Update half-step angular velocities | |
| + angvel0.x += angacc.x * dt; | |
| + angvel0.y += angacc.y * dt; | |
| + angvel0.z += angacc.z * dt; | |
| + | |
| + // Update positions | |
| + x.x += vel0.x * dt; | |
| + x.y += vel0.y * dt; | |
| + x.z += vel0.z * dt; | |
| + | |
| + // Update angular positions | |
| + angpos.x += angvel0.x * dt; | |
| + angpos.y += angvel0.y * dt; | |
| + angpos.z += angvel0.z * dt; | |
| + | |
| + // Update full-step linear velocity | |
| + vel.x = vel0.x + 0.5 * acc.x * dt; | |
| + vel.y = vel0.y + 0.5 * acc.x * dt; | |
| + vel.z = vel0.z + 0.5 * acc.x * dt; | |
| + | |
| + // Update full-step angular velocity | |
| + angvel.x = angvel0.x + 0.5 * angacc.x * dt; | |
| + angvel.y = angvel0.y + 0.5 * angacc.x * dt; | |
| + angvel.z = angvel0.z + 0.5 * angacc.x * dt; | |
| + | |
| + /* | |
| + //// First-order Euler integration scheme /// | |
| + // Update angular position | |
| + angpos.x += angvel.x * dt; | |
| + angpos.y += angvel.y * dt; | |
| + angpos.z += angvel.z * dt; | |
| + | |
| + // Update position | |
| + x.x += vel.x * dt; | |
| + x.y += vel.y * dt; | |
| + x.z += vel.z * dt; | |
| + */ | |
| + | |
| + /* | |
| + /// Second-order scheme based on Taylor expansion /// | |
| + // Update angular position | |
| + angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5; | |
| + angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5; | |
| + angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5; | |
| + | |
| + // Update position | |
| + x.x += vel.x * dt + acc.x * dt*dt * 0.5; | |
| + x.y += vel.y * dt + acc.y * dt*dt * 0.5; | |
| + x.z += vel.z * dt + acc.z * dt*dt * 0.5; | |
| + */ | |
| + | |
| + /* | |
| + // Update angular velocity | |
| + angvel.x += angacc.x * dt; | |
| + angvel.y += angacc.y * dt; | |
| + angvel.z += angacc.z * dt; | |
| + | |
| + // Update linear velocity | |
| + vel.x += acc.x * dt; | |
| + vel.y += acc.y * dt; | |
| + vel.z += acc.z * dt; | |
| + */ | |
| + | |
| + // Add x-displacement for this time step to | |
| + // sum of x-displacements | |
| + //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
| + xysum.x += vel.x * dt; | |
| + xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f; | |
| + | |
| // Hold threads for coalesced write | |
| __syncthreads(); | |
| // Store data in global memory at original, pre-sort positions | |
| - dev_xysum[orig_idx] += xysum; | |
| - dev_acc[orig_idx] = acc; | |
| - dev_angacc[orig_idx] = angacc; | |
| - dev_angvel[orig_idx] = angvel; | |
| - dev_vel[orig_idx] = vel; | |
| - dev_angpos[orig_idx] = angpos; | |
| - dev_x[orig_idx] = x; | |
| + dev_xysum[orig_idx] += xysum; | |
| + dev_acc[orig_idx] = acc; | |
| + dev_angacc[orig_idx] = angacc; | |
| + dev_angvel[orig_idx] = angvel; | |
| + dev_angvel0[orig_idx] = angvel0; | |
| + dev_vel[orig_idx] = vel; | |
| + dev_vel0[orig_idx] = vel0; | |
| + dev_angpos[orig_idx] = angpos; | |
| + dev_x[orig_idx] = x; | |
| } | |
| } // End of integrate(...) | |
| diff --git a/src/sphere.h b/src/sphere.h | |
| t@@ -54,10 +54,12 @@ class DEM { | |
| Float4 *dev_x; | |
| Float2 *dev_xysum; | |
| Float4 *dev_vel; | |
| + Float4 *dev_vel0; | |
| Float4 *dev_acc; | |
| Float4 *dev_force; | |
| Float4 *dev_angpos; | |
| Float4 *dev_angvel; | |
| + Float4 *dev_angvel0; | |
| Float4 *dev_angacc; | |
| Float4 *dev_torque; | |
| unsigned int *dev_contacts; |