tHalf-step leapfrog Verlet integration scheme added - sphere - GPU-based 3D dis… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit a8f9bb4ed81e686acb8e1a51b4ba60fedcf03dae | |
parent 68987a0bcf02ed1b43919109acc000099694f787 | |
Author: Anders Damsgaard Christensen <[email protected]> | |
Date: Tue, 13 Nov 2012 20:36:03 +0100 | |
Half-step leapfrog Verlet integration scheme added | |
Diffstat: | |
M src/device.cu | 10 ++++++++++ | |
M src/integration.cuh | 154 +++++++++++++++++++----------… | |
M src/sphere.h | 2 ++ | |
3 files changed, 109 insertions(+), 57 deletions(-) | |
--- | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -237,10 +237,12 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
cudaMalloc((void**)&dev_x, memSizeF4); | |
cudaMalloc((void**)&dev_xysum, memSizeF4); | |
cudaMalloc((void**)&dev_vel, memSizeF4); | |
+ cudaMalloc((void**)&dev_vel0, memSizeF4); | |
cudaMalloc((void**)&dev_acc, memSizeF4); | |
cudaMalloc((void**)&dev_force, memSizeF4); | |
cudaMalloc((void**)&dev_angpos, memSizeF4); | |
cudaMalloc((void**)&dev_angvel, memSizeF4); | |
+ cudaMalloc((void**)&dev_angvel0, memSizeF4); | |
cudaMalloc((void**)&dev_angacc, memSizeF4); | |
cudaMalloc((void**)&dev_torque, memSizeF4); | |
t@@ -296,10 +298,12 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
cudaFree(dev_x); | |
cudaFree(dev_xysum); | |
cudaFree(dev_vel); | |
+ cudaFree(dev_vel0); | |
cudaFree(dev_acc); | |
cudaFree(dev_force); | |
cudaFree(dev_angpos); | |
cudaFree(dev_angvel); | |
+ cudaFree(dev_angvel0); | |
cudaFree(dev_angacc); | |
cudaFree(dev_torque); | |
t@@ -352,6 +356,8 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
sizeof(Float2)*np, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_vel, k.vel, | |
memSizeF4, cudaMemcpyHostToDevice); | |
+ cudaMemcpy( dev_vel0, k.vel, | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_acc, k.acc, | |
memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_force, k.force, | |
t@@ -360,6 +366,8 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angvel, k.angvel, | |
memSizeF4, cudaMemcpyHostToDevice); | |
+ cudaMemcpy( dev_angvel0, k.angvel, | |
+ memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_angacc, k.angacc, | |
memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_torque, k.torque, | |
t@@ -695,6 +703,8 @@ __host__ void DEM::startTime() | |
dev_angpos, | |
dev_acc, | |
dev_angacc, | |
+ dev_vel0, | |
+ dev_angvel0, | |
dev_xysum, | |
dev_gridParticleIndex); | |
cudaThreadSynchronize(); | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -11,6 +11,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_… | |
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, … | |
Float4* dev_force, Float4* dev_torque, Float4* dev_a… | |
Float4* dev_acc, Float4* dev_angacc, | |
+ Float4* dev_vel0, Float4* dev_angvel0, | |
Float2* dev_xysum, | |
unsigned int* dev_gridParticleIndex) // Input: Sorte… | |
{ | |
t@@ -21,15 +22,17 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de… | |
// Copy data to temporary arrays to avoid any potential read-after-write, | |
// write-after-read, or write-after-write hazards. | |
unsigned int orig_idx = dev_gridParticleIndex[idx]; | |
- Float4 force = dev_force[orig_idx]; | |
- Float4 torque = dev_torque[orig_idx]; | |
- Float4 angpos = dev_angpos[orig_idx]; | |
- Float4 acc = dev_acc[orig_idx]; | |
- Float4 angacc = dev_angacc[orig_idx]; | |
- Float4 x = dev_x_sorted[idx]; | |
- Float4 vel = dev_vel_sorted[idx]; | |
- Float4 angvel = dev_angvel_sorted[idx]; | |
- Float radius = x.w; | |
+ Float4 force = dev_force[orig_idx]; | |
+ Float4 torque = dev_torque[orig_idx]; | |
+ Float4 angpos = dev_angpos[orig_idx]; | |
+ Float4 acc = dev_acc[orig_idx]; | |
+ Float4 angacc = dev_angacc[orig_idx]; | |
+ Float4 vel0 = dev_vel0[orig_idx]; | |
+ Float4 angvel0 = dev_angvel0[orig_idx]; | |
+ Float4 x = dev_x_sorted[idx]; | |
+ Float4 vel = dev_vel_sorted[idx]; | |
+ Float4 angvel = dev_angvel_sorted[idx]; | |
+ Float radius = x.w; | |
Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f); | |
t@@ -42,47 +45,6 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev… | |
// Particle mass | |
Float m = 4.0/3.0 * PI * radius*radius*radius * rho; | |
-#if 0 | |
- //// First-order Euler integration scheme /// | |
- // Update angular position | |
- angpos.x += angvel.x * dt; | |
- angpos.y += angvel.y * dt; | |
- angpos.z += angvel.z * dt; | |
- | |
- // Update position | |
- x.x += vel.x * dt; | |
- x.y += vel.y * dt; | |
- x.z += vel.z * dt; | |
-#else | |
- | |
- /// Second-order scheme based on Taylor expansion /// | |
- // Update angular position | |
- angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5; | |
- angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5; | |
- angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5; | |
- | |
- // Update position | |
- x.x += vel.x * dt + acc.x * dt*dt * 0.5; | |
- x.y += vel.y * dt + acc.y * dt*dt * 0.5; | |
- x.z += vel.z * dt + acc.z * dt*dt * 0.5; | |
-#endif | |
- | |
- // Update angular velocity | |
- angvel.x += angacc.x * dt; | |
- angvel.y += angacc.y * dt; | |
- angvel.z += angacc.z * dt; | |
- | |
- // Update linear velocity | |
- vel.x += acc.x * dt; | |
- vel.y += acc.y * dt; | |
- vel.z += acc.z * dt; | |
- | |
- // Add x-displacement for this time step to | |
- // sum of x-displacements | |
- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
- xysum.x += vel.x * dt; | |
- xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f; | |
- | |
// Update linear acceleration of particle | |
acc.x = force.x / m; | |
acc.y = force.y / m; | |
t@@ -132,17 +94,95 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* d… | |
x.x -= L.x; | |
} | |
+ | |
+ //// Half-step leapfrog Verlet integration scheme //// | |
+ // Update half-step linear velocities | |
+ vel0.x += acc.x * dt; | |
+ vel0.y += acc.y * dt; | |
+ vel0.z += acc.z * dt; | |
+ | |
+ // Update half-step angular velocities | |
+ angvel0.x += angacc.x * dt; | |
+ angvel0.y += angacc.y * dt; | |
+ angvel0.z += angacc.z * dt; | |
+ | |
+ // Update positions | |
+ x.x += vel0.x * dt; | |
+ x.y += vel0.y * dt; | |
+ x.z += vel0.z * dt; | |
+ | |
+ // Update angular positions | |
+ angpos.x += angvel0.x * dt; | |
+ angpos.y += angvel0.y * dt; | |
+ angpos.z += angvel0.z * dt; | |
+ | |
+ // Update full-step linear velocity | |
+ vel.x = vel0.x + 0.5 * acc.x * dt; | |
+ vel.y = vel0.y + 0.5 * acc.x * dt; | |
+ vel.z = vel0.z + 0.5 * acc.x * dt; | |
+ | |
+ // Update full-step angular velocity | |
+ angvel.x = angvel0.x + 0.5 * angacc.x * dt; | |
+ angvel.y = angvel0.y + 0.5 * angacc.x * dt; | |
+ angvel.z = angvel0.z + 0.5 * angacc.x * dt; | |
+ | |
+ /* | |
+ //// First-order Euler integration scheme /// | |
+ // Update angular position | |
+ angpos.x += angvel.x * dt; | |
+ angpos.y += angvel.y * dt; | |
+ angpos.z += angvel.z * dt; | |
+ | |
+ // Update position | |
+ x.x += vel.x * dt; | |
+ x.y += vel.y * dt; | |
+ x.z += vel.z * dt; | |
+ */ | |
+ | |
+ /* | |
+ /// Second-order scheme based on Taylor expansion /// | |
+ // Update angular position | |
+ angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5; | |
+ angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5; | |
+ angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5; | |
+ | |
+ // Update position | |
+ x.x += vel.x * dt + acc.x * dt*dt * 0.5; | |
+ x.y += vel.y * dt + acc.y * dt*dt * 0.5; | |
+ x.z += vel.z * dt + acc.z * dt*dt * 0.5; | |
+ */ | |
+ | |
+ /* | |
+ // Update angular velocity | |
+ angvel.x += angacc.x * dt; | |
+ angvel.y += angacc.y * dt; | |
+ angvel.z += angacc.z * dt; | |
+ | |
+ // Update linear velocity | |
+ vel.x += acc.x * dt; | |
+ vel.y += acc.y * dt; | |
+ vel.z += acc.z * dt; | |
+ */ | |
+ | |
+ // Add x-displacement for this time step to | |
+ // sum of x-displacements | |
+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
+ xysum.x += vel.x * dt; | |
+ xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f; | |
+ | |
// Hold threads for coalesced write | |
__syncthreads(); | |
// Store data in global memory at original, pre-sort positions | |
- dev_xysum[orig_idx] += xysum; | |
- dev_acc[orig_idx] = acc; | |
- dev_angacc[orig_idx] = angacc; | |
- dev_angvel[orig_idx] = angvel; | |
- dev_vel[orig_idx] = vel; | |
- dev_angpos[orig_idx] = angpos; | |
- dev_x[orig_idx] = x; | |
+ dev_xysum[orig_idx] += xysum; | |
+ dev_acc[orig_idx] = acc; | |
+ dev_angacc[orig_idx] = angacc; | |
+ dev_angvel[orig_idx] = angvel; | |
+ dev_angvel0[orig_idx] = angvel0; | |
+ dev_vel[orig_idx] = vel; | |
+ dev_vel0[orig_idx] = vel0; | |
+ dev_angpos[orig_idx] = angpos; | |
+ dev_x[orig_idx] = x; | |
} | |
} // End of integrate(...) | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -54,10 +54,12 @@ class DEM { | |
Float4 *dev_x; | |
Float2 *dev_xysum; | |
Float4 *dev_vel; | |
+ Float4 *dev_vel0; | |
Float4 *dev_acc; | |
Float4 *dev_force; | |
Float4 *dev_angpos; | |
Float4 *dev_angvel; | |
+ Float4 *dev_angvel0; | |
Float4 *dev_angacc; | |
Float4 *dev_torque; | |
unsigned int *dev_contacts; |