Introduction
Introduction Statistics Contact Development Disclaimer Help
tHalf-step leapfrog Verlet integration scheme added - sphere - GPU-based 3D dis…
git clone git://src.adamsgaard.dk/sphere
Log
Files
Refs
LICENSE
---
commit a8f9bb4ed81e686acb8e1a51b4ba60fedcf03dae
parent 68987a0bcf02ed1b43919109acc000099694f787
Author: Anders Damsgaard Christensen <[email protected]>
Date: Tue, 13 Nov 2012 20:36:03 +0100
Half-step leapfrog Verlet integration scheme added
Diffstat:
M src/device.cu | 10 ++++++++++
M src/integration.cuh | 154 +++++++++++++++++++----------…
M src/sphere.h | 2 ++
3 files changed, 109 insertions(+), 57 deletions(-)
---
diff --git a/src/device.cu b/src/device.cu
t@@ -237,10 +237,12 @@ __host__ void DEM::allocateGlobalDeviceMemory(void)
cudaMalloc((void**)&dev_x, memSizeF4);
cudaMalloc((void**)&dev_xysum, memSizeF4);
cudaMalloc((void**)&dev_vel, memSizeF4);
+ cudaMalloc((void**)&dev_vel0, memSizeF4);
cudaMalloc((void**)&dev_acc, memSizeF4);
cudaMalloc((void**)&dev_force, memSizeF4);
cudaMalloc((void**)&dev_angpos, memSizeF4);
cudaMalloc((void**)&dev_angvel, memSizeF4);
+ cudaMalloc((void**)&dev_angvel0, memSizeF4);
cudaMalloc((void**)&dev_angacc, memSizeF4);
cudaMalloc((void**)&dev_torque, memSizeF4);
t@@ -296,10 +298,12 @@ __host__ void DEM::freeGlobalDeviceMemory()
cudaFree(dev_x);
cudaFree(dev_xysum);
cudaFree(dev_vel);
+ cudaFree(dev_vel0);
cudaFree(dev_acc);
cudaFree(dev_force);
cudaFree(dev_angpos);
cudaFree(dev_angvel);
+ cudaFree(dev_angvel0);
cudaFree(dev_angacc);
cudaFree(dev_torque);
t@@ -352,6 +356,8 @@ __host__ void DEM::transferToGlobalDeviceMemory()
sizeof(Float2)*np, cudaMemcpyHostToDevice);
cudaMemcpy( dev_vel, k.vel,
memSizeF4, cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_vel0, k.vel,
+ memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_acc, k.acc,
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_force, k.force,
t@@ -360,6 +366,8 @@ __host__ void DEM::transferToGlobalDeviceMemory()
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_angvel, k.angvel,
memSizeF4, cudaMemcpyHostToDevice);
+ cudaMemcpy( dev_angvel0, k.angvel,
+ memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_angacc, k.angacc,
memSizeF4, cudaMemcpyHostToDevice);
cudaMemcpy( dev_torque, k.torque,
t@@ -695,6 +703,8 @@ __host__ void DEM::startTime()
dev_angpos,
dev_acc,
dev_angacc,
+ dev_vel0,
+ dev_angvel0,
dev_xysum,
dev_gridParticleIndex);
cudaThreadSynchronize();
diff --git a/src/integration.cuh b/src/integration.cuh
t@@ -11,6 +11,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_…
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, …
Float4* dev_force, Float4* dev_torque, Float4* dev_a…
Float4* dev_acc, Float4* dev_angacc,
+ Float4* dev_vel0, Float4* dev_angvel0,
Float2* dev_xysum,
unsigned int* dev_gridParticleIndex) // Input: Sorte…
{
t@@ -21,15 +22,17 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de…
// Copy data to temporary arrays to avoid any potential read-after-write,
// write-after-read, or write-after-write hazards.
unsigned int orig_idx = dev_gridParticleIndex[idx];
- Float4 force = dev_force[orig_idx];
- Float4 torque = dev_torque[orig_idx];
- Float4 angpos = dev_angpos[orig_idx];
- Float4 acc = dev_acc[orig_idx];
- Float4 angacc = dev_angacc[orig_idx];
- Float4 x = dev_x_sorted[idx];
- Float4 vel = dev_vel_sorted[idx];
- Float4 angvel = dev_angvel_sorted[idx];
- Float radius = x.w;
+ Float4 force = dev_force[orig_idx];
+ Float4 torque = dev_torque[orig_idx];
+ Float4 angpos = dev_angpos[orig_idx];
+ Float4 acc = dev_acc[orig_idx];
+ Float4 angacc = dev_angacc[orig_idx];
+ Float4 vel0 = dev_vel0[orig_idx];
+ Float4 angvel0 = dev_angvel0[orig_idx];
+ Float4 x = dev_x_sorted[idx];
+ Float4 vel = dev_vel_sorted[idx];
+ Float4 angvel = dev_angvel_sorted[idx];
+ Float radius = x.w;
Float2 xysum = MAKE_FLOAT2(0.0f, 0.0f);
t@@ -42,47 +45,6 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev…
// Particle mass
Float m = 4.0/3.0 * PI * radius*radius*radius * rho;
-#if 0
- //// First-order Euler integration scheme ///
- // Update angular position
- angpos.x += angvel.x * dt;
- angpos.y += angvel.y * dt;
- angpos.z += angvel.z * dt;
-
- // Update position
- x.x += vel.x * dt;
- x.y += vel.y * dt;
- x.z += vel.z * dt;
-#else
-
- /// Second-order scheme based on Taylor expansion ///
- // Update angular position
- angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
- angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
- angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
-
- // Update position
- x.x += vel.x * dt + acc.x * dt*dt * 0.5;
- x.y += vel.y * dt + acc.y * dt*dt * 0.5;
- x.z += vel.z * dt + acc.z * dt*dt * 0.5;
-#endif
-
- // Update angular velocity
- angvel.x += angacc.x * dt;
- angvel.y += angacc.y * dt;
- angvel.z += angacc.z * dt;
-
- // Update linear velocity
- vel.x += acc.x * dt;
- vel.y += acc.y * dt;
- vel.z += acc.z * dt;
-
- // Add x-displacement for this time step to
- // sum of x-displacements
- //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
- xysum.x += vel.x * dt;
- xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
-
// Update linear acceleration of particle
acc.x = force.x / m;
acc.y = force.y / m;
t@@ -132,17 +94,95 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* d…
x.x -= L.x;
}
+
+ //// Half-step leapfrog Verlet integration scheme ////
+ // Update half-step linear velocities
+ vel0.x += acc.x * dt;
+ vel0.y += acc.y * dt;
+ vel0.z += acc.z * dt;
+
+ // Update half-step angular velocities
+ angvel0.x += angacc.x * dt;
+ angvel0.y += angacc.y * dt;
+ angvel0.z += angacc.z * dt;
+
+ // Update positions
+ x.x += vel0.x * dt;
+ x.y += vel0.y * dt;
+ x.z += vel0.z * dt;
+
+ // Update angular positions
+ angpos.x += angvel0.x * dt;
+ angpos.y += angvel0.y * dt;
+ angpos.z += angvel0.z * dt;
+
+ // Update full-step linear velocity
+ vel.x = vel0.x + 0.5 * acc.x * dt;
+ vel.y = vel0.y + 0.5 * acc.x * dt;
+ vel.z = vel0.z + 0.5 * acc.x * dt;
+
+ // Update full-step angular velocity
+ angvel.x = angvel0.x + 0.5 * angacc.x * dt;
+ angvel.y = angvel0.y + 0.5 * angacc.x * dt;
+ angvel.z = angvel0.z + 0.5 * angacc.x * dt;
+
+ /*
+ //// First-order Euler integration scheme ///
+ // Update angular position
+ angpos.x += angvel.x * dt;
+ angpos.y += angvel.y * dt;
+ angpos.z += angvel.z * dt;
+
+ // Update position
+ x.x += vel.x * dt;
+ x.y += vel.y * dt;
+ x.z += vel.z * dt;
+ */
+
+ /*
+ /// Second-order scheme based on Taylor expansion ///
+ // Update angular position
+ angpos.x += angvel.x * dt + angacc.x * dt*dt * 0.5;
+ angpos.y += angvel.y * dt + angacc.y * dt*dt * 0.5;
+ angpos.z += angvel.z * dt + angacc.z * dt*dt * 0.5;
+
+ // Update position
+ x.x += vel.x * dt + acc.x * dt*dt * 0.5;
+ x.y += vel.y * dt + acc.y * dt*dt * 0.5;
+ x.z += vel.z * dt + acc.z * dt*dt * 0.5;
+ */
+
+ /*
+ // Update angular velocity
+ angvel.x += angacc.x * dt;
+ angvel.y += angacc.y * dt;
+ angvel.z += angacc.z * dt;
+
+ // Update linear velocity
+ vel.x += acc.x * dt;
+ vel.y += acc.y * dt;
+ vel.z += acc.z * dt;
+ */
+
+ // Add x-displacement for this time step to
+ // sum of x-displacements
+ //x.w += vel.x * dt + (acc.x * dt*dt)/2.0f;
+ xysum.x += vel.x * dt;
+ xysum.y += vel.y * dt;// + (acc.y * dt*dt * 0.5f;
+
// Hold threads for coalesced write
__syncthreads();
// Store data in global memory at original, pre-sort positions
- dev_xysum[orig_idx] += xysum;
- dev_acc[orig_idx] = acc;
- dev_angacc[orig_idx] = angacc;
- dev_angvel[orig_idx] = angvel;
- dev_vel[orig_idx] = vel;
- dev_angpos[orig_idx] = angpos;
- dev_x[orig_idx] = x;
+ dev_xysum[orig_idx] += xysum;
+ dev_acc[orig_idx] = acc;
+ dev_angacc[orig_idx] = angacc;
+ dev_angvel[orig_idx] = angvel;
+ dev_angvel0[orig_idx] = angvel0;
+ dev_vel[orig_idx] = vel;
+ dev_vel0[orig_idx] = vel0;
+ dev_angpos[orig_idx] = angpos;
+ dev_x[orig_idx] = x;
}
} // End of integrate(...)
diff --git a/src/sphere.h b/src/sphere.h
t@@ -54,10 +54,12 @@ class DEM {
Float4 *dev_x;
Float2 *dev_xysum;
Float4 *dev_vel;
+ Float4 *dev_vel0;
Float4 *dev_acc;
Float4 *dev_force;
Float4 *dev_angpos;
Float4 *dev_angvel;
+ Float4 *dev_angvel0;
Float4 *dev_angacc;
Float4 *dev_torque;
unsigned int *dev_contacts;
You are viewing proxied material from mx1.adamsgaard.dk. The copyright of proxied material belongs to its original authors. Any comments or complaints in relation to proxied material should be directed to the original authors of the content concerned. Please see the disclaimer for more details.