tAdded half-step leapfrog Verlet integration scheme to walls - sphere - GPU-bas… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 45640191a4d4d51119d37baa4063ee631b342f89 | |
parent 5379ed77ad3917674c12f19a303e21aed69d8cd1 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 29 Nov 2012 10:00:57 +0100 | |
Added half-step leapfrog Verlet integration scheme to walls | |
Diffstat: | |
M src/device.cu | 10 +++++++++- | |
M src/integration.cuh | 16 ++++++++++++++-- | |
M src/sphere.h | 1 + | |
3 files changed, 24 insertions(+), 3 deletions(-) | |
--- | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -283,6 +283,7 @@ __host__ void DEM::allocateGlobalDeviceMemory(void) | |
cudaMalloc((void**)&dev_walls_nx, sizeof(Float4)*walls.nw); | |
cudaMalloc((void**)&dev_walls_mvfd, sizeof(Float4)*walls.nw); | |
cudaMalloc((void**)&dev_walls_force_pp, sizeof(Float)*walls.nw*np); | |
+ cudaMalloc((void**)&dev_walls_vel0, sizeof(Float)*walls.nw); | |
// dev_walls_force_partial allocated later | |
checkForCudaErrors("End of allocateGlobalDeviceMemory"); | |
t@@ -331,6 +332,7 @@ __host__ void DEM::freeGlobalDeviceMemory() | |
cudaFree(dev_walls_mvfd); | |
cudaFree(dev_walls_force_partial); | |
cudaFree(dev_walls_force_pp); | |
+ cudaFree(dev_walls_vel0); | |
checkForCudaErrors("During cudaFree calls"); | |
if (verbose == 1) | |
t@@ -399,6 +401,10 @@ __host__ void DEM::transferToGlobalDeviceMemory() | |
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
cudaMemcpy( dev_walls_mvfd, walls.mvfd, | |
sizeof(Float4)*walls.nw, cudaMemcpyHostToDevice); | |
+ for (int i = 0; i<walls.nw; ++i) { | |
+ cudaMemcpy( &dev_walls_vel0[i], &walls.mvfd[i].y, | |
+ sizeof(Float), cudaMemcpyHostToDevice); | |
+ } | |
checkForCudaErrors("End of transferToGlobalDeviceMemory"); | |
if (verbose == 1) | |
t@@ -736,10 +742,12 @@ __host__ void DEM::startTime() | |
if (PROFILING == 1) | |
startTimer(&kernel_tic); | |
if (walls.nw > 0) { | |
- integrateWalls<<< 1, walls.nw>>>(dev_walls_nx, | |
+ integrateWalls<<< 1, walls.nw>>>( | |
+ dev_walls_nx, | |
dev_walls_mvfd, | |
dev_walls_wmode, | |
dev_walls_force_partial, | |
+ dev_walls_vel0, | |
blocksPerGrid); | |
} | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -225,6 +225,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx, | |
Float4* dev_walls_mvfd, | |
int* dev_walls_wmode, | |
Float* dev_walls_force_partial, | |
+ Float* dev_walls_vel0, | |
unsigned int blocksPerGrid) | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
t@@ -236,6 +237,7 @@ __global__ void integrateWalls(Float4* dev_walls_nx, | |
Float4 w_nx = dev_walls_nx[idx]; | |
Float4 w_mvfd = dev_walls_mvfd[idx]; | |
int wmode = dev_walls_wmode[idx]; // Wall BC, 0: fixed, 1: devs, 2: v… | |
+ Float vel0 = dev_walls_vel0[idx]; | |
Float acc; | |
t@@ -263,20 +265,30 @@ __global__ void integrateWalls(Float4* dev_walls_nx, | |
acc = 0.0; | |
} | |
+ //// Half-step leapfrog Verlet integration scheme //// | |
+ | |
+ // Update half-step velocity | |
+ vel0 += acc * dt; | |
+ | |
// Update position. Second-order scheme based on Taylor expansion | |
- w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0; | |
+ //w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0; | |
+ | |
+ // Update position | |
+ w_nx.w += vel0 * dt; | |
// Update position. First-order Euler integration scheme | |
//w_nx.w += w_mvfd.y * dt; | |
// Update linear velocity | |
- w_mvfd.y += acc * dt; | |
+ //w_mvfd.y += acc * dt; | |
+ w_mvfd.y += vel0 + 0.5 * acc * dt; | |
//cuPrintf("\nwall %d, wmode = %d, force = %f, acc = %f\n", idx, wmode… | |
// Store data in global memory | |
dev_walls_nx[idx] = w_nx; | |
dev_walls_mvfd[idx] = w_mvfd; | |
+ dev_walls_vel0[idx] = vel0; | |
} | |
} // End of integrateWalls(...) | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -82,6 +82,7 @@ class DEM { | |
Float4 *dev_walls_mvfd; // Mass, velocity, force, dev. stress | |
Float *dev_walls_force_partial; // Pre-sum per wall | |
Float *dev_walls_force_pp; // Force per particle per wall | |
+ Float *dev_walls_vel0; // Half-step velocity | |
unsigned char *dev_img; | |
float4 *dev_ray_origo; // Ray data always single precision | |
float4 *dev_ray_direction; |