tAdded angular position information in datafiles and on device - sphere - GPU-b… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 223197848f6e1f56d5003dc84ac7d89623de97d0 | |
parent b742ff213fba24e811e63093723c2b8858c86fae | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 11 Oct 2012 09:28:24 +0200 | |
Added angular position information in datafiles and on device | |
Diffstat: | |
M python/sphere.py | 21 ++++++++++++++++++--- | |
M src/datatypes.h | 4 +++- | |
M src/device.cu | 13 ++++++++++--- | |
M src/file_io.cpp | 10 ++++++++++ | |
M src/integration.cuh | 10 +++++++++- | |
M src/main.cpp | 10 ++++++++++ | |
6 files changed, 60 insertions(+), 8 deletions(-) | |
--- | |
diff --git a/python/sphere.py b/python/sphere.py | |
t@@ -39,6 +39,7 @@ class Spherebin: | |
self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
self.force = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
self.torque = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
self.fixvel = numpy.zeros(self.np, dtype=numpy.float64) | |
self.xsum = numpy.zeros(self.np, dtype=numpy.float64) | |
self.radius = numpy.zeros(self.np, dtype=numpy.float64) | |
t@@ -112,6 +113,7 @@ class Spherebin: | |
self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape… | |
self.force = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape… | |
self.torque = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape… | |
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape… | |
self.fixvel = numpy.zeros(self.np, dtype=numpy.float64) | |
self.xsum = numpy.zeros(self.np, dtype=numpy.float64) | |
self.radius = numpy.zeros(self.np, dtype=numpy.float64) | |
t@@ -145,6 +147,7 @@ class Spherebin: | |
self.angvel[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.force[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
self.torque[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
+ self.angpos[j,i] = numpy.fromfile(fh, dtype=numpy.float64, count=1) | |
# Per-particle single-value parameters | |
for j in range(self.np): | |
t@@ -247,6 +250,7 @@ class Spherebin: | |
fh.write(self.angvel[j,i].astype(numpy.float64)) | |
fh.write(self.force[j,i].astype(numpy.float64)) | |
fh.write(self.torque[j,i].astype(numpy.float64)) | |
+ fh.write(self.angpos[j,i].astype(numpy.float64)) | |
# Per-particle single-value parameters | |
for j in range(self.np): | |
t@@ -549,6 +553,11 @@ class Spherebin: | |
z_adjust = 1.1 # Overheightening of grid. 1.0 = no overheightening | |
self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize) | |
self.L[2] = (z_max-z_min)*z_adjust | |
+ | |
+ # zero kinematics | |
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
# Initialize upper wall | |
self.wmode[0] = 0 | |
t@@ -575,6 +584,11 @@ class Spherebin: | |
self.num[2] = numpy.ceil((z_max-z_min)*z_adjust/cellsize) | |
self.L[2] = (z_max-z_min)*z_adjust | |
+ # zero kinematics | |
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ | |
# Initialize upper wall | |
self.wmode[0] = 1 | |
self.w_n[0,2] = -1.0 | |
t@@ -606,9 +620,10 @@ class Spherebin: | |
# Initialize upper wall | |
self.w_devs[0] = deviatoric_stress | |
- # Zero kinematics | |
- self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
- self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ # zero kinematics | |
+ self.vel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angvel = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
+ self.angpos = numpy.zeros(self.np*self.nd, dtype=numpy.float64).reshape(s… | |
fixheight = 2*cellsize | |
#fixheight = cellsize | |
diff --git a/src/datatypes.h b/src/datatypes.h | |
t@@ -144,7 +144,8 @@ struct Params { | |
int fwritebin(char *target, Particles *p, | |
Float4 *host_x, Float4 *host_vel, | |
Float4 *host_angvel, Float4 *host_force, | |
- Float4 *host_torque, uint4 *host_bonds, | |
+ Float4 *host_torque, Float4 *host_angpos, | |
+ uint4 *host_bonds, | |
Grid *grid, Time *time, Params *params, | |
Float4 *host_w_nx, Float4 *host_w_mvfd); | |
t@@ -160,6 +161,7 @@ void gpuMain(Float4* host_x, | |
Float4* host_angacc, | |
Float4* host_force, | |
Float4* host_torque, | |
+ Float4* host_angpos, | |
uint4* host_bonds, | |
Particles* p, Grid* grid, | |
Time* time, Params* params, | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -161,6 +161,7 @@ __host__ void gpuMain(Float4* host_x, | |
Float4* host_angacc, | |
Float4* host_force, | |
Float4* host_torque, | |
+ Float4* host_angpos, | |
uint4* host_bonds, | |
Particles* p, | |
Grid* grid, | |
t@@ -185,6 +186,7 @@ __host__ void gpuMain(Float4* host_x, | |
Float4* dev_angacc; // Particle angular acceleration | |
Float4* dev_force; // Sum of forces | |
Float4* dev_torque; // Sum of torques | |
+ Float4* dev_angpos; // Particle angular position | |
Float* dev_radius; // Particle radius | |
Float* dev_es_dot; // Current shear energy producion rate | |
Float* dev_ev_dot; // Current viscous energy producion rate | |
t@@ -240,6 +242,7 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMalloc((void**)&dev_angacc, memSizeF4); | |
cudaMalloc((void**)&dev_force, memSizeF4); | |
cudaMalloc((void**)&dev_torque, memSizeF4); | |
+ cudaMalloc((void**)&dev_angpos, memSizeF4); | |
cudaMalloc((void**)&dev_radius, memSizeF); | |
cudaMalloc((void**)&dev_radius_sorted, memSizeF); | |
cudaMalloc((void**)&dev_es_dot, memSizeF); | |
t@@ -280,6 +283,7 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMemcpy(dev_angacc, host_angacc, memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_force, host_force, memSizeF4, cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_torque, host_torque, memSizeF4, cudaMemcpyHostToDevice); | |
+ cudaMemcpy(dev_angpos, host_angpos, memSizeF4, cudaMemcpyHostToDevice); | |
//cudaMemcpy(dev_bonds, host_bonds, sizeof(uint4) * p->np, cudaMemcpyHostToD… | |
cudaMemcpy(dev_radius, p->radius, memSizeF, cudaMemcpyHostToDevice); | |
cudaMemcpy(dev_es_dot, p->es_dot, memSizeF, cudaMemcpyHostToDevice); | |
t@@ -361,7 +365,7 @@ __host__ void gpuMain(Float4* host_x, | |
sprintf(file,"%s/output/%s.output0.bin", cwd, inputbin); | |
if (fwritebin(file, p, host_x, host_vel, | |
host_angvel, host_force, | |
- host_torque, | |
+ host_torque, host_angpos, | |
host_bonds, | |
grid, time, params, | |
host_w_nx, host_w_mvfd) != 0) { | |
t@@ -539,7 +543,7 @@ __host__ void gpuMain(Float4* host_x, | |
integrate<<<dimGrid, dimBlock>>>(dev_x_sorted, dev_vel_sorted, | |
dev_angvel_sorted, dev_radius_sorted, | |
dev_x, dev_vel, dev_angvel, | |
- dev_force, dev_torque, | |
+ dev_force, dev_torque, dev_angpos, | |
dev_gridParticleIndex); | |
cudaThreadSynchronize(); | |
t@@ -598,6 +602,7 @@ __host__ void gpuMain(Float4* host_x, | |
cudaMemcpy(host_angvel, dev_angvel, memSizeF4, cudaMemcpyDeviceToHost); | |
cudaMemcpy(host_force, dev_force, memSizeF4, cudaMemcpyDeviceToHost); | |
cudaMemcpy(host_torque, dev_torque, memSizeF4, cudaMemcpyDeviceToHost); | |
+ cudaMemcpy(host_angpos, dev_angpos, memSizeF4, cudaMemcpyDeviceToHost); | |
//cudaMemcpy(host_bonds, dev_bonds, sizeof(uint4) * p->np, cudaMemcpyDev… | |
cudaMemcpy(p->es_dot, dev_es_dot, memSizeF, cudaMemcpyDeviceToHost); | |
cudaMemcpy(p->ev_dot, dev_ev_dot, memSizeF, cudaMemcpyDeviceToHost); | |
t@@ -620,7 +625,8 @@ __host__ void gpuMain(Float4* host_x, | |
if (fwritebin(file, p, host_x, host_vel, | |
host_angvel, host_force, | |
- host_torque, host_bonds, | |
+ host_torque, host_angpos, | |
+ host_bonds, | |
grid, time, params, | |
host_w_nx, host_w_mvfd) != 0) { | |
cout << "\n Error during fwritebin() in main loop\n"; | |
t@@ -695,6 +701,7 @@ __host__ void gpuMain(Float4* host_x, | |
cudaFree(dev_angacc); | |
cudaFree(dev_force); | |
cudaFree(dev_torque); | |
+ cudaFree(dev_angpos); | |
cudaFree(dev_radius); | |
cudaFree(dev_radius_sorted); | |
cudaFree(dev_es_dot); | |
diff --git a/src/file_io.cpp b/src/file_io.cpp | |
t@@ -17,6 +17,7 @@ int fwritebin(char *target, | |
Float4 *host_angvel, | |
Float4 *host_force, | |
Float4 *host_torque, | |
+ Float4 *host_angpos, | |
uint4 *host_bonds, | |
Grid *grid, | |
Time *time, | |
t@@ -73,6 +74,7 @@ int fwritebin(char *target, | |
fwrite(&host_angvel[j].x, sizeof(Float), 1, fp); | |
fwrite(&host_force[j].x, sizeof(Float), 1, fp); | |
fwrite(&host_torque[j].x, sizeof(Float), 1, fp); | |
+ fwrite(&host_angpos[j].x, sizeof(Float), 1, fp); | |
// y-axis | |
fwrite(&host_x[j].y, sizeof(Float), 1, fp); | |
t@@ -80,6 +82,7 @@ int fwritebin(char *target, | |
fwrite(&host_angvel[j].y, sizeof(Float), 1, fp); | |
fwrite(&host_force[j].y, sizeof(Float), 1, fp); | |
fwrite(&host_torque[j].y, sizeof(Float), 1, fp); | |
+ fwrite(&host_angpos[j].y, sizeof(Float), 1, fp); | |
// z-axis | |
fwrite(&host_x[j].z, sizeof(Float), 1, fp); | |
t@@ -87,6 +90,7 @@ int fwritebin(char *target, | |
fwrite(&host_angvel[j].z, sizeof(Float), 1, fp); | |
fwrite(&host_force[j].z, sizeof(Float), 1, fp); | |
fwrite(&host_torque[j].z, sizeof(Float), 1, fp); | |
+ fwrite(&host_angpos[j].z, sizeof(Float), 1, fp); | |
} | |
// Individual particle values | |
t@@ -200,6 +204,8 @@ int fwritebin(char *target, | |
fwrite(&d, sizeof(d), 1, fp); | |
d = (double)host_torque[j].x; | |
fwrite(&d, sizeof(d), 1, fp); | |
+ d = (double)host_angpos[j].x; | |
+ fwrite(&d, sizeof(d), 1, fp); | |
// y-axis | |
d = (double)host_x[j].y; | |
t@@ -212,6 +218,8 @@ int fwritebin(char *target, | |
fwrite(&d, sizeof(d), 1, fp); | |
d = (double)host_torque[j].y; | |
fwrite(&d, sizeof(d), 1, fp); | |
+ d = (double)host_angpos[j].y; | |
+ fwrite(&d, sizeof(d), 1, fp); | |
// z-axis | |
d = (double)host_x[j].z; | |
t@@ -224,6 +232,8 @@ int fwritebin(char *target, | |
fwrite(&d, sizeof(d), 1, fp); | |
d = (double)host_torque[j].z; | |
fwrite(&d, sizeof(d), 1, fp); | |
+ d = (double)host_angpos[j].z; | |
+ fwrite(&d, sizeof(d), 1, fp); | |
} | |
// Individual particle values | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -10,7 +10,7 @@ | |
__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Inp… | |
Float4* dev_angvel_sorted, Float* dev_radius_sorted,… | |
Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, … | |
- Float4* dev_force, Float4* dev_torque, // Input | |
+ Float4* dev_force, Float4* dev_torque, Float4* dev_a… | |
unsigned int* dev_gridParticleIndex) // Input: Sorte… | |
{ | |
unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
t@@ -22,6 +22,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev_… | |
unsigned int orig_idx = dev_gridParticleIndex[idx]; | |
Float4 force = dev_force[orig_idx]; | |
Float4 torque = dev_torque[orig_idx]; | |
+ Float4 angpos = dev_angpos[orig_idx]; | |
// Initialize acceleration vectors to zero | |
Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
t@@ -88,6 +89,12 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* dev… | |
//x.y += vel.y * dt; | |
//x.z += vel.z * dt; | |
+ // Update angular position. Second-order scheme based on Taylor expansion | |
+ // (greater accuracy than the first-order Euler's scheme) | |
+ angpos.x += angvel.x * dt + (angacc.x * dt*dt)/2.0f; | |
+ angpos.y += angvel.y * dt + (angacc.y * dt*dt)/2.0f; | |
+ angpos.z += angvel.z * dt + (angacc.z * dt*dt)/2.0f; | |
+ | |
// Update position. Second-order scheme based on Taylor expansion | |
// (greater accuracy than the first-order Euler's scheme) | |
x.x += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
t@@ -121,6 +128,7 @@ __global__ void integrate(Float4* dev_x_sorted, Float4* de… | |
// Store data in global memory at original, pre-sort positions | |
dev_angvel[orig_idx] = angvel; | |
dev_vel[orig_idx] = vel; | |
+ dev_angpos[orig_idx] = angpos; | |
dev_x[orig_idx] = x; | |
} | |
} // End of integrate(...) | |
diff --git a/src/main.cpp b/src/main.cpp | |
t@@ -190,6 +190,7 @@ int main(int argc, char *argv[]) | |
Float4 *host_angacc = new Float4[p.np]; // Particle angular acceleration ve… | |
Float4 *host_force = new Float4[p.np]; // Particle summed force | |
Float4 *host_torque = new Float4[p.np]; // Particle summed torque | |
+ Float4 *host_angpos = new Float4[p.np]; // Particle angular position | |
uint4 *host_bonds = new uint4[p.np]; // Bonds from particle [i] to two p… | |
t@@ -223,6 +224,9 @@ int main(int argc, char *argv[]) | |
exit(1); | |
if (fread(&host_torque[j].x, sizeof(Float), 1, fp) != 1) | |
exit(1); | |
+ if (fread(&host_angpos[j].x, sizeof(Float), 1, fp) != 1) | |
+ exit(1); | |
+ | |
if (fread(&host_x[j].y, sizeof(Float), 1, fp) != 1) | |
exit(1); // Return unsuccessful exit status | |
t@@ -234,6 +238,8 @@ int main(int argc, char *argv[]) | |
exit(1); | |
if (fread(&host_torque[j].y, sizeof(Float), 1, fp) != 1) | |
exit(1); | |
+ if (fread(&host_angpos[j].y, sizeof(Float), 1, fp) != 1) | |
+ exit(1); | |
if (fread(&host_x[j].z, sizeof(Float), 1, fp) != 1) | |
exit(1); // Return unsuccessful exit status | |
t@@ -245,6 +251,8 @@ int main(int argc, char *argv[]) | |
exit(1); | |
if (fread(&host_torque[j].z, sizeof(Float), 1, fp) != 1) | |
exit(1); | |
+ if (fread(&host_angpos[j].z, sizeof(Float), 1, fp) != 1) | |
+ exit(1); | |
} | |
for (j=0; j<p.np; ++j) { | |
t@@ -438,6 +446,7 @@ int main(int argc, char *argv[]) | |
host_angacc, | |
host_force, | |
host_torque, | |
+ host_angpos, | |
host_bonds, | |
&p, &grid, | |
&time, ¶ms, | |
t@@ -457,6 +466,7 @@ int main(int argc, char *argv[]) | |
delete[] host_angacc; | |
delete[] host_force; | |
delete[] host_torque; | |
+ delete[] host_angpos; | |
// Particle bonds | |
delete[] host_bonds; |