tRemoved redundant zeroing of delta_t - sphere - GPU-based 3D discrete element … | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit 005e46d1edbfa0e3313f7c5f538a2bf265200f51 | |
parent 223197848f6e1f56d5003dc84ac7d89623de97d0 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Thu, 11 Oct 2012 11:05:35 +0200 | |
Removed redundant zeroing of delta_t | |
Diffstat: | |
M raytracer/README | 29 ++++++++++++++--------------- | |
M raytracer/doc/raytracer.pdf | 0 | |
M raytracer/doc/raytracer.tex | 49 +++--------------------------… | |
D raytracer/io.cpp | 31 -----------------------------… | |
D raytracer/render_all_outputs_GPU_c… | 34 ---------------------------… | |
D raytracer/rt_kernel.h | 31 -----------------------------… | |
D raytracer/rt_kernel_cpu.cpp | 259 -----------------------------… | |
D raytracer/rt_kernel_cpu.h | 14 -------------- | |
M src/contactsearch.cuh | 8 +++++--- | |
9 files changed, 23 insertions(+), 432 deletions(-) | |
--- | |
diff --git a/raytracer/README b/raytracer/README | |
t@@ -1,37 +1,36 @@ | |
------------------------------------------------------------------ | |
-This is the README file for the SPHERE ray tracing module. | |
- Last revision: Oct. 17., 2011 | |
+ This is the README file for the ESys-particle ray tracing module | |
+ Last revision: May. 18., 2012 | |
------------------------------------------------------------------ | |
- BACKGROUND INFORMATION | |
+BACKGROUND INFORMATION | |
+====================== | |
-The ray tracing module was developed as an exam project in the | |
-course "Data Parallel Computing", held Q1 at the Department of | |
-Computer Science, Aarhus University, Denmark by Thomas Sangild | |
-Sørensen. | |
+The ray tracing module was initially developed for the Sphere DEM | |
+software: http://github.com/anders-dc/sphere | |
-It is used for visualizing simulation output from the SPHERE | |
-discrete element method software. | |
+This is a port for visualizing simulation output from the | |
+ESys-Particle discrete element method software. | |
-It is developed by Anders Damsgaard Christensen, [email protected], and | |
+It is developed by Anders Damsgaard Christensen, [email protected], and | |
is licenced under the GNU General Public Licence (GPL). | |
Documentation can be found in the file doc/raytracer.pdf | |
------------------------------------------------------------------- | |
- REQUIREMENTS | |
+REQUIREMENTS | |
+============ | |
For compiling the raytracer, the following software is required: | |
- GNU compiler collection | |
- CUDA developer drivers | |
- CUDA toolkit | |
When executing the compiled code, a CUDA compatible GPU is | |
-necessary. | |
+necessary, along with appropriate developer device drivers. | |
------------------------------------------------------------------- | |
- USAGE INSTRUCTIONS | |
+USAGE INSTRUCTIONS | |
+================== | |
The source code is built using GNU make with the command: | |
./make | |
diff --git a/raytracer/doc/raytracer.pdf b/raytracer/doc/raytracer.pdf | |
Binary files differ. | |
diff --git a/raytracer/doc/raytracer.tex b/raytracer/doc/raytracer.tex | |
t@@ -273,17 +273,17 @@ This appendix contains the following source code files: | |
\subsection{CUDA raytracing source code} | |
%\subsubsection{rt\_kernel.h} | |
-\lstinputlisting[caption={rt\_kernel.h},label={rt-kernel.h},numbers=left,break… | |
+\lstinputlisting[caption={rt-kernel.h},label={rt-kernel.h},numbers=left,breakl… | |
%\subsubsection{rt\_kernel.cu} | |
-\lstinputlisting[caption={rt\_kernel.cu},label={rt-kernel.cu},numbers=left,bre… | |
+\lstinputlisting[caption={rt-kernel.cu},label={rt-kernel.cu},numbers=left,brea… | |
\subsection{CPU raytracing source code} | |
%\subsubsection{rt\_kernel\_cpu.h} | |
-\lstinputlisting[caption={rt\_kernel\_cpu.h},label={rt-kernel-cpu.h},numbers=l… | |
+\lstinputlisting[caption={rt-kernel-cpu.h},label={rt-kernel-cpu.h},numbers=lef… | |
%\subsubsection{rt\_kernel\_cpu.cpp} | |
-\lstinputlisting[caption={rt\_kernel\_cpu.cpp},label={rt-kernel-cpu.cpp},numbe… | |
+\lstinputlisting[caption={rt-kernel-cpu.cpp},label={rt-kernel-cpu.cpp},numbers… | |
t@@ -295,47 +295,6 @@ This appendix contains the following source code files: | |
-\section{Bloopers} | |
-This section contains pictures of the unfinished raytracer in a malfunctioning… | |
- | |
-\begin{figure}[!h] | |
-\centering | |
-\includegraphics[width=0.30\textwidth]{bloopers/black.png} | |
-\caption{An end result encountered way too many times, e.g. after inadvertentl… | |
-\label{fig:black} | |
-\end{figure} | |
- | |
-\begin{figure}[!h] | |
-\centering | |
-\includegraphics[width=0.30\textwidth]{bloopers/twisted.png} | |
-\caption{Twisted appearance caused by wrong thread/block layout.} | |
-\label{fig:twisted} | |
-\end{figure} | |
- | |
-\begin{figure}[!h] | |
-\centering | |
-\includegraphics[width=0.30\textwidth]{bloopers/wrong_order.png} | |
-\caption{A problem with the distance determination caused the particles to be … | |
-\label{fig:wrongorder} | |
-\end{figure} | |
- | |
-\begin{figure}[!h] | |
-\centering | |
-\includegraphics[width=0.30\textwidth]{bloopers/colorful.png} | |
-\caption{Colorful result caused by forgetting to normalize the normal vector c… | |
-\label{fig:colorful} | |
-\end{figure} | |
- | |
-\begin{figure}[!h] | |
-\centering | |
-\includegraphics[width=0.30\textwidth]{bloopers/cpu_wrong_colorful.png} | |
-\caption{Another colorful result with wrong order of particles, cause by using… | |
-\label{fig:colorful_cpu} | |
-\end{figure} | |
- | |
- | |
- | |
- | |
% You can push biographies down or up by placing | |
diff --git a/raytracer/io.cpp b/raytracer/io.cpp | |
t@@ -1,31 +0,0 @@ | |
-// io.cpp | |
-// Contains functions to read SPHERE binary files, | |
-// and write images in the PPM format | |
- | |
-#include <iostream> | |
-#include <cstdio> | |
-#include "header.h" | |
- | |
- | |
-// Write image structure to PPM file | |
-int image_to_ppm(rgb* rgb_img, const char* filename, const unsigned int width,… | |
-{ | |
- FILE *fout = fopen(filename, "wb"); | |
- if (fout == NULL) { | |
- std::cout << "File error in img_write() while trying to writing " << filen… | |
- return 1; | |
- } | |
- | |
- fprintf(fout, "P6 %d %d 255\n", width, height); | |
- | |
- // Write pixel array to ppm image file | |
- for (unsigned int i=0; i<height*width; i++) { | |
- putc(rgb_img[i].r, fout); | |
- putc(rgb_img[i].g, fout); | |
- putc(rgb_img[i].b, fout); | |
- } | |
- | |
- fclose(fout); | |
- return 0; | |
-} | |
- | |
diff --git a/raytracer/render_all_outputs_GPU_clever.sh b/raytracer/render_all_… | |
t@@ -1,34 +0,0 @@ | |
-#!/bin/bash | |
- | |
-XRES=800 | |
-YRES=600 | |
- | |
-WORKHORSE=GPU | |
- | |
-echo "# Rendering PPM images" | |
-for F in ../output/*.bin | |
-do | |
- BASE=`basename $F` | |
- OUTFILE=$BASE.ppm.jpg | |
- if [ -e ../img_out/$OUTFILE ] | |
- then | |
- echo $OUTFILE exists. | |
- else | |
- ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev/n… | |
- if [ $? -ne 0 ] ; then | |
- echo "Error rendering $F, trying again..." | |
- ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure 4000 > /dev… | |
- fi | |
- fi | |
-done | |
- | |
-echo "# Converting PPM files to JPEG using ImageMagick in parallel" | |
-for F in ../img_out/*.ppm | |
-do | |
- (BASE=`basename $F`; convert $F $F.jpg &) | |
-done | |
- | |
-sleep 5 | |
-echo "# Removing temporary PPM files" | |
-rm ../img_out/*.ppm | |
- | |
diff --git a/raytracer/rt_kernel.h b/raytracer/rt_kernel.h | |
t@@ -1,31 +0,0 @@ | |
-#ifndef RT_KERNEL_H_ | |
-#define RT_KERNEL_H_ | |
- | |
-#include <vector_functions.h> | |
- | |
-// Constants | |
-__constant__ float4 const_u; | |
-__constant__ float4 const_v; | |
-__constant__ float4 const_w; | |
-__constant__ float4 const_eye; | |
-__constant__ float4 const_imgplane; | |
-__constant__ float const_d; | |
-__constant__ float4 const_light; | |
- | |
- | |
-// Host prototype functions | |
- | |
-extern "C" | |
-void cameraInit(float4 eye, float4 lookat, float imgw, float hw_ratio); | |
- | |
-extern "C" | |
-void checkForCudaErrors(const char* checkpoint_description); | |
- | |
-extern "C" | |
-int rt(float4* p, const unsigned int np, | |
- rgb* img, const unsigned int width, const unsigned int height, | |
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw, | |
- const int visualize, const float max_val, | |
- float* fixvel, float* pres, float* es_dot, float* es); | |
- | |
-#endif | |
diff --git a/raytracer/rt_kernel_cpu.cpp b/raytracer/rt_kernel_cpu.cpp | |
t@@ -1,259 +0,0 @@ | |
-#include <iostream> | |
-#include <cstdio> | |
-#include <cmath> | |
-#include <time.h> | |
-#include <cuda.h> | |
-#include <cutil_math.h> | |
-#include <string.h> | |
-#include "header.h" | |
-#include "rt_kernel_cpu.h" | |
- | |
-// Constants | |
-float3 constc_u; | |
-float3 constc_v; | |
-float3 constc_w; | |
-float3 constc_eye; | |
-float4 constc_imgplane; | |
-float constc_d; | |
-float3 constc_light; | |
- | |
-__inline__ float3 f4_to_f3(float4 in) | |
-{ | |
- return make_float3(in.x, in.y, in.z); | |
-} | |
- | |
-__inline__ float4 f3_to_f4(float3 in) | |
-{ | |
- return make_float4(in.x, in.y, in.z, 0.0f); | |
-} | |
- | |
-__inline__ float lengthf3(float3 in) | |
-{ | |
- return sqrt(in.x*in.x + in.y*in.y + in.z*in.z); | |
-} | |
- | |
-// Kernel for initializing image data | |
-void imageInit_cpu(unsigned char* _img, unsigned int pixels) | |
-{ | |
- for (unsigned int mempos=0; mempos<pixels; mempos++) { | |
- _img[mempos*4] = 0; // Red channel | |
- _img[mempos*4 + 1] = 0; // Green channel | |
- _img[mempos*4 + 2] = 0; // Blue channel | |
- } | |
-} | |
- | |
-// Calculate ray origins and directions | |
-void rayInitPerspective_cpu(float3* _ray_origo, | |
- float3* _ray_direction, | |
- float3 eye, | |
- unsigned int width, | |
- unsigned int height) | |
-{ | |
- unsigned int i; | |
- #pragma omp parallel for | |
- for (i=0; i<width; i++) { | |
- for (unsigned int j=0; j<height; j++) { | |
- | |
- unsigned int mempos = i + j*height; | |
- | |
- // Calculate pixel coordinates in image plane | |
- float p_u = constc_imgplane.x + (constc_imgplane.y - constc_imgplane.x) | |
- * (i + 0.5f) / width; | |
- float p_v = constc_imgplane.z + (constc_imgplane.w - constc_imgplane.z) | |
- * (j + 0.5f) / height; | |
- | |
- // Write ray origo and direction to global memory | |
- _ray_origo[mempos] = constc_eye; | |
- _ray_direction[mempos] = -constc_d*constc_w + p_u*constc_u + p_v*constc_… | |
- } | |
- } | |
-} | |
- | |
-// Check wether the pixel's viewing ray intersects with the spheres, | |
-// and shade the pixel correspondingly | |
-void rayIntersectSpheres_cpu(float3* _ray_origo, | |
- float3* _ray_direction, | |
- float4* _p, | |
- unsigned char* _img, | |
- unsigned int pixels, | |
- unsigned int np) | |
-{ | |
- unsigned int mempos; | |
- #pragma omp parallel for | |
- for (mempos=0; mempos<pixels; mempos++) { | |
- | |
- // Read ray data from global memory | |
- float3 e = _ray_origo[mempos]; | |
- float3 d = _ray_direction[mempos]; | |
- //float step = lengthf3(d); | |
- | |
- // Distance, in ray steps, between object and eye initialized with a large… | |
- float tdist = 1e10f; | |
- | |
- // Surface normal at closest sphere intersection | |
- float3 n; | |
- | |
- // Intersection point coordinates | |
- float3 p; | |
- | |
- // Iterate through all particles | |
- for (unsigned int i=0; i<np; i++) { | |
- | |
- // Read sphere coordinate and radius | |
- float3 c = f4_to_f3(_p[i]); | |
- float R = _p[i].w; | |
- | |
- // Calculate the discriminant: d = B^2 - 4AC | |
- float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2 | |
- - 4.0f*dot(d,d) // -4*A | |
- * (dot((e-c),(e-c)) - R*R); // C | |
- | |
- // If the determinant is positive, there are two solutions | |
- // One where the line enters the sphere, and one where it exits | |
- if (Delta > 0.0f) { | |
- | |
- // Calculate roots, Shirley 2009 p. 77 | |
- float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - do… | |
- * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d)); | |
- | |
- // Check wether intersection is closer than previous values | |
- if (fabs(t_minus) < tdist) { | |
- p = e + t_minus*d; | |
- tdist = fabs(t_minus); | |
- n = normalize(2.0f * (p - c)); // Surface normal | |
- } | |
- | |
- } // End of solution branch | |
- | |
- } // End of particle loop | |
- | |
- // Write pixel color | |
- if (tdist < 1e10) { | |
- | |
- // Lambertian shading parameters | |
- //float dotprod = fabs(dot(n, constc_light)); | |
- float dotprod = fmax(0.0f,dot(n, constc_light)); | |
- float I_d = 40.0f; // Light intensity | |
- float k_d = 5.0f; // Diffuse coefficient | |
- | |
- // Ambient shading | |
- float k_a = 10.0f; | |
- float I_a = 5.0f; | |
- | |
- // Write shading model values to pixel color channels | |
- _img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod | |
- + k_a * I_a)*0.48f); | |
- _img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod | |
- + k_a * I_a)*0.41f); | |
- _img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod | |
- + k_a * I_a)*0.27f); | |
- } | |
- } | |
-} | |
- | |
- | |
-void cameraInit_cpu(float3 eye, float3 lookat, float imgw, float hw_ratio) | |
-{ | |
- // Image dimensions in world space (l, r, b, t) | |
- float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.… | |
- | |
- // The view vector | |
- float3 view = eye - lookat; | |
- | |
- // Construct the camera view orthonormal base | |
- float3 v = make_float3(0.0f, 0.0f, 1.0f); // v: Pointing upward | |
- float3 w = -view/lengthf3(view); // w: Pointing backwards | |
- float3 u = cross(make_float3(v.x, v.y, v.z), make_float3(w.x, w.y, w.z)); //… | |
- | |
- // Focal length 20% of eye vector length | |
- float d = lengthf3(view)*0.8f; | |
- | |
- // Light direction (points towards light source) | |
- float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f)); | |
- | |
- std::cout << " Transfering camera values to constant memory\n"; | |
- | |
- constc_u = u; | |
- constc_v = v; | |
- constc_w = w; | |
- constc_eye = eye; | |
- constc_imgplane = imgplane; | |
- constc_d = d; | |
- constc_light = light; | |
- | |
- std::cout << "Rendering image..."; | |
-} | |
- | |
- | |
-// Wrapper for the rt algorithm | |
-int rt_cpu(float4* p, unsigned int np, | |
- rgb* img, unsigned int width, unsigned int height, | |
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw) { | |
- | |
- using std::cout; | |
- | |
- cout << "Initializing CPU raytracer:\n"; | |
- | |
- // Initialize GPU timestamp recorders | |
- float t1_go, t2_go, t1_stop, t2_stop; | |
- | |
- // Start timer 1 | |
- t1_go = clock(); | |
- | |
- // Allocate memory | |
- cout << " Allocating device memory\n"; | |
- static unsigned char *_img; // RGBw values in image | |
- static float3* _ray_origo; // Ray origo (x,y,z) | |
- static float3* _ray_direction; // Ray direction (x,y,z) | |
- _img = new unsigned char[width*height*4]; | |
- _ray_origo = new float3[width*height]; | |
- _ray_direction = new float3[width*height]; | |
- | |
- // Arrange thread/block structure | |
- unsigned int pixels = width*height; | |
- float hw_ratio = (float)height/(float)width; | |
- | |
- // Start timer 2 | |
- t2_go = clock(); | |
- | |
- // Initialize image to background color | |
- imageInit_cpu(_img, pixels); | |
- | |
- // Initialize camera | |
- cameraInit_cpu(make_float3(eye.x, eye.y, eye.z), | |
- make_float3(lookat.x, lookat.y, lookat.z), | |
- imgw, hw_ratio); | |
- | |
- // Construct rays for perspective projection | |
- rayInitPerspective_cpu( | |
- _ray_origo, _ray_direction, | |
- make_float3(eye.x, eye.y, eye.z), | |
- width, height); | |
- | |
- // Find closest intersection between rays and spheres | |
- rayIntersectSpheres_cpu( | |
- _ray_origo, _ray_direction, | |
- p, _img, pixels, np); | |
- | |
- // Stop timer 2 | |
- t2_stop = clock(); | |
- | |
- memcpy(img, _img, sizeof(unsigned char)*pixels*4); | |
- | |
- // Free dynamically allocated device memory | |
- delete [] _img; | |
- delete [] _ray_origo; | |
- delete [] _ray_direction; | |
- | |
- // Stop timer 1 | |
- t1_stop = clock(); | |
- | |
- // Report time spent | |
- cout << " done.\n" | |
- << " Time spent on entire CPU raytracing routine: " | |
- << (t1_stop-t1_go)/CLOCKS_PER_SEC*1000.0 << " ms\n"; | |
- cout << " - Functions: " << (t2_stop-t2_go)/CLOCKS_PER_SEC*1000.0 << " ms\n… | |
- | |
- // Return successfully | |
- return 0; | |
-} | |
diff --git a/raytracer/rt_kernel_cpu.h b/raytracer/rt_kernel_cpu.h | |
t@@ -1,14 +0,0 @@ | |
-#ifndef RT_KERNEL_CPU_H_ | |
-#define RT_KERNEL_CPU_H_ | |
- | |
-#include <vector_functions.h> | |
- | |
-// Host prototype functions | |
- | |
-void cameraInit(float3 eye, float3 lookat, float imgw, float hw_ratio); | |
- | |
-int rt_cpu(float4* p, const unsigned int np, | |
- rgb* img, const unsigned int width, const unsigned int height, | |
- f3 origo, f3 L, f3 eye, f3 lookat, float imgw); | |
- | |
-#endif | |
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh | |
t@@ -479,14 +479,16 @@ __global__ void interact(unsigned int* dev_gridParticleI… | |
} else { | |
__syncthreads(); | |
// Remove this contact (there is no particle with index=np) | |
- dev_contacts[mempos] = devC_np; | |
+ dev_contacts[mempos] = devC_np; | |
// Zero sum of shear displacement in this position | |
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
} | |
- } else { | |
+ | |
+ }/* else { // if dev_contacts[mempos] == devC_np | |
__syncthreads(); | |
+ // Zero sum of shear displacement in this position | |
dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
- } | |
+ } */ | |
} // Contact loop end | |