tRaytracer incorporated into sphere program, and is working - sphere - GPU-base… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit ede3f29203f93318355cd254be5204f7fe888bb6 | |
parent 490068513c59308c71cd7a008d70b9f532280e60 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Fri, 9 Nov 2012 21:23:30 +0100 | |
Raytracer incorporated into sphere program, and is working | |
Diffstat: | |
M src/Makefile | 1 + | |
A src/colorbar.h | 22 ++++++++++++++++++++++ | |
M src/device.cu | 5 ++++- | |
M src/main.cpp | 36 ++++++++++++++++++++++++-----… | |
A src/raytracer.cuh | 475 +++++++++++++++++++++++++++++… | |
M src/sphere.cpp | 19 +++---------------- | |
M src/sphere.h | 8 ++++---- | |
7 files changed, 537 insertions(+), 29 deletions(-) | |
--- | |
diff --git a/src/Makefile b/src/Makefile | |
t@@ -66,6 +66,7 @@ INCLUDES+=-I. -I$(SDKPATH)/shared/inc -I$(SDKPATH)/inc -I$(C… | |
LDFLAGS+=-L$(CUDA_INSTALL_PATH)/lib | |
LDFLAGS+=-L$(SDKPATH)/../../shared/lib -L$(SDKPATH)/../lib | |
LDFLAGS+=-lcutil_x86_64 -lcuda -lcudart | |
+LDFLAGS+=-fopenmp | |
#all: $(CCFILES) $(CUFILES) $(EXECUTABLE) raytracer | |
all: $(CCFILES) $(CUFILES) $(EXECUTABLE) | |
diff --git a/src/colorbar.h b/src/colorbar.h | |
t@@ -0,0 +1,22 @@ | |
+#ifndef COLORBAR_H_ | |
+#define COLORBAR_H_ | |
+ | |
+// Functions that determine red-, green- and blue color components | |
+// in a blue-white-red colormap. Ratio should be between 0.0-1.0. | |
+ | |
+__inline__ __host__ __device__ float red(float ratio) | |
+{ | |
+ return fmin(1.0f, 0.209f*ratio*ratio*ratio - 2.49f*ratio*ratio + 3.0f*ratio … | |
+}; | |
+ | |
+__inline__ __host__ __device__ float green(float ratio) | |
+{ | |
+ return fmin(1.0f, -2.44f*ratio*ratio + 2.15f*ratio + 0.369f); | |
+}; | |
+ | |
+__inline__ __host__ __device__ float blue(float ratio) | |
+{ | |
+ return fmin(1.0f, -2.21f*ratio*ratio + 1.61f*ratio + 0.573f); | |
+}; | |
+ | |
+#endif | |
diff --git a/src/device.cu b/src/device.cu | |
t@@ -195,7 +195,7 @@ __host__ void DEM::transferToConstantDeviceMemory() | |
using std::cout; | |
if (verbose == 1) | |
- cout << "\n Transfering data to constant device memory: "; | |
+ cout << " Transfering data to constant device memory: "; | |
cudaMemcpyToSymbol("devC_nd", &nd, sizeof(nd)); | |
cudaMemcpyToSymbol("devC_np", &np, sizeof(np)); | |
t@@ -463,6 +463,9 @@ __host__ void DEM::startTime() | |
cudaThreadSynchronize(); | |
checkForCudaErrors("Start of startTime()"); | |
+ // Write initial data to output/<sid>.output00000.bin | |
+ writebin(("output/" + sid + ".output00000.bin").c_str()); | |
+ | |
// Model world variables | |
float tic, toc, filetimeclock, time_spent, dev_time_spent; | |
diff --git a/src/main.cpp b/src/main.cpp | |
t@@ -12,6 +12,7 @@ | |
// Including library files | |
#include <iostream> | |
#include <string> | |
+#include <cstdlib> | |
// Including user files | |
#include "constants.h" | |
t@@ -28,9 +29,11 @@ int main(const int argc, const char *argv[]) | |
{ | |
// Default values | |
int verbose = 1; | |
- int checkConstantVals = 0; | |
- int render = 0; | |
+ int checkVals = 1; | |
+ int render = 0; // whether to render an image | |
+ int method = 0; // visualization method | |
int nfiles = 0; // number of input files | |
+ float max_val = 0.0f; | |
// Process input parameters | |
int i; | |
t@@ -46,7 +49,11 @@ int main(const int argc, const char *argv[]) | |
<< "-V, --version\t\tprint version information and exit\n" | |
<< "-q, --quiet\t\tsuppress status messages to stdout\n" | |
<< "-r, --render\t\trender input files instead of simulating temporal … | |
- << "-cc, --check-constants\t\tcheck values in constant GPU memory" << … | |
+ << "-dcv, --dpnt-check-values\t\tdon't check values before running\n" | |
+ << "Raytracer (-r) specific options:\n" | |
+ << "-m <method> <maxval>, --method <method> <maxval>\tcolor visualizat… | |
+ << "\t\t\t\tpres, vel\n" | |
+ << std::endl; | |
return 0; // Exit with success | |
} | |
t@@ -71,8 +78,17 @@ int main(const int argc, const char *argv[]) | |
else if (argvi == "-r" || argvi == "--render") | |
render = 1; | |
- else if (argvi == "-cc" || argvi == "--check-constants") | |
- checkConstantVals = 1; | |
+ else if (argvi == "-dcv" || argvi == "--dont-check-values") | |
+ checkVals = 0; | |
+ | |
+ else if (argvi == "-m" || argvi == "--method") { | |
+ if (std::string(argv[i+1]) == "pres") | |
+ method = 1; | |
+ //max_val = std::strtof(argv[i+2]); | |
+ max_val = atof(argv[i+2]); | |
+ i += 2; // skip ahead | |
+ } | |
+ | |
// The rest of the values must be input binary files | |
else { | |
t@@ -81,10 +97,14 @@ int main(const int argc, const char *argv[]) | |
std::cout << argv[0] << ": processing input file: " << argvi << std::end… | |
// Create DEM class, read data from input binary, check values | |
- DEM dem(argvi, verbose, checkConstantVals, render); | |
+ DEM dem(argvi, verbose, checkVals); | |
+ | |
+ // Render image if requested | |
+ if (render == 1) | |
+ dem.render(method, max_val); | |
- // Start iterating through time, unless user chose to render image | |
- if (render == 0) | |
+ // Otherwise, start iterating through time | |
+ else | |
dem.startTime(); | |
} | |
} | |
diff --git a/src/raytracer.cuh b/src/raytracer.cuh | |
t@@ -0,0 +1,475 @@ | |
+#ifndef RAYTRACER_CUH_ | |
+#define RAYTRACER_CUH_ | |
+ | |
+#include "colorbar.h" | |
+ | |
+//#include "cuPrintf.cu" | |
+ | |
+// Template for discarding the last term in four-component vector structs | |
+__device__ __inline__ float3 f4_to_f3(float4 in) { | |
+ return make_float3(in.x, in.y, in.z); | |
+} | |
+ | |
+// Kernel for initializing image data | |
+__global__ void imageInit(unsigned char* dev_img, unsigned int pixels) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > pixels) | |
+ return; | |
+ | |
+ dev_img[mempos*4] = 255; // Red channel | |
+ dev_img[mempos*4 + 1] = 255; // Green channel | |
+ dev_img[mempos*4 + 2] = 255; // Blue channel | |
+} | |
+ | |
+// Calculate ray origins and directions | |
+__global__ void rayInitPerspective(float4* dev_ray_origo, | |
+ float4* dev_ray_direction, | |
+ float4 eye, | |
+ unsigned int width, | |
+ unsigned int height) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > width*height) | |
+ return; | |
+ | |
+ // Calculate 2D position from linear index | |
+ unsigned int i = mempos % width; | |
+ unsigned int j = (int)floor((float)mempos/width) % width; | |
+ | |
+ // Calculate pixel coordinates in image plane | |
+ float p_u = devC_imgplane.x + (devC_imgplane.y - devC_imgplane.x) | |
+ * (i + 0.5f) / width; | |
+ float p_v = devC_imgplane.z + (devC_imgplane.w - devC_imgplane.z) | |
+ * (j + 0.5f) / height; | |
+ | |
+ // Write ray origo and direction to global memory | |
+ dev_ray_origo[mempos] = make_float4(devC_eye, 0.0f); | |
+ dev_ray_direction[mempos] = make_float4(-devC_d*devC_w + p_u*devC_u + p_v*de… | |
+} | |
+ | |
+// Check wether the pixel's viewing ray intersects with the spheres, | |
+// and shade the pixel correspondingly | |
+__global__ void rayIntersectSpheres(float4* dev_ray_origo, | |
+ float4* dev_ray_direction, | |
+ Float4* dev_x, | |
+ unsigned char* dev_img) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > devC_pixels) | |
+ return; | |
+ | |
+ // Read ray data from global memory | |
+ float3 e = f4_to_f3(dev_ray_origo[mempos]); | |
+ float3 d = f4_to_f3(dev_ray_direction[mempos]); | |
+ //float step = length(d); | |
+ | |
+ // Distance, in ray steps, between object and eye initialized with a large v… | |
+ float tdist = 1e10f; | |
+ | |
+ // Surface normal at closest sphere intersection | |
+ float3 n; | |
+ | |
+ // Intersection point coordinates | |
+ float3 p; | |
+ | |
+ //cuPrintf("mepos %d\n", mempos); | |
+ | |
+ // Iterate through all particles | |
+ for (unsigned int i=0; i<devC_np; ++i) { | |
+ | |
+ // Read sphere coordinate and radius | |
+ Float4 x = dev_x[i]; | |
+ float3 c = make_float3(x.x, x.y, x.z); | |
+ float R = x.w; | |
+ | |
+ //cuPrintf("particle %d at: %f, %f, %f, radius: %f\n", i, c.x, c.y, c.z, R… | |
+ | |
+ // Calculate the discriminant: d = B^2 - 4AC | |
+ float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2 | |
+ - 4.0f*dot(d,d) // -4*A | |
+ * (dot((e-c),(e-c)) - R*R); // C | |
+ | |
+ // If the determinant is positive, there are two solutions | |
+ // One where the line enters the sphere, and one where it exits | |
+ if (Delta > 0.0f) { | |
+ | |
+ // Calculate roots, Shirley 2009 p. 77 | |
+ float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(… | |
+ * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d)); | |
+ | |
+ // Check wether intersection is closer than previous values | |
+ if (fabs(t_minus) < tdist) { | |
+ p = e + t_minus*d; | |
+ tdist = fabs(t_minus); | |
+ n = normalize(2.0f * (p - c)); // Surface normal | |
+ } | |
+ | |
+ } // End of solution branch | |
+ | |
+ } // End of particle loop | |
+ | |
+ // Write pixel color | |
+ if (tdist < 1e10f) { | |
+ | |
+ // Lambertian shading parameters | |
+ float dotprod = fmax(0.0f,dot(n, devC_light)); | |
+ float I_d = 40.0f; // Light intensity | |
+ float k_d = 5.0f; // Diffuse coefficient | |
+ | |
+ // Ambient shading | |
+ float k_a = 10.0f; | |
+ float I_a = 5.0f; // 5.0 for black background | |
+ | |
+ // Write shading model values to pixel color channels | |
+ dev_img[mempos*4] = | |
+ (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.48f); | |
+ dev_img[mempos*4 + 1] = | |
+ (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.41f); | |
+ dev_img[mempos*4 + 2] = | |
+ (unsigned char) ((k_d * I_d * dotprod + k_a * I_a)*0.27f); | |
+ | |
+ } | |
+} | |
+ | |
+// Check wether the pixel's viewing ray intersects with the spheres, | |
+// and shade the pixel correspondingly using a colormap | |
+__global__ void rayIntersectSpheresColormap(float4* dev_ray_origo, | |
+ float4* dev_ray_direction, | |
+ Float4* dev_x, | |
+ Float4* dev_vel, | |
+ Float* dev_linarr, | |
+ float max_val, | |
+ unsigned char* dev_img) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > devC_pixels) | |
+ return; | |
+ | |
+ // Read ray data from global memory | |
+ float3 e = f4_to_f3(dev_ray_origo[mempos]); | |
+ float3 d = f4_to_f3(dev_ray_direction[mempos]); | |
+ | |
+ // Distance, in ray steps, between object and eye initialized with a large v… | |
+ float tdist = 1e10f; | |
+ | |
+ // Surface normal at closest sphere intersection | |
+ float3 n; | |
+ | |
+ // Intersection point coordinates | |
+ float3 p; | |
+ | |
+ unsigned int hitidx; | |
+ | |
+ // Iterate through all particles | |
+ for (unsigned int i=0; i<devC_np; ++i) { | |
+ | |
+ // Read sphere coordinate and radius | |
+ Float4 x = dev_x[i]; | |
+ float3 c = make_float3(x.x, x.y, x.z); | |
+ float R = x.w; | |
+ | |
+ // Calculate the discriminant: d = B^2 - 4AC | |
+ float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2 | |
+ - 4.0f*dot(d,d) // -4*A | |
+ * (dot((e-c),(e-c)) - R*R); // C | |
+ | |
+ // If the determinant is positive, there are two solutions | |
+ // One where the line enters the sphere, and one where it exits | |
+ if (Delta > 0.0f) { | |
+ | |
+ // Calculate roots, Shirley 2009 p. 77 | |
+ float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(… | |
+ * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d)); | |
+ | |
+ // Check wether intersection is closer than previous values | |
+ if (fabs(t_minus) < tdist) { | |
+ p = e + t_minus*d; | |
+ tdist = fabs(t_minus); | |
+ n = normalize(2.0f * (p - c)); // Surface normal | |
+ hitidx = i; | |
+ } | |
+ | |
+ } // End of solution branch | |
+ | |
+ } // End of particle loop | |
+ | |
+ // Write pixel color | |
+ if (tdist < 1e10) { | |
+ | |
+ // Fetch particle data used for color | |
+ float ratio = dev_linarr[hitidx] / max_val; | |
+ float fixvel = dev_vel[hitidx].w; | |
+ | |
+ // Make sure the ratio doesn't exceed the 0.0-1.0 interval | |
+ if (ratio < 0.01f) | |
+ ratio = 0.01f; | |
+ if (ratio > 0.99f) | |
+ ratio = 0.99f; | |
+ | |
+ // Lambertian shading parameters | |
+ float dotprod = fmax(0.0f,dot(n, devC_light)); | |
+ float I_d = 40.0f; // Light intensity | |
+ float k_d = 5.0f; // Diffuse coefficient | |
+ | |
+ // Ambient shading | |
+ float k_a = 10.0f; | |
+ float I_a = 5.0f; | |
+ | |
+ float redv = red(ratio); | |
+ float greenv = green(ratio); | |
+ float bluev = blue(ratio); | |
+ | |
+ // Make particle dark grey if the horizontal velocity is fixed | |
+ if (fixvel > 0.f) { | |
+ redv = 0.5; | |
+ greenv = 0.5; | |
+ bluev = 0.5; | |
+ } | |
+ | |
+ // Write shading model values to pixel color channels | |
+ dev_img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*redv); | |
+ dev_img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*greenv); | |
+ dev_img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*bluev); | |
+ } | |
+} | |
+ | |
+ | |
+__host__ void DEM::cameraInit( | |
+ const float3 eye, | |
+ const float3 lookat, | |
+ const float imgw, | |
+ const float focalLength) | |
+{ | |
+ float hw_ratio = height/width; | |
+ | |
+ // Image dimensions in world space (l, r, b, t) | |
+ float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.… | |
+ | |
+ // The view vector | |
+ float3 view = eye - lookat; | |
+ | |
+ // Construct the camera view orthonormal base | |
+ float3 up = make_float3(0.0f, 0.0f, 1.0f); // Pointing upward along +z | |
+ float3 w = -view/length(view); // w: Pointing backwards | |
+ float3 u = cross(up, w) / length(cross(up, w)); | |
+ float3 v = cross(w, u); | |
+ | |
+ unsigned int pixels = width*height; | |
+ | |
+ // Focal length 20% of eye vector length | |
+ float d = length(view)*0.8f; | |
+ | |
+ // Light direction (points towards light source) | |
+ float3 light = normalize(-1.0f*eye*make_float3(1.0f, 0.2f, 0.6f)); | |
+ | |
+ if (verbose == 1) | |
+ std::cout << " Transfering camera values to constant memory: "; | |
+ | |
+ cudaMemcpyToSymbol("devC_u", &u, sizeof(u)); | |
+ cudaMemcpyToSymbol("devC_v", &v, sizeof(v)); | |
+ cudaMemcpyToSymbol("devC_w", &w, sizeof(w)); | |
+ cudaMemcpyToSymbol("devC_eye", &eye, sizeof(eye)); | |
+ cudaMemcpyToSymbol("devC_imgplane", &imgplane, sizeof(imgplane)); | |
+ cudaMemcpyToSymbol("devC_d", &d, sizeof(d)); | |
+ cudaMemcpyToSymbol("devC_light", &light, sizeof(light)); | |
+ cudaMemcpyToSymbol("devC_pixels", &pixels, sizeof(pixels)); | |
+ | |
+ if (verbose == 1) | |
+ std::cout << "Done" << std::endl; | |
+ checkForCudaErrors("During cameraInit"); | |
+} | |
+ | |
+// Allocate global device memory | |
+__host__ void DEM::rt_allocateGlobalDeviceMemory(void) | |
+{ | |
+ if (verbose == 1) | |
+ std::cout << " Allocating device memory: "; | |
+ cudaMalloc((void**)&dev_img, width*height*4*sizeof(unsigned char)); | |
+ cudaMalloc((void**)&dev_ray_origo, width*height*sizeof(float4)); | |
+ cudaMalloc((void**)&dev_ray_direction, width*height*sizeof(float4)); | |
+ if (verbose == 1) | |
+ std::cout << "Done" << std::endl; | |
+ checkForCudaErrors("During rt_allocateGlobalDeviceMemory()"); | |
+} | |
+ | |
+ | |
+ // Free dynamically allocated device memory | |
+__host__ void DEM::rt_freeGlobalDeviceMemory(void) | |
+{ | |
+ if (verbose == 1) | |
+ std::cout << " Freeing device memory: "; | |
+ cudaFree(dev_img); | |
+ cudaFree(dev_ray_origo); | |
+ cudaFree(dev_ray_direction); | |
+ if (verbose == 1) | |
+ std::cout << "Done" << std::endl; | |
+ checkForCudaErrors("During rt_freeGlobalDeviceMemory()"); | |
+} | |
+ | |
+// Transfer image data from device to host | |
+__host__ void DEM::rt_transferFromGlobalDeviceMemory(void) | |
+{ | |
+ if (verbose == 1) | |
+ std::cout << " Transfering image data: device -> host: "; | |
+ cudaMemcpy(img, dev_img, width*height*4*sizeof(unsigned char), cudaMemcpyDev… | |
+ if (verbose == 1) | |
+ std::cout << "Done" << std::endl; | |
+ checkForCudaErrors("During rt_transferFromGlobalDeviceMemory()"); | |
+} | |
+ | |
+// Find the max. spatial positions of the particles, and return these as a vec… | |
+__host__ float3 DEM::maxPos() | |
+{ | |
+ int i; | |
+ float3 shared_max = make_float3(0.0f, 0.0f, 0.0f); | |
+ | |
+#pragma omp parallel if(np > 100) | |
+ { | |
+ // Max. val per thread | |
+ float3 max = make_float3(0.0f, 0.0f, 0.0f); | |
+ | |
+#pragma omp for nowait | |
+ // Find max val. per thread | |
+ for (i = 0; i<np; ++i) { | |
+ max.x = std::max(max.x, (float)k.x[i].x); | |
+ max.y = std::max(max.y, (float)k.x[i].y); | |
+ max.z = std::max(max.z, (float)k.x[i].z); | |
+ } | |
+ | |
+ // Find total max, by comparing one thread with the | |
+ // shared result, one at a time | |
+#pragma omp critical | |
+ { | |
+ shared_max.x = std::max(shared_max.x, max.x); | |
+ shared_max.y = std::max(shared_max.y, max.y); | |
+ shared_max.z = std::max(shared_max.z, max.z); | |
+ } | |
+ } | |
+ | |
+ // Return final result | |
+ return shared_max; | |
+} | |
+ | |
+// Wrapper for the rt kernel | |
+__host__ void DEM::render( | |
+ const int method, | |
+ const float maxval, | |
+ const float focalLength, | |
+ const unsigned int img_width, | |
+ const unsigned int img_height) | |
+ /*float4* p, unsigned int np, | |
+ rgba* img, unsigned int width, unsigned int height, | |
+ f3 origo, f3 L, f3 eye, f3 lookat, float imgw, | |
+ int visualize, float max_val, | |
+ float* fixvel, | |
+ float* xsum, | |
+ float* pres, | |
+ float* es_dot, | |
+ float* es, | |
+ float* vel)*/ | |
+{ | |
+ // Namespace directives | |
+ using std::cout; | |
+ using std::cerr; | |
+ using std::endl; | |
+ | |
+ //cudaPrintfInit(); | |
+ | |
+ // Save image dimensions in class object | |
+ width = img_width; | |
+ height = img_height; | |
+ | |
+ // Allocate memory for output image | |
+ img = new rgba[height*width]; | |
+ rt_allocateGlobalDeviceMemory(); | |
+ | |
+ // Arrange thread/block structure | |
+ unsigned int pixels = width*height; | |
+ const unsigned int threadsPerBlock = 256; | |
+ const unsigned int blocksPerGrid = iDivUp(pixels, threadsPerBlock); | |
+ | |
+ // Initialize image to background color | |
+ imageInit<<< blocksPerGrid, threadsPerBlock >>>(dev_img, pixels); | |
+ | |
+ // Initialize camera values and transfer to constant memory | |
+ float imgw = grid.L[0]*1.35f; // Screen width in world coordinates | |
+ float3 lookat = maxPos() / 2.0f; // Look at the centre of the mean positions | |
+ float3 eye = make_float3(grid.L[0] * 0.5f, | |
+ grid.L[1] * -5.0f, | |
+ grid.L[2] * 0.5f); | |
+ cameraInit(eye, lookat, imgw, focalLength); | |
+ | |
+ // Construct rays for perspective projection | |
+ rayInitPerspective<<< blocksPerGrid, threadsPerBlock >>>( | |
+ dev_ray_origo, dev_ray_direction, | |
+ make_float4(eye.x, eye.y, eye.z, 0.0f), width, height); | |
+ cudaThreadSynchronize(); | |
+ | |
+ // Find closest intersection between rays and spheres | |
+ if (method == 1) { // Visualize pressure | |
+ if (verbose == 1) | |
+ cout << " Pressure color map range: [0, " << maxval << "] Pa\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ dev_ray_origo, dev_ray_direction, | |
+ dev_x, dev_vel, | |
+ dev_p, maxval, | |
+ dev_img); | |
+ | |
+ /*} else if (visualize == 2) { // es_dot visualization | |
+ if (verbose == 1) | |
+ cout << " Shear heat production rate color map range: [0, " << maxval <… | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, maxval, _img); | |
+ } else if (visualize == 3) { // es visualization | |
+ if (verbose == 1) | |
+ cout << " Total shear heat color map range: [0, " << maxval << "] J\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, maxval, _img); | |
+ } else if (visualize == 4) { // velocity visualization | |
+ if (verbose == 1) | |
+ cout << " Velocity color map range: [0, " << maxval << "] m/s\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, maxval, _img); | |
+ } else if (visualize == 5) { // xsum visualization | |
+ if (verbose == 1) | |
+ cout << " XSum color map range: [0, " << maxval << "] m\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, maxval, _img); */ | |
+ } else { // Normal visualization | |
+ rayIntersectSpheres<<< blocksPerGrid, threadsPerBlock >>>( | |
+ dev_ray_origo, dev_ray_direction, | |
+ dev_x, dev_img); | |
+ } | |
+ | |
+ // Make sure all threads are done before continuing CPU control sequence | |
+ cudaThreadSynchronize(); | |
+ | |
+ // Check for errors | |
+ checkForCudaErrors("CUDA error after kernel execution"); | |
+ | |
+ // Copy image data from global device memory to host memory | |
+ rt_transferFromGlobalDeviceMemory(); | |
+ | |
+ // Free dynamically allocated global device memory | |
+ rt_freeGlobalDeviceMemory(); | |
+ | |
+ //cudaPrintfDisplay(stdout, true); | |
+ | |
+ // Write image to PPM file | |
+ writePPM(("img_out/" + sid + ".ppm").c_str()); | |
+ | |
+} | |
+ | |
+#endif | |
diff --git a/src/sphere.cpp b/src/sphere.cpp | |
t@@ -2,6 +2,7 @@ | |
#include <string> | |
#include <cstdio> | |
#include <cstdlib> | |
+#include <cmath> | |
#include "typedefs.h" | |
#include "datatypes.h" | |
t@@ -12,8 +13,7 @@ | |
// and reports the values | |
DEM::DEM(const std::string inputbin, | |
const int verbosity, | |
- const int checkVals, | |
- const int render_img) | |
+ const int checkVals) | |
: verbose(verbosity) | |
{ | |
using std::cout; | |
t@@ -39,9 +39,6 @@ DEM::DEM(const std::string inputbin, | |
if (verbose == 1) | |
reportValues(); | |
- // Write initial data to output/<sid>.output00000.bin | |
- writebin(("output/" + sid + ".output00000.bin").c_str()); | |
- | |
// Initialize CUDA | |
initializeGPU(); | |
t@@ -55,16 +52,6 @@ DEM::DEM(const std::string inputbin, | |
// Transfer data from host to gpu device memory | |
transferToGlobalDeviceMemory(); | |
- // Render image using raytracer if requested | |
- if (render_img == 1) { | |
- float3 eye = make_float3( 2.5f * grid.L[0], | |
- -5.0f * grid.L[1], | |
- 0.5f * grid.L[2]); | |
- //float focalLength = 0.8f*grid.L[0]; | |
- //float3 eye = make_float3(0.0f, 0.0f, 0.0f); | |
- render(eye); | |
- } | |
- | |
} | |
// Destructor: Liberates dynamically allocated host memory | |
t@@ -116,7 +103,7 @@ void DEM::checkValues(void) | |
} | |
// Check that the current time | |
- if (time.current < time.total || time.current < 0.0) { | |
+ if (time.current > time.total || time.current < 0.0) { | |
cerr << "Error: time.current = " << time.current | |
<< " s, time.total = " << time.total << " s\n"; | |
exit(1); | |
diff --git a/src/sphere.h b/src/sphere.h | |
t@@ -116,6 +116,8 @@ class DEM { | |
void transferFromGlobalDeviceMemory(void); | |
void rt_transferFromGlobalDeviceMemory(void); | |
+ // Find and return the max. position of any particle in each dimension | |
+ float3 maxPos(void); | |
// Values and functions accessible from the outside | |
t@@ -124,8 +126,7 @@ class DEM { | |
// Constructor, some parameters with default values | |
DEM(std::string inputbin, | |
const int verbosity = 1, | |
- const int checkVals = 1, | |
- const int render = 0); | |
+ const int checkVals = 1); | |
// Destructor | |
~DEM(void); | |
t@@ -148,10 +149,9 @@ class DEM { | |
// Render particles using raytracing | |
void render(//const char *target, | |
- const float3 eye, | |
- const float focalLength = 1.0, | |
const int method = 1, | |
const float maxval = 1.0e3, | |
+ const float focalLength = 1.0, | |
const unsigned int img_width = 800, | |
const unsigned int img_height = 800); | |