tMajor revision: Double precision, kernels split into separate sources - sphere… | |
git clone git://src.adamsgaard.dk/sphere | |
Log | |
Files | |
Refs | |
LICENSE | |
--- | |
commit cd41737e0b69a87e1dbca387031c53793b51ab56 | |
parent d818c179c8ce377aa14d4a4398ff6e6f9907fe75 | |
Author: Anders Damsgaard <[email protected]> | |
Date: Mon, 20 Aug 2012 16:36:05 +0200 | |
Major revision: Double precision, kernels split into separate sources | |
Diffstat: | |
A img_out/about.txt | 2 ++ | |
A input/1e3-test-shear.bin | 0 | |
D mfiles/freadbin.m | 118 -----------------------------… | |
D mfiles/fwritebin.m | 98 -----------------------------… | |
D mfiles/initsetup.m | 180 -----------------------------… | |
D mfiles/plotspheres.m | 99 -----------------------------… | |
D mfiles/status.m | 11 ----------- | |
D mfiles/useoutput.m | 112 -----------------------------… | |
D mfiles/visualize.m | 379 -----------------------------… | |
A python/elastic_collision.py | 46 +++++++++++++++++++++++++++++… | |
A python/inelastic_collision.py | 49 +++++++++++++++++++++++++++++… | |
A raytracer/colorbar.h | 22 ++++++++++++++++++++++ | |
A raytracer/render_all_outputs_CPU.sh | 27 +++++++++++++++++++++++++++ | |
A raytracer/render_all_outputs_GPU.sh | 29 +++++++++++++++++++++++++++++ | |
A raytracer/rt-kernel.cu | 442 +++++++++++++++++++++++++++++… | |
A raytracer/rt_GPU_init_pres.sh | 36 +++++++++++++++++++++++++++++… | |
A raytracer/rt_GPU_pres.sh | 36 +++++++++++++++++++++++++++++… | |
A raytracer/rt_GPU_tall_pres.sh | 36 +++++++++++++++++++++++++++++… | |
A src/cohesion.cuh | 126 +++++++++++++++++++++++++++++… | |
A src/contactmodels.cuh | 411 ++++++++++++++++++++++++++++++ | |
A src/contactsearch.cuh | 625 +++++++++++++++++++++++++++++… | |
A src/integration.cuh | 207 ++++++++++++++++++++++++++++++ | |
A src/sorting.cuh | 127 +++++++++++++++++++++++++++++… | |
A src/vector_arithmetic.h | 994 +++++++++++++++++++++++++++++… | |
24 files changed, 3215 insertions(+), 997 deletions(-) | |
--- | |
diff --git a/img_out/about.txt b/img_out/about.txt | |
t@@ -0,0 +1,2 @@ | |
+This folder will contain images rendered from ../output binaries, and | |
+graphs plotted with ../python/sphere.py. | |
diff --git a/input/1e3-test-shear.bin b/input/1e3-test-shear.bin | |
Binary files differ. | |
diff --git a/mfiles/freadbin.m b/mfiles/freadbin.m | |
t@@ -1,118 +0,0 @@ | |
-function [p, grids, time, params, walls] = freadbin(path, fn) | |
- %path = '../output/'; %Target directory | |
- %fn = 'output0.bin'; %Target binary | |
- | |
- %Open binary file for reading with little endian ordering and unicode char… | |
- %fid = fopen([path fn],'rb','ieee-le','UTF-8'); | |
- fid = fopen([path fn], 'rb', 'ieee-le'); | |
- | |
- % Read the number of dimensions and particles | |
- grids.nd = fread(fid, 1, 'int'); | |
- p.np = fread(fid, 1, 'uint32'); | |
- | |
- % Read the time variables | |
- time.dt = fread(fid, 1, 'float'); | |
- time.current = fread(fid, 1, 'double'); | |
- time.total = fread(fid, 1, 'double'); | |
- time.file_dt = fread(fid, 1, 'float'); | |
- time.step_count = fread(fid, 1, 'uint32'); | |
- | |
- %Initiate variables for faster execution | |
- p.x = zeros(p.np, grids.nd); % Coordinate vector | |
- p.vel = zeros(p.np, grids.nd); % Velocity vector | |
- p.angvel = zeros(p.np, grids.nd); % Acceleration vector | |
- p.force = zeros(p.np, grids.nd); % Linear force vector | |
- p.torque = zeros(p.np, grids.nd); % Rotational torque vector | |
- p.fixvel = zeros(p.np, 1); % Fix horizontal particle veloci… | |
- p.xsum = zeros(p.np, 1); % Total particle displacement al… | |
- p.radius = zeros(p.np, 1); % Particle radius | |
- p.rho = zeros(p.np, 1); % Density | |
- p.k_n = zeros(p.np, 1); % Normal stiffness | |
- p.k_s = zeros(p.np, 1); % Shear stiffness | |
- p.k_r = zeros(p.np, 1); % Rolling stiffness | |
- p.gamma_s = zeros(p.np, 1); % Shear viscosity | |
- p.gamma_r = zeros(p.np, 1); % Rolling viscosity | |
- p.mu_s = zeros(p.np, 1); % Inter-particle contact static shear f… | |
- p.mu_d = zeros(p.np, 1); % Inter-particle contact dynamic shear … | |
- p.mu_r = zeros(p.np, 1); % Inter-particle contact rolling fricti… | |
- p.C = zeros(p.np, 1); % Inter-particle cohesion | |
- p.E = zeros(p.np, 1); % Young's modulus | |
- p.K = zeros(p.np, 1); % Bulk modulus | |
- p.nu = zeros(p.np, 1); % Poisson's ratio | |
- p.es_dot = zeros(p.np, 1); % Current shear energy dissipation | |
- p.es = zeros(p.np, 1); % Total shear energy dissipation | |
- p.p = zeros(p.np, 1); % Pressure on particle | |
- p.bonds = zeros(p.np, 2); % Inter-particle bonds | |
- | |
- % Read remaining data from MATLAB binary | |
- grids.origo = fread(fid, [1, grids.nd], 'float'); | |
- grids.L = fread(fid, [1, grids.nd], 'float'); | |
- grids.num = fread(fid, [1, grids.nd], 'uint32'); | |
- | |
- for j=1:p.np | |
- %p.id = fread(fid, 1, 'uint32'); | |
- | |
- for i=1:grids.nd %Coordinates, velocity, acceleration, pre-velocity | |
- p.x(j,i) = fread(fid, 1, 'float'); | |
- p.vel(j,i) = fread(fid, 1, 'float'); | |
- p.angvel(j,i) = fread(fid, 1, 'float'); | |
- p.force(j,i) = fread(fid, 1, 'float'); | |
- p.torque(j,i) = fread(fid, 1, 'float'); | |
- end | |
- end | |
- | |
- for j=1:p.np %Parameters with one value per particle | |
- p.fixvel(j) = fread(fid, 1, 'float'); | |
- p.xsum(j) = fread(fid, 1, 'float'); | |
- p.radius(j) = fread(fid, 1, 'float'); | |
- p.rho(j) = fread(fid, 1, 'float'); | |
- p.k_n(j) = fread(fid, 1, 'float'); | |
- p.k_s(j) = fread(fid, 1, 'float'); | |
- p.k_r(j) = fread(fid, 1, 'float'); | |
- p.gamma_s(j) = fread(fid, 1, 'float'); | |
- p.gamma_r(j) = fread(fid, 1, 'float'); | |
- p.mu_s(j) = fread(fid, 1, 'float'); | |
- p.mu_d(j) = fread(fid, 1, 'float'); | |
- p.mu_r(j) = fread(fid, 1, 'float'); | |
- p.C(j) = fread(fid, 1, 'float'); | |
- p.E(j) = fread(fid, 1, 'float'); | |
- p.K(j) = fread(fid, 1, 'float'); | |
- p.nu(j) = fread(fid, 1, 'float'); | |
- p.es_dot(j) = fread(fid, 1, 'float'); | |
- p.es(j) = fread(fid, 1, 'float'); | |
- p.p(j) = fread(fid, 1, 'float'); | |
- end | |
- | |
- %Physical, constant parameters | |
- %params.global = fread(fid, 1, 'ubit1'); | |
- params.global = fread(fid, 1, 'int'); | |
- params.g = fread(fid, [1, grids.nd], 'float'); | |
- params.kappa = fread(fid, 1, 'float'); | |
- params.db = fread(fid, 1, 'float'); | |
- params.V_b = fread(fid, 1, 'float'); | |
- params.shearmodel = fread(fid, 1, 'uint32'); | |
- | |
- % Wall data | |
- walls.nw = fread(fid, 1, 'uint32'); | |
- for j=1:walls.nw | |
- for i=1:grids.nd | |
- walls.n(j,i) = fread(fid, 1, 'float'); | |
- end | |
- walls.x(j) = fread(fid, 1, 'float'); | |
- walls.m(j) = fread(fid, 1, 'float'); | |
- walls.vel(j) = fread(fid, 1, 'float'); | |
- walls.force(j) = fread(fid, 1, 'float'); | |
- walls.devs(j) = fread(fid, 1, 'float'); | |
- end | |
- params.periodic = fread(fid, 1, 'int'); | |
- | |
- for j=1:p.np | |
- p.bonds(j,1) = fread(fid, 1, 'uint32'); | |
- p.bonds(j,2) = fread(fid, 1, 'uint32'); | |
- p.bonds(j,3) = fread(fid, 1, 'uint32'); | |
- p.bonds(j,4) = fread(fid, 1, 'uint32'); | |
- end | |
- | |
- fclose(fid); | |
- | |
-end | |
diff --git a/mfiles/fwritebin.m b/mfiles/fwritebin.m | |
t@@ -1,98 +0,0 @@ | |
-function fwritebin(path, fn, p, grids, time, params, walls) | |
- %path = '../input/'; %Target directory | |
- %fn = '3dtest.bin'; %Target binary | |
- | |
- %Open binary file for writing, little endian ordering, unicode char encodi… | |
- %fid = fopen([path fn], 'wb','ieee-le','UTF-8'); | |
- fid = fopen([path fn], 'wb', 'ieee-le'); | |
- | |
- %fwrite(fid, grids.nd, 'ushort'); %Unsigned short int | |
- %fwrite(fid, p.np, 'ulong'); %Unsigned long int | |
- fwrite(fid, grids.nd, 'int'); | |
- fwrite(fid, p.np, 'uint32'); | |
- | |
- %Time parameters | |
- fwrite(fid, time.dt, 'float'); | |
- fwrite(fid, time.current, 'double'); | |
- fwrite(fid, time.total, 'double'); | |
- fwrite(fid, time.file_dt, 'float'); | |
- fwrite(fid, time.step_count, 'uint32'); | |
- | |
- for i=1:grids.nd %Grid origo | |
- fwrite(fid, grids.origo(i), 'float'); | |
- end | |
- | |
- for i=1:grids.nd %Grid dimensions | |
- fwrite(fid, grids.L(i), 'float'); | |
- end | |
- | |
- for i=1:grids.nd %Grid dimensions | |
- fwrite(fid, grids.num(i), 'uint32'); | |
- end | |
- | |
- for j=1:p.np | |
- for i=1:grids.nd %Coordinates, velocity, acceleration, pre-velocity | |
- fwrite(fid, p.x(j,i), 'float'); | |
- fwrite(fid, p.vel(j,i), 'float'); | |
- fwrite(fid, p.angvel(j,i), 'float'); | |
- fwrite(fid, p.force(j,i), 'float'); | |
- fwrite(fid, p.torque(j,i), 'float'); | |
- end | |
- end | |
- | |
- for j=1:p.np %Parameters with one value per particle | |
- fwrite(fid, p.fixvel(j), 'float'); | |
- fwrite(fid, p.xsum(j), 'float'); | |
- fwrite(fid, p.radius(j), 'float'); | |
- fwrite(fid, p.rho(j), 'float'); | |
- fwrite(fid, p.k_n(j), 'float'); | |
- fwrite(fid, p.k_s(j), 'float'); | |
- fwrite(fid, p.k_r(j), 'float'); | |
- fwrite(fid, p.gamma_s(j), 'float'); | |
- fwrite(fid, p.gamma_r(j), 'float'); | |
- fwrite(fid, p.mu_s(j), 'float'); | |
- fwrite(fid, p.mu_d(j), 'float'); | |
- fwrite(fid, p.mu_r(j), 'float'); | |
- fwrite(fid, p.C(j), 'float'); | |
- fwrite(fid, p.E(j), 'float'); | |
- fwrite(fid, p.K(j), 'float'); | |
- fwrite(fid, p.nu(j), 'float'); | |
- fwrite(fid, p.es_dot(j), 'float'); | |
- fwrite(fid, p.es(j), 'float'); | |
- fwrite(fid, p.p(j), 'float'); | |
- end | |
- | |
- %Physical, constant parameters | |
- %fwrite(fid, params.global, 'ubit1'); | |
- fwrite(fid, params.global, 'int'); | |
- for i=1:grids.nd | |
- fwrite(fid, params.g(i), 'float'); | |
- end | |
- fwrite(fid, params.kappa, 'float'); | |
- fwrite(fid, params.db, 'float'); | |
- fwrite(fid, params.V_b, 'float'); | |
- fwrite(fid, params.shearmodel, 'uint32'); | |
- | |
- fwrite(fid, walls.nw, 'uint32'); | |
- for j=1:walls.nw | |
- for i=1:grids.nd | |
- fwrite(fid, walls.n(j,i), 'float'); % Wall normal | |
- end | |
- fwrite(fid, walls.x(j), 'float'); % Wall pos. on axis parallel to wall… | |
- fwrite(fid, walls.m(j), 'float'); % Wall mass | |
- fwrite(fid, walls.vel(j), 'float'); % Wall vel. on axis parallel to wa… | |
- fwrite(fid, walls.force(j), 'float'); % Wall force on axis parallel to… | |
- fwrite(fid, walls.devs(j), 'float'); % Deviatoric stress on wall normal | |
- end | |
- fwrite(fid, params.periodic, 'int'); | |
- | |
- for j=1:p.np | |
- fwrite(fid, p.bonds(i,1), 'uint32'); | |
- fwrite(fid, p.bonds(i,2), 'uint32'); | |
- fwrite(fid, p.bonds(i,3), 'uint32'); | |
- fwrite(fid, p.bonds(i,4), 'uint32'); | |
- end | |
- | |
- fclose(fid); | |
- | |
-end | |
diff --git a/mfiles/initsetup.m b/mfiles/initsetup.m | |
t@@ -1,180 +0,0 @@ | |
- function initsetup(plotfigure) | |
-% initsetup() creates a model setup of particles in 3D with cubic | |
-% packing. Specify PSD and desired number of particles, and the | |
-% function will determine the model dimensions, and fill the space | |
-% with a number of particles from a specified particle size distribution. | |
-close all; | |
- | |
-% Simulation project name | |
-simulation_name = '1e3-init-visc'; | |
- | |
-% Plot particle assembly in MATLAB after initialization | |
-if exist('plotfigure','var') | |
- if plotfigure == 1 | |
- visualize = 1; | |
- else | |
- visualize = 0; | |
- end | |
-else | |
- visualize = 0; | |
-end | |
- | |
-% Physical, constant parameters | |
-params.g = [0.0, 0.0, -9.80665]; %standard gravity, by definition 9.80665 m/s^2 | |
-%params.g = 0.98; | |
- | |
-% No. of dimensions | |
-grids.nd = 3; | |
-grids.origo = [0 0 0]; %Coordinate system origo | |
- | |
-% Number of particles | |
-p.np = 1e3; | |
- | |
-% Create grid | |
-form = 4; % For a cubic grid, use 3. | |
- % For a higher grid, use 4 or more (for cubic end config. use 5). | |
- % For a flatter grid, use 1 or 2. | |
-grids.num(1) = ceil(nthroot(p.np, form)); % Particles in x direction | |
-grids.num(2) = grids.num(1); % Particles in y direction | |
-grids.num(3) = ceil(p.np/(grids.num(1)*grids.num(2))); % Particles in z direct… | |
- | |
-disp(['Grid dimensions: x=' num2str(grids.num(1)) ... | |
- ', y=' num2str(grids.num(2)) ... | |
- ', z=' num2str(grids.num(3))]) | |
- | |
-%grids.num = ceil(nthroot(p.np,grids.nd)) * ones(grids.nd, 1); %Square/cubic | |
-p.np = grids.num(1)*grids.num(2)*grids.num(3); | |
- | |
-% Particle size distribution | |
-p.psd = 'logn'; %PSD: logn or uni | |
- | |
-p.m = 440e-6; %Mean size | |
-p.v = p.m*0.00002; %Variance | |
- | |
-p.radius = zeros(p.np, 1); | |
-p.x = zeros(p.np, grids.nd); | |
- | |
-p.bonds = ones(p.np, 4) * p.np; % No bonds when the values equal the no. of p… | |
- | |
-if strcmp(p.psd,'logn') %Log-normal PSD. Note: Keep variance small. | |
- mu = log((p.m^2)/sqrt(p.v+p.m^2)); | |
- sigma = sqrt(log(p.v/(p.m^2)+1)); | |
- p.radius = lognrnd(mu,sigma,1,p.np); %Array of particle radii | |
-elseif strcmp(p.psd,'uni') %Uniform PSD between rmin and rmax | |
- rmin = p.m - p.v*1e5; | |
- rmax = p.m + p.v*1e5; | |
- %rmin = 0.1*dd; rmax = 0.4*dd; | |
- p.radius = (rmax-rmin)*rand(p.np,1)+rmin; %Array of particle radii | |
-end | |
- | |
-% Display PSD | |
-if visualize == 1 | |
- figure; | |
- hist(p.radius); | |
- title(['PSD: ' num2str(p.np) ' particles, m = ' num2str(p.m) ' m']); | |
-end | |
- | |
-% Friction angles | |
-ang_s = 30; % Angle of static shear resistance | |
-ang_d = 25; % Angle of dynamic shear resistance | |
-ang_r = 35; % Angle of rolling resistance | |
- | |
-% Other particle parameters | |
-p.vel = zeros(p.np, grids.nd); % Velocity vector | |
-p.angvel = zeros(p.np, grids.nd); % Angular velocity | |
-p.fixvel = zeros(p.np, 1); % 0: horizontal particle velocity free, 1… | |
-p.xsum = zeros(p.np, 1); % Total displacement along x-axis | |
-p.force = zeros(p.np, grids.nd); % Force vector | |
-p.torque = zeros(p.np, grids.nd); % Torque vector | |
- | |
-params.global = 1; % 1: Physical parameters global, 0: individual per particle | |
-p.rho = 3600*ones(p.np,1); % Density | |
-p.k_n = 4e5*ones(p.np,1); % Normal stiffness [N/m] | |
-p.k_s = p.k_n(:); % Shear stiffness [N/m] | |
-p.k_r = p.k_s(:).*(10); % Rolling stiffness | |
-params.shearmodel = 1; % Contact model. 1=frictional, viscou… | |
-p.gamma_s = p.k_n./1e3; % Shear viscosity [Ns/m] | |
-p.gamma_r = p.gamma_s(:); % Rolling viscosity [?] | |
-p.mu_s = tand(ang_s)*ones(p.np,1); % Inter-particle shear contact frict… | |
-p.mu_r = tand(ang_r)*ones(p.np,1); % Inter-particle rolling contact fri… | |
-p.C = 0*zeros(p.np,1); % Inter-particle cohesion | |
-p.E = 10e9*ones(p.np,1); % Young's modulus | |
-p.K = 38e9*ones(p.np,1); % Bulk modulus | |
-p.nu = 0.1*2.0*sqrt(min(4/3*pi.*p.radius(:).*p.radius(:).*p.radius(:).*p.… | |
-p.es_dot = zeros(p.np,1); % Current shear energy dissipation | |
-p.es = zeros(p.np,1); % Total shear energy dissipation | |
-p.p = zeros(p.np,1); % Pressure on particle | |
- | |
-% Parameters related to capillary bonds | |
-% Disable capillary cohesion by setting kappa to zero. | |
-enableCapillaryCohesion = 0; | |
-theta = 0.0; % Wettability (0: perfect) | |
-if (enableCapillaryCohesion == 1) | |
- params.kappa = 2*pi*p.gamma_s(1) * cos(theta); % Prefactor | |
- params.V_b = 1e-12; % Liquid volume at bond | |
-else | |
- params.kappa = 0; % Zero capillary force | |
- params.V_b = 0; % Zero liquid volume at bond | |
-end | |
-params.db = (1.0 + theta/2.0) * params.V_b^(1.0/3.0); % Debonding distance | |
- | |
-% Time parameters | |
-%time.dt = 0.2*min(sqrt((p.rho(:).*p.radius(:).^2)./p.K(:))); % Comput… | |
-%time.dt = time.dt*1e1; % Speedup factor | |
-time.dt = 0.17*sqrt(min(4/3*pi.*p.radius(:).*p.radius(:).*p.radius(:).… | |
-time.current = 0.0; | |
-time.total = 1.500+2.0*time.dt; % Total simulation time [s] | |
-time.file_dt = 0.0010; % Interval between output#.bin generation [s] | |
-time.step_count = 0; | |
- | |
-% Calculate particle coordinates | |
-% Grid unit length. Maximum particle diameter determines grid size | |
-GU = 2*max(p.radius)*1.40; % Forty percent margin | |
-grids.L = [GU*grids.num(1) GU*grids.num(2) GU*grids.num(3)]; | |
- | |
-% Particle coordinates by filling grid. | |
-x = linspace(GU/2, grids.L(1)-GU, grids.num(1)); | |
-y = linspace(GU/2, grids.L(2)-GU, grids.num(2)); | |
-z = linspace(GU/2, grids.L(3)-GU, grids.num(3)); | |
- | |
-[X Y Z] = meshgrid(x,y,z); | |
-X=X(:); Y=Y(:); Z=Z(:); | |
- | |
-p.x = [X Y Z]; | |
- | |
-%Particle positions randomly modified by + 10 percent of a grid unit | |
-p.x = p.x + (rand(p.np, grids.nd)*0.49*GU); | |
- | |
-% Walls with friction | |
-% Note that the world boundaries already act as fricionless boundaries | |
-% Upper wall | |
-wno = 1; | |
-walls.n(wno,:) = [0.0, 0.0, -1.0]; % Normal along negative z-axis | |
-walls.x(wno) = grids.L(3); % Positioned at upper boundary | |
-walls.m(wno) = p.rho(1)*p.np*pi*max(p.radius)*max(p.radius)*max(p.radius… | |
-walls.vel(wno) = 0.0; % Wall velocity | |
-walls.force(wno) = 0.0; % Wall force | |
-walls.devs(wno) = 0.0; % Deviatoric stress | |
-walls.nw = wno; | |
- | |
-% Define behavior of x- and y boundaries. | |
-% Uncomment only one! | |
-%params.periodic = 0; % Behave as frictional walls | |
-params.periodic = 1; % Behave as periodic boundaries | |
-%params.periodic = 2; % x: periodic, y: frictional walls | |
- | |
-% Write output binary | |
-fwritebin('../input/', [simulation_name '.bin'], p, grids, time, params, walls… | |
-disp('Writing of binary file complete.'); | |
- | |
-% Plot particles in bubble plot | |
-if visualize == 1 | |
- disp('Waiting for visualization.'); | |
- plotspheres(p, grids, 5, 0, 1); | |
-end | |
- | |
-disp(['Project "' simulation_name '" is now ready for processing with SPHERE.'… | |
-disp(['Call "' pwd '/sphere ' simulation_name '" from the system terminal ']); | |
-disp(['to initiate, and check progress here in MATLAB using "status(''' simula… | |
- | |
-end | |
diff --git a/mfiles/plotspheres.m b/mfiles/plotspheres.m | |
t@@ -1,98 +0,0 @@ | |
-function plotspheres(p, grids, n, quiver, visible) | |
- | |
-if exist('visible','var') | |
- if visible == 0 | |
- % Create a new invisible figure window | |
- figure('visible','off'); | |
- else | |
- % Create a new figure window | |
- figure('Renderer','OpenGL') | |
- end | |
-else | |
- % Create a new figure window | |
- figure('Renderer','OpenGL') | |
-end | |
- | |
-% Generate unit sphere, consisting of n-by-n faces (i.e. the resolution) | |
-if exist('n', 'var') | |
- [x,y,z] = sphere(n); | |
-else | |
- [x,y,z] = sphere(10); | |
-end | |
- | |
-% Iterate through particle data | |
-hold on | |
-for i=1:p.np | |
- spheresurf = surf(x*p.radius(i)+p.x(i,1), ... | |
- y*p.radius(i)+p.x(i,2), ... | |
- z*p.radius(i)+p.x(i,3)); | |
- set(spheresurf,'EdgeColor','none', ... | |
- 'FaceColor',[0.96 0.64 0.38], ... % RGB | |
- 'FaceLighting','phong', ... | |
- 'AmbientStrength',0.3, ... | |
- 'DiffuseStrength',0.8, ... | |
- 'SpecularStrength',0.9, ... | |
- 'SpecularExponent',25, ... | |
- 'BackFaceLighting','lit'); | |
-end | |
-camlight left; | |
-hidden off | |
- | |
-% Optional quiver3 plot (velocity vectors) | |
-if exist('quiver','var') | |
- if quiver == 1 | |
- quiver3(p.x(:,1), p.x(:,2) , p.x(:,3), ... | |
- p.vel(:,1), p.vel(:,2), p.vel(:,3), 3); | |
- end | |
-end | |
- | |
-% Draw walls from 8 vertices and connect these to 6 faces | |
-vertices = [grids.origo(1) grids.origo(2) grids.origo(3); ... % 1 | |
- grids.origo(1) grids.L(2) grids.origo(3); ... % 2 | |
- grids.L(1) grids.L(2) grids.origo(3); ... % 3 | |
- grids.L(1) grids.origo(2) grids.origo(3); ... % 4 | |
- grids.origo(1) grids.origo(2) grids.L(3); ... % 5 | |
- grids.origo(1) grids.L(2) grids.L(3); ... % 6 | |
- grids.L(1) grids.L(2) grids.L(3); ... % 7 | |
- grids.L(1) grids.origo(2) grids.L(3)]; % 8 | |
- | |
-faces = [1 2 3 4; ... % (observing along pos. y axis direction) | |
- 2 6 7 3; ... % | |
- 4 3 7 8; ... % | |
- 1 5 8 4; ... % | |
- 1 2 6 5; ... % | |
- 5 6 7 8]; % | |
- | |
-patch('Faces', faces, 'Vertices', vertices, ... | |
- 'FaceColor','none', 'EdgeColor','black','LineWidth',2); | |
- | |
-% View specifications | |
-%daspect([1 1 1]) | |
- | |
-view([grids.L(1), -2*grids.L(2), grids.L(3)]) | |
-grid on | |
-axis equal | |
-maxr = max(p.radius); | |
-axis([grids.origo(1)-maxr grids.L(1)+maxr ... | |
- grids.origo(2)-maxr grids.L(2)+maxr ... | |
- grids.origo(3)-maxr grids.L(3)+maxr]); | |
- | |
-light('Position', [grids.L(1), -grids.L(2), grids.L(3)]); | |
-material shiny; %shiny dull metal | |
-camlight(45,45); | |
-lighting gouraud; %flat gouraud phone none | |
- | |
-% Remove hidden lines from mesh plot (hidden on) | |
-hidden off | |
- | |
-% Labels | |
-title([num2str(p.np) ' particles']); | |
-xlabel('x [m]'); | |
-ylabel('y [m]'); | |
-zlabel('z [m]'); | |
- | |
-% Add delaunay triangulation | |
-%dt = DelaunayTri(p.x(:,1), p.x(:,2), p.x(:,3)); | |
-%tetramesh(dt,'FaceAlpha',0.02); | |
- | |
-end | |
-\ No newline at end of file | |
diff --git a/mfiles/status.m b/mfiles/status.m | |
t@@ -1,11 +0,0 @@ | |
-function lastfile = status(project) | |
- | |
-% status() writes status of current model run | |
- | |
-st = load(['../output/' project '.status.dat']); | |
- | |
-disp(['Status: time = ',num2str(st(1)),', ',num2str(st(2)),'% done, filenr = '… | |
- | |
-lastfile = st(3); | |
- | |
-end | |
diff --git a/mfiles/useoutput.m b/mfiles/useoutput.m | |
t@@ -1,112 +0,0 @@ | |
-%function useoutput(fn, plotfigure) | |
-% Use particle data from target binary from output directory, | |
-% create a fitting grid, and zero time variables. | |
-% Final configuration is saved in a new filename in input/ directory. | |
- | |
-% Plot particle assembly in MATLAB after initialization | |
-visualize = 0; % No | |
-%visualize = 1; % Yes | |
- | |
-% Input binary file | |
-directory = '../input/'; | |
-inputbin = '5e4-cons_10kPa.bin'; | |
- | |
-% New configuration name | |
-simulation_name = '5e4-shear_10kPa'; | |
- | |
-% Read data | |
-[p, grids, time, params, walls] = freadbin(directory, inputbin); | |
-%[p, grids, time, params, walls] = freadbinold(directory, inputbin); | |
- | |
-% Change temporal parameters (comment out to use previous values" | |
-time.current = 0.0; % New starting time | |
-time.step_count = 0; % First output file number | |
-time.total = 0.25+2*time.dt; % Total simulation time [s] | |
-time.file_dt = 0.01; %Interval between output#.bin generation [s] | |
-time.dt = 0.17*sqrt(min(4/3*pi.*p.radius(:).*p.radius(:).*p.radius(:).… | |
- | |
-% Particle stiffness | |
-p.k_n = 4e5*ones(p.np,1); % Normal stiffness [N/m] | |
-p.gamma_s = p.k_n./1e3; % Shear viscosity [Ns/m] | |
-p.gamma_r = p.gamma_s(:); % Rolling viscosity | |
- | |
-% Friction angles | |
-ang_s = 25; % Angle of shear resistance | |
-ang_r = 35; % Angle of rolling resistance | |
-p.mu_s = tand(ang_s)*ones(p.np,1); % Inter-particle shear contact friction coe… | |
-p.mu_r = tand(ang_r)*ones(p.np,1); % Inter-particle rolling contact friction c… | |
- | |
-% Compute new grid, scaled to fit max.- & min. particle positions | |
-GU = 2*max(p.radius); % Cell size | |
-x_min = min(p.x(:,1));% - p.radius(:)); | |
-x_max = max(p.x(:,1));% + p.radius(:));%*3.0; | |
-%y_min = min(p.x(:,2) - p.radius(:)); | |
-%y_max = max(p.x(:,2) + p.radius(:)); | |
-z_min = min(p.x(:,3) - p.radius(:)); | |
-z_max = max(p.x(:,3) + p.radius(:)); | |
-z_adjust = 1.2; % Specify overheightening of world to allow for shear dilatancy | |
-%grids.num(1) = ceil((x_max-x_min)/GU); | |
-%grids.num(2) = ceil((y_max-y_min)/GU); | |
-grids.num(3) = ceil((z_max-z_min)*z_adjust/GU); | |
-%grids.L = [(x_max-x_min) (y_max-y_min) (z_max-z_min)*z_adjust]; | |
-grids.L(3) = (z_max-z_min)*z_adjust; | |
- | |
-% Parameters related to capillary bonds | |
-% Disable capillary cohesion by setting kappa to zero. | |
-enableCapillaryCohesion = 0; | |
-theta = 0.0; % Wettability (0: perfect) | |
-if (enableCapillaryCohesion == 1) | |
- params.kappa = 2*pi*p.gamma_s(1) * cos(theta); % Prefactor | |
- params.V_b = 1e-12; % Liquid volume at bond | |
-else | |
- params.kappa = 0; % Zero capillary force | |
- params.V_b = 0; % Zero liquid volume at bond | |
-end | |
-params.db = (1.0 + theta/2.0) * params.V_b^(1.0/3.0); % Debonding distance | |
- | |
-% Move mobile upper wall to top of domain | |
-walls.x(1) = max(p.x(:,3)+p.radius(:)); | |
- | |
-% Define new deviatoric stress [Pa] | |
-%walls.devs(1) = 0.0; | |
-walls.devs(1) = 10.0e3; | |
- | |
-% Let x- and y boundaries be periodic | |
-params.periodic = 1; % x- and y boundaries periodic | |
-%params.periodic = 2; % Only x-boundaries periodic | |
- | |
-% By default, all particles are free to move horizontally | |
-p.fixvel = zeros(p.np, 1); % Start off by defining all particles as free | |
-shearing = 1; % 1: true, 0: false | |
- | |
-if shearing == 1 | |
- % Fix horizontal velocity to 0.0 of lowermost particles | |
- I = find(p.x(:,3) < (z_max-z_min)*0.1); % Find the lower 10% | |
- p.fixvel(I) = 1; % Fix horiz. velocity | |
- p.vel(I,1) = 0.0; % x-value | |
- p.vel(I,2) = 0.0; % y-value | |
- | |
- % Fix horizontal velocity to 0.0 of uppermost particles | |
- I = find(p.x(:,3) > (z_max-z_min)*0.9); % Find the upper 10% | |
- p.fixvel(I) = 1; % Fix horiz. velocity | |
- p.vel(I,1) = (x_max-x_min)*1.0; % x-value: One grid length per second | |
- p.vel(I,2) = 0.0; % y-value | |
-end | |
- | |
-% Zero x-axis displacement | |
-p.xsum = zeros(p.np, 1); | |
- | |
-% Write output binary | |
-fwritebin('../input/', [simulation_name '.bin'], p, grids, time, params, walls… | |
-disp('Writing of binary file complete.'); | |
- | |
-% Plot particles in bubble plot | |
-if visualize == 1 | |
- disp('Waiting for visualization.'); | |
- plotspheres(p, grids, 5, 0, 1); | |
-end | |
- | |
-disp(['Project "' simulation_name '" is now ready for processing with SPHERE.'… | |
-disp(['Call "' pwd '/sphere ' simulation_name '" from the system terminal ']); | |
-disp(['to initiate, and check progress here in MATLAB using "status(''' simula… | |
- | |
diff --git a/mfiles/visualize.m b/mfiles/visualize.m | |
t@@ -1,379 +0,0 @@ | |
-function visualize(project, method, file_nr) | |
- | |
- lastfile = status(project); | |
- | |
- if exist('file_nr','var') | |
- filenr = file_nr; | |
- else | |
- filenr = lastfile; | |
- end | |
- | |
- figure; | |
- | |
- % Plot sum of kinetic energy vs. time | |
- if strcmpi(method, 'energy') | |
- | |
- disp('Energy') | |
- | |
- Epot = zeros(1,lastfile); | |
- Ekin = zeros(1,lastfile); | |
- Erot = zeros(1,lastfile); | |
- Es = zeros(1,lastfile); | |
- Es_dot = zeros(1,lastfile); | |
- Esum = zeros(1,lastfile); | |
- | |
- % Load data from all output files | |
- for i = 1:lastfile | |
- fn = [project '.output' num2str(i) '.bin']; | |
- disp(fn); | |
- [p, ~, time, params, ~] = freadbin('../output/', fn); | |
- for j = 1:p.np | |
- r = p.radius(j); | |
- m = (4/3*pi*r*r*r*p.rho(j)); | |
- Epot(i) = Epot(i) + m * norm(params.g) * p.x(j,3); | |
- Ekin(i) = Ekin(i) + 0.5 * m ... | |
- * norm(p.vel(j,:)) * norm(p.vel(j,:)); | |
- Erot(i) = Erot(i) + 0.5 * (2/5 * m * r*r) ... | |
- * norm(p.angvel(j,:)) * norm(p.angvel(j,:)); | |
- end | |
- Es(i) = sum(p.es); | |
- Es_dot(i) = sum(p.es_dot); | |
- | |
- Esum(i) = Epot(i) + Ekin(i) + Erot(i) + Es(i); | |
- %Esum(i) = Epot(i) + Ekin(i) + Es(i); | |
- end | |
- | |
- % Time axis | |
- t = linspace(time.file_dt, time.current, length(Ekin)); | |
- | |
- %disp(num2str(Ekin(:))); | |
- | |
- % Visualization, E_pot | |
- subplot(2,3,1); | |
- plot(t, Epot, '+-'); | |
- title('Potential energy'); | |
- xlabel('Time [s]'); | |
- ylabel('Total potential energy [J]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualization, E_kin | |
- subplot(2,3,2); | |
- plot(t, Ekin, '+-'); | |
- title('Kinetic energy'); | |
- xlabel('Time [s]'); | |
- ylabel('Total kinetic energy [J]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualization, E_rot | |
- subplot(2,3,3); | |
- plot(t, Erot, '+-'); | |
- title('Rotational energy'); | |
- xlabel('Time [s]'); | |
- ylabel('Total rotational energy [J]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualizaiton, E_s_dot (current shear energy) | |
- subplot(2,3,4); | |
- plot(t, Es_dot, '+-'); | |
- title('Shear energy rate'); | |
- xlabel('Time [s]'); | |
- ylabel('Shear energy rate [W]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualizaiton, E_s (total shear energy) | |
- subplot(2,3,5); | |
- plot(t, Es, '+-'); | |
- title('Total shear energy'); | |
- xlabel('Time [s]'); | |
- ylabel('Total shear energy [J]'); | |
- box on; | |
- grid on; | |
- | |
- % Total energy, Esum | |
- subplot(2,3,6); | |
- plot(t, Esum, '+-'); | |
- title('Total energy: Pot + Kin + Rot + Shear'); | |
- xlabel('Time [s]'); | |
- ylabel('Total energy [J]'); | |
- box on; | |
- grid on; | |
- | |
- end % End visualize energy | |
- | |
- | |
- % Visualize wall kinematics and force | |
- if strcmpi(method, 'walls') | |
- disp('Walls') | |
- | |
- [~, ~, ~, ~, walls] = freadbin('../output/', [project '.output0.bin']); | |
- | |
- wforce = zeros(lastfile, walls.nw); | |
- wvel = zeros(lastfile, walls.nw); | |
- wpos = zeros(lastfile, walls.nw); | |
- wdevs = zeros(lastfile, walls.nw); | |
- | |
- % Load data from all output files | |
- for i = 1:lastfile | |
- fn = [project '.output' num2str(i) '.bin']; | |
- disp(fn); | |
- [~, ~, time, ~, walls] = freadbin('../output/', fn); | |
- | |
- for j = 1:walls.nw | |
- wforce(i,j) = walls.force(j); | |
- wvel(i,j) = walls.vel(j); | |
- wpos(i,j) = walls.x(j); | |
- wdevs(i,j) = walls.devs(j); | |
- end | |
- end | |
- | |
- % Time axis | |
- t = linspace(time.file_dt, time.current, lastfile); | |
- | |
- % Visualization, wall position | |
- subplot(2,2,1); | |
- hold on | |
- for i=1:walls.nw | |
- p = plot(t, wpos(:,i), '+-'); | |
- if i==2 | |
- set(p,'Color','red') | |
- end | |
- end | |
- hold off | |
- title('Wall positions'); | |
- xlabel('Time [s]'); | |
- ylabel('Position [m]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualization, wall force | |
- subplot(2,2,2); | |
- hold on | |
- for i=1:walls.nw | |
- p = plot(t, wforce(:,i), '+-'); | |
- if i==2 | |
- set(p,'Color','red') | |
- end | |
- end | |
- hold off | |
- title('Wall forces'); | |
- xlabel('Time [s]'); | |
- ylabel('Force [N]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualization, wall velocity | |
- subplot(2,2,3); | |
- hold on | |
- for i=1:walls.nw | |
- p = plot(t, wvel(:,i), '+-'); | |
- if i==2 | |
- set(p,'Color','red') | |
- end | |
- end | |
- hold off | |
- title('Wall velocities'); | |
- xlabel('Time [s]'); | |
- ylabel('Velocity [m/s]'); | |
- box on; | |
- grid on; | |
- | |
- % Visualization, wall deviatoric stresses | |
- subplot(2,2,4); | |
- hold on | |
- for i=1:walls.nw | |
- p = plot(t, wdevs(:,i), '+-'); | |
- if i==2 | |
- set(p,'Color','red') | |
- end | |
- end | |
- hold off | |
- title('Wall deviatoric stresses'); | |
- xlabel('Time [s]'); | |
- ylabel('Stress [Pa]'); | |
- box on; | |
- grid on; | |
- | |
- end % End visualize walls | |
- | |
- | |
- % Visualize first output file with plotspheres | |
- if strcmpi(method, 'first') | |
- disp('Visualizing first output file') | |
- fn = [project '.output0.bin']; | |
- [p, grids, ~, ~] = freadbin('../output/', fn); | |
- plotspheres(p, grids, 5, 0, 1); | |
- end % End visualize last file | |
- | |
- | |
- % Visualize last output file with plotspheres | |
- if strcmpi(method, 'last') | |
- disp('Visualizing last output file') | |
- fn = [project '.output' num2str(lastfile) '.bin']; | |
- [p, grids, ~, ~] = freadbin('../output/', fn); | |
- plotspheres(p, grids, 5, 0, 1); | |
- end % End visualize last file | |
- | |
- | |
- % Visualize horizontal velocities (velocity profile) | |
- if strcmpi(method, 'veloprofile') | |
- | |
- disp('Visualizing velocity profile'); | |
- | |
- % Read data | |
- fn = [project '.output' num2str(filenr) '.bin']; | |
- disp(fn); | |
- [p, ~, time, params, ~] = freadbin('../output/', fn); | |
- | |
- horiz_velo = zeros(2, p.np); % x,y velocities for each particle | |
- | |
- for j = 1:p.np | |
- horiz_velo(1, j) = p.vel(j, 1); % x-velocity | |
- horiz_velo(2, j) = p.vel(j, 2); % y-velocity | |
- end | |
- | |
- % Find shear velocity (for scaling first axis) | |
- fixidx = find(p.fixvel > 0.0); | |
- shearvel = max(p.vel(fixidx,1)); | |
- | |
- % Plot x- and y velocities vs. z position | |
- hold on; | |
- plot(horiz_velo(2,:), p.x(:,3), '.b'); % y-velocity (blue) | |
- plot(horiz_velo(1,:), p.x(:,3), '.r'); % x-velocity (red) | |
- axis([-0.5*shearvel, shearvel*1.5, min(p.x(:,3)), max(p.x(:,3))]); | |
- title(['Velocity profile, t = ' num2str(time.current) ' s']); | |
- xlabel('Horizontal velocity [m/s]'); | |
- ylabel('Vertical position [m]'); | |
- legend('y', 'x', 'Location', 'SouthEast'); | |
- box on; | |
- grid on; | |
- hold off; | |
- | |
- end % End visualize velocity profile | |
- | |
- if strcmpi(method, 'sheardisp') | |
- disp('Visualizing shear displacement, 1D'); | |
- | |
- % Read first datafile | |
- [p, ~, ~, ~, ~] = freadbin('../output/',[project '.output0.bin']); | |
- | |
- % Read original z-position at t = 0 s. | |
- zpos = p.x(:,3); | |
- | |
- % Read last datafile | |
- [p, ~, ~, ~, ~] = freadbin('../output/',[project '.output' num2str(lastfil… | |
- | |
- % Plot | |
- plot(p.xsum(:), zpos(:), 'o'); | |
- title(project); | |
- xlabel('Shear displacement [m]'); | |
- ylabel('Initial vertical position [m]'); | |
- box on; | |
- grid on; | |
- | |
- end % End visualize shear displacement | |
- | |
- if strcmpi(method, 'sheardisp2d') | |
- disp('Visualizing shear displacement, 2D'); | |
- | |
- % Read first datafile | |
- [p, ~, ~, ~, ~] = freadbin('../output/',[project '.output0.bin']); | |
- | |
- % Read original z-position at t = 0 s. | |
- zpos = p.x(:,3); | |
- | |
- % Read last datafile | |
- [p, ~, ~, ~, ~] = freadbin('../output/',[project '.output' num2str(lastfil… | |
- | |
- | |
- | |
- % Plot | |
- colormap(jet) | |
- title(project); | |
- xlabel('Shear displacement [m]'); | |
- ylabel('y [m]'); | |
- zlabel('z [m]'); | |
- box on; | |
- grid on; | |
- axis equal; | |
- axis vis3d; | |
- rotate3d on; | |
- | |
- end % End visualize shear displacement 2D | |
- | |
- if strcmpi(method, 'sheardisp3d') | |
- disp('Visualizing shear displacement, 3D'); | |
- | |
- % Read last datafile | |
- [p, ~, ~, ~, ~] = freadbin('../output/',[project '.output' num2str(lastfil… | |
- | |
- % Plot | |
- scatter3(p.xsum(:), p.x(:,2), p.x(:,3), 30, p.x(:,3), 'filled'); | |
- colormap(jet) | |
- title(project); | |
- xlabel('Shear displacement [m]'); | |
- ylabel('y [m]'); | |
- zlabel('z [m]'); | |
- box on; | |
- grid on; | |
- axis equal; | |
- axis vis3d; | |
- rotate3d on; | |
- | |
- end % End visualize shear displacement 3D | |
- | |
- % Visualize shear stresses | |
- if strcmpi(method, 'shear') | |
- | |
- [p, grids, time, params, walls] = freadbin('../output/',[project '.output0… | |
- | |
- disp('Visualizing shear stress dynamics') | |
- xdisp = zeros(1, lastfile+1); % Shear displacement | |
- sigma = zeros(1, lastfile+1); % Effective normal stress | |
- tau = zeros(1, lastfile+1); % Shear stress | |
- dilation = zeros(1, lastfile+1); % Dilation | |
- | |
- % Calculate the shear velocity | |
- fixedidx_all = find(p.fixvel(:) > 0.0); % All particles with a fixed horiz… | |
- shearvel = max(p.vel(fixedidx_all,1)); | |
- | |
- % Surface area | |
- A = grids.L(1) * grids.L(2); % x-length * y-length | |
- | |
- % Load data from all output files | |
- for i = 0:lastfile | |
- fn = [project '.output' num2str(i) '.bin']; | |
- disp(fn); | |
- [p, ~, time, ~, walls] = freadbin('../output/', fn); | |
- | |
- xdisp(i+1) = time.current * shearvel; | |
- sigma(i+1) = walls.force(1) / A; | |
- tau(i+1) = shearstress(p, A); | |
- dilation(i+1) = walls.x; | |
- end | |
- | |
- % Plot stresses | |
- subplot(2,1,1); | |
- plot(xdisp, sigma, 'o-g', xdisp, tau, '+-r'); | |
- title('Stress dynamics'); | |
- xlabel('Shear distance [m]'); | |
- ylabel('Stress [Pa]'); | |
- legend('\sigma`','\tau'); | |
- box on; | |
- grid on; | |
- | |
- % Plot dilation | |
- subplot(2,1,2); | |
- plot(xdisp, dilation, '+-b'); | |
- title('Dilation'); | |
- xlabel('Shear distance [m]'); | |
- ylabel('Upper wall pos. [m]'); | |
- box on; | |
- grid on; | |
- | |
- end | |
- | |
-end % End function | |
diff --git a/python/elastic_collision.py b/python/elastic_collision.py | |
t@@ -0,0 +1,46 @@ | |
+#!/usr/bin/env python | |
+import subprocess | |
+import numpy | |
+import matplotlib.pyplot as plt | |
+ | |
+# Import sphere functionality | |
+from sphere import * | |
+ | |
+# New class | |
+init = Spherebin(np = 2, nd = 3) | |
+ | |
+#init.generateRadii(radius_mean = 0.5, radius_variance = 1e-15, histogram = 0) | |
+init.radius[0] = numpy.ones(1, dtype=numpy.float64) * 0.5; | |
+init.radius[1] = numpy.ones(1, dtype=numpy.float64) * 0.52; | |
+ | |
+init.defaultparams() | |
+ | |
+# The world should never be less than 3 cells in ANY direction, due to contact… | |
+init.initsetup(gridnum = numpy.array([12,4,4]), periodic = 1, shearmodel = 2, … | |
+ | |
+init.x[0] = numpy.array([1, 2, 2]) | |
+init.x[1] = numpy.array([6, 2, 2]) | |
+#init.x[2] = numpy.array([7, 2, 2]) | |
+#init.x[3] = numpy.array([8, 2, 2]) | |
+ | |
+init.nu = numpy.zeros(init.np, dtype=numpy.float64) | |
+ | |
+#for i in range(init.np): | |
+# init.x[i] = numpy.array([4+i*init.radius[i]*2, 2, 2]) | |
+ | |
+init.vel[0] = numpy.array([10.0, 0.0, 0.0]) | |
+ | |
+ | |
+init.initTemporal(total = 1.0) | |
+ | |
+init.writebin("../input/nc-test.bin") | |
+#render("../input/nc-test.bin", out = "~/Desktop/nc-test") | |
+ | |
+subprocess.call("cd ..; rm output/*; ./sphere_darwin_X86_64 nc-test", shell=Tr… | |
+ | |
+visualize("nc-test", "energy", savefig=True, outformat='png') | |
+#visualize("nc-test", "walls", savefig=True) | |
+ | |
+subprocess.call("rm ../img_out/*; cd ../raytracer; ./render_all_outputs_GPU_cl… | |
+ | |
+ | |
diff --git a/python/inelastic_collision.py b/python/inelastic_collision.py | |
t@@ -0,0 +1,49 @@ | |
+#!/usr/bin/env python | |
+import subprocess | |
+import numpy | |
+import matplotlib.pyplot as plt | |
+ | |
+# Import sphere functionality | |
+from sphere import * | |
+ | |
+# New class | |
+init = Spherebin(np = 2, nd = 3) | |
+ | |
+init.radius = numpy.ones(init.np, dtype=numpy.float64) * 0.5; | |
+ | |
+init.defaultparams() | |
+ | |
+# The world should never be less than 3 cells in ANY direction, due to contact… | |
+init.initsetup(gridnum = numpy.array([12,4,4]), periodic = 1, shearmodel = 2, … | |
+ | |
+init.x[0] = numpy.array([1, 2, 2]) | |
+init.x[1] = numpy.array([6, 2, 2]) | |
+#init.x[2] = numpy.array([7, 2, 2]) | |
+#init.x[3] = numpy.array([8, 2, 2]) | |
+ | |
+# Set fraction of critical damping (0 = elastic, 1 = completely inelastic) | |
+damping_fraction = 1.0 | |
+init.nu = numpy.ones(init.np, dtype=numpy.float64) \ | |
+ * damping_fraction * 2.0 * math.sqrt(4.0/3.0 * math.pi * init.radius… | |
+ * init.rho[0] * init.k_n[0]) | |
+ | |
+ | |
+#for i in range(init.np): | |
+# init.x[i] = numpy.array([4+i*init.radius[i]*2, 2, 2]) | |
+ | |
+init.vel[0] = numpy.array([10.0, 0.0, 0.0]) | |
+ | |
+ | |
+init.initTemporal(total = 1.0) | |
+ | |
+init.writebin("../input/nc-test.bin") | |
+#render("../input/nc-test.bin", out = "~/Desktop/nc-test") | |
+ | |
+subprocess.call("cd ..; rm output/*; ./sphere_darwin_X86_64 nc-test", shell=Tr… | |
+ | |
+visualize("nc-test", "energy", savefig=True, outformat='png') | |
+#visualize("nc-test", "walls", savefig=True) | |
+ | |
+subprocess.call("rm ../img_out/*; cd ../raytracer; ./render_all_outputs_GPU_cl… | |
+ | |
+ | |
diff --git a/raytracer/colorbar.h b/raytracer/colorbar.h | |
t@@ -0,0 +1,22 @@ | |
+#ifndef COLORBAR_H_ | |
+#define COLORBAR_H_ | |
+ | |
+// Functions that determine red-, green- and blue color components | |
+// in a blue-white-red colormap. Ratio should be between 0.0-1.0. | |
+ | |
+__inline__ __host__ __device__ float red(float ratio) | |
+{ | |
+ return fmin(1.0f, 0.209f*ratio*ratio*ratio - 2.49f*ratio*ratio + 3.0f*ratio … | |
+}; | |
+ | |
+__inline__ __host__ __device__ float green(float ratio) | |
+{ | |
+ return fmin(1.0f, -2.44f*ratio*ratio + 2.15f*ratio + 0.369f); | |
+}; | |
+ | |
+__inline__ __host__ __device__ float blue(float ratio) | |
+{ | |
+ return fmin(1.0f, -2.21f*ratio*ratio + 1.61f*ratio + 0.573f); | |
+}; | |
+ | |
+#endif | |
diff --git a/raytracer/render_all_outputs_CPU.sh b/raytracer/render_all_outputs… | |
t@@ -0,0 +1,27 @@ | |
+#!/bin/bash | |
+ | |
+FILES=`ls ../output/*.bin` | |
+ | |
+XRES=800 | |
+YRES=800 | |
+ | |
+WORKHORSE=CPU | |
+ | |
+echo "# Rendering PPM images and converting to JPG" | |
+for F in ../output/*.bin | |
+do | |
+ (BASE=`basename $F`; | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm; | |
+ convert ../img_out/$BASE.ppm ../img_out/$BASE.jpg;) | |
+ #rm ../img_out/$BASE.ppm &) | |
+done | |
+ | |
+#echo "# Converting PPM files to JPG using ImageMagick in parallel" | |
+#for F in ../img_out/*.ppm | |
+#do | |
+# (BASE=`basename $F`; convert $F $F.jpg &) | |
+#done | |
+ | |
+#echo "# Removed temporary files" | |
+#rm ../img_out/*.ppm | |
+ | |
diff --git a/raytracer/render_all_outputs_GPU.sh b/raytracer/render_all_outputs… | |
t@@ -0,0 +1,29 @@ | |
+#!/bin/bash | |
+ | |
+FILES=`ls ../output/*.bin` | |
+ | |
+XRES=800 | |
+YRES=800 | |
+ | |
+WORKHORSE=GPU | |
+ | |
+echo "# Rendering PPM images" | |
+for F in ../output/*.bin | |
+do | |
+ BASE=`basename $F` | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm > /dev/null | |
+ if [ $? -ne 0 ] ; then | |
+ echo "Error rendering $F, trying again..." | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm > /dev/null | |
+ fi | |
+done | |
+ | |
+echo "# Converting PPM files to JPEG using ImageMagick in parallel" | |
+for F in ../img_out/*.ppm | |
+do | |
+ (BASE=`basename $F`; convert $F $F.jpg &) | |
+done | |
+ | |
+#echo "# Removed temporary files" | |
+#rm ../img_out/*.ppm | |
+ | |
diff --git a/raytracer/rt-kernel.cu b/raytracer/rt-kernel.cu | |
t@@ -0,0 +1,442 @@ | |
+#include <iostream> | |
+#include <cutil_math.h> | |
+#include "header.h" | |
+#include "rt-kernel.h" | |
+#include "colorbar.h" | |
+ | |
+unsigned int iDivUp (unsigned int a, unsigned int b) { | |
+ return (a % b != 0) ? (a / b + 1) : (a / b); | |
+} | |
+ | |
+__inline__ __host__ __device__ float3 f4_to_f3(float4 in) | |
+{ | |
+ return make_float3(in.x, in.y, in.z); | |
+} | |
+ | |
+__inline__ __host__ __device__ float4 f3_to_f4(float3 in) | |
+{ | |
+ return make_float4(in.x, in.y, in.z, 0.0f); | |
+} | |
+ | |
+// Kernel for initializing image data | |
+__global__ void imageInit(unsigned char* _img, unsigned int pixels) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > pixels) | |
+ return; | |
+ | |
+ _img[mempos*4] = 255; // Red channel | |
+ _img[mempos*4 + 1] = 255; // Green channel | |
+ _img[mempos*4 + 2] = 255; // Blue channel | |
+} | |
+ | |
+// Calculate ray origins and directions | |
+__global__ void rayInitPerspective(float4* _ray_origo, | |
+ float4* _ray_direction, | |
+ float4 eye, | |
+ unsigned int width, | |
+ unsigned int height) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > width*height) | |
+ return; | |
+ | |
+ // Calculate 2D position from linear index | |
+ unsigned int i = mempos % width; | |
+ unsigned int j = (int)floor((float)mempos/width) % width; | |
+ | |
+ // Calculate pixel coordinates in image plane | |
+ float p_u = const_imgplane.x + (const_imgplane.y - const_imgplane.x) | |
+ * (i + 0.5f) / width; | |
+ float p_v = const_imgplane.z + (const_imgplane.w - const_imgplane.z) | |
+ * (j + 0.5f) / height; | |
+ | |
+ // Write ray origo and direction to global memory | |
+ _ray_origo[mempos] = const_eye; | |
+ _ray_direction[mempos] = -const_d*const_w + p_u*const_u + p_v*const_v; | |
+} | |
+ | |
+// Check wether the pixel's viewing ray intersects with the spheres, | |
+// and shade the pixel correspondingly | |
+__global__ void rayIntersectSpheres(float4* _ray_origo, | |
+ float4* _ray_direction, | |
+ float4* _p, | |
+ unsigned char* _img) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > const_pixels) | |
+ return; | |
+ | |
+ // Read ray data from global memory | |
+ float3 e = f4_to_f3(_ray_origo[mempos]); | |
+ float3 d = f4_to_f3(_ray_direction[mempos]); | |
+ //float step = length(d); | |
+ | |
+ // Distance, in ray steps, between object and eye initialized with a large v… | |
+ float tdist = 1e10f; | |
+ | |
+ // Surface normal at closest sphere intersection | |
+ float3 n; | |
+ | |
+ // Intersection point coordinates | |
+ float3 p; | |
+ | |
+ // Iterate through all particles | |
+ for (Inttype i=0; i<const_np; ++i) { | |
+ | |
+ // Read sphere coordinate and radius | |
+ float3 c = f4_to_f3(_p[i]); | |
+ float R = _p[i].w; | |
+ | |
+ // Calculate the discriminant: d = B^2 - 4AC | |
+ float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2 | |
+ - 4.0f*dot(d,d) // -4*A | |
+ * (dot((e-c),(e-c)) - R*R); // C | |
+ | |
+ // If the determinant is positive, there are two solutions | |
+ // One where the line enters the sphere, and one where it exits | |
+ if (Delta > 0.0f) { | |
+ | |
+ // Calculate roots, Shirley 2009 p. 77 | |
+ float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(… | |
+ * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d)); | |
+ | |
+ // Check wether intersection is closer than previous values | |
+ if (fabs(t_minus) < tdist) { | |
+ p = e + t_minus*d; | |
+ tdist = fabs(t_minus); | |
+ n = normalize(2.0f * (p - c)); // Surface normal | |
+ } | |
+ | |
+ } // End of solution branch | |
+ | |
+ } // End of particle loop | |
+ | |
+ // Write pixel color | |
+ if (tdist < 1e10f) { | |
+ | |
+ // Lambertian shading parameters | |
+ float dotprod = fmax(0.0f,dot(n, f4_to_f3(const_light))); | |
+ float I_d = 40.0f; // Light intensity | |
+ float k_d = 5.0f; // Diffuse coefficient | |
+ | |
+ // Ambient shading | |
+ float k_a = 10.0f; | |
+ float I_a = 5.0f; // 5.0 for black background | |
+ | |
+ // Write shading model values to pixel color channels | |
+ _img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*0.48f); | |
+ _img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*0.41f); | |
+ _img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*0.27f); | |
+ | |
+ } | |
+} | |
+ | |
+// Check wether the pixel's viewing ray intersects with the spheres, | |
+// and shade the pixel correspondingly using a colormap | |
+__global__ void rayIntersectSpheresColormap(float4* _ray_origo, | |
+ float4* _ray_direction, | |
+ float4* _p, | |
+ float* _fixvel, | |
+ float* _linarr, | |
+ float max_val, | |
+ unsigned char* _img) | |
+{ | |
+ // Compute pixel position from threadIdx/blockIdx | |
+ unsigned int mempos = threadIdx.x + blockIdx.x * blockDim.x; | |
+ if (mempos > const_pixels) | |
+ return; | |
+ | |
+ // Read ray data from global memory | |
+ float3 e = f4_to_f3(_ray_origo[mempos]); | |
+ float3 d = f4_to_f3(_ray_direction[mempos]); | |
+ | |
+ // Distance, in ray steps, between object and eye initialized with a large v… | |
+ float tdist = 1e10f; | |
+ | |
+ // Surface normal at closest sphere intersection | |
+ float3 n; | |
+ | |
+ // Intersection point coordinates | |
+ float3 p; | |
+ | |
+ unsigned int hitidx; | |
+ | |
+ // Iterate through all particles | |
+ for (Inttype i=0; i<const_np; ++i) { | |
+ | |
+ // Read sphere coordinate and radius | |
+ float3 c = f4_to_f3(_p[i]); | |
+ float R = _p[i].w; | |
+ | |
+ // Calculate the discriminant: d = B^2 - 4AC | |
+ float Delta = (2.0f*dot(d,(e-c)))*(2.0f*dot(d,(e-c))) // B^2 | |
+ - 4.0f*dot(d,d) // -4*A | |
+ * (dot((e-c),(e-c)) - R*R); // C | |
+ | |
+ // If the determinant is positive, there are two solutions | |
+ // One where the line enters the sphere, and one where it exits | |
+ if (Delta > 0.0f) { | |
+ | |
+ // Calculate roots, Shirley 2009 p. 77 | |
+ float t_minus = ((dot(-d,(e-c)) - sqrt( dot(d,(e-c))*dot(d,(e-c)) - dot(… | |
+ * (dot((e-c),(e-c)) - R*R) ) ) / dot(d,d)); | |
+ | |
+ // Check wether intersection is closer than previous values | |
+ if (fabs(t_minus) < tdist) { | |
+ p = e + t_minus*d; | |
+ tdist = fabs(t_minus); | |
+ n = normalize(2.0f * (p - c)); // Surface normal | |
+ hitidx = i; | |
+ } | |
+ | |
+ } // End of solution branch | |
+ | |
+ } // End of particle loop | |
+ | |
+ // Write pixel color | |
+ if (tdist < 1e10) { | |
+ | |
+ // Fetch particle data used for color | |
+ float ratio = _linarr[hitidx] / max_val; | |
+ float fixvel = _fixvel[hitidx]; | |
+ | |
+ // Make sure the ratio doesn't exceed the 0.0-1.0 interval | |
+ if (ratio < 0.01f) | |
+ ratio = 0.01f; | |
+ if (ratio > 0.99f) | |
+ ratio = 0.99f; | |
+ | |
+ // Lambertian shading parameters | |
+ float dotprod = fmax(0.0f,dot(n, f4_to_f3(const_light))); | |
+ float I_d = 40.0f; // Light intensity | |
+ float k_d = 5.0f; // Diffuse coefficient | |
+ | |
+ // Ambient shading | |
+ float k_a = 10.0f; | |
+ float I_a = 5.0f; | |
+ | |
+ float redv = red(ratio); | |
+ float greenv = green(ratio); | |
+ float bluev = blue(ratio); | |
+ | |
+ // Make particle dark grey if the horizontal velocity is fixed | |
+ if (fixvel > 0.f) { | |
+ redv = 0.5; | |
+ greenv = 0.5; | |
+ bluev = 0.5; | |
+ } | |
+ | |
+ // Write shading model values to pixel color channels | |
+ _img[mempos*4] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*redv); | |
+ _img[mempos*4 + 1] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*greenv); | |
+ _img[mempos*4 + 2] = (unsigned char) ((k_d * I_d * dotprod | |
+ + k_a * I_a)*bluev); | |
+ } | |
+} | |
+ | |
+extern "C" | |
+__host__ void cameraInit(float4 eye, float4 lookat, float imgw, float hw_ratio, | |
+ unsigned int pixels, Inttype np) | |
+{ | |
+ // Image dimensions in world space (l, r, b, t) | |
+ float4 imgplane = make_float4(-0.5f*imgw, 0.5f*imgw, -0.5f*imgw*hw_ratio, 0.… | |
+ | |
+ // The view vector | |
+ float4 view = eye - lookat; | |
+ | |
+ // Construct the camera view orthonormal base | |
+ //float4 up = make_float4(0.0f, 1.0f, 0.0f, 0.0f); // Pointing upward along… | |
+ float4 up = make_float4(0.0f, 0.0f, 1.0f, 0.0f); // Pointing upward along +z | |
+ float4 w = -view/length(view); // w: Pointing backwards | |
+ float4 u = make_float4(cross(make_float3(up.x, up.y, up.z), | |
+ make_float3(w.x, w.y, w.z)), 0.0f) | |
+ / length(cross(make_float3(up.x, up.y, up.z), make_float3(w.x, w.y… | |
+ float4 v = make_float4(cross(make_float3(w.x, w.y, w.z), make_float3(u.x, u.… | |
+ | |
+ // Focal length 20% of eye vector length | |
+ float d = length(view)*0.8f; | |
+ | |
+ // Light direction (points towards light source) | |
+ float4 light = normalize(-1.0f*eye*make_float4(1.0f, 0.2f, 0.6f, 0.0f)); | |
+ | |
+ std::cout << " Transfering camera values to constant memory\n"; | |
+ | |
+ cudaMemcpyToSymbol("const_u", &u, sizeof(u)); | |
+ cudaMemcpyToSymbol("const_v", &v, sizeof(v)); | |
+ cudaMemcpyToSymbol("const_w", &w, sizeof(w)); | |
+ cudaMemcpyToSymbol("const_eye", &eye, sizeof(eye)); | |
+ cudaMemcpyToSymbol("const_imgplane", &imgplane, sizeof(imgplane)); | |
+ cudaMemcpyToSymbol("const_d", &d, sizeof(d)); | |
+ cudaMemcpyToSymbol("const_light", &light, sizeof(light)); | |
+ cudaMemcpyToSymbol("const_pixels", &pixels, sizeof(pixels)); | |
+ cudaMemcpyToSymbol("const_np", &np, sizeof(np)); | |
+} | |
+ | |
+// Check for CUDA errors | |
+extern "C" | |
+__host__ void checkForCudaErrors(const char* checkpoint_description) | |
+{ | |
+ cudaError_t err = cudaGetLastError(); | |
+ if (err != cudaSuccess) { | |
+ std::cout << "\nCuda error detected, checkpoint: " << checkpoint_descripti… | |
+ << "\nError string: " << cudaGetErrorString(err) << "\n"; | |
+ exit(EXIT_FAILURE); | |
+ } | |
+} | |
+ | |
+ | |
+// Wrapper for the rt kernel | |
+extern "C" | |
+__host__ int rt(float4* p, Inttype np, | |
+ rgb* img, unsigned int width, unsigned int height, | |
+ f3 origo, f3 L, f3 eye, f3 lookat, float imgw, | |
+ int visualize, float max_val, | |
+ float* fixvel, float* pres, float* es_dot, float* es, float* v… | |
+{ | |
+ using std::cout; | |
+ | |
+ cout << "Initializing CUDA:\n"; | |
+ | |
+ // Initialize GPU timestamp recorders | |
+ float t1, t2; | |
+ cudaEvent_t t1_go, t2_go, t1_stop, t2_stop; | |
+ cudaEventCreate(&t1_go); | |
+ cudaEventCreate(&t2_go); | |
+ cudaEventCreate(&t2_stop); | |
+ cudaEventCreate(&t1_stop); | |
+ | |
+ // Start timer 1 | |
+ cudaEventRecord(t1_go, 0); | |
+ | |
+ // Allocate memory | |
+ cout << " Allocating device memory\n"; | |
+ static float4 *_p; // Particle positions (x,y,z) and … | |
+ static float *_fixvel; // Indicates whether a particle has a … | |
+ static float *_linarr; // Array for linear values to color t… | |
+ static unsigned char *_img; // RGBw values in image | |
+ static float4 *_ray_origo; // Ray origo (x,y,z) | |
+ static float4 *_ray_direction; // Ray direction (x,y,z) | |
+ cudaMalloc((void**)&_p, np*sizeof(float4)); | |
+ cudaMalloc((void**)&_fixvel, np*sizeof(float)); | |
+ cudaMalloc((void**)&_linarr, np*sizeof(float)); // 0 size if visualize = 0; | |
+ cudaMalloc((void**)&_img, width*height*4*sizeof(unsigned char)); | |
+ cudaMalloc((void**)&_ray_origo, width*height*sizeof(float4)); | |
+ cudaMalloc((void**)&_ray_direction, width*height*sizeof(float4)); | |
+ | |
+ // Transfer particle data | |
+ cout << " Transfering particle data: host -> device\n"; | |
+ cudaMemcpy(_p, p, np*sizeof(float4), cudaMemcpyHostToDevice); | |
+ cudaMemcpy(_fixvel, fixvel, np*sizeof(float), cudaMemcpyHostToDevice); | |
+ if (visualize == 1 || visualize == 4) | |
+ cudaMemcpy(_linarr, pres, np*sizeof(float), cudaMemcpyHostToDevice); | |
+ if (visualize == 2) | |
+ cudaMemcpy(_linarr, es_dot, np*sizeof(float), cudaMemcpyHostToDevice); | |
+ if (visualize == 3) | |
+ cudaMemcpy(_linarr, es, np*sizeof(float), cudaMemcpyHostToDevice); | |
+ if (visualize == 4) | |
+ cudaMemcpy(_linarr, vel, np*sizeof(float), cudaMemcpyHostToDevice); | |
+ | |
+ // Check for errors after memory allocation | |
+ checkForCudaErrors("CUDA error after memory allocation"); | |
+ | |
+ // Arrange thread/block structure | |
+ unsigned int pixels = width*height; | |
+ float hw_ratio = (float)height/(float)width; | |
+ const unsigned int threadsPerBlock = 256; | |
+ const unsigned int blocksPerGrid = iDivUp(pixels, threadsPerBlock); | |
+ | |
+ // Start timer 2 | |
+ cudaEventRecord(t2_go, 0); | |
+ | |
+ // Initialize image to background color | |
+ imageInit<<< blocksPerGrid, threadsPerBlock >>>(_img, pixels); | |
+ | |
+ // Initialize camera | |
+ cameraInit(make_float4(eye.x, eye.y, eye.z, 0.0f), | |
+ make_float4(lookat.x, lookat.y, lookat.z, 0.0f), | |
+ imgw, hw_ratio, pixels, np); | |
+ checkForCudaErrors("CUDA error after cameraInit"); | |
+ | |
+ // Construct rays for perspective projection | |
+ rayInitPerspective<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ make_float4(eye.x, eye.y, eye.z, 0.0f), | |
+ width, height); | |
+ | |
+ cudaThreadSynchronize(); | |
+ | |
+ // Find closest intersection between rays and spheres | |
+ if (visualize == 1) { // Visualize pressure | |
+ cout << " Pressure color map range: [0, " << max_val << "] Pa\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, max_val, _img); | |
+ } else if (visualize == 2) { // es_dot visualization | |
+ cout << " Shear heat production rate color map range: [0, " << max_val <<… | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, max_val, _img); | |
+ } else if (visualize == 3) { // es visualization | |
+ cout << " Total shear heat color map range: [0, " << max_val << "] J\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, max_val, _img); | |
+ } else if (visualize == 4) { // velocity visualization | |
+ cout << " Velocity color map range: [0, " << max_val << "] m/s\n"; | |
+ rayIntersectSpheresColormap<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _fixvel, _linarr, max_val, _img); | |
+ } else { // Normal visualization | |
+ rayIntersectSpheres<<< blocksPerGrid, threadsPerBlock >>>( | |
+ _ray_origo, _ray_direction, | |
+ _p, _img); | |
+ } | |
+ | |
+ // Make sure all threads are done before continuing CPU control sequence | |
+ cudaThreadSynchronize(); | |
+ | |
+ // Check for errors | |
+ checkForCudaErrors("CUDA error after kernel execution"); | |
+ | |
+ // Stop timer 2 | |
+ cudaEventRecord(t2_stop, 0); | |
+ cudaEventSynchronize(t2_stop); | |
+ | |
+ // Transfer image data from device to host | |
+ cout << " Transfering image data: device -> host\n"; | |
+ cudaMemcpy(img, _img, width*height*4*sizeof(unsigned char), cudaMemcpyDevice… | |
+ | |
+ // Free dynamically allocated device memory | |
+ cudaFree(_p); | |
+ cudaFree(_fixvel); | |
+ cudaFree(_linarr); | |
+ cudaFree(_img); | |
+ cudaFree(_ray_origo); | |
+ cudaFree(_ray_direction); | |
+ | |
+ // Stop timer 1 | |
+ cudaEventRecord(t1_stop, 0); | |
+ cudaEventSynchronize(t1_stop); | |
+ | |
+ // Calculate time spent in t1 and t2 | |
+ cudaEventElapsedTime(&t1, t1_go, t1_stop); | |
+ cudaEventElapsedTime(&t2, t2_go, t2_stop); | |
+ | |
+ // Report time spent | |
+ cout << " Time spent on entire GPU routine: " | |
+ << t1 << " ms\n"; | |
+ cout << " - Kernels: " << t2 << " ms\n" | |
+ << " - Memory alloc. and transfer: " << t1-t2 << " ms\n"; | |
+ | |
+ // Return successfully | |
+ return 0; | |
+} | |
diff --git a/raytracer/rt_GPU_init_pres.sh b/raytracer/rt_GPU_init_pres.sh | |
t@@ -0,0 +1,36 @@ | |
+#!/bin/bash | |
+ | |
+# Pass max. pressure as command line argument | |
+ | |
+XRES=400 | |
+YRES=1600 | |
+ | |
+WORKHORSE=GPU | |
+ | |
+echo "# Rendering PPM images" | |
+for F in ../output/*init*.bin | |
+do | |
+ BASE=`basename $F` | |
+ OUTFILE=$BASE.ppm.jpg | |
+ if [ -e ../img_out/$OUTFILE ] | |
+ then | |
+ echo "$OUTFILE exists." > /dev/null | |
+ else | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/null | |
+ if [ $? -ne 0 ] ; then | |
+ echo "Error rendering $F, trying again..." | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/n… | |
+ fi | |
+ fi | |
+done | |
+ | |
+echo "# Converting PPM files to JPEG using ImageMagick in parallel" | |
+for F in ../img_out/*.ppm | |
+do | |
+ (BASE=`basename $F`; convert $F $F.jpg > /dev/null &) | |
+done | |
+ | |
+sleep 5 | |
+echo "# Removing temporary PPM files" | |
+rm ../img_out/*.ppm | |
+ | |
diff --git a/raytracer/rt_GPU_pres.sh b/raytracer/rt_GPU_pres.sh | |
t@@ -0,0 +1,36 @@ | |
+#!/bin/bash | |
+ | |
+# Pass max. pressure as command line argument | |
+ | |
+XRES=800 | |
+YRES=800 | |
+ | |
+WORKHORSE=GPU | |
+ | |
+echo "# Rendering PPM images" | |
+for F in ../output/*.bin | |
+do | |
+ BASE=`basename $F` | |
+ OUTFILE=$BASE.ppm.jpg | |
+ if [ -e ../img_out/$OUTFILE ] | |
+ then | |
+ echo "$OUTFILE exists." > /dev/null | |
+ else | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/null | |
+ if [ $? -ne 0 ] ; then | |
+ echo "Error rendering $F, trying again..." | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/n… | |
+ fi | |
+ fi | |
+done | |
+ | |
+echo "# Converting PPM files to JPEG using ImageMagick in parallel" | |
+for F in ../img_out/*.ppm | |
+do | |
+ (BASE=`basename $F`; convert $F $F.jpg > /dev/null &) | |
+done | |
+ | |
+sleep 5 | |
+echo "# Removing temporary PPM files" | |
+rm ../img_out/*.ppm | |
+ | |
diff --git a/raytracer/rt_GPU_tall_pres.sh b/raytracer/rt_GPU_tall_pres.sh | |
t@@ -0,0 +1,36 @@ | |
+#!/bin/bash | |
+ | |
+# Pass max. pressure as command line argument | |
+ | |
+XRES=800 | |
+YRES=1200 | |
+ | |
+WORKHORSE=GPU | |
+ | |
+echo "# Rendering PPM images" | |
+for F in ../output/*.bin | |
+do | |
+ BASE=`basename $F` | |
+ OUTFILE=$BASE.ppm.jpg | |
+ if [ -e ../img_out/$OUTFILE ] | |
+ then | |
+ echo "$OUTFILE exists." > /dev/null | |
+ else | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/null | |
+ if [ $? -ne 0 ] ; then | |
+ echo "Error rendering $F, trying again..." | |
+ ./rt $WORKHORSE $F $XRES $YRES ../img_out/$BASE.ppm pressure $@ > /dev/n… | |
+ fi | |
+ fi | |
+done | |
+ | |
+echo "# Converting PPM files to JPEG using ImageMagick in parallel" | |
+for F in ../img_out/*.ppm | |
+do | |
+ (BASE=`basename $F`; convert $F $F.jpg > /dev/null &) | |
+done | |
+ | |
+sleep 5 | |
+echo "# Removing temporary PPM files" | |
+rm ../img_out/*.ppm | |
+ | |
diff --git a/src/cohesion.cuh b/src/cohesion.cuh | |
t@@ -0,0 +1,126 @@ | |
+#ifndef COHESION_CUH_ | |
+#define COHESION_CUH_ | |
+ | |
+// cohesion.cuh | |
+// Functions governing attractive forces between contacts | |
+ | |
+ | |
+// Linear-elastic bond: Attractive force with normal- and shear components | |
+// acting upon particle A in a bonded particle pair | |
+__device__ void bondLinear(Float3* N, Float3* T, Float* es_dot, Float* p, | |
+ unsigned int idx_a, unsigned int idx_b, | |
+ Float4* dev_x_sorted, Float4* dev_vel_sorted, | |
+ Float4* dev_angvel_sorted, | |
+ Float radius_a, Float radius_b, | |
+ Float3 x_ab, Float x_ab_length, | |
+ Float delta_ab) | |
+{ | |
+ | |
+ // If particles are not overlapping, apply bond force | |
+ if (delta_ab > 0.0f) { | |
+ | |
+ // Allocate variables and fetch missing time=t values for particle A and B | |
+ Float4 vel_a = dev_vel_sorted[idx_a]; | |
+ Float4 vel_b = dev_vel_sorted[idx_b]; | |
+ Float4 angvel4_a = dev_angvel_sorted[idx_a]; | |
+ Float4 angvel4_b = dev_angvel_sorted[idx_b]; | |
+ | |
+ // Convert to Float3's | |
+ Float3 angvel_a = MAKE_FLOAT3(angvel4_a.x, angvel4_a.y, angvel4_a.z); | |
+ Float3 angvel_b = MAKE_FLOAT3(angvel4_b.x, angvel4_b.y, angvel4_b.z); | |
+ | |
+ // Normal vector of contact | |
+ Float3 n_ab = x_ab/x_ab_length; | |
+ | |
+ // Relative contact interface velocity, w/o rolling | |
+ Float3 vel_ab_linear = MAKE_FLOAT3(vel_a.x - vel_b.x, | |
+ vel_a.y - vel_b.y, | |
+ vel_a.z - vel_b.z); | |
+ | |
+ // Relative contact interface velocity of particle surfaces at | |
+ // the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10) | |
+ Float3 vel_ab = vel_ab_linear | |
+ + radius_a * cross(n_ab, angvel_a) | |
+ + radius_b * cross(n_ab, angvel_b); | |
+ | |
+ // Relative contact interface rolling velocity | |
+ //Float3 angvel_ab = angvel_a - angvel_b; | |
+ //Float angvel_ab_length = length(angvel_ab); | |
+ | |
+ // Normal component of the relative contact interface velocity | |
+ //Float vel_n_ab = dot(vel_ab_linear, n_ab); | |
+ | |
+ // Tangential component of the relative contact interface velocity | |
+ // Hinrichsen and Wolf 2004, eq. 13.9 | |
+ Float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab)); | |
+ //Float vel_t_ab_length = length(vel_t_ab); | |
+ | |
+ Float3 f_n = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ Float3 f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Mean radius | |
+ Float R_bar = (radius_a + radius_b)/2.0f; | |
+ | |
+ // Normal force component: Elastic | |
+ f_n = devC_k_n * delta_ab * n_ab; | |
+ | |
+ if (length(vel_t_ab) > 0.f) { | |
+ // Shear force component: Viscous | |
+ f_t = -1.0f * devC_gamma_s * vel_t_ab; | |
+ | |
+ // Shear friction production rate [W] | |
+ //*es_dot += -dot(vel_t_ab, f_t); | |
+ } | |
+ | |
+ // Add force components from this bond to total force for particle | |
+ *N += f_n + f_t; | |
+ *T += -R_bar * cross(n_ab, f_t); | |
+ | |
+ // Pressure excerted onto the particle from this bond | |
+ *p += length(f_n) / (4.0f * PI * radius_a*radius_a); | |
+ | |
+ } | |
+} // End of bondLinear() | |
+ | |
+ | |
+// Capillary cohesion after Richefeu et al. (2006) | |
+__device__ void capillaryCohesion_exp(Float3* N, Float radius_a, | |
+ Float radius_b, Float delta_ab, | |
+ Float3 x_ab, Float x_ab_length, | |
+ Float kappa) | |
+{ | |
+ | |
+ // Normal vector | |
+ Float3 n_ab = x_ab/x_ab_length; | |
+ | |
+ Float3 f_c; | |
+ Float lambda, R_geo, R_har, r, h; | |
+ | |
+ // Determine the ratio; r = max{Ri/Rj;Rj/Ri} | |
+ if ((radius_a/radius_b) > (radius_b/radius_a)) | |
+ r = radius_a/radius_b; | |
+ else | |
+ r = radius_b/radius_a; | |
+ | |
+ // Exponential decay function | |
+ h = -sqrtf(r); | |
+ | |
+ // The harmonic mean | |
+ R_har = (2.0f * radius_a * radius_b) / (radius_a + radius_b); | |
+ | |
+ // The geometrical mean | |
+ R_geo = sqrtf(radius_a * radius_b); | |
+ | |
+ // The exponential falloff of the capillary force with distance | |
+ lambda = 0.9f * h * sqrtf(devC_V_b/R_har); | |
+ | |
+ // Calculate cohesional force | |
+ f_c = -kappa * R_geo * expf(-delta_ab/lambda) * n_ab; | |
+ | |
+ // Add force components from this collision to total force for particle | |
+ *N += f_c; | |
+ | |
+} // End of capillaryCohesion_exp | |
+ | |
+ | |
+#endif | |
diff --git a/src/contactmodels.cuh b/src/contactmodels.cuh | |
t@@ -0,0 +1,411 @@ | |
+#ifndef CONTACTMODELS_CUH_ | |
+#define CONTACTMODELS_CUH_ | |
+ | |
+// contactmodels.cuh | |
+// Functions governing repulsive forces between contacts | |
+ | |
+ | |
+// Linear viscoelastic contact model for particle-wall interactions | |
+// with tangential friction and rolling resistance | |
+__device__ Float contactLinear_wall(Float3* N, Float3* T, Float* es_dot, Float… | |
+ unsigned int idx_a, Float radius_a, | |
+ Float4* dev_vel_sorted, Float4* dev_angvel… | |
+ Float3 n, Float delta, Float wvel) | |
+{ | |
+ // Fetch particle velocities from global memory | |
+ Float4 linvel_tmp = dev_vel_sorted[idx_a]; | |
+ Float4 angvel_tmp = dev_angvel_sorted[idx_a]; | |
+ | |
+ // Convert velocities to three-component vectors | |
+ Float3 linvel = MAKE_FLOAT3(linvel_tmp.x, | |
+ linvel_tmp.y, | |
+ linvel_tmp.z); | |
+ Float3 angvel = MAKE_FLOAT3(angvel_tmp.x, | |
+ angvel_tmp.y, | |
+ angvel_tmp.z); | |
+ | |
+ // Store the length of the angular velocity for later use | |
+ Float angvel_length = length(angvel); | |
+ | |
+ // Contact velocity is the sum of the linear and | |
+ // rotational components | |
+ Float3 vel = linvel + radius_a * cross(n, angvel) + wvel; | |
+ | |
+ // Normal component of the contact velocity | |
+ Float vel_n = dot(vel, n); | |
+ | |
+ // The tangential velocity is the contact velocity | |
+ // with the normal component subtracted | |
+ Float3 vel_t = vel - n * (dot(vel, n)); | |
+ Float vel_t_length = length(vel_t); | |
+ | |
+ // Calculate elastic normal component | |
+ //Float3 f_n = -devC_k_n * delta * n; | |
+ | |
+ // Normal force component: Elastic - viscous damping | |
+ Float3 f_n = (-devC_k_n * delta - devC_gamma_wn * vel_n) * n; | |
+ | |
+ // Make sure the viscous damping doesn't exceed the elastic component, | |
+ // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n) | |
+ if (dot(f_n, n) < 0.0f) | |
+ f_n = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ Float f_n_length = length(f_n); // Save length for later use | |
+ | |
+ // Initialize vectors | |
+ Float3 f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ Float3 T_res = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Check that the tangential velocity is high enough to avoid | |
+ // divide by zero (producing a NaN) | |
+ if (vel_t_length > 0.f) { | |
+ | |
+ Float f_t_visc = devC_gamma_ws * vel_t_length; // Tangential force by vis… | |
+ Float f_t_limit = devC_mu_s * f_n_length; // Max. friction | |
+ | |
+ // If the shear force component exceeds the friction, | |
+ // the particle slips and energy is dissipated | |
+ if (f_t_visc < f_t_limit) { | |
+ f_t = -1.0f * f_t_visc * vel_t/vel_t_length; | |
+ | |
+ } else { // Dynamic friction, friction failure | |
+ f_t = -1.0f * f_t_limit * vel_t/vel_t_length; | |
+ | |
+ // Shear energy production rate [W] | |
+ //*es_dot += -dot(vel_t, f_t); | |
+ } | |
+ } | |
+ | |
+/* if (angvel_length > 0.f) { | |
+ // Apply rolling resistance (Zhou et al. 1999) | |
+ //T_res = -angvel_a/angvel_length * devC_mu_r * radius_a * f_n_length; | |
+ | |
+ // New rolling resistance model | |
+ T_res = -1.0f * fmin(devC_gamma_r * radius_a * angvel_length, | |
+ devC_mu_r * radius_a * f_n_length) | |
+ * angvel_a/angvel_length; | |
+ }*/ | |
+ | |
+ // Total force from wall | |
+ *N += f_n + f_t; | |
+ | |
+ // Total torque from wall | |
+ *T += -radius_a * cross(n, f_t) + T_res; | |
+ | |
+ // Pressure excerted onto particle from this contact | |
+ *p += f_n_length / (4.0f * PI * radius_a*radius_a); | |
+ | |
+ // Return force excerted onto the wall | |
+ //return -dot(*N, n); | |
+ return dot(f_n, n); | |
+} | |
+ | |
+ | |
+// Linear vicoelastic contact model for particle-particle interactions | |
+// with tangential friction and rolling resistance | |
+__device__ void contactLinearViscous(Float3* N, Float3* T, Float* es_dot, Floa… | |
+ unsigned int idx_a, unsigned in… | |
+ Float4* dev_vel_sorted, | |
+ Float4* dev_angvel_sorted, | |
+ Float radius_a, Float radius_b, | |
+ Float3 x_ab, Float x_ab_length, | |
+ Float delta_ab, Float kappa) | |
+{ | |
+ | |
+ // Allocate variables and fetch missing time=t values for particle A and B | |
+ Float4 vel_a = dev_vel_sorted[idx_a]; | |
+ Float4 vel_b = dev_vel_sorted[idx_b]; | |
+ Float4 angvel4_a = dev_angvel_sorted[idx_a]; | |
+ Float4 angvel4_b = dev_angvel_sorted[idx_b]; | |
+ | |
+ // Convert to Float3's | |
+ Float3 angvel_a = MAKE_FLOAT3(angvel4_a.x, angvel4_a.y, angvel4_a.z); | |
+ Float3 angvel_b = MAKE_FLOAT3(angvel4_b.x, angvel4_b.y, angvel4_b.z); | |
+ | |
+ // Force between grain pair decomposed into normal- and tangential part | |
+ Float3 f_n, f_t, f_c, T_res; | |
+ | |
+ // Normal vector of contact | |
+ Float3 n_ab = x_ab/x_ab_length; | |
+ | |
+ // Relative contact interface velocity, w/o rolling | |
+ Float3 vel_ab_linear = MAKE_FLOAT3(vel_a.x - vel_b.x, | |
+ vel_a.y - vel_b.y, | |
+ vel_a.z - vel_b.z); | |
+ | |
+ // Relative contact interface velocity of particle surfaces at | |
+ // the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10) | |
+ Float3 vel_ab = vel_ab_linear | |
+ + radius_a * cross(n_ab, angvel_a) | |
+ + radius_b * cross(n_ab, angvel_b); | |
+ | |
+ // Relative contact interface rolling velocity | |
+ Float3 angvel_ab = angvel_a - angvel_b; | |
+ Float angvel_ab_length = length(angvel_ab); | |
+ | |
+ // Normal component of the relative contact interface velocity | |
+ Float vel_n_ab = dot(vel_ab_linear, n_ab); | |
+ | |
+ // Tangential component of the relative contact interface velocity | |
+ // Hinrichsen and Wolf 2004, eq. 13.9 | |
+ Float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab)); | |
+ Float vel_t_ab_length = length(vel_t_ab); | |
+ | |
+ // Compute the normal stiffness of the contact | |
+ //Float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b); | |
+ | |
+ // Calculate rolling radius | |
+ Float R_bar = (radius_a + radius_b) / 2.0f; | |
+ | |
+ // Normal force component: linear-elastic approximation (Augier 2009, eq. 3) | |
+ // with velocity dependant damping | |
+ // Damping coefficient: alpha = 0.8 | |
+ //f_n = (-k_n_ab * delta_ab + 2.0f * 0.8f * sqrtf(m_eff*k_n_ab) * vel_ab) * … | |
+ | |
+ // Linear spring for normal component (Renzo 2004, eq. 35) | |
+ // Dissipation due to plastic deformation is modelled by using a different | |
+ // unloading spring constant (Walton and Braun 1986) | |
+ // Here the factor in the second term determines the relative strength of the | |
+ // unloading spring relative to the loading spring. | |
+ /* if (vel_n_ab > 0.0f) { // Loading | |
+ f_n = (-k_n_ab * delta_ab) * n_ab; | |
+ } else { // Unloading | |
+ f_n = (-k_n_ab * 0.90f * delta_ab) * n_ab; | |
+ } // f_n is OK! */ | |
+ | |
+ // Normal force component: Elastic | |
+ //f_n = -k_n_ab * delta_ab * n_ab; | |
+ | |
+ // Normal force component: Elastic - viscous damping | |
+ f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab; | |
+ | |
+ // Make sure the viscous damping doesn't exceed the elastic component, | |
+ // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n) | |
+ if (dot(f_n, n_ab) < 0.0f) | |
+ f_n = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ Float f_n_length = length(f_n); | |
+ | |
+ // Add max. capillary force | |
+ f_c = -kappa * sqrtf(radius_a * radius_b) * n_ab; | |
+ | |
+ // Initialize force vectors to zero | |
+ f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ T_res = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Shear force component: Nonlinear relation | |
+ // Coulomb's law of friction limits the tangential force to less or equal | |
+ // to the normal force | |
+ if (vel_t_ab_length > 0.f) { | |
+ | |
+ // Tangential force by viscous model | |
+ Float f_t_visc = devC_gamma_s * vel_t_ab_length; | |
+ | |
+ // Determine max. friction | |
+ Float f_t_limit; | |
+ if (vel_t_ab_length > 0.001f) { // Dynamic | |
+ f_t_limit = devC_mu_d * length(f_n-f_c); | |
+ } else { // Static | |
+ f_t_limit = devC_mu_s * length(f_n-f_c); | |
+ } | |
+ | |
+ // If the shear force component exceeds the friction, | |
+ // the particle slips and energy is dissipated | |
+ if (f_t_visc < f_t_limit) { // Static | |
+ f_t = -1.0f * f_t_visc * vel_t_ab/vel_t_ab_length; | |
+ | |
+ } else { // Dynamic, friction failure | |
+ f_t = -1.0f * f_t_limit * vel_t_ab/vel_t_ab_length; | |
+ | |
+ // Shear friction production rate [W] | |
+ //*es_dot += -dot(vel_t_ab, f_t); | |
+ } | |
+ } | |
+ | |
+/* if (angvel_ab_length > 0.f) { | |
+ // Apply rolling resistance (Zhou et al. 1999) | |
+ //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n); | |
+ | |
+ // New rolling resistance model | |
+ T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length, | |
+ devC_mu_r * R_bar * f_n_length) | |
+ * angvel_ab/angvel_ab_length; | |
+ } | |
+*/ | |
+ | |
+ // Add force components from this collision to total force for particle | |
+ *N += f_n + f_t + f_c; | |
+ *T += -R_bar * cross(n_ab, f_t) + T_res; | |
+ | |
+ // Pressure excerted onto the particle from this contact | |
+ *p += f_n_length / (4.0f * PI * radius_a*radius_a); | |
+ | |
+} // End of contactLinearViscous() | |
+ | |
+ | |
+// Linear elastic contact model for particle-particle interactions | |
+__device__ void contactLinear(Float3* N, Float3* T, | |
+ Float* es_dot, Float* p, | |
+ unsigned int idx_a_orig, | |
+ unsigned int idx_b_orig, | |
+ Float4 vel_a, | |
+ Float4* dev_vel, | |
+ Float3 angvel_a, | |
+ Float4* dev_angvel, | |
+ Float radius_a, Float radius_b, | |
+ Float3 x_ab, Float x_ab_length, | |
+ Float delta_ab, Float4* dev_delta_t, | |
+ unsigned int mempos) | |
+{ | |
+ | |
+ // Allocate variables and fetch missing time=t values for particle A and B | |
+ Float4 vel_b = dev_vel[idx_b_orig]; | |
+ Float4 angvel4_b = dev_angvel[idx_b_orig]; | |
+ | |
+ // Fetch previous sum of shear displacement for the contact pair | |
+ Float4 delta_t0_4 = dev_delta_t[mempos]; | |
+ | |
+ Float3 delta_t0_uncor = MAKE_FLOAT3(delta_t0_4.x, | |
+ delta_t0_4.y, | |
+ delta_t0_4.z); | |
+ | |
+ // Convert to Float3 | |
+ Float3 angvel_b = MAKE_FLOAT3(angvel4_b.x, angvel4_b.y, angvel4_b.z); | |
+ | |
+ // Force between grain pair decomposed into normal- and tangential part | |
+ Float3 f_n, f_t, f_c, T_res; | |
+ | |
+ // Normal vector of contact | |
+ Float3 n_ab = x_ab/x_ab_length; | |
+ | |
+ // Relative contact interface velocity, w/o rolling | |
+ Float3 vel_ab_linear = MAKE_FLOAT3(vel_a.x - vel_b.x, | |
+ vel_a.y - vel_b.y, | |
+ vel_a.z - vel_b.z); | |
+ | |
+ // Relative contact interface velocity of particle surfaces at | |
+ // the contact, with rolling (Hinrichsen and Wolf 2004, eq. 13.10) | |
+ Float3 vel_ab = vel_ab_linear | |
+ + radius_a * cross(n_ab, angvel_a) | |
+ + radius_b * cross(n_ab, angvel_b); | |
+ | |
+ // Relative contact interface rolling velocity | |
+ Float3 angvel_ab = angvel_a - angvel_b; | |
+ Float angvel_ab_length = length(angvel_ab); | |
+ | |
+ // Normal component of the relative contact interface velocity | |
+ Float vel_n_ab = dot(vel_ab_linear, n_ab); | |
+ | |
+ // Tangential component of the relative contact interface velocity | |
+ // Hinrichsen and Wolf 2004, eq. 13.9 | |
+ Float3 vel_t_ab = vel_ab - (n_ab * dot(vel_ab, n_ab)); | |
+ Float vel_t_ab_length = length(vel_t_ab); | |
+ | |
+ // Correct tangential displacement vector, which is | |
+ // necessary if the tangential plane rotated | |
+ Float3 delta_t0 = delta_t0_uncor - (n_ab * dot(delta_t0_uncor, n_ab)); | |
+ Float delta_t0_length = length(delta_t0); | |
+ | |
+ // New tangential displacement vector | |
+ Float3 delta_t; | |
+ | |
+ // Compute the normal stiffness of the contact | |
+ //Float k_n_ab = k_n_a * k_n_b / (k_n_a + k_n_b); | |
+ | |
+ // Calculate rolling radius | |
+ Float R_bar = (radius_a + radius_b) / 2.0f; | |
+ | |
+ // Normal force component: Elastic | |
+ //f_n = -devC_k_n * delta_ab * n_ab; | |
+ | |
+ // Normal force component: Elastic - viscous damping | |
+ f_n = (-devC_k_n * delta_ab - devC_gamma_n * vel_n_ab) * n_ab; | |
+ | |
+ // Make sure the viscous damping doesn't exceed the elastic component, | |
+ // i.e. the damping factor doesn't exceed the critical damping, 2*sqrt(m*k_n) | |
+ if (dot(f_n, n_ab) < 0.0f) | |
+ f_n = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ Float f_n_length = length(f_n); | |
+ | |
+ // Add max. capillary force | |
+ f_c = -devC_kappa * sqrtf(radius_a * radius_b) * n_ab; | |
+ | |
+ // Initialize force vectors to zero | |
+ f_t = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ T_res = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Apply a tangential force if the previous tangential displacement | |
+ // is non-zero, or the current sliding velocity is non-zero. | |
+ if (delta_t0_length > 0.f || vel_t_ab_length > 0.f) { | |
+ | |
+ // Shear force: Visco-Elastic, limited by Coulomb friction | |
+ Float3 f_t_elast = -1.0f * devC_k_s * delta_t0; | |
+ Float3 f_t_visc = -1.0f * devC_gamma_s * vel_t_ab; | |
+ | |
+ Float f_t_limit; | |
+ | |
+ if (vel_t_ab_length > 0.001f) { // Dynamic friciton | |
+ f_t_limit = devC_mu_d * length(f_n-f_c); | |
+ } else { // Static friction | |
+ f_t_limit = devC_mu_s * length(f_n-f_c); | |
+ } | |
+ | |
+ // Tangential force before friction limit correction | |
+ f_t = f_t_elast + f_t_visc; | |
+ Float f_t_length = length(f_t); | |
+ | |
+ // If failure criterion is not met, contact is viscous-linear elastic. | |
+ // If failure criterion is met, contact force is limited, | |
+ // resulting in a slip and energy dissipation | |
+ if (f_t_length > f_t_limit) { // Dynamic case | |
+ | |
+ // Frictional force is reduced to equal the limit | |
+ f_t *= f_t_limit/f_t_length; | |
+ | |
+ // A slip event zeros the displacement vector | |
+ //delta_t = make_Float3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // In a slip event, the tangential spring is adjusted to a | |
+ // length which is consistent with Coulomb's equation | |
+ // (Hinrichsen and Wolf, 2004) | |
+ delta_t = -1.0f/devC_k_s * f_t + devC_gamma_s * vel_t_ab; | |
+ | |
+ // Shear friction heat production rate: | |
+ // The energy lost from the tangential spring is dissipated as heat | |
+ //*es_dot += -dot(vel_t_ab, f_t); | |
+ *es_dot += length(delta_t0 - delta_t) * devC_k_s / devC_dt; // Seen in E… | |
+ | |
+ } else { // Static case | |
+ | |
+ // No correction of f_t is required | |
+ | |
+ // Add tangential displacement to total tangential displacement | |
+ delta_t = delta_t0 + vel_t_ab * devC_dt; | |
+ } | |
+ } | |
+ | |
+ if (angvel_ab_length > 0.f) { | |
+ // Apply rolling resistance (Zhou et al. 1999) | |
+ //T_res = -angvel_ab/angvel_ab_length * devC_mu_r * R_bar * length(f_n); | |
+ | |
+ // New rolling resistance model | |
+ T_res = -1.0f * fmin(devC_gamma_r * R_bar * angvel_ab_length, | |
+ devC_mu_r * R_bar * f_n_length) | |
+ * angvel_ab/angvel_ab_length; | |
+ } | |
+ | |
+ // Add force components from this collision to total force for particle | |
+ *N += f_n + f_t + f_c; | |
+ *T += -R_bar * cross(n_ab, f_t) + T_res; | |
+ | |
+ // Pressure excerted onto the particle from this contact | |
+ *p += f_n_length / (4.0f * PI * radius_a*radius_a); | |
+ | |
+ // Store sum of tangential displacements | |
+ dev_delta_t[mempos] = MAKE_FLOAT4(delta_t.x, delta_t.y, delta_t.z, 0.0f); | |
+ | |
+} // End of contactLinear() | |
+ | |
+ | |
+#endif | |
diff --git a/src/contactsearch.cuh b/src/contactsearch.cuh | |
t@@ -0,0 +1,625 @@ | |
+#ifndef CONTACTSEARCH_CUH_ | |
+#define CONTACTSEARCH_CUH_ | |
+ | |
+// contactsearch.cuh | |
+// Functions for identifying contacts and processing boundaries | |
+ | |
+ | |
+// Find overlaps between particle no. 'idx' and particles in cell 'gridpos' | |
+// Kernel executed on device, and callable from device only. | |
+__device__ void overlapsInCell(int3 targetCell, | |
+ unsigned int idx_a, | |
+ Float4 x_a, Float radius_a, | |
+ Float3* N, Float3* T, | |
+ Float* es_dot, Float* p, | |
+ Float4* dev_x_sorted, | |
+ Float* dev_radius_sorted, | |
+ Float4* dev_vel_sorted, | |
+ Float4* dev_angvel_sorted, | |
+ unsigned int* dev_cellStart, | |
+ unsigned int* dev_cellEnd, | |
+ Float4* dev_w_nx, | |
+ Float4* dev_w_mvfd) | |
+ //uint4 bonds) | |
+{ | |
+ | |
+ // Variable containing modifier for interparticle | |
+ // vector, if it crosses a periodic boundary | |
+ Float3 distmod = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Check whether x- and y boundaries are to be treated as periodic | |
+ // 1: x- and y boundaries periodic | |
+ // 2: x boundaries periodic | |
+ if (devC_periodic == 1) { | |
+ | |
+ // Periodic x-boundary | |
+ if (targetCell.x < 0) { | |
+ targetCell.x = devC_num[0] - 1; | |
+ distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ if (targetCell.x == devC_num[0]) { | |
+ targetCell.x = 0; | |
+ distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ | |
+ // Periodic y-boundary | |
+ if (targetCell.y < 0) { | |
+ targetCell.y = devC_num[1] - 1; | |
+ distmod += MAKE_FLOAT3(0.0f, devC_L[1], 0.0f); | |
+ } | |
+ if (targetCell.y == devC_num[1]) { | |
+ targetCell.y = 0; | |
+ distmod -= MAKE_FLOAT3(0.0f, devC_L[1], 0.0f); | |
+ } | |
+ | |
+ } else if (devC_periodic == 2) { | |
+ | |
+ // Periodic x-boundary | |
+ if (targetCell.x < 0) { | |
+ targetCell.x = devC_num[0] - 1; | |
+ distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ if (targetCell.x == devC_num[0]) { | |
+ targetCell.x = 0; | |
+ distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ | |
+ // Hande out-of grid cases on y-axis | |
+ if (targetCell.y < 0 || targetCell.y == devC_num[1]) | |
+ return; | |
+ | |
+ } else { | |
+ | |
+ // Hande out-of grid cases on x- and y-axes | |
+ if (targetCell.x < 0 || targetCell.x == devC_num[0]) | |
+ return; | |
+ if (targetCell.y < 0 || targetCell.y == devC_num[1]) | |
+ return; | |
+ } | |
+ | |
+ // Handle out-of-grid cases on z-axis | |
+ if (targetCell.z < 0 || targetCell.z == devC_num[2]) | |
+ return; | |
+ | |
+ | |
+ //// Check and process particle-particle collisions | |
+ | |
+ // Calculate linear cell ID | |
+ unsigned int cellID = targetCell.x | |
+ + __umul24(targetCell.y, devC_num[0]) | |
+ + __umul24(__umul24(devC_num[0], | |
+ devC_num[1]), | |
+ targetCell.z); | |
+ | |
+ // Lowest particle index in cell | |
+ unsigned int startIdx = dev_cellStart[cellID]; | |
+ | |
+ // Make sure cell is not empty | |
+ if (startIdx != 0xffffffff) { | |
+ | |
+ // Highest particle index in cell + 1 | |
+ unsigned int endIdx = dev_cellEnd[cellID]; | |
+ | |
+ // Iterate over cell particles | |
+ for (unsigned int idx_b = startIdx; idx_b<endIdx; ++idx_b) { | |
+ if (idx_b != idx_a) { // Do not collide particle with itself | |
+ | |
+ // Fetch position and velocity of particle B. | |
+ Float4 x_b = dev_x_sorted[idx_b]; | |
+ Float radius_b = dev_radius_sorted[idx_b]; | |
+ Float kappa = devC_kappa; | |
+ | |
+ // Distance between particle centers (Float4 -> Float3) | |
+ Float3 x_ab = MAKE_FLOAT3(x_a.x - x_b.x, | |
+ x_a.y - x_b.y, | |
+ x_a.z - x_b.z); | |
+ | |
+ // Adjust interparticle vector if periodic boundary/boundaries | |
+ // are crossed | |
+ x_ab += distmod; | |
+ | |
+ Float x_ab_length = length(x_ab); | |
+ | |
+ // Distance between particle perimeters | |
+ Float delta_ab = x_ab_length - (radius_a + radius_b); | |
+ | |
+ // Check for particle overlap | |
+ if (delta_ab < 0.0f) { | |
+ contactLinearViscous(N, T, es_dot, p, | |
+ idx_a, idx_b, | |
+ dev_vel_sorted, | |
+ dev_angvel_sorted, | |
+ radius_a, radius_b, | |
+ x_ab, x_ab_length, | |
+ delta_ab, kappa); | |
+ } else if (delta_ab < devC_db) { | |
+ // Check wether particle distance satisfies the capillary bond dista… | |
+ capillaryCohesion_exp(N, radius_a, radius_b, delta_ab, | |
+ x_ab, x_ab_length, kappa); | |
+ } | |
+ | |
+ // Check wether particles are bonded together | |
+ /*if (bonds.x == idx_b || bonds.y == idx_b || | |
+ bonds.z == idx_b || bonds.w == idx_b) { | |
+ bondLinear(N, T, es_dot, p, | |
+ idx_a, idx_b, | |
+ dev_x_sorted, dev_vel_sorted, | |
+ dev_angvel_sorted, | |
+ radius_a, radius_b, | |
+ x_ab, x_ab_length, | |
+ delta_ab); | |
+ }*/ | |
+ | |
+ } // Do not collide particle with itself end | |
+ } // Iterate over cell particles end | |
+ } // Check wether cell is empty end | |
+} // End of overlapsInCell(...) | |
+ | |
+// Find overlaps between particle no. 'idx' and particles in cell 'gridpos' | |
+// Write the indexes of the overlaps in array contacts[] | |
+// Kernel executed on device, and callable from device only. | |
+__device__ void findContactsInCell(int3 targetCell, | |
+ unsigned int idx_a, | |
+ Float4 x_a, Float radius_a, | |
+ Float4* dev_x_sorted, | |
+ Float* dev_radius_sorted, | |
+ unsigned int* dev_cellStart, | |
+ unsigned int* dev_cellEnd, | |
+ unsigned int* dev_gridParticleIndex, | |
+ int* nc, | |
+ unsigned int* dev_contacts, | |
+ Float4* dev_distmod) | |
+{ | |
+ // Variable containing modifier for interparticle | |
+ // vector, if it crosses a periodic boundary | |
+ Float3 distmod = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Check whether x- and y boundaries are to be treated as periodic | |
+ // 1: x- and y boundaries periodic | |
+ // 2: x boundaries periodic | |
+ if (devC_periodic == 1) { | |
+ | |
+ // Periodic x-boundary | |
+ if (targetCell.x < 0) { | |
+ targetCell.x = devC_num[0] - 1; | |
+ distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ if (targetCell.x == devC_num[0]) { | |
+ targetCell.x = 0; | |
+ distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ | |
+ // Periodic y-boundary | |
+ if (targetCell.y < 0) { | |
+ targetCell.y = devC_num[1] - 1; | |
+ distmod += MAKE_FLOAT3(0.0f, devC_L[1], 0.0f); | |
+ } | |
+ if (targetCell.y == devC_num[1]) { | |
+ targetCell.y = 0; | |
+ distmod -= MAKE_FLOAT3(0.0f, devC_L[1], 0.0f); | |
+ } | |
+ | |
+ } else if (devC_periodic == 2) { | |
+ | |
+ // Periodic x-boundary | |
+ if (targetCell.x < 0) { | |
+ targetCell.x = devC_num[0] - 1; | |
+ distmod += MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ if (targetCell.x == devC_num[0]) { | |
+ targetCell.x = 0; | |
+ distmod -= MAKE_FLOAT3(devC_L[0], 0.0f, 0.0f); | |
+ } | |
+ | |
+ // Hande out-of grid cases on y-axis | |
+ if (targetCell.y < 0 || targetCell.y == devC_num[1]) | |
+ return; | |
+ | |
+ } else { | |
+ | |
+ // Hande out-of grid cases on x- and y-axes | |
+ if (targetCell.x < 0 || targetCell.x == devC_num[0]) | |
+ return; | |
+ if (targetCell.y < 0 || targetCell.y == devC_num[1]) | |
+ return; | |
+ } | |
+ | |
+ // Handle out-of-grid cases on z-axis | |
+ if (targetCell.z < 0 || targetCell.z == devC_num[2]) | |
+ return; | |
+ | |
+ | |
+ //// Check and process particle-particle collisions | |
+ | |
+ // Calculate linear cell ID | |
+ unsigned int cellID = targetCell.x | |
+ + __umul24(targetCell.y, devC_num[0]) | |
+ + __umul24(__umul24(devC_num[0], | |
+ devC_num[1]), | |
+ targetCell.z); | |
+ | |
+ // Lowest particle index in cell | |
+ unsigned int startIdx = dev_cellStart[cellID]; | |
+ | |
+ // Make sure cell is not empty | |
+ if (startIdx != 0xffffffff) { | |
+ | |
+ // Highest particle index in cell + 1 | |
+ unsigned int endIdx = dev_cellEnd[cellID]; | |
+ | |
+ // Read the original index of particle A | |
+ unsigned int idx_a_orig = dev_gridParticleIndex[idx_a]; | |
+ | |
+ // Iterate over cell particles | |
+ for (unsigned int idx_b = startIdx; idx_b<endIdx; ++idx_b) { | |
+ if (idx_b != idx_a) { // Do not collide particle with itself | |
+ | |
+ // Fetch position and radius of particle B. | |
+ Float4 x_b = dev_x_sorted[idx_b]; | |
+ Float radius_b = dev_radius_sorted[idx_b]; | |
+ | |
+ // Read the original index of particle B | |
+ unsigned int idx_b_orig = dev_gridParticleIndex[idx_b]; | |
+ | |
+ __syncthreads(); | |
+ | |
+ // Distance between particle centers (Float4 -> Float3) | |
+ Float3 x_ab = MAKE_FLOAT3(x_a.x - x_b.x, | |
+ x_a.y - x_b.y, | |
+ x_a.z - x_b.z); | |
+ | |
+ // Adjust interparticle vector if periodic boundary/boundaries | |
+ // are crossed | |
+ x_ab += distmod; | |
+ | |
+ Float x_ab_length = length(x_ab); | |
+ | |
+ // Distance between particle perimeters | |
+ Float delta_ab = x_ab_length - (radius_a + radius_b); | |
+ | |
+ // Check for particle overlap | |
+ if (delta_ab < 0.0f) { | |
+ | |
+ // If the particle is not yet registered in the contact list, | |
+ // use the next position in the array | |
+ int cpos = *nc; | |
+ unsigned int cidx; | |
+ | |
+ // Find out, if particle is already registered in contact list | |
+ for (int i=0; i<devC_nc; ++i) { | |
+ __syncthreads(); | |
+ cidx = dev_contacts[(unsigned int)(idx_a_orig*devC_nc+i)]; | |
+ if (cidx == idx_b_orig) | |
+ cpos = i; | |
+ } | |
+ | |
+ __syncthreads(); | |
+ | |
+ // Write the particle index to the relevant position, | |
+ // no matter if it already is there or not (concurrency of write) | |
+ dev_contacts[(unsigned int)(idx_a_orig*devC_nc+cpos)] = idx_b_orig; | |
+ | |
+ | |
+ // Write the interparticle vector and radius of particle B | |
+ //dev_x_ab_r_b[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_Float4… | |
+ dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = MAKE_FLOAT4(d… | |
+ | |
+ // Increment contact counter | |
+ ++*nc; | |
+ } | |
+ | |
+ // Write the inter-particle position vector correction and radius of p… | |
+ //dev_distmod[(unsigned int)(idx_a_orig*devC_nc+cpos)] = make_Float4(d… | |
+ | |
+ // Check wether particles are bonded together | |
+ /*if (bonds.x == idx_b || bonds.y == idx_b || | |
+ bonds.z == idx_b || bonds.w == idx_b) { | |
+ bondLinear(N, T, es_dot, p, | |
+ idx_a, idx_b, | |
+ dev_x_sorted, dev_vel_sorted, | |
+ dev_angvel_sorted, | |
+ radius_a, radius_b, | |
+ x_ab, x_ab_length, | |
+ delta_ab); | |
+ }*/ | |
+ | |
+ } // Do not collide particle with itself end | |
+ } // Iterate over cell particles end | |
+ } // Check wether cell is empty end | |
+} // End of findContactsInCell(...) | |
+ | |
+ | |
+// For a single particle: | |
+// Search for neighbors to particle 'idx' inside the 27 closest cells, | |
+// and save the contact pairs in global memory. | |
+__global__ void topology(unsigned int* dev_cellStart, | |
+ unsigned int* dev_cellEnd, // Input: Particles in… | |
+ unsigned int* dev_gridParticleIndex, // Input: Unsort… | |
+ Float4* dev_x_sorted, Float* dev_radius_sorted, | |
+ unsigned int* dev_contacts, | |
+ Float4* dev_distmod) | |
+{ | |
+ // Thread index equals index of particle A | |
+ unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x; | |
+ if (idx_a < devC_np) { | |
+ // Fetch particle data in global read | |
+ Float4 x_a = dev_x_sorted[idx_a]; | |
+ Float radius_a = dev_radius_sorted[idx_a]; | |
+ | |
+ // Count the number of contacts in this time step | |
+ int nc = 0; | |
+ | |
+ // Grid position of host and neighbor cells in uniform, cubic grid | |
+ int3 gridPos; | |
+ int3 targetPos; | |
+ | |
+ // Calculate cell address in grid from position of particle | |
+ gridPos.x = floor((x_a.x - devC_origo[0]) / (devC_L[0]/devC_num[0])); | |
+ gridPos.y = floor((x_a.y - devC_origo[1]) / (devC_L[1]/devC_num[1])); | |
+ gridPos.z = floor((x_a.z - devC_origo[2]) / (devC_L[2]/devC_num[2])); | |
+ | |
+ // Find overlaps between particle no. idx and all particles | |
+ // from its own cell + 26 neighbor cells | |
+ for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis | |
+ for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis | |
+ for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis | |
+ targetPos = gridPos + make_int3(x_dim, y_dim, z_dim); | |
+ findContactsInCell(targetPos, idx_a, x_a, radius_a, | |
+ dev_x_sorted, dev_radius_sorted, | |
+ dev_cellStart, dev_cellEnd, | |
+ dev_gridParticleIndex, | |
+ &nc, dev_contacts, dev_distmod); | |
+ } | |
+ } | |
+ } | |
+ } | |
+} // End of topology(...) | |
+ | |
+ | |
+// For a single particle: | |
+// Search for neighbors to particle 'idx' inside the 27 closest cells, | |
+// and compute the resulting normal- and shear force on it. | |
+// Collide with top- and bottom walls, save resulting force on upper wall. | |
+// Kernel is executed on device, and is callable from host only. | |
+__global__ void interact(unsigned int* dev_gridParticleIndex, // Input: Unsort… | |
+ unsigned int* dev_cellStart, | |
+ unsigned int* dev_cellEnd, | |
+ Float4* dev_x, Float* dev_radius, | |
+ Float4* dev_x_sorted, Float* dev_radius_sorted, | |
+ Float4* dev_vel_sorted, Float4* dev_angvel_sorted, | |
+ Float4* dev_vel, Float4* dev_angvel, | |
+ Float4* dev_force, Float4* dev_torque, | |
+ Float* dev_es_dot, Float* dev_es, Float* dev_p, | |
+ Float4* dev_w_nx, Float4* dev_w_mvfd, | |
+ Float* dev_w_force, //uint4* dev_bonds_sorted, | |
+ unsigned int* dev_contacts, | |
+ Float4* dev_distmod, | |
+ Float4* dev_delta_t) | |
+{ | |
+ // Thread index equals index of particle A | |
+ unsigned int idx_a = __umul24(blockIdx.x, blockDim.x) + threadIdx.x; | |
+ | |
+ if (idx_a < devC_np) { | |
+ | |
+ // Fetch particle data in global read | |
+ unsigned int idx_a_orig = dev_gridParticleIndex[idx_a]; | |
+ Float4 x_a = dev_x_sorted[idx_a]; | |
+ Float radius_a = dev_radius_sorted[idx_a]; | |
+ | |
+ // Fetch wall data in global read | |
+ Float4 w_up_nx = dev_w_nx[0]; | |
+ Float4 w_up_mvfd = dev_w_mvfd[0]; | |
+ | |
+ // Fetch world dimensions in constant memory read | |
+ Float3 origo = MAKE_FLOAT3(devC_origo[0], | |
+ devC_origo[1], | |
+ devC_origo[2]); | |
+ Float3 L = MAKE_FLOAT3(devC_L[0], | |
+ devC_L[1], | |
+ devC_L[2]); | |
+ | |
+ // Index of particle which is bonded to particle A. | |
+ // The index is equal to the particle no (p.np) | |
+ // if particle A is bond-less. | |
+ //uint4 bonds = dev_bonds_sorted[idx_a]; | |
+ | |
+ // Initiate shear friction loss rate at 0.0 | |
+ Float es_dot = 0.0f; | |
+ | |
+ // Initiate pressure on particle at 0.0 | |
+ Float p = 0.0f; | |
+ | |
+ // Allocate memory for temporal force/torque vector values | |
+ Float3 N = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ Float3 T = MAKE_FLOAT3(0.0f, 0.0f, 0.0f); | |
+ | |
+ // Apply linear elastic, frictional contact model to registered contacts | |
+ if (devC_shearmodel == 2) { | |
+ unsigned int idx_b_orig, mempos; | |
+ Float delta_n, x_ab_length, radius_b; | |
+ Float3 x_ab; | |
+ Float4 x_b, distmod; | |
+ Float4 vel_a = dev_vel_sorted[idx_a]; | |
+ Float4 angvel4_a = dev_angvel_sorted[idx_a]; | |
+ Float3 angvel_a = MAKE_FLOAT3(angvel4_a.x, angvel4_a.y, angvel4_a.z); | |
+ | |
+ // Loop over all possible contacts, and remove contacts | |
+ // that no longer are valid (delta_n > 0.0) | |
+ for (int i = 0; i<(int)devC_nc; ++i) { | |
+ mempos = (unsigned int)(idx_a_orig * devC_nc + i); | |
+ __syncthreads(); | |
+ idx_b_orig = dev_contacts[mempos]; | |
+ distmod = dev_distmod[mempos]; | |
+ x_b = dev_x[idx_b_orig]; | |
+ //radius_b = dev_radius[idx_b_orig]; | |
+ radius_b = distmod.w; | |
+ | |
+ // Inter-particle vector, corrected for periodic boundaries | |
+ x_ab = MAKE_FLOAT3(x_a.x - x_b.x + distmod.x, | |
+ x_a.y - x_b.y + distmod.y, | |
+ x_a.z - x_b.z + distmod.z); | |
+ | |
+ x_ab_length = length(x_ab); | |
+ delta_n = x_ab_length - (radius_a + radius_b); | |
+ | |
+ | |
+ if (idx_b_orig != (unsigned int)devC_np) { | |
+ | |
+ // Process collision if the particles are overlapping | |
+ if (delta_n < 0.0f) { | |
+ //cuPrintf("\nProcessing contact, idx_a_orig = %u, idx_b_orig = %u… | |
+ // idx_a_orig, idx_b_orig, i, delta_n); | |
+ contactLinear(&N, &T, &es_dot, &p, | |
+ idx_a_orig, | |
+ idx_b_orig, | |
+ vel_a, | |
+ dev_vel, | |
+ angvel_a, | |
+ dev_angvel, | |
+ radius_a, radius_b, | |
+ x_ab, x_ab_length, | |
+ delta_n, dev_delta_t, | |
+ mempos); | |
+ } else { | |
+ __syncthreads(); | |
+ // Remove this contact (there is no particle with index=np) | |
+ dev_contacts[mempos] = devC_np; | |
+ // Zero sum of shear displacement in this position | |
+ dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ } | |
+ } else { | |
+ __syncthreads(); | |
+ dev_delta_t[mempos] = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ } | |
+ } // Contact loop end | |
+ | |
+ } else if (devC_shearmodel == 1) { | |
+ | |
+ int3 gridPos; | |
+ int3 targetPos; | |
+ | |
+ // Calculate address in grid from position | |
+ gridPos.x = floor((x_a.x - devC_origo[0]) / (devC_L[0]/devC_num[0])); | |
+ gridPos.y = floor((x_a.y - devC_origo[1]) / (devC_L[1]/devC_num[1])); | |
+ gridPos.z = floor((x_a.z - devC_origo[2]) / (devC_L[2]/devC_num[2])); | |
+ | |
+ // Find overlaps between particle no. idx and all particles | |
+ // from its own cell + 26 neighbor cells. | |
+ // Calculate resulting normal- and shear-force components and | |
+ // torque for the particle on the base of contactLinearViscous() | |
+ for (int z_dim=-1; z_dim<2; ++z_dim) { // z-axis | |
+ for (int y_dim=-1; y_dim<2; ++y_dim) { // y-axis | |
+ for (int x_dim=-1; x_dim<2; ++x_dim) { // x-axis | |
+ targetPos = gridPos + make_int3(x_dim, y_dim, z_dim); | |
+ overlapsInCell(targetPos, idx_a, x_a, radius_a, | |
+ &N, &T, &es_dot, &p, | |
+ dev_x_sorted, dev_radius_sorted, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ dev_cellStart, dev_cellEnd, | |
+ dev_w_nx, dev_w_mvfd); | |
+ } | |
+ } | |
+ } | |
+ | |
+ } | |
+ | |
+ //// Interact with walls | |
+ Float delta_w; // Overlap distance | |
+ Float3 w_n; // Wall surface normal | |
+ Float w_force = 0.0f; // Force on wall from particle A | |
+ | |
+ // Upper wall (idx 0) | |
+ delta_w = w_up_nx.w - (x_a.z + radius_a); | |
+ w_n = MAKE_FLOAT3(0.0f, 0.0f, -1.0f); | |
+ if (delta_w < 0.0f) { | |
+ w_force = contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, w_up_mvfd.y); | |
+ } | |
+ | |
+ // Lower wall (force on wall not stored) | |
+ delta_w = x_a.z - radius_a - origo.z; | |
+ w_n = MAKE_FLOAT3(0.0f, 0.0f, 1.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ | |
+ if (devC_periodic == 0) { | |
+ | |
+ // Left wall | |
+ delta_w = x_a.x - radius_a - origo.x; | |
+ w_n = MAKE_FLOAT3(1.0f, 0.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ // Right wall | |
+ delta_w = L.x - (x_a.x + radius_a); | |
+ w_n = MAKE_FLOAT3(-1.0f, 0.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ // Front wall | |
+ delta_w = x_a.y - radius_a - origo.y; | |
+ w_n = MAKE_FLOAT3(0.0f, 1.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ // Back wall | |
+ delta_w = L.y - (x_a.y + radius_a); | |
+ w_n = MAKE_FLOAT3(0.0f, -1.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ } else if (devC_periodic == 2) { | |
+ | |
+ // Front wall | |
+ delta_w = x_a.y - radius_a - origo.y; | |
+ w_n = MAKE_FLOAT3(0.0f, 1.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ | |
+ // Back wall | |
+ delta_w = L.y - (x_a.y + radius_a); | |
+ w_n = MAKE_FLOAT3(0.0f, -1.0f, 0.0f); | |
+ if (delta_w < 0.0f) { | |
+ (void)contactLinear_wall(&N, &T, &es_dot, &p, idx_a, radius_a, | |
+ dev_vel_sorted, dev_angvel_sorted, | |
+ w_n, delta_w, 0.0f); | |
+ } | |
+ } | |
+ | |
+ | |
+ // Hold threads for coalesced write | |
+ __syncthreads(); | |
+ | |
+ // Write force to unsorted position | |
+ unsigned int orig_idx = dev_gridParticleIndex[idx_a]; | |
+ dev_force[orig_idx] = MAKE_FLOAT4(N.x, N.y, N.z, 0.0f); | |
+ dev_torque[orig_idx] = MAKE_FLOAT4(T.x, T.y, T.z, 0.0f); | |
+ dev_es_dot[orig_idx] = es_dot; | |
+ dev_es[orig_idx] += es_dot * devC_dt; | |
+ dev_p[orig_idx] = p; | |
+ dev_w_force[orig_idx] = w_force; | |
+ } | |
+} // End of interact(...) | |
+ | |
+ | |
+#endif | |
diff --git a/src/integration.cuh b/src/integration.cuh | |
t@@ -0,0 +1,207 @@ | |
+#ifndef INTEGRATION_CUH_ | |
+#define INTEGRATION_CUH_ | |
+ | |
+// integration.cuh | |
+// Functions responsible for temporal integration | |
+ | |
+ | |
+// Second order integration scheme based on Taylor expansion of particle kinem… | |
+// Kernel executed on device, and callable from host only. | |
+__global__ void integrate(Float4* dev_x_sorted, Float4* dev_vel_sorted, // Inp… | |
+ Float4* dev_angvel_sorted, Float* dev_radius_sorted,… | |
+ Float4* dev_x, Float4* dev_vel, Float4* dev_angvel, … | |
+ Float4* dev_force, Float4* dev_torque, // Input | |
+ unsigned int* dev_gridParticleIndex) // Input: Sorte… | |
+{ | |
+ unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
+ | |
+ if (idx < devC_np) { // Condition prevents block size error | |
+ | |
+ // Copy data to temporary arrays to avoid any potential read-after-write, | |
+ // write-after-read, or write-after-write hazards. | |
+ unsigned int orig_idx = dev_gridParticleIndex[idx]; | |
+ Float4 force = dev_force[orig_idx]; | |
+ Float4 torque = dev_torque[orig_idx]; | |
+ | |
+ // Initialize acceleration vectors to zero | |
+ Float4 acc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ Float4 angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ | |
+ // Fetch particle position and velocity values from global read | |
+ Float4 x = dev_x_sorted[idx]; | |
+ Float4 vel = dev_vel_sorted[idx]; | |
+ Float4 angvel = dev_angvel_sorted[idx]; | |
+ Float radius = dev_radius_sorted[idx]; | |
+ | |
+ // Coherent read from constant memory to registers | |
+ Float dt = devC_dt; | |
+ Float3 origo = MAKE_FLOAT3(devC_origo[0], devC_origo[1], devC_origo[2]); | |
+ Float3 L = MAKE_FLOAT3(devC_L[0], devC_L[1], devC_L[2]); | |
+ Float rho = devC_rho; | |
+ | |
+ // Particle mass | |
+ Float m = 4.0f/3.0f * PI * radius*radius*radius * rho; | |
+ | |
+ // Update linear acceleration of particle | |
+ acc.x = force.x / m; | |
+ acc.y = force.y / m; | |
+ acc.z = force.z / m; | |
+ | |
+ // Update angular acceleration of particle | |
+ // (angacc = (total moment)/Intertia, intertia = 2/5*m*r^2) | |
+ angacc.x = torque.x * 1.0f / (2.0f/5.0f * m * radius*radius); | |
+ angacc.y = torque.y * 1.0f / (2.0f/5.0f * m * radius*radius); | |
+ angacc.z = torque.z * 1.0f / (2.0f/5.0f * m * radius*radius); | |
+ | |
+ // Add gravity | |
+ acc.x += devC_g[0]; | |
+ acc.y += devC_g[1]; | |
+ acc.z += devC_g[2]; | |
+ | |
+ // Check if particle has a fixed horizontal velocity | |
+ if (vel.w > 0.0f) { | |
+ | |
+ // Zero horizontal acceleration and disable | |
+ // gravity to counteract segregation. | |
+ // Particles may move in the z-dimension, | |
+ // to allow for dilation. | |
+ acc.x = 0.0f; | |
+ acc.y = 0.0f; | |
+ acc.z -= devC_g[2]; | |
+ | |
+ // Zero the angular acceleration | |
+ angacc = MAKE_FLOAT4(0.0f, 0.0f, 0.0f, 0.0f); | |
+ } | |
+ | |
+ // Update angular velocity | |
+ angvel.x += angacc.x * dt; | |
+ angvel.y += angacc.y * dt; | |
+ angvel.z += angacc.z * dt; | |
+ | |
+ // Update linear velocity | |
+ vel.x += acc.x * dt; | |
+ vel.y += acc.y * dt; | |
+ vel.z += acc.z * dt; | |
+ | |
+ // Update position. First-order Euler's scheme: | |
+ //x.x += vel.x * dt; | |
+ //x.y += vel.y * dt; | |
+ //x.z += vel.z * dt; | |
+ | |
+ // Update position. Second-order scheme based on Taylor expansion | |
+ // (greater accuracy than the first-order Euler's scheme) | |
+ x.x += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
+ x.y += vel.y * dt + (acc.y * dt*dt)/2.0f; | |
+ x.z += vel.z * dt + (acc.z * dt*dt)/2.0f; | |
+ | |
+ // Add x-displacement for this time step to | |
+ // sum of x-displacements | |
+ x.w += vel.x * dt + (acc.x * dt*dt)/2.0f; | |
+ | |
+ // Move particle across boundary if it is periodic | |
+ if (devC_periodic == 1) { | |
+ if (x.x < origo.x) | |
+ x.x += L.x; | |
+ if (x.x > L.x) | |
+ x.x -= L.x; | |
+ if (x.y < origo.y) | |
+ x.y += L.y; | |
+ if (x.y > L.y) | |
+ x.y -= L.y; | |
+ } else if (devC_periodic == 2) { | |
+ if (x.x < origo.x) | |
+ x.x += L.x; | |
+ if (x.x > L.x) | |
+ x.x -= L.x; | |
+ } | |
+ | |
+ // Hold threads for coalesced write | |
+ __syncthreads(); | |
+ | |
+ // Store data in global memory at original, pre-sort positions | |
+ dev_angvel[orig_idx] = angvel; | |
+ dev_vel[orig_idx] = vel; | |
+ dev_x[orig_idx] = x; | |
+ } | |
+} // End of integrate(...) | |
+ | |
+ | |
+// Reduce wall force contributions from particles to a single value per wall | |
+__global__ void summation(Float* in, Float *out) | |
+{ | |
+ __shared__ Float cache[256]; | |
+ unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; | |
+ unsigned int cacheIdx = threadIdx.x; | |
+ | |
+ Float temp = 0.0f; | |
+ while (idx < devC_np) { | |
+ temp += in[idx]; | |
+ idx += blockDim.x * gridDim.x; | |
+ } | |
+ | |
+ // Set the cache values | |
+ cache[cacheIdx] = temp; | |
+ | |
+ __syncthreads(); | |
+ | |
+ // For reductions, threadsPerBlock must be a power of two | |
+ // because of the following code | |
+ unsigned int i = blockDim.x/2; | |
+ while (i != 0) { | |
+ if (cacheIdx < i) | |
+ cache[cacheIdx] += cache[cacheIdx + i]; | |
+ __syncthreads(); | |
+ i /= 2; | |
+ } | |
+ | |
+ // Write sum for block to global memory | |
+ if (cacheIdx == 0) | |
+ out[blockIdx.x] = cache[0]; | |
+} | |
+ | |
+// Update wall positions | |
+__global__ void integrateWalls(Float4* dev_w_nx, | |
+ Float4* dev_w_mvfd, | |
+ Float* dev_w_force_partial, | |
+ unsigned int blocksPerGrid) | |
+{ | |
+ unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
+ | |
+ if (idx < devC_nw) { // Condition prevents block size error | |
+ | |
+ // Copy data to temporary arrays to avoid any potential read-after-write, | |
+ // write-after-read, or write-after-write hazards. | |
+ Float4 w_nx = dev_w_nx[idx]; | |
+ Float4 w_mvfd = dev_w_mvfd[idx]; | |
+ | |
+ // Find the final sum of forces on wall | |
+ w_mvfd.z = 0.0f; | |
+ for (int i=0; i<blocksPerGrid; ++i) { | |
+ w_mvfd.z += dev_w_force_partial[i]; | |
+ } | |
+ | |
+ Float dt = devC_dt; | |
+ | |
+ // Normal load = Deviatoric stress times wall surface area, | |
+ // directed downwards. | |
+ Float N = -w_mvfd.w*devC_L[0]*devC_L[1]; | |
+ | |
+ // Calculate resulting acceleration of wall | |
+ // (Wall mass is stored in w component of position Float4) | |
+ Float acc = (w_mvfd.z+N)/w_mvfd.x; | |
+ | |
+ // Update linear velocity | |
+ w_mvfd.y += acc * dt; | |
+ | |
+ // Update position. Second-order scheme based on Taylor expansion | |
+ // (greater accuracy than the first-order Euler's scheme) | |
+ w_nx.w += w_mvfd.y * dt + (acc * dt*dt)/2.0f; | |
+ | |
+ // Store data in global memory | |
+ dev_w_nx[idx] = w_nx; | |
+ dev_w_mvfd[idx] = w_mvfd; | |
+ } | |
+} // End of integrateWalls(...) | |
+ | |
+ | |
+#endif | |
diff --git a/src/sorting.cuh b/src/sorting.cuh | |
t@@ -0,0 +1,127 @@ | |
+// Returns the cellID containing the particle, based cubic grid | |
+// See Bayraktar et al. 2009 | |
+// Kernel is executed on the device, and is callable from the device only | |
+__device__ int calcCellID(Float3 x) | |
+{ | |
+ unsigned int i_x, i_y, i_z; | |
+ | |
+ // Calculate integral coordinates: | |
+ i_x = floor((x.x - devC_origo[0]) / (devC_L[0]/devC_num[0])); | |
+ i_y = floor((x.y - devC_origo[1]) / (devC_L[1]/devC_num[1])); | |
+ i_z = floor((x.z - devC_origo[2]) / (devC_L[2]/devC_num[2])); | |
+ | |
+ // Integral coordinates are converted to 1D coordinate: | |
+ return __umul24(__umul24(i_z, devC_num[1]), | |
+ devC_num[0]) | |
+ + __umul24(i_y, devC_num[0]) + i_x; | |
+ | |
+} // End of calcCellID(...) | |
+ | |
+ | |
+// Calculate hash value for each particle, based on position in grid. | |
+// Kernel executed on device, and callable from host only. | |
+__global__ void calcParticleCellID(unsigned int* dev_gridParticleCellID, | |
+ unsigned int* dev_gridParticleIndex, | |
+ Float4* dev_x) | |
+{ | |
+ //unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; // Thread id | |
+ unsigned int idx = __umul24(blockIdx.x, blockDim.x) + threadIdx.x; | |
+ | |
+ if (idx < devC_np) { // Condition prevents block size error | |
+ | |
+ //volatile Float4 x = dev_x[idx]; // Ensure coalesced read | |
+ Float4 x = dev_x[idx]; // Ensure coalesced read | |
+ | |
+ unsigned int cellID = calcCellID(MAKE_FLOAT3(x.x, x.y, x.z)); | |
+ | |
+ // Store values | |
+ dev_gridParticleCellID[idx] = cellID; | |
+ dev_gridParticleIndex[idx] = idx; | |
+ | |
+ } | |
+} // End of calcParticleCellID(...) | |
+ | |
+ | |
+// Reorder particle data into sorted order, and find the start and end particl… | |
+// of each cell in the sorted hash array. | |
+// Kernel executed on device, and callable from host only. | |
+__global__ void reorderArrays(unsigned int* dev_cellStart, unsigned int* dev_c… | |
+ unsigned int* dev_gridParticleCellID, | |
+ unsigned int* dev_gridParticleIndex, | |
+ Float4* dev_x, Float4* dev_vel, | |
+ Float4* dev_angvel, Float* dev_radius, | |
+ //uint4* dev_bonds, | |
+ Float4* dev_x_sorted, Float4* dev_vel_sorted, | |
+ Float4* dev_angvel_sorted, Float* dev_radius_sor… | |
+ //uint4* dev_bonds_sorted) | |
+{ | |
+ | |
+ // Create hash array in shared on-chip memory. The size of the array | |
+ // (threadsPerBlock + 1) is determined at launch time (extern notation). | |
+ extern __shared__ unsigned int shared_data[]; | |
+ | |
+ // Thread index in block | |
+ unsigned int tidx = threadIdx.x; | |
+ | |
+ // Thread index in grid | |
+ unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; | |
+ //unsigned int idx = __umul24(blockIdx.x, blockDim.x) + threadIdx.x; | |
+ | |
+ // CellID hash value of particle idx | |
+ unsigned int cellID; | |
+ | |
+ // Read cellID data and store it in shared memory (shared_data) | |
+ if (idx < devC_np) { // Condition prevents block size error | |
+ cellID = dev_gridParticleCellID[idx]; | |
+ | |
+ // Load hash data into shared memory, allowing access to neighbor particle… | |
+ shared_data[tidx+1] = cellID; | |
+ | |
+ if (idx > 0 && tidx == 0) { | |
+ // First thread in block must load neighbor particle hash | |
+ shared_data[0] = dev_gridParticleCellID[idx-1]; | |
+ } | |
+ } | |
+ | |
+ // Pause completed threads in this block, until all | |
+ // threads are done loading data into shared memory | |
+ __syncthreads(); | |
+ | |
+ // Find lowest and highest particle index in each cell | |
+ if (idx < devC_np) { // Condition prevents block size error | |
+ // If this particle has a different cell index to the previous particle, i… | |
+ // particle in the cell -> Store the index of this particle in the cell. | |
+ // The previous particle must be the last particle in the previous cell. | |
+ if (idx == 0 || cellID != shared_data[tidx]) { | |
+ dev_cellStart[cellID] = idx; | |
+ if (idx > 0) | |
+ dev_cellEnd[shared_data[tidx]] = idx; | |
+ } | |
+ | |
+ // Check wether the thread is the last one | |
+ if (idx == (devC_np - 1)) | |
+ dev_cellEnd[cellID] = idx + 1; | |
+ | |
+ | |
+ // Use the sorted index to reorder the position and velocity data | |
+ unsigned int sortedIndex = dev_gridParticleIndex[idx]; | |
+ | |
+ // Fetch from global read | |
+ Float4 x = dev_x[sortedIndex]; | |
+ Float4 vel = dev_vel[sortedIndex]; | |
+ Float4 angvel = dev_angvel[sortedIndex]; | |
+ Float radius = dev_radius[sortedIndex]; | |
+ //uint4 bonds = dev_bonds[sortedIndex]; | |
+ | |
+ __syncthreads(); | |
+ // Write sorted data to global memory | |
+ dev_x_sorted[idx] = x; | |
+ dev_vel_sorted[idx] = vel; | |
+ dev_angvel_sorted[idx] = angvel; | |
+ dev_radius_sorted[idx] = radius; | |
+ //dev_bonds_sorted[idx] = bonds; | |
+ } | |
+} // End of reorderArrays(...) | |
+ | |
+ | |
+ | |
diff --git a/src/vector_arithmetic.h b/src/vector_arithmetic.h | |
t@@ -0,0 +1,994 @@ | |
+#ifndef VECTOR_ARITHMETIC_H_ | |
+#define VECTOR_ARITHMETIC_H_ | |
+ | |
+#include "cuda_runtime.h" | |
+#include "datatypes.h" | |
+ | |
+typedef unsigned int uint; | |
+typedef unsigned short ushort; | |
+ | |
+#ifndef __CUDACC__ | |
+#include <math.h> | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// host implementations of CUDA functions | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline Float fminf(Float a, Float b) | |
+{ | |
+ return a < b ? a : b; | |
+} | |
+ | |
+inline Float fmaxf(Float a, Float b) | |
+{ | |
+ return a > b ? a : b; | |
+} | |
+ | |
+inline int max(int a, int b) | |
+{ | |
+ return a > b ? a : b; | |
+} | |
+ | |
+inline int min(int a, int b) | |
+{ | |
+ return a < b ? a : b; | |
+} | |
+ | |
+inline Float rsqrtf(Float x) | |
+{ | |
+ return 1.0f / sqrtf(x); | |
+} | |
+#endif | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// constructors | |
+//////////////////////////////////////////////////////////////////////////////… | |
+/*inline __host__ __device__ Float3 MAKE_FLOAT3(Float s) | |
+{ | |
+ return MAKE_FLOAT3(s, s, s); | |
+} | |
+inline __host__ __device__ Float3 MAKE_FLOAT3(Float4 a) | |
+{ | |
+ return MAKE_FLOAT3(a.x, a.y, a.z); | |
+} | |
+inline __host__ __device__ Float3 MAKE_FLOAT3(int3 a) | |
+{ | |
+ return MAKE_FLOAT3(Float(a.x), Float(a.y), Float(a.z)); | |
+} | |
+inline __host__ __device__ Float3 MAKE_FLOAT3(uint3 a) | |
+{ | |
+ return MAKE_FLOAT3(Float(a.x), Float(a.y), Float(a.z)); | |
+} | |
+ | |
+inline __host__ __device__ Float4 MAKE_FLOAT4(Float s) | |
+{ | |
+ return MAKE_FLOAT4(s, s, s, s); | |
+} | |
+inline __host__ __device__ Float4 MAKE_FLOAT4(Float3 a) | |
+{ | |
+ return MAKE_FLOAT4(a.x, a.y, a.z, 0.0f); | |
+} | |
+inline __host__ __device__ Float4 MAKE_FLOAT4(Float3 a, Float w) | |
+{ | |
+ return MAKE_FLOAT4(a.x, a.y, a.z, w); | |
+} | |
+inline __host__ __device__ Float4 MAKE_FLOAT4(int4 a) | |
+{ | |
+ return MAKE_FLOAT4(Float(a.x), Float(a.y), Float(a.z), Float(a.w)); | |
+} | |
+inline __host__ __device__ Float4 MAKE_FLOAT4(uint4 a) | |
+{ | |
+ return MAKE_FLOAT4(Float(a.x), Float(a.y), Float(a.z), Float(a.w)); | |
+} | |
+*/ | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// negate | |
+//////////////////////////////////////////////////////////////////////////////… | |
+inline __host__ __device__ Float3 operator-(Float3 &a) | |
+{ | |
+ return MAKE_FLOAT3(-a.x, -a.y, -a.z); | |
+} | |
+inline __host__ __device__ Float4 operator-(Float4 &a) | |
+{ | |
+ return MAKE_FLOAT4(-a.x, -a.y, -a.z, -a.w); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// addition | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ int2 operator+(int2 a, int2 b) | |
+{ | |
+ return make_int2(a.x + b.x, a.y + b.y); | |
+} | |
+inline __host__ __device__ void operator+=(int2 &a, int2 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; | |
+} | |
+inline __host__ __device__ int2 operator+(int2 a, int b) | |
+{ | |
+ return make_int2(a.x + b, a.y + b); | |
+} | |
+inline __host__ __device__ int2 operator+(int b, int2 a) | |
+{ | |
+ return make_int2(a.x + b, a.y + b); | |
+} | |
+inline __host__ __device__ void operator+=(int2 &a, int b) | |
+{ | |
+ a.x += b; a.y += b; | |
+} | |
+ | |
+inline __host__ __device__ uint2 operator+(uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(a.x + b.x, a.y + b.y); | |
+} | |
+inline __host__ __device__ void operator+=(uint2 &a, uint2 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; | |
+} | |
+inline __host__ __device__ uint2 operator+(uint2 a, uint b) | |
+{ | |
+ return make_uint2(a.x + b, a.y + b); | |
+} | |
+inline __host__ __device__ uint2 operator+(uint b, uint2 a) | |
+{ | |
+ return make_uint2(a.x + b, a.y + b); | |
+} | |
+inline __host__ __device__ void operator+=(uint2 &a, uint b) | |
+{ | |
+ a.x += b; a.y += b; | |
+} | |
+ | |
+ | |
+inline __host__ __device__ Float3 operator+(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(a.x + b.x, a.y + b.y, a.z + b.z); | |
+} | |
+inline __host__ __device__ void operator+=(Float3 &a, Float3 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; | |
+} | |
+inline __host__ __device__ Float3 operator+(Float3 a, Float b) | |
+{ | |
+ return MAKE_FLOAT3(a.x + b, a.y + b, a.z + b); | |
+} | |
+inline __host__ __device__ void operator+=(Float3 &a, Float b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; | |
+} | |
+ | |
+inline __host__ __device__ int3 operator+(int3 a, int3 b) | |
+{ | |
+ return make_int3(a.x + b.x, a.y + b.y, a.z + b.z); | |
+} | |
+inline __host__ __device__ void operator+=(int3 &a, int3 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; | |
+} | |
+inline __host__ __device__ int3 operator+(int3 a, int b) | |
+{ | |
+ return make_int3(a.x + b, a.y + b, a.z + b); | |
+} | |
+inline __host__ __device__ void operator+=(int3 &a, int b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; | |
+} | |
+ | |
+inline __host__ __device__ uint3 operator+(uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(a.x + b.x, a.y + b.y, a.z + b.z); | |
+} | |
+inline __host__ __device__ void operator+=(uint3 &a, uint3 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; | |
+} | |
+inline __host__ __device__ uint3 operator+(uint3 a, uint b) | |
+{ | |
+ return make_uint3(a.x + b, a.y + b, a.z + b); | |
+} | |
+inline __host__ __device__ void operator+=(uint3 &a, uint b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; | |
+} | |
+ | |
+inline __host__ __device__ int3 operator+(int b, int3 a) | |
+{ | |
+ return make_int3(a.x + b, a.y + b, a.z + b); | |
+} | |
+inline __host__ __device__ uint3 operator+(uint b, uint3 a) | |
+{ | |
+ return make_uint3(a.x + b, a.y + b, a.z + b); | |
+} | |
+inline __host__ __device__ Float3 operator+(Float b, Float3 a) | |
+{ | |
+ return MAKE_FLOAT3(a.x + b, a.y + b, a.z + b); | |
+} | |
+ | |
+inline __host__ __device__ Float4 operator+(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | |
+} | |
+inline __host__ __device__ void operator+=(Float4 &a, Float4 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; | |
+} | |
+inline __host__ __device__ Float4 operator+(Float4 a, Float b) | |
+{ | |
+ return MAKE_FLOAT4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ Float4 operator+(Float b, Float4 a) | |
+{ | |
+ return MAKE_FLOAT4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ void operator+=(Float4 &a, Float b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; a.w += b; | |
+} | |
+ | |
+inline __host__ __device__ int4 operator+(int4 a, int4 b) | |
+{ | |
+ return make_int4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | |
+} | |
+inline __host__ __device__ void operator+=(int4 &a, int4 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; | |
+} | |
+inline __host__ __device__ int4 operator+(int4 a, int b) | |
+{ | |
+ return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ int4 operator+(int b, int4 a) | |
+{ | |
+ return make_int4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ void operator+=(int4 &a, int b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; a.w += b; | |
+} | |
+ | |
+inline __host__ __device__ uint4 operator+(uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); | |
+} | |
+inline __host__ __device__ void operator+=(uint4 &a, uint4 b) | |
+{ | |
+ a.x += b.x; a.y += b.y; a.z += b.z; a.w += b.w; | |
+} | |
+inline __host__ __device__ uint4 operator+(uint4 a, uint b) | |
+{ | |
+ return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ uint4 operator+(uint b, uint4 a) | |
+{ | |
+ return make_uint4(a.x + b, a.y + b, a.z + b, a.w + b); | |
+} | |
+inline __host__ __device__ void operator+=(uint4 &a, uint b) | |
+{ | |
+ a.x += b; a.y += b; a.z += b; a.w += b; | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// subtract | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+ | |
+inline __host__ __device__ int2 operator-(int2 a, int2 b) | |
+{ | |
+ return make_int2(a.x - b.x, a.y - b.y); | |
+} | |
+inline __host__ __device__ void operator-=(int2 &a, int2 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; | |
+} | |
+inline __host__ __device__ int2 operator-(int2 a, int b) | |
+{ | |
+ return make_int2(a.x - b, a.y - b); | |
+} | |
+inline __host__ __device__ int2 operator-(int b, int2 a) | |
+{ | |
+ return make_int2(b - a.x, b - a.y); | |
+} | |
+inline __host__ __device__ void operator-=(int2 &a, int b) | |
+{ | |
+ a.x -= b; a.y -= b; | |
+} | |
+ | |
+inline __host__ __device__ uint2 operator-(uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(a.x - b.x, a.y - b.y); | |
+} | |
+inline __host__ __device__ void operator-=(uint2 &a, uint2 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; | |
+} | |
+inline __host__ __device__ uint2 operator-(uint2 a, uint b) | |
+{ | |
+ return make_uint2(a.x - b, a.y - b); | |
+} | |
+inline __host__ __device__ uint2 operator-(uint b, uint2 a) | |
+{ | |
+ return make_uint2(b - a.x, b - a.y); | |
+} | |
+inline __host__ __device__ void operator-=(uint2 &a, uint b) | |
+{ | |
+ a.x -= b; a.y -= b; | |
+} | |
+ | |
+inline __host__ __device__ Float3 operator-(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(a.x - b.x, a.y - b.y, a.z - b.z); | |
+} | |
+inline __host__ __device__ void operator-=(Float3 &a, Float3 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; | |
+} | |
+inline __host__ __device__ Float3 operator-(Float3 a, Float b) | |
+{ | |
+ return MAKE_FLOAT3(a.x - b, a.y - b, a.z - b); | |
+} | |
+inline __host__ __device__ Float3 operator-(Float b, Float3 a) | |
+{ | |
+ return MAKE_FLOAT3(b - a.x, b - a.y, b - a.z); | |
+} | |
+inline __host__ __device__ void operator-=(Float3 &a, Float b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; | |
+} | |
+ | |
+inline __host__ __device__ int3 operator-(int3 a, int3 b) | |
+{ | |
+ return make_int3(a.x - b.x, a.y - b.y, a.z - b.z); | |
+} | |
+inline __host__ __device__ void operator-=(int3 &a, int3 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; | |
+} | |
+inline __host__ __device__ int3 operator-(int3 a, int b) | |
+{ | |
+ return make_int3(a.x - b, a.y - b, a.z - b); | |
+} | |
+inline __host__ __device__ int3 operator-(int b, int3 a) | |
+{ | |
+ return make_int3(b - a.x, b - a.y, b - a.z); | |
+} | |
+inline __host__ __device__ void operator-=(int3 &a, int b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; | |
+} | |
+ | |
+inline __host__ __device__ uint3 operator-(uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(a.x - b.x, a.y - b.y, a.z - b.z); | |
+} | |
+inline __host__ __device__ void operator-=(uint3 &a, uint3 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; | |
+} | |
+inline __host__ __device__ uint3 operator-(uint3 a, uint b) | |
+{ | |
+ return make_uint3(a.x - b, a.y - b, a.z - b); | |
+} | |
+inline __host__ __device__ uint3 operator-(uint b, uint3 a) | |
+{ | |
+ return make_uint3(b - a.x, b - a.y, b - a.z); | |
+} | |
+inline __host__ __device__ void operator-=(uint3 &a, uint b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; | |
+} | |
+ | |
+inline __host__ __device__ Float4 operator-(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | |
+} | |
+inline __host__ __device__ void operator-=(Float4 &a, Float4 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; | |
+} | |
+inline __host__ __device__ Float4 operator-(Float4 a, Float b) | |
+{ | |
+ return MAKE_FLOAT4(a.x - b, a.y - b, a.z - b, a.w - b); | |
+} | |
+inline __host__ __device__ void operator-=(Float4 &a, Float b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; a.w -= b; | |
+} | |
+ | |
+inline __host__ __device__ int4 operator-(int4 a, int4 b) | |
+{ | |
+ return make_int4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | |
+} | |
+inline __host__ __device__ void operator-=(int4 &a, int4 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; | |
+} | |
+inline __host__ __device__ int4 operator-(int4 a, int b) | |
+{ | |
+ return make_int4(a.x - b, a.y - b, a.z - b, a.w - b); | |
+} | |
+inline __host__ __device__ int4 operator-(int b, int4 a) | |
+{ | |
+ return make_int4(b - a.x, b - a.y, b - a.z, b - a.w); | |
+} | |
+inline __host__ __device__ void operator-=(int4 &a, int b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; a.w -= b; | |
+} | |
+ | |
+inline __host__ __device__ uint4 operator-(uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); | |
+} | |
+inline __host__ __device__ void operator-=(uint4 &a, uint4 b) | |
+{ | |
+ a.x -= b.x; a.y -= b.y; a.z -= b.z; a.w -= b.w; | |
+} | |
+inline __host__ __device__ uint4 operator-(uint4 a, uint b) | |
+{ | |
+ return make_uint4(a.x - b, a.y - b, a.z - b, a.w - b); | |
+} | |
+inline __host__ __device__ uint4 operator-(uint b, uint4 a) | |
+{ | |
+ return make_uint4(b - a.x, b - a.y, b - a.z, b - a.w); | |
+} | |
+inline __host__ __device__ void operator-=(uint4 &a, uint b) | |
+{ | |
+ a.x -= b; a.y -= b; a.z -= b; a.w -= b; | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// multiply | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ int2 operator*(int2 a, int2 b) | |
+{ | |
+ return make_int2(a.x * b.x, a.y * b.y); | |
+} | |
+inline __host__ __device__ void operator*=(int2 &a, int2 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; | |
+} | |
+inline __host__ __device__ int2 operator*(int2 a, int b) | |
+{ | |
+ return make_int2(a.x * b, a.y * b); | |
+} | |
+inline __host__ __device__ int2 operator*(int b, int2 a) | |
+{ | |
+ return make_int2(b * a.x, b * a.y); | |
+} | |
+inline __host__ __device__ void operator*=(int2 &a, int b) | |
+{ | |
+ a.x *= b; a.y *= b; | |
+} | |
+ | |
+inline __host__ __device__ uint2 operator*(uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(a.x * b.x, a.y * b.y); | |
+} | |
+inline __host__ __device__ void operator*=(uint2 &a, uint2 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; | |
+} | |
+inline __host__ __device__ uint2 operator*(uint2 a, uint b) | |
+{ | |
+ return make_uint2(a.x * b, a.y * b); | |
+} | |
+inline __host__ __device__ uint2 operator*(uint b, uint2 a) | |
+{ | |
+ return make_uint2(b * a.x, b * a.y); | |
+} | |
+inline __host__ __device__ void operator*=(uint2 &a, uint b) | |
+{ | |
+ a.x *= b; a.y *= b; | |
+} | |
+ | |
+inline __host__ __device__ Float3 operator*(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(a.x * b.x, a.y * b.y, a.z * b.z); | |
+} | |
+inline __host__ __device__ void operator*=(Float3 &a, Float3 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; | |
+} | |
+inline __host__ __device__ Float3 operator*(Float3 a, Float b) | |
+{ | |
+ return MAKE_FLOAT3(a.x * b, a.y * b, a.z * b); | |
+} | |
+inline __host__ __device__ Float3 operator*(Float b, Float3 a) | |
+{ | |
+ return MAKE_FLOAT3(b * a.x, b * a.y, b * a.z); | |
+} | |
+inline __host__ __device__ void operator*=(Float3 &a, Float b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; | |
+} | |
+ | |
+inline __host__ __device__ int3 operator*(int3 a, int3 b) | |
+{ | |
+ return make_int3(a.x * b.x, a.y * b.y, a.z * b.z); | |
+} | |
+inline __host__ __device__ void operator*=(int3 &a, int3 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; | |
+} | |
+inline __host__ __device__ int3 operator*(int3 a, int b) | |
+{ | |
+ return make_int3(a.x * b, a.y * b, a.z * b); | |
+} | |
+inline __host__ __device__ int3 operator*(int b, int3 a) | |
+{ | |
+ return make_int3(b * a.x, b * a.y, b * a.z); | |
+} | |
+inline __host__ __device__ void operator*=(int3 &a, int b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; | |
+} | |
+ | |
+inline __host__ __device__ uint3 operator*(uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(a.x * b.x, a.y * b.y, a.z * b.z); | |
+} | |
+inline __host__ __device__ void operator*=(uint3 &a, uint3 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; | |
+} | |
+inline __host__ __device__ uint3 operator*(uint3 a, uint b) | |
+{ | |
+ return make_uint3(a.x * b, a.y * b, a.z * b); | |
+} | |
+inline __host__ __device__ uint3 operator*(uint b, uint3 a) | |
+{ | |
+ return make_uint3(b * a.x, b * a.y, b * a.z); | |
+} | |
+inline __host__ __device__ void operator*=(uint3 &a, uint b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; | |
+} | |
+ | |
+inline __host__ __device__ Float4 operator*(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | |
+} | |
+inline __host__ __device__ void operator*=(Float4 &a, Float4 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; | |
+} | |
+inline __host__ __device__ Float4 operator*(Float4 a, Float b) | |
+{ | |
+ return MAKE_FLOAT4(a.x * b, a.y * b, a.z * b, a.w * b); | |
+} | |
+inline __host__ __device__ Float4 operator*(Float b, Float4 a) | |
+{ | |
+ return MAKE_FLOAT4(b * a.x, b * a.y, b * a.z, b * a.w); | |
+} | |
+inline __host__ __device__ void operator*=(Float4 &a, Float b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; a.w *= b; | |
+} | |
+ | |
+inline __host__ __device__ int4 operator*(int4 a, int4 b) | |
+{ | |
+ return make_int4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | |
+} | |
+inline __host__ __device__ void operator*=(int4 &a, int4 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; | |
+} | |
+inline __host__ __device__ int4 operator*(int4 a, int b) | |
+{ | |
+ return make_int4(a.x * b, a.y * b, a.z * b, a.w * b); | |
+} | |
+inline __host__ __device__ int4 operator*(int b, int4 a) | |
+{ | |
+ return make_int4(b * a.x, b * a.y, b * a.z, b * a.w); | |
+} | |
+inline __host__ __device__ void operator*=(int4 &a, int b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; a.w *= b; | |
+} | |
+ | |
+inline __host__ __device__ uint4 operator*(uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); | |
+} | |
+inline __host__ __device__ void operator*=(uint4 &a, uint4 b) | |
+{ | |
+ a.x *= b.x; a.y *= b.y; a.z *= b.z; a.w *= b.w; | |
+} | |
+inline __host__ __device__ uint4 operator*(uint4 a, uint b) | |
+{ | |
+ return make_uint4(a.x * b, a.y * b, a.z * b, a.w * b); | |
+} | |
+inline __host__ __device__ uint4 operator*(uint b, uint4 a) | |
+{ | |
+ return make_uint4(b * a.x, b * a.y, b * a.z, b * a.w); | |
+} | |
+inline __host__ __device__ void operator*=(uint4 &a, uint b) | |
+{ | |
+ a.x *= b; a.y *= b; a.z *= b; a.w *= b; | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// divide | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 operator/(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(a.x / b.x, a.y / b.y, a.z / b.z); | |
+} | |
+inline __host__ __device__ void operator/=(Float3 &a, Float3 b) | |
+{ | |
+ a.x /= b.x; a.y /= b.y; a.z /= b.z; | |
+} | |
+inline __host__ __device__ Float3 operator/(Float3 a, Float b) | |
+{ | |
+ return MAKE_FLOAT3(a.x / b, a.y / b, a.z / b); | |
+} | |
+inline __host__ __device__ void operator/=(Float3 &a, Float b) | |
+{ | |
+ a.x /= b; a.y /= b; a.z /= b; | |
+} | |
+inline __host__ __device__ Float3 operator/(Float b, Float3 a) | |
+{ | |
+ return MAKE_FLOAT3(b / a.x, b / a.y, b / a.z); | |
+} | |
+ | |
+inline __host__ __device__ Float4 operator/(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w); | |
+} | |
+inline __host__ __device__ void operator/=(Float4 &a, Float4 b) | |
+{ | |
+ a.x /= b.x; a.y /= b.y; a.z /= b.z; a.w /= b.w; | |
+} | |
+inline __host__ __device__ Float4 operator/(Float4 a, Float b) | |
+{ | |
+ return MAKE_FLOAT4(a.x / b, a.y / b, a.z / b, a.w / b); | |
+} | |
+inline __host__ __device__ void operator/=(Float4 &a, Float b) | |
+{ | |
+ a.x /= b; a.y /= b; a.z /= b; a.w /= b; | |
+} | |
+inline __host__ __device__ Float4 operator/(Float b, Float4 a){ | |
+ return MAKE_FLOAT4(b / a.x, b / a.y, b / a.z, b / a.w); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// min | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 fminf(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z)); | |
+} | |
+inline __host__ __device__ Float4 fminf(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(fminf(a.x,b.x), fminf(a.y,b.y), fminf(a.z,b.z), fmi… | |
+} | |
+ | |
+inline __host__ __device__ int2 min(int2 a, int2 b) | |
+{ | |
+ return make_int2(min(a.x,b.x), min(a.y,b.y)); | |
+} | |
+inline __host__ __device__ int3 min(int3 a, int3 b) | |
+{ | |
+ return make_int3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); | |
+} | |
+inline __host__ __device__ int4 min(int4 a, int4 b) | |
+{ | |
+ return make_int4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); | |
+} | |
+ | |
+inline __host__ __device__ uint2 min(uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(min(a.x,b.x), min(a.y,b.y)); | |
+} | |
+inline __host__ __device__ uint3 min(uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z)); | |
+} | |
+inline __host__ __device__ uint4 min(uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(min(a.x,b.x), min(a.y,b.y), min(a.z,b.z), min(a.w,b.w)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// max | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 fmaxf(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z)); | |
+} | |
+inline __host__ __device__ Float4 fmaxf(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(fmaxf(a.x,b.x), fmaxf(a.y,b.y), fmaxf(a.z,b.z), fma… | |
+} | |
+ | |
+inline __host__ __device__ int2 max(int2 a, int2 b) | |
+{ | |
+ return make_int2(max(a.x,b.x), max(a.y,b.y)); | |
+} | |
+inline __host__ __device__ int3 max(int3 a, int3 b) | |
+{ | |
+ return make_int3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); | |
+} | |
+inline __host__ __device__ int4 max(int4 a, int4 b) | |
+{ | |
+ return make_int4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); | |
+} | |
+ | |
+inline __host__ __device__ uint2 max(uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(max(a.x,b.x), max(a.y,b.y)); | |
+} | |
+inline __host__ __device__ uint3 max(uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z)); | |
+} | |
+inline __host__ __device__ uint4 max(uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(max(a.x,b.x), max(a.y,b.y), max(a.z,b.z), max(a.w,b.w)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// lerp | |
+// - linear interpolation between a and b, based on value t in [0, 1] range | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __device__ __host__ Float lerp(Float a, Float b, Float t) | |
+{ | |
+ return a + t*(b-a); | |
+} | |
+inline __device__ __host__ Float3 lerp(Float3 a, Float3 b, Float t) | |
+{ | |
+ return a + t*(b-a); | |
+} | |
+inline __device__ __host__ Float4 lerp(Float4 a, Float4 b, Float t) | |
+{ | |
+ return a + t*(b-a); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// clamp | |
+// - clamp the value v to be in the range [a, b] | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __device__ __host__ Float clamp(Float f, Float a, Float b) | |
+{ | |
+ return fmaxf(a, fminf(f, b)); | |
+} | |
+inline __device__ __host__ int clamp(int f, int a, int b) | |
+{ | |
+ return max(a, min(f, b)); | |
+} | |
+inline __device__ __host__ uint clamp(uint f, uint a, uint b) | |
+{ | |
+ return max(a, min(f, b)); | |
+} | |
+ | |
+inline __device__ __host__ Float3 clamp(Float3 v, Float a, Float b) | |
+{ | |
+ return MAKE_FLOAT3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); | |
+} | |
+inline __device__ __host__ Float3 clamp(Float3 v, Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, … | |
+} | |
+inline __device__ __host__ Float4 clamp(Float4 v, Float a, Float b) | |
+{ | |
+ return MAKE_FLOAT4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), c… | |
+} | |
+inline __device__ __host__ Float4 clamp(Float4 v, Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, … | |
+} | |
+ | |
+inline __device__ __host__ int2 clamp(int2 v, int a, int b) | |
+{ | |
+ return make_int2(clamp(v.x, a, b), clamp(v.y, a, b)); | |
+} | |
+inline __device__ __host__ int2 clamp(int2 v, int2 a, int2 b) | |
+{ | |
+ return make_int2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); | |
+} | |
+inline __device__ __host__ int3 clamp(int3 v, int a, int b) | |
+{ | |
+ return make_int3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); | |
+} | |
+inline __device__ __host__ int3 clamp(int3 v, int3 a, int3 b) | |
+{ | |
+ return make_int3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.… | |
+} | |
+inline __device__ __host__ int4 clamp(int4 v, int a, int b) | |
+{ | |
+ return make_int4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), cla… | |
+} | |
+inline __device__ __host__ int4 clamp(int4 v, int4 a, int4 b) | |
+{ | |
+ return make_int4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a.… | |
+} | |
+ | |
+inline __device__ __host__ uint2 clamp(uint2 v, uint a, uint b) | |
+{ | |
+ return make_uint2(clamp(v.x, a, b), clamp(v.y, a, b)); | |
+} | |
+inline __device__ __host__ uint2 clamp(uint2 v, uint2 a, uint2 b) | |
+{ | |
+ return make_uint2(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y)); | |
+} | |
+inline __device__ __host__ uint3 clamp(uint3 v, uint a, uint b) | |
+{ | |
+ return make_uint3(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b)); | |
+} | |
+inline __device__ __host__ uint3 clamp(uint3 v, uint3 a, uint3 b) | |
+{ | |
+ return make_uint3(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a… | |
+} | |
+inline __device__ __host__ uint4 clamp(uint4 v, uint a, uint b) | |
+{ | |
+ return make_uint4(clamp(v.x, a, b), clamp(v.y, a, b), clamp(v.z, a, b), cl… | |
+} | |
+inline __device__ __host__ uint4 clamp(uint4 v, uint4 a, uint4 b) | |
+{ | |
+ return make_uint4(clamp(v.x, a.x, b.x), clamp(v.y, a.y, b.y), clamp(v.z, a… | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// dot product | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float dot(Float3 a, Float3 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z; | |
+} | |
+inline __host__ __device__ Float dot(Float4 a, Float4 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; | |
+} | |
+ | |
+inline __host__ __device__ int dot(int2 a, int2 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y; | |
+} | |
+inline __host__ __device__ int dot(int3 a, int3 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z; | |
+} | |
+inline __host__ __device__ int dot(int4 a, int4 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; | |
+} | |
+ | |
+inline __host__ __device__ uint dot(uint2 a, uint2 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y; | |
+} | |
+inline __host__ __device__ uint dot(uint3 a, uint3 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z; | |
+} | |
+inline __host__ __device__ uint dot(uint4 a, uint4 b) | |
+{ | |
+ return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// length | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float length(Float3 v) | |
+{ | |
+ return sqrtf(dot(v, v)); | |
+} | |
+inline __host__ __device__ Float length(Float4 v) | |
+{ | |
+ return sqrtf(dot(v, v)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// normalize | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 normalize(Float3 v) | |
+{ | |
+ Float invLen = rsqrtf(dot(v, v)); | |
+ return v * invLen; | |
+} | |
+inline __host__ __device__ Float4 normalize(Float4 v) | |
+{ | |
+ Float invLen = rsqrtf(dot(v, v)); | |
+ return v * invLen; | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// floor | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 floorf(Float3 v) | |
+{ | |
+ return MAKE_FLOAT3(floorf(v.x), floorf(v.y), floorf(v.z)); | |
+} | |
+inline __host__ __device__ Float4 floorf(Float4 v) | |
+{ | |
+ return MAKE_FLOAT4(floorf(v.x), floorf(v.y), floorf(v.z), floorf(v.w)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// frac - returns the fractional portion of a scalar or each vector component | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float fracf(Float v) | |
+{ | |
+ return v - floorf(v); | |
+} | |
+inline __host__ __device__ Float3 fracf(Float3 v) | |
+{ | |
+ return MAKE_FLOAT3(fracf(v.x), fracf(v.y), fracf(v.z)); | |
+} | |
+inline __host__ __device__ Float4 fracf(Float4 v) | |
+{ | |
+ return MAKE_FLOAT4(fracf(v.x), fracf(v.y), fracf(v.z), fracf(v.w)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// fmod | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 fmodf(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z)); | |
+} | |
+inline __host__ __device__ Float4 fmodf(Float4 a, Float4 b) | |
+{ | |
+ return MAKE_FLOAT4(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z), fmod… | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// absolute value | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 fabs(Float3 v) | |
+{ | |
+ return MAKE_FLOAT3(fabs(v.x), fabs(v.y), fabs(v.z)); | |
+} | |
+inline __host__ __device__ Float4 fabs(Float4 v) | |
+{ | |
+ return MAKE_FLOAT4(fabs(v.x), fabs(v.y), fabs(v.z), fabs(v.w)); | |
+} | |
+ | |
+inline __host__ __device__ int2 abs(int2 v) | |
+{ | |
+ return make_int2(abs(v.x), abs(v.y)); | |
+} | |
+inline __host__ __device__ int3 abs(int3 v) | |
+{ | |
+ return make_int3(abs(v.x), abs(v.y), abs(v.z)); | |
+} | |
+inline __host__ __device__ int4 abs(int4 v) | |
+{ | |
+ return make_int4(abs(v.x), abs(v.y), abs(v.z), abs(v.w)); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// reflect | |
+// - returns reflection of incident ray I around surface normal N | |
+// - N should be normalized, reflected vector's length is equal to length of I | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 reflect(Float3 i, Float3 n) | |
+{ | |
+ return i - 2.0f * n * dot(n,i); | |
+} | |
+ | |
+//////////////////////////////////////////////////////////////////////////////… | |
+// cross product | |
+//////////////////////////////////////////////////////////////////////////////… | |
+ | |
+inline __host__ __device__ Float3 cross(Float3 a, Float3 b) | |
+{ | |
+ return MAKE_FLOAT3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x… | |
+} | |
+ | |
+#endif |