GopherProxy

	timplemented ndem parameter for DEM/CFD computation ratio - sphere - GPU-based …
	git clone git://src.adamsgaard.dk/sphere
	Log
	Files
	Refs
	LICENSE
	---
	commit f8ab5f09ac2e8cd7d89525a6e619d6f8c382fa1d
	parent e567138fef8fd2ec15afe7a583c3274c757839d0
	Author: Anders Damsgaard <[email protected]>
	Date: Thu, 1 May 2014 10:51:45 +0200

	implemented ndem parameter for DEM/CFD computation ratio

	Diffstat:
	M src/device.cu \| 839 ++++++++++++++++-------------…
	M src/navierstokes.cuh \| 30 +++++++++++++++++-------------

	2 files changed, 440 insertions(+), 429 deletions(-)
	---
	diff --git a/src/device.cu b/src/device.cu
	t@@ -761,7 +761,6 @@ __host__ void DEM::startTime()
	dev_contacts,
	dev_distmod);

	-
	// Synchronization point
	cudaThreadSynchronize();
	if (PROFILING == 1)
	t@@ -774,7 +773,6 @@ __host__ void DEM::startTime()
	" too large?", iter);
	}

	-
	// For each particle process collisions and compute resulting forc…
	//cudaPrintfInit();
	if (PROFILING == 1)
	t@@ -897,484 +895,493 @@ __host__ void DEM::startTime()
	}
	#endif

	- // Initial guess for the top epsilon values. These may be changed …
	- // setUpperPressureNS
	- Float pressure = ns.p[idx(0,0,ns.nz-1)];
	- Float pressure_new = pressure; // Dirichlet
	- Float epsilon_value = pressure_new - ns.beta*pressure;
	- setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- epsilon_value);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSepsilonTop", iter);
	-
	- // Modulate the pressures at the upper boundary cells
	- if ((ns.p_mod_A > 1.0e-5 \|\| ns.p_mod_A < -1.0e-5) &&
	- ns.p_mod_f > 1.0e-7) {
	- Float new_pressure = ns.p[idx(0,0,ns.nz-1)] // original pressu…
	- + ns.p_mod_Asin(2.0M_PIns.p_mod_ftime.current
	- + ns.p_mod_phi);
	- setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p,
	+ if ((iter % ns.ndem) == 0) {
	+ // Initial guess for the top epsilon values. These may be
	+ // changed in setUpperPressureNS
	+ Float pressure = ns.p[idx(0,0,ns.nz-1)];
	+ Float pressure_new = pressure; // Dirichlet
	+ Float epsilon_value = pressure_new - ns.beta*pressure;
	+ setNSepsilonTop<<<dimGridFluid, dimBlockFluid>>>(
	dev_ns_epsilon,
	dev_ns_epsilon_new,
	- ns.beta,
	- new_pressure);
	+ epsilon_value);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setUpperPressureNS", iter);
	-
	- if (report_even_more_epsilon == 1) {
	- std::cout
	- << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	- << "\n###### EPSILON AFTER setUpperPressureNS ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	+ checkForCudaErrorsIter("Post setNSepsilonTop", iter);
	+
	+ // Modulate the pressures at the upper boundary cells
	+ if ((ns.p_mod_A > 1.0e-5 \|\| ns.p_mod_A < -1.0e-5) &&
	+ ns.p_mod_f > 1.0e-7) {
	+ // original pressure
	+ Float new_pressure = ns.p[idx(0,0,ns.nz-1)]
	+ + ns.p_mod_Asin(2.0M_PIns.p_mod_ftime.current
	+ + ns.p_mod_phi);
	+ setUpperPressureNS<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p,
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ ns.beta,
	+ new_pressure);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setUpperPressureNS", iter);
	+
	+ if (report_even_more_epsilon == 1) {
	+ std::cout
	+ << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	+ << "\n###### EPSILON AFTER setUpperPressureNS "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }
	}
	- }
	-
	- // Set the values of the ghost nodes in the grid
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p,
	- dev_ns_v,
	- dev_ns_v_p,
	- dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_epsilon);*/
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p, ns.bc_bot, ns.bc_top);
	-
	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v, ns.bc_bot, ns.bc_top);
	-
	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v_p, ns.bc_bot, ns.bc_top);
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi, ns.bc_bot, ns.bc_top);
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_dphi, ns.bc_bot, ns.bc_top);
	-
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSghostNodesDev);
	- checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
	- /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");*/
	-
	-
	- // Find the fluid stress tensor, needed for predicting the fluid
	- // velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v,
	- dev_ns_tau);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSstressTensor);
	- checkForCudaErrorsIter("Post findNSstressTensor", iter);
	-
	- // Set stress tensor values in the ghost nodes
	- setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_tau,
	- ns.bc_bot,
	- ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);

	- // Find the divergence of phiviv, needed for predicting the fluid
	- // velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi,
	- dev_ns_v,
	- dev_ns_div_phi_vi_v);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSdivphiviv);
	- checkForCudaErrorsIter("Post findNSdivphiviv", iter);
	-
	- // Find the divergence of phi*tau, needed for predicting the fluid
	- // velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_phi,
	- dev_ns_tau,
	- dev_ns_div_phi_tau);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSdivphitau);
	- checkForCudaErrorsIter("Post findNSdivphitau", iter);
	+ // Set the values of the ghost nodes in the grid
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ /*setNSghostNodesDev<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p,
	+ dev_ns_v,
	+ dev_ns_v_p,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_epsilon);*/
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p, ns.bc_bot, ns.bc_top);

	- // Predict the fluid velocities on the base of the old pressure
	- // field and ignoring the incompressibility constraint
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p,
	- dev_ns_v,
	- dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_div_phi_vi_v,
	- dev_ns_div_phi_tau,
	- ns.bc_bot,
	- ns.bc_top,
	- ns.beta,
	- dev_ns_fi,
	- dev_ns_v_p);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findPredNSvelocities);
	- checkForCudaErrorsIter("Post findPredNSvelocities", iter);
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v, ns.bc_bot, ns.bc_top);

	- setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_v_p);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)",
	- iter);
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v_p, ns.bc_bot, ns.bc_top);

	- // In the first iteration of the sphere program, we'll need to
	- // manually estimate the values of epsilon. In the subsequent
	- // iterations, the previous values are used.
	- if (iter == 0) {
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_phi, ns.bc_bot, ns.bc_top);

	- // Define the first estimate of the values of epsilon.
	- // The initial guess depends on the value of ns.beta.
	- Float pressure = ns.p[idx(2,2,2)];
	- Float pressure_new = pressure; // Guess p_current = p_new
	- Float epsilon_value = pressure_new - ns.beta*pressure;
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon, epsilon_value);
	- cudaThreadSynchronize();
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_dphi, ns.bc_bot, ns.bc_top);

	- setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm);
	cudaThreadSynchronize();
	-
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSepsilon);
	- checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
	-
	- if (report_even_more_epsilon == 1) {
	- std::cout
	- << "\n###### EPSILON AFTER setNSepsilonInterior ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	-
	- // Set the epsilon values at the lower boundary
	- pressure = ns.p[idx(0,0,0)];
	- pressure_new = pressure; // Guess p_current = p_new
	- epsilon_value = pressure_new - ns.beta*pressure;
	+ &t_setNSghostNodesDev);
	+ checkForCudaErrorsIter("Post setNSghostNodesDev", iter);
	+ /*std::cout << "\n###### EPSILON AFTER setNSghostNodesDev ####…
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");*/
	+
	+ // Find the fluid stress tensor, needed for predicting the flu…
	+ // velocities
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- epsilon_value);
	+ findNSstressTensor<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v,
	+ dev_ns_tau);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_setNSdirichlet);
	- checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
	+ &t_findNSstressTensor);
	+ checkForCudaErrorsIter("Post findNSstressTensor", iter);

	- if (report_even_more_epsilon == 1) {
	- std::cout
	- << "\n###### EPSILON AFTER setNSepsilonBottom ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	-
	- /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon);
	- cudaThreadSynchronize();
	- checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_epsilon…
	- iter);*/
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	+ // Set stress tensor values in the ghost nodes
	+ setNSghostNodes_tau<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_tau,
	+ ns.bc_bot,
	+ ns.bc_top);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
	- iter);
	+ checkForCudaErrorsIter("Post setNSghostNodes_tau", iter);

	- if (report_even_more_epsilon == 1) {
	- std::cout <<
	- "\n###### EPSILON AFTER setNSghostNodes(epsilon) #####…
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	- }
	-
	- // Solve the system of epsilon using a Jacobi iterative solver.
	- // The average normalized residual is initialized to a large value.
	- //double avg_norm_res;
	- double max_norm_res;
	-
	- // Write a log file of the normalized residuals during the Jacobi
	- // iterations
	- std::ofstream reslog;
	- if (write_res_log == 1)
	- reslog.open("max_res_norm.dat");
	-
	- // transfer normalized residuals from GPU to CPU
	- if (report_epsilon == 1) {
	- std::cout << "\n###### BEFORE FIRST JACOBI ITERATION ######"
	- << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	-
	- for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
	-
	- // Only grad(epsilon) changes during the Jacobi iterations. The
	- // remaining terms of the forcing function are only calculated
	- // during the first iteration.
	+ // Find the divergence of phiviv, needed for predicting the
	+ // fluid velocities
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_f1,
	- dev_ns_f2,
	- dev_ns_f,
	+ findNSdivphiviv<<<dimGridFluid, dimBlockFluid>>>(
	dev_ns_phi,
	- dev_ns_dphi,
	- dev_ns_v_p,
	- nijac);
	+ dev_ns_v,
	+ dev_ns_div_phi_vi_v);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_findNSforcing);
	- checkForCudaErrorsIter("Post findNSforcing", iter);
	- /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_f1,
	- dev_ns_f2,
	- dev_ns_f,
	- nijac);
	- cudaThreadSynchronize();
	- checkForCudaErrors("Post setNSghostNodesForcing", iter);*/
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
	- iter);
	-
	- if (report_epsilon == 1) {
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after setNSghostNodes(epsilon,2) ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	-
	- /*smoothing<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post smoothing", iter);
	-
	- if (report_epsilon == 1) {
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after smoothing(epsilon) ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(3)",
	- iter);
	- */
	-
	- /*if (report_epsilon == 1) {
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac
	- << " after setNSghostNodesEpsilon(epsilon,3) ######"
	- << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");
	- }*/
	+ &t_findNSdivphiviv);
	+ checkForCudaErrorsIter("Post findNSdivphiviv", iter);

	- // Store old values
	- /*copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_old);
	+ // Find the divergence of phi*tau, needed for predicting the
	+ // fluid velocities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findNSdivphitau<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_phi,
	+ dev_ns_tau,
	+ dev_ns_div_phi_tau);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post copyValues (epsilon->epsilon_old)…
	- iter);*/
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findNSdivphitau);
	+ checkForCudaErrorsIter("Post findNSdivphitau", iter);

	- // Perform a single Jacobi iteration
	+ // Predict the fluid velocities on the base of the old pressure
	+ // field and ignoring the incompressibility constraint
	if (PROFILING == 1)
	startTimer(&kernel_tic);
	- jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- dev_ns_epsilon_new,
	- dev_ns_norm,
	- dev_ns_f,
	+ findPredNSvelocities<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p,
	+ dev_ns_v,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_div_phi_vi_v,
	+ dev_ns_div_phi_tau,
	ns.bc_bot,
	ns.bc_top,
	- ns.theta);
	+ ns.beta,
	+ dev_ns_fi,
	+ ns.ndem,
	+ dev_ns_v_p);
	cudaThreadSynchronize();
	if (PROFILING == 1)
	stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_jacobiIterationNS);
	- checkForCudaErrorsIter("Post jacobiIterationNS", iter);
	-
	- // Flip flop: swap new and current array pointers
	- /Float tmp = dev_ns_epsilon;
	- dev_ns_epsilon = dev_ns_epsilon_new;
	- dev_ns_epsilon_new = tmp;*/
	+ &t_findPredNSvelocities);
	+ checkForCudaErrorsIter("Post findPredNSvelocities", iter);

	- // Copy new values to current values
	- copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon_new,
	- dev_ns_epsilon);
	+ setNSghostNodes<Float3><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_v_p);
	cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post copyValues (epsilon_new->epsilon)…
	+ checkForCudaErrorsIter("Post setNSghostNodesFloat3(dev_ns_v_p)…
	iter);

	- /*findNormalizedResiduals<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon_old,
	- dev_ns_epsilon,
	- dev_ns_norm,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post findNormalizedResiduals",
	- iter);*/
	+ // In the first iteration of the sphere program, we'll need to
	+ // manually estimate the values of epsilon. In the subsequent
	+ // iterations, the previous values are used.
	+ if (iter == 0) {
	+
	+ // Define the first estimate of the values of epsilon.
	+ // The initial guess depends on the value of ns.beta.
	+ Float pressure = ns.p[idx(2,2,2)];
	+ Float pressure_new = pressure; // Guess p_current = p_new
	+ Float epsilon_value = pressure_new - ns.beta*pressure;
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setNSepsilonInterior<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon, epsilon_value);
	+ cudaThreadSynchronize();
	+
	+ setNSnormZero<<<dimGridFluid, dimBlockFluid>>>(dev_ns_norm…
	+ cudaThreadSynchronize();
	+
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_setNSepsilon);
	+ checkForCudaErrorsIter("Post setNSepsilonInterior", iter);
	+
	+ if (report_even_more_epsilon == 1) {
	+ std::cout
	+ << "\n###### EPSILON AFTER setNSepsilonInterior "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }

	+ // Set the epsilon values at the lower boundary
	+ pressure = ns.p[idx(0,0,0)];
	+ pressure_new = pressure; // Guess p_current = p_new
	+ epsilon_value = pressure_new - ns.beta*pressure;
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ setNSepsilonBottom<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ epsilon_value);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_setNSdirichlet);
	+ checkForCudaErrorsIter("Post setNSepsilonBottom", iter);
	+
	+ if (report_even_more_epsilon == 1) {
	+ std::cout
	+ << "\n###### EPSILON AFTER setNSepsilonBottom "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }
	+
	+ /*setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("Post setNSghostNodesFloat(dev_ns_eps…
	+ iter);*/
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(1)",
	+ iter);
	+
	+ if (report_even_more_epsilon == 1) {
	+ std::cout <<
	+ "\n###### EPSILON AFTER setNSghostNodes(epsilon) "
	+ << "######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }
	+ }
	+
	+ // Solve the system of epsilon using a Jacobi iterative solver.
	+ // The average normalized residual is initialized to a large
	+ // value.
	+ //double avg_norm_res;
	+ double max_norm_res;
	+
	+ // Write a log file of the normalized residuals during the Jac…
	+ // iterations
	+ std::ofstream reslog;
	+ if (write_res_log == 1)
	+ reslog.open("max_res_norm.dat");
	+
	+ // transfer normalized residuals from GPU to CPU
	if (report_epsilon == 1) {
	- std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after jacobiIterationNS ######"
	+ std::cout << "\n###### BEFORE FIRST JACOBI ITERATION #####…
	+ << "\n@@@@@@ TIME STEP " << iter << " @@@@@@"
	<< std::endl;
	transferNSepsilonFromGlobalDeviceMemory();
	printNSarray(stdout, ns.epsilon, "epsilon");
	}

	- if (nijac % nijacnorm == 0) {
	-
	- // Read the normalized residuals from the device
	- transferNSnormFromGlobalDeviceMemory();
	-
	- // Write the normalized residuals to the terminal
	- //printNSarray(stdout, ns.norm, "norm");
	-
	- // Find the maximum value of the normalized residuals
	- max_norm_res = maxNormResNS();
	-
	- // Write the Jacobi iteration number and maximum value of
	- // the normalized residual to the log file
	- if (write_res_log == 1)
	- reslog << nijac << '\t' << max_norm_res << std::endl;
	- }
	+ for (unsigned int nijac = 0; nijac<ns.maxiter; ++nijac) {
	+
	+ // Only grad(epsilon) changes during the Jacobi iterations.
	+ // The remaining terms of the forcing function are only
	+ // calculated during the first iteration.
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ findNSforcing<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_f1,
	+ dev_ns_f2,
	+ dev_ns_f,
	+ dev_ns_phi,
	+ dev_ns_dphi,
	+ dev_ns_v_p,
	+ nijac,
	+ ns.ndem);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_findNSforcing);
	+ checkForCudaErrorsIter("Post findNSforcing", iter);
	+ /*setNSghostNodesForcing<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_f1,
	+ dev_ns_f2,
	+ dev_ns_f,
	+ nijac);
	+ cudaThreadSynchronize();
	+ checkForCudaErrors("Post setNSghostNodesForcing", iter);…
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(2)",
	+ iter);

	- if (max_norm_res < ns.tolerance) {
	-
	- if (write_conv_log == 1 && iter % conv_log_interval == 0)
	- convlog << iter << '\t' << nijac << std::endl;
	-
	- // Apply smoothing if requested
	- if (ns.gamma > 0.0) {
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>…
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(4)…
	- iter);
	-
	- smoothing<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_epsilon,
	- ns.gamma,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post smoothing", iter);
	-
	- setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>…
	- dev_ns_epsilon,
	- ns.bc_bot, ns.bc_top);
	- cudaThreadSynchronize();
	- checkForCudaErrorsIter("Post setNSghostNodesEpsilon(4)…
	- iter);
	+ if (report_epsilon == 1) {
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac
	+ << " after setNSghostNodes(epsilon,2) ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	}

	+ /*smoothing<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post smoothing", iter);
	+
	+ if (report_epsilon == 1) {
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac << " after smoothing(epsilon) ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }
	+
	+ setNSghostNodes<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post setNSghostNodesEpsilon(3)",
	+ iter);
	+ */
	+
	+ /*if (report_epsilon == 1) {
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac
	+ << " after setNSghostNodesEpsilon(epsilon,3) ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }*/
	+
	+ // Store old values
	+ /*copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_old);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post copyValues (epsilon->epsilon_old)", iter);*/
	+
	+ // Perform a single Jacobi iteration
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ jacobiIterationNS<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ dev_ns_epsilon_new,
	+ dev_ns_norm,
	+ dev_ns_f,
	+ ns.bc_bot,
	+ ns.bc_top,
	+ ns.theta);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_jacobiIterationNS);
	+ checkForCudaErrorsIter("Post jacobiIterationNS", iter);
	+
	+ // Flip flop: swap new and current array pointers
	+ /Float tmp = dev_ns_epsilon;
	+ dev_ns_epsilon = dev_ns_epsilon_new;
	+ dev_ns_epsilon_new = tmp;*/
	+
	+ // Copy new values to current values
	+ copyValues<Float><<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon_new,
	+ dev_ns_epsilon);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post copyValues (epsilon_new->epsilon)", iter);
	+
	+ /*findNormalizedResiduals<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon_old,
	+ dev_ns_epsilon,
	+ dev_ns_norm,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post findNormalizedResiduals",
	+ iter);*/
	+
	if (report_epsilon == 1) {
	std::cout << "\n###### JACOBI ITERATION "
	- << nijac << " after smoothing ######"
	+ << nijac << " after jacobiIterationNS ######"
	<< std::endl;
	transferNSepsilonFromGlobalDeviceMemory();
	printNSarray(stdout, ns.epsilon, "epsilon");
	}

	- break; // solution has converged, exit Jacobi iter. loop
	- }
	+ if (nijac % nijacnorm == 0) {

	- if (nijac >= ns.maxiter-1) {
	+ // Read the normalized residuals from the device
	+ transferNSnormFromGlobalDeviceMemory();

	- if (write_conv_log == 1)
	- convlog << iter << '\t' << nijac << std::endl;
	+ // Write the normalized residuals to the terminal
	+ //printNSarray(stdout, ns.norm, "norm");

	- std::cerr << "\nIteration " << iter << ", time "
	- << iter*time.dt << " s: "
	- "Error, the epsilon solution in the fluid "
	- "calculations did not converge. Try increasing the "
	- "value of 'ns.maxiter' (" << ns.maxiter
	- << ") or increase 'ns.tolerance' ("
	- << ns.tolerance << ")." << std::endl;
	- }
	- //break; // end after Jacobi first iteration
	- } // end Jacobi iteration loop
	+ // Find the maximum value of the normalized residuals
	+ max_norm_res = maxNormResNS();

	- if (write_res_log == 1)
	- reslog.close();
	+ // Write the Jacobi iteration number and maximum value
	+ // of the normalized residual to the log file
	+ if (write_res_log == 1)
	+ reslog << nijac << '\t' << max_norm_res << std::en…
	+ }

	- // Find the new pressures and velocities
	- if (PROFILING == 1)
	- startTimer(&kernel_tic);
	- updateNSvelocityPressure<<<dimGridFluid, dimBlockFluid>>>(
	- dev_ns_p,
	- dev_ns_v,
	- dev_ns_v_p,
	- dev_ns_phi,
	- dev_ns_epsilon,
	- ns.beta,
	- ns.bc_bot,
	- ns.bc_top);
	- cudaThreadSynchronize();
	- if (PROFILING == 1)
	- stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	- &t_updateNSvelocityPressure);
	- checkForCudaErrorsIter("Post updateNSvelocityPressure", iter);
	- }
	+ if (max_norm_res < ns.tolerance) {
	+
	+ if (write_conv_log == 1 && iter % conv_log_interval ==…
	+ convlog << iter << '\t' << nijac << std::endl;
	+
	+ // Apply smoothing if requested
	+ if (ns.gamma > 0.0) {
	+ setNSghostNodes<Float>
	+ <<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post setNSghostNodesEpsilon(4)", iter);
	+
	+ smoothing<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.gamma,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter("Post smoothing", iter);
	+
	+ setNSghostNodes<Float>
	+ <<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_epsilon,
	+ ns.bc_bot, ns.bc_top);
	+ cudaThreadSynchronize();
	+ checkForCudaErrorsIter
	+ ("Post setNSghostNodesEpsilon(4)", iter);
	+ }
	+
	+ if (report_epsilon == 1) {
	+ std::cout << "\n###### JACOBI ITERATION "
	+ << nijac << " after smoothing ######"
	+ << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");
	+ }
	+
	+ break; // solution has converged, exit Jacobi loop
	+ }

	- /*std::cout << "\n###### ITERATION "
	- << iter << " ######" << std::endl;
	- transferNSepsilonFromGlobalDeviceMemory();
	- printNSarray(stdout, ns.epsilon, "epsilon");*/
	- //transferNSepsilonNewFromGlobalDeviceMemory();
	- //printNSarray(stdout, ns.epsilon_new, "epsilon_new");
	+ if (nijac >= ns.maxiter-1) {
	+
	+ if (write_conv_log == 1)
	+ convlog << iter << '\t' << nijac << std::endl;
	+
	+ std::cerr << "\nIteration " << iter << ", time "
	+ << iter*time.dt << " s: "
	+ "Error, the epsilon solution in the fluid "
	+ "calculations did not converge. Try increasing the…
	+ "value of 'ns.maxiter' (" << ns.maxiter
	+ << ") or increase 'ns.tolerance' ("
	+ << ns.tolerance << ")." << std::endl;
	+ }
	+ //break; // end after Jacobi first iteration
	+ } // end Jacobi iteration loop
	+
	+ if (write_res_log == 1)
	+ reslog.close();
	+
	+ // Find the new pressures and velocities
	+ if (PROFILING == 1)
	+ startTimer(&kernel_tic);
	+ updateNSvelocityPressure<<<dimGridFluid, dimBlockFluid>>>(
	+ dev_ns_p,
	+ dev_ns_v,
	+ dev_ns_v_p,
	+ dev_ns_phi,
	+ dev_ns_epsilon,
	+ ns.beta,
	+ ns.bc_bot,
	+ ns.bc_top,
	+ ns.ndem);
	+ cudaThreadSynchronize();
	+ if (PROFILING == 1)
	+ stopTimer(&kernel_tic, &kernel_toc, &kernel_elapsed,
	+ &t_updateNSvelocityPressure);
	+ checkForCudaErrorsIter("Post updateNSvelocityPressure", iter);
	+ }
	+
	+ /*std::cout << "\n###### ITERATION "
	+ << iter << " ######" << std::endl;
	+ transferNSepsilonFromGlobalDeviceMemory();
	+ printNSarray(stdout, ns.epsilon, "epsilon");*/
	+ //transferNSepsilonNewFromGlobalDeviceMemory();
	+ //printNSarray(stdout, ns.epsilon_new, "epsilon_new");
	+ }

	if (np > 0) {
	// Update particle kinematics
	diff --git a/src/navierstokes.cuh b/src/navierstokes.cuh
	t@@ -1059,6 +1059,7 @@ __global__ void findPorositiesVelocitiesDiametersSpheric…
	Float3* dev_ns_vp_avg,
	Float* dev_ns_d_avg,
	const unsigned int iteration,
	+ const unsigned int ndem,
	const unsigned int np)
	{
	// 3D thread index
	t@@ -1207,8 +1208,8 @@ __global__ void findPorositiesVelocitiesDiametersSpheric…
	dot_epsilon_ii.x + dot_epsilon_ii.y + dot_epsilon_ii.z;

	const Float dphi =
	- (1.0 - fmin(phi_0,0.99))dot_epsilon_kkdevC_dt;
	- phi = phi_0 + dphi/devC_dt;
	+ (1.0 - fmin(phi_0,0.99))dot_epsilon_kkndem*devC_dt;
	+ phi = phi_0 + dphi/(ndem*devC_dt);

	//if (dot_epsilon_kk != 0.0)
	//printf("%d,%d,%d\tdot_epsilon_kk = %f\tdphi = %f\tphi = %f\n…
	t@@ -1911,6 +1912,7 @@ __global__ void findPredNSvelocities(
	int bc_top, // in
	Float beta, // in
	Float3* dev_ns_fi, // in
	+ unsigned int ndem, // in
	Float3* dev_ns_v_p) // out
	{
	// 3D thread index
	t@@ -1959,18 +1961,18 @@ __global__ void findPredNSvelocities(
	Float3 pressure_term = MAKE_FLOAT3(0.0, 0.0, 0.0);
	if (beta > 0.0) {
	grad_p = gradient(dev_ns_p, x, y, z, dx, dy, dz);
	- pressure_term = -beta/devC_params.rho_fgrad_pdevC_dt/phi;
	+ pressure_term = -beta/devC_params.rho_fgrad_pndem*devC_dt/phi;
	}

	// Calculate the predicted velocity
	Float3 v_p = v
	+ pressure_term
	- + 1.0/devC_params.rho_fdiv_phi_taudevC_dt/phi
	+ + 1.0/devC_params.rho_fdiv_phi_taundem*devC_dt/phi
	+ MAKE_FLOAT3(devC_params.g[0], devC_params.g[1], devC_params.g[2])
	- *devC_dt
	- - devC_dt/(devC_params.rho_fphi)f_i
	+ ndemdevC_dt
	+ - ndemdevC_dt/(devC_params.rho_fphi)*f_i
	- v*dphi/phi
	- - div_phi_vi_v*devC_dt/phi // advection term
	+ - div_phi_vi_vndemdevC_dt/phi // advection term
	;

	// Report velocity components to stdout for debugging
	t@@ -2017,7 +2019,8 @@ __global__ void findNSforcing(
	Float* dev_ns_phi,
	Float* dev_ns_dphi,
	Float3* dev_ns_v_p,
	- unsigned int nijac)
	+ unsigned int nijac,
	+ unsigned int ndem)
	{
	// 3D thread index
	const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	t@@ -2066,9 +2069,9 @@ __global__ void findNSforcing(
	+ dot(grad_phi, v_p)devC_params.rho_f/(devC_dtphi)
	+ dphidevC_params.rho_f/(devC_dtdevC_dt*phi);
	f2 = grad_phi/phi;*/
	- f1 = div_v_pdevC_params.rho_fphi/devC_dt
	- + dot(grad_phi, v_p)*devC_params.rho_f/devC_dt
	- + dphidevC_params.rho_f/(devC_dtdevC_dt);
	+ f1 = div_v_pdevC_params.rho_fphi/(ndem*devC_dt)
	+ + dot(grad_phi, v_p)devC_params.rho_f/(ndemdevC_dt)
	+ + dphidevC_params.rho_f/(ndemdevC_dt*devC_dt);
	f2 = grad_phi/phi;

	// Report values terms in the forcing function for debugging
	t@@ -2392,7 +2395,8 @@ __global__ void updateNSvelocityPressure(
	Float* dev_ns_epsilon,
	Float beta,
	int bc_bot,
	- int bc_top)
	+ int bc_top,
	+ unsigned int ndem)
	{
	// 3D thread index
	const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	t@@ -2431,7 +2435,7 @@ __global__ void updateNSvelocityPressure(

	// Find new velocity
	//Float3 v = v_p - devC_dt/devC_params.rho_f*grad_epsilon;
	- Float3 v = v_p - devC_dt/(devC_params.rho_fphi)grad_epsilon;
	+ Float3 v = v_p - ndemdevC_dt/(devC_params.rho_fphi)*grad_epsilon;

	// Print values for debugging
	/* if (z == 0) {