diff --git a/include/Level/level.h b/include/Level/level.h index fdfb265c..dd4ffab0 100644 --- a/include/Level/level.h +++ b/include/Level/level.h @@ -114,7 +114,6 @@ class LevelCache explicit LevelCache(const PolarGrid& grid, const DensityProfileCoefficients& density_profile_coefficients, const DomainGeometry& domain_geometry, const bool cache_density_profile_coefficients, const bool cache_domain_geometry); - explicit LevelCache(const Level& previous_level, const PolarGrid& current_grid); const DomainGeometry& domainGeometry() const; const DensityProfileCoefficients& densityProfileCoefficients() const; @@ -132,18 +131,8 @@ class LevelCache inline void obtainValues(const int i_r, const int i_theta, const int global_index, double r, double theta, double& coeff_beta, double& arr, double& att, double& art, double& detDF) const { - if (cache_density_profile_coefficients_) - coeff_beta = coeff_beta_[global_index]; - else - coeff_beta = density_profile_coefficients_.beta(r, theta); - - double coeff_alpha; - if (!cache_domain_geometry_) { - if (cache_density_profile_coefficients_) - coeff_alpha = coeff_alpha_[global_index]; - else - coeff_alpha = density_profile_coefficients_.alpha(r, theta); - } + coeff_beta = cache_density_profile_coefficients_ ? coeff_beta_[global_index] + : density_profile_coefficients_.beta(r, theta); if (cache_domain_geometry_) { arr = arr_[global_index]; @@ -152,6 +141,9 @@ class LevelCache detDF = detDF_[global_index]; } else { + double coeff_alpha = cache_density_profile_coefficients_ ? coeff_alpha_[global_index] + : density_profile_coefficients_.alpha(r, theta); + compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); } } @@ -160,7 +152,7 @@ class LevelCache const DomainGeometry& domain_geometry_; const DensityProfileCoefficients& density_profile_coefficients_; - bool cache_density_profile_coefficients_; // cache alpha(r_i), beta(r_i) + bool cache_density_profile_coefficients_; // cache alpha(r, theta), beta(r, theta) Vector coeff_alpha_; Vector coeff_beta_; diff --git a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp index 034c8ee5..8124c66f 100644 --- a/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp +++ b/src/ExtrapolatedSmoother/ExtrapolatedSmootherGive/smootherSolver.cpp @@ -826,30 +826,8 @@ void ExtrapolatedSmootherGive::applyAscOrthoCircleSection(const int i_r, const S const double theta = grid_.theta(i_theta); const int index = grid_.index(i_r, i_theta); - double coeff_beta; - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_beta = level_cache_.coeff_beta()[index]; - else - coeff_beta = density_profile_coefficients_.beta(r, theta); - - double coeff_alpha; - if (!level_cache_.cacheDomainGeometry()) { - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_alpha = level_cache_.coeff_alpha()[index]; - else - coeff_alpha = density_profile_coefficients_.alpha(r, theta); - } - - double arr, att, art, detDF; - if (level_cache_.cacheDomainGeometry()) { - arr = level_cache_.arr()[index]; - att = level_cache_.att()[index]; - art = level_cache_.art()[index]; - detDF = level_cache_.detDF()[index]; - } - else { - compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); - } + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node NODE_APPLY_ASC_ORTHO_CIRCLE_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, @@ -868,30 +846,8 @@ void ExtrapolatedSmootherGive::applyAscOrthoRadialSection(const int i_theta, con const double r = grid_.radius(i_r); const int index = grid_.index(i_r, i_theta); - double coeff_beta; - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_beta = level_cache_.coeff_beta()[index]; - else - coeff_beta = density_profile_coefficients_.beta(r, theta); - - double coeff_alpha; - if (!level_cache_.cacheDomainGeometry()) { - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_alpha = level_cache_.coeff_alpha()[index]; - else - coeff_alpha = density_profile_coefficients_.alpha(r, theta); - } - - double arr, att, art, detDF; - if (level_cache_.cacheDomainGeometry()) { - arr = level_cache_.arr()[index]; - att = level_cache_.att()[index]; - art = level_cache_.art()[index]; - detDF = level_cache_.detDF()[index]; - } - else { - compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); - } + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node NODE_APPLY_ASC_ORTHO_RADIAL_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, diff --git a/src/GMGPolar/setup.cpp b/src/GMGPolar/setup.cpp index 3c897a1b..8ff5a2e7 100644 --- a/src/GMGPolar/setup.cpp +++ b/src/GMGPolar/setup.cpp @@ -61,8 +61,10 @@ void GMGPolar::setup() levels_.emplace_back(level_depth, std::move(finest_grid), std::move(finest_levelCache), extrapolation_, FMG_); for (level_depth = 1; level_depth < number_of_levels_; level_depth++) { - auto current_grid = std::make_unique(coarseningGrid(levels_[level_depth - 1].grid())); - auto current_levelCache = std::make_unique(levels_[level_depth - 1], *current_grid); + auto current_grid = std::make_unique(coarseningGrid(levels_[level_depth - 1].grid())); + auto current_levelCache = + std::make_unique(*current_grid, density_profile_coefficients_, domain_geometry_, + cache_density_profile_coefficients_, cache_domain_geometry_); levels_.emplace_back(level_depth, std::move(current_grid), std::move(current_levelCache), extrapolation_, FMG_); } diff --git a/src/Level/levelCache.cpp b/src/Level/levelCache.cpp index c8d31243..8271e293 100644 --- a/src/Level/levelCache.cpp +++ b/src/Level/levelCache.cpp @@ -16,6 +16,8 @@ LevelCache::LevelCache(const PolarGrid& grid, const DensityProfileCoefficients& , art_("art", cache_domain_geometry ? grid.numberOfNodes() : 0) , detDF_("detDF", cache_domain_geometry ? grid.numberOfNodes() : 0) { + // Pre-compute and store alpha/beta coefficients at all grid nodes to avoid + // repeated expensive evaluations during runtime computations if (cache_density_profile_coefficients_) { #pragma omp parallel for for (int i_r = 0; i_r < grid.nr(); i_r++) { @@ -31,15 +33,20 @@ LevelCache::LevelCache(const PolarGrid& grid, const DensityProfileCoefficients& } } + // Pre-compute and store Jacobian matrix elements (arr, att, art, detDF) at all grid nodes + // to avoid repeated coordinate transformation calculations during domain operations if (cache_domain_geometry_) { + // We split the loops into two regions to better respect the + // access patterns of the smoother and improve cache locality + + // Circular Indexing Section #pragma omp parallel for for (int i_r = 0; i_r < grid.numberSmootherCircles(); i_r++) { const double r = grid.radius(i_r); for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { - const double theta = grid.theta(i_theta); - const int index = grid.index(i_r, i_theta); - - double coeff_alpha = density_profile_coefficients.alpha(r, theta); + const double theta = grid.theta(i_theta); + const int index = grid.index(i_r, i_theta); + const double coeff_alpha = density_profile_coefficients.alpha(r, theta); double arr, att, art, detDF; compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); @@ -49,21 +56,14 @@ LevelCache::LevelCache(const PolarGrid& grid, const DensityProfileCoefficients& art_(index) = art; } } - + // Radial Indexing Section #pragma omp parallel for for (int i_theta = 0; i_theta < grid.ntheta(); i_theta++) { const double theta = grid.theta(i_theta); for (int i_r = grid.numberSmootherCircles(); i_r < grid.nr(); i_r++) { - const double r = grid.radius(i_r); - const int index = grid.index(i_r, i_theta); - - double coeff_alpha; - if (cache_density_profile_coefficients_ && !cache_domain_geometry_) { - coeff_alpha = coeff_alpha_(index); - } - else { - coeff_alpha = density_profile_coefficients.alpha(r, theta); - } + const double r = grid.radius(i_r); + const int index = grid.index(i_r, i_theta); + const double coeff_alpha = density_profile_coefficients.alpha(r, theta); double arr, att, art, detDF; compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); @@ -76,62 +76,6 @@ LevelCache::LevelCache(const PolarGrid& grid, const DensityProfileCoefficients& } } -LevelCache::LevelCache(const Level& previous_level, const PolarGrid& current_grid) - : domain_geometry_(previous_level.levelCache().domainGeometry()) - , density_profile_coefficients_(previous_level.levelCache().densityProfileCoefficients()) - , cache_density_profile_coefficients_(previous_level.levelCache().cacheDensityProfileCoefficients()) - , coeff_alpha_("coeff_alpha", - previous_level.levelCache().coeff_alpha().size() > 0 ? current_grid.numberOfNodes() : 0) - , coeff_beta_("coeff_beta", previous_level.levelCache().coeff_beta().size() > 0 ? current_grid.numberOfNodes() : 0) - , cache_domain_geometry_(previous_level.levelCache().cacheDomainGeometry()) - , arr_("arr", previous_level.levelCache().arr().size() > 0 ? current_grid.numberOfNodes() : 0) - , att_("att", previous_level.levelCache().att().size() > 0 ? current_grid.numberOfNodes() : 0) - , art_("art", previous_level.levelCache().art().size() > 0 ? current_grid.numberOfNodes() : 0) - , detDF_("detDF", previous_level.levelCache().detDF().size() > 0 ? current_grid.numberOfNodes() : 0) -{ - const auto& previous_level_cache = previous_level.levelCache(); - - if (previous_level_cache.cacheDensityProfileCoefficients()) { -#pragma omp parallel for - for (int i_r = 0; i_r < current_grid.nr(); i_r++) { - for (int i_theta = 0; i_theta < current_grid.ntheta(); i_theta++) { - const int current_index = current_grid.index(i_r, i_theta); - const int previous_index = previous_level.grid().index(2 * i_r, 2 * i_theta); - - if (!previous_level_cache.cacheDomainGeometry()) { - coeff_alpha_[current_index] = previous_level_cache.coeff_alpha()[previous_index]; - } - coeff_beta_[current_index] = previous_level_cache.coeff_beta()[previous_index]; - } - } - } - - if (previous_level_cache.cacheDomainGeometry()) { -#pragma omp parallel for - for (int i_r = 0; i_r < current_grid.numberSmootherCircles(); i_r++) { - for (int i_theta = 0; i_theta < current_grid.ntheta(); i_theta++) { - const int current_index = current_grid.index(i_r, i_theta); - const int previous_index = previous_level.grid().index(2 * i_r, 2 * i_theta); - arr_[current_index] = previous_level_cache.arr()[previous_index]; - att_[current_index] = previous_level_cache.att()[previous_index]; - art_[current_index] = previous_level_cache.art()[previous_index]; - detDF_[current_index] = previous_level_cache.detDF()[previous_index]; - } - } -#pragma omp parallel for - for (int i_theta = 0; i_theta < current_grid.ntheta(); i_theta++) { - for (int i_r = current_grid.numberSmootherCircles(); i_r < current_grid.nr(); i_r++) { - const int current_index = current_grid.index(i_r, i_theta); - const int previous_index = previous_level.grid().index(2 * i_r, 2 * i_theta); - arr_[current_index] = previous_level_cache.arr()[previous_index]; - att_[current_index] = previous_level_cache.att()[previous_index]; - art_[current_index] = previous_level_cache.art()[previous_index]; - detDF_[current_index] = previous_level_cache.detDF()[previous_index]; - } - } - } -} - const DensityProfileCoefficients& LevelCache::densityProfileCoefficients() const { return density_profile_coefficients_; diff --git a/src/Smoother/SmootherGive/smootherSolver.cpp b/src/Smoother/SmootherGive/smootherSolver.cpp index 86e4474f..28843757 100644 --- a/src/Smoother/SmootherGive/smootherSolver.cpp +++ b/src/Smoother/SmootherGive/smootherSolver.cpp @@ -334,30 +334,8 @@ void SmootherGive::applyAscOrthoCircleSection(const int i_r, const SmootherColor const double theta = grid_.theta(i_theta); const int index = grid_.index(i_r, i_theta); - double coeff_beta; - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_beta = level_cache_.coeff_beta()[index]; - else - coeff_beta = density_profile_coefficients_.beta(r, theta); - - double coeff_alpha; - if (!level_cache_.cacheDomainGeometry()) { - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_alpha = level_cache_.coeff_alpha()[index]; - else - coeff_alpha = density_profile_coefficients_.alpha(r, theta); - } - - double arr, att, art, detDF; - if (level_cache_.cacheDomainGeometry()) { - arr = level_cache_.arr()[index]; - att = level_cache_.att()[index]; - art = level_cache_.art()[index]; - detDF = level_cache_.detDF()[index]; - } - else { - compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); - } + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); // Apply Asc Ortho at the current node NODE_APPLY_ASC_ORTHO_CIRCLE_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, @@ -375,30 +353,9 @@ void SmootherGive::applyAscOrthoRadialSection(const int i_theta, const SmootherC const double r = grid_.radius(i_r); const int index = grid_.index(i_r, i_theta); - double coeff_beta; - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_beta = level_cache_.coeff_beta()[index]; - else - coeff_beta = density_profile_coefficients_.beta(r, theta); - - double coeff_alpha; - if (!level_cache_.cacheDomainGeometry()) { - if (level_cache_.cacheDensityProfileCoefficients()) - coeff_alpha = level_cache_.coeff_alpha()[index]; - else - coeff_alpha = density_profile_coefficients_.alpha(r, theta); - } + double coeff_beta, arr, att, art, detDF; + level_cache_.obtainValues(i_r, i_theta, index, r, theta, coeff_beta, arr, att, art, detDF); - double arr, att, art, detDF; - if (level_cache_.cacheDomainGeometry()) { - arr = level_cache_.arr()[index]; - att = level_cache_.att()[index]; - art = level_cache_.art()[index]; - detDF = level_cache_.detDF()[index]; - } - else { - compute_jacobian_elements(domain_geometry_, r, theta, coeff_alpha, arr, att, art, detDF); - } // Apply Asc Ortho at the current node NODE_APPLY_ASC_ORTHO_RADIAL_GIVE(i_r, i_theta, r, theta, grid_, DirBC_Interior_, smoother_color, x, rhs, temp, arr, att, art, detDF, coeff_beta);