From b41cda3eb66219006e0ef8cbf3087237a23bb941 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 15 Oct 2024 14:46:53 -0400 Subject: [PATCH 01/31] add annealer.cpp/.h --- vpr/src/base/ShowSetup.cpp | 10 +-- vpr/src/base/read_options.cpp | 6 +- vpr/src/base/read_options.h | 2 +- vpr/src/base/vpr_types.h | 4 +- vpr/src/place/annealer.cpp | 145 ++++++++++++++++++++++++++++++++++ vpr/src/place/annealer.h | 135 +++++++++++++++++++++++++++++++ vpr/src/place/place.cpp | 6 +- vpr/src/place/place_util.cpp | 140 +------------------------------- vpr/src/place/place_util.h | 124 ----------------------------- 9 files changed, 295 insertions(+), 277 deletions(-) create mode 100644 vpr/src/place/annealer.cpp create mode 100644 vpr/src/place/annealer.h diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index f288be8f865..68aa073759d 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -207,13 +207,13 @@ void writeClusteredNetlistStats(const std::string& block_usage_filename) { static void ShowAnnealSched(const t_annealing_sched& AnnealSched) { VTR_LOG("AnnealSched.type: "); switch (AnnealSched.type) { - case AUTO_SCHED: + case e_sched_type::AUTO_SCHED: VTR_LOG("AUTO_SCHED\n"); break; - case USER_SCHED: + case e_sched_type::USER_SCHED: VTR_LOG("USER_SCHED\n"); break; - case DUSTY_SCHED: + case e_sched_type::DUSTY_SCHED: VTR_LOG("DUSTY_SCHED\n"); break; default: @@ -222,11 +222,11 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) { VTR_LOG("AnnealSched.inner_num: %f\n", AnnealSched.inner_num); - if (USER_SCHED == AnnealSched.type) { + if (e_sched_type::USER_SCHED == AnnealSched.type) { VTR_LOG("AnnealSched.init_t: %f\n", AnnealSched.init_t); VTR_LOG("AnnealSched.alpha_t: %f\n", AnnealSched.alpha_t); VTR_LOG("AnnealSched.exit_t: %f\n", AnnealSched.exit_t); - } else if (DUSTY_SCHED == AnnealSched.type) { + } else if (e_sched_type::DUSTY_SCHED == AnnealSched.type) { VTR_LOG("AnnealSched.alpha_min: %f\n", AnnealSched.alpha_min); VTR_LOG("AnnealSched.alpha_max: %f\n", AnnealSched.alpha_max); VTR_LOG("AnnealSched.alpha_decay: %f\n", AnnealSched.alpha_decay); diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index fa7084a9b07..eeb4bbfaee0 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -3141,13 +3141,13 @@ void set_conditional_defaults(t_options& args) { || args.PlaceAlphaDecay.provenance() == Provenance::SPECIFIED || args.PlaceSuccessMin.provenance() == Provenance::SPECIFIED || args.PlaceSuccessTarget.provenance() == Provenance::SPECIFIED) { - args.anneal_sched_type.set(DUSTY_SCHED, Provenance::INFERRED); + args.anneal_sched_type.set(e_sched_type::DUSTY_SCHED, Provenance::INFERRED); } else if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule || args.PlaceExitT.provenance() == Provenance::SPECIFIED || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { - args.anneal_sched_type.set(USER_SCHED, Provenance::INFERRED); + args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { - args.anneal_sched_type.set(AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule + args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule } /* diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index c07762350d5..673694fd80c 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -124,7 +124,7 @@ struct t_options { argparse::ArgValue PlaceAlphaDecay; argparse::ArgValue PlaceSuccessMin; argparse::ArgValue PlaceSuccessTarget; - argparse::ArgValue anneal_sched_type; + argparse::ArgValue anneal_sched_type; argparse::ArgValue PlaceAlgorithm; argparse::ArgValue PlaceQuenchAlgorithm; argparse::ArgValue pad_loc_type; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 98f17e898a1..84432ed7181 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -460,7 +460,7 @@ constexpr int NUM_PL_MOVE_TYPES = 7; constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3; /* Timing data structures end */ -enum sched_type { +enum class e_sched_type { AUTO_SCHED, DUSTY_SCHED, USER_SCHED @@ -836,7 +836,7 @@ struct t_packer_opts { * the obvious meanings. */ struct t_annealing_sched { - enum sched_type type; + e_sched_type type; float inner_num; float init_t; float alpha_t; diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp new file mode 100644 index 00000000000..b820acdaf92 --- /dev/null +++ b/vpr/src/place/annealer.cpp @@ -0,0 +1,145 @@ + +#include "annealer.h" + +#include +#include + +#include "globals.h" +#include "draw_global.h" +#include "vpr_types.h" +#include "place_util.h" + +///@brief Constructor: Initialize all annealing state variables and macros. +t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, + float first_t, + float first_rlim, + int first_move_lim, + float first_crit_exponent) { + num_temps = 0; + alpha = annealing_sched.alpha_min; + t = first_t; + restart_t = first_t; + rlim = first_rlim; + move_lim_max = first_move_lim; + crit_exponent = first_crit_exponent; + + /* Determine the current move_lim based on the schedule type */ + if (annealing_sched.type == e_sched_type::DUSTY_SCHED) { + move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target)); + } else { + move_lim = move_lim_max; + } + + /* Store this inverse value for speed when updating crit_exponent. */ + INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); + + /* The range limit cannot exceed the largest grid size. */ + const auto& grid = g_vpr_ctx.device().grid; + UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1); +} + +bool t_annealing_state::outer_loop_update(float success_rate, + const t_placer_costs& costs, + const t_placer_opts& placer_opts, + const t_annealing_sched& annealing_sched) { +#ifndef NO_GRAPHICS + t_draw_state* draw_state = get_draw_state_vars(); + if (!draw_state->list_of_breakpoints.empty()) { + /* Update temperature in the current information variable. */ + get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++; + } +#endif + + if (annealing_sched.type == e_sched_type::USER_SCHED) { + /* Update t with user specified alpha. */ + t *= annealing_sched.alpha_t; + + /* Check if the exit criterion is met. */ + bool exit_anneal = t >= annealing_sched.exit_t; + + return exit_anneal; + } + + /* Automatically determine exit temperature. */ + auto& cluster_ctx = g_vpr_ctx.clustering(); + float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + + if (annealing_sched.type == e_sched_type::DUSTY_SCHED) { + /* May get nan if there are no nets */ + bool restart_temp = t < t_exit || std::isnan(t_exit); + + /* If the success rate or the temperature is * + * too low, reset the temperature and alpha. */ + if (success_rate < annealing_sched.success_min || restart_temp) { + /* Only exit anneal when alpha gets too large. */ + if (alpha > annealing_sched.alpha_max) { + return false; + } + /* Take a half step from the restart temperature. */ + t = restart_t / sqrt(alpha); + /* Update alpha. */ + alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay); + } else { + /* If the success rate is promising, next time * + * reset t to the current annealing temperature. */ + if (success_rate > annealing_sched.success_target) { + restart_t = t; + } + /* Update t. */ + t *= alpha; + } + + /* Update move lim. */ + update_move_lim(annealing_sched.success_target, success_rate); + } else { + VTR_ASSERT_SAFE(annealing_sched.type == e_sched_type::AUTO_SCHED); + /* Automatically adjust alpha according to success rate. */ + if (success_rate > 0.96) { + alpha = 0.5; + } else if (success_rate > 0.8) { + alpha = 0.9; + } else if (success_rate > 0.15 || rlim > 1.) { + alpha = 0.95; + } else { + alpha = 0.8; + } + /* Update temp. */ + t *= alpha; + /* Must be duplicated to retain previous behavior. */ + if (t < t_exit || std::isnan(t_exit)) { + return false; + } + } + + /* Update the range limiter. */ + update_rlim(success_rate); + + /* If using timing driven algorithm, update the crit_exponent. */ + if (placer_opts.place_algorithm.is_timing_driven()) { + update_crit_exponent(placer_opts); + } + + /* Continues the annealing. */ + return true; +} + +void t_annealing_state::update_rlim(float success_rate) { + rlim *= (1. - 0.44 + success_rate); + rlim = std::min(rlim, UPPER_RLIM); + rlim = std::max(rlim, FINAL_RLIM); +} + +void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { + /* If rlim == FINAL_RLIM, then scale == 0. */ + float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM; + + /* Apply the scaling factor on crit_exponent. */ + crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + + placer_opts.td_place_exp_first; +} + +void t_annealing_state::update_move_lim(float success_target, float success_rate) { + move_lim = move_lim_max * (success_target / success_rate); + move_lim = std::min(move_lim, move_lim_max); + move_lim = std::max(move_lim, 1); +} \ No newline at end of file diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h new file mode 100644 index 00000000000..0c046bcdf5f --- /dev/null +++ b/vpr/src/place/annealer.h @@ -0,0 +1,135 @@ + +#pragma once + +class t_placer_costs; +struct t_placer_opts; +struct t_annealing_sched; + +/** + * @brief Stores variables that are used by the annealing process. + * + * This structure is updated by update_annealing_state() on each outer + * loop iteration. It stores various important variables that need to + * be accessed during the placement inner loop. + * + * Private variables are not given accessor functions. They serve as + * macros originally defined in place.cpp as global scope variables. + * + * Public members: + * @param t + * Temperature for simulated annealing. + * @param restart_t + * Temperature used after restart due to minimum success ratio. + * Currently only used and updated by DUSTY_SCHED. + * @param alpha + * Temperature decays factor (multiplied each outer loop iteration). + * @param num_temps + * The count of how many temperature iterations have passed. + * + * @param rlim + * Range limit for block swaps. + * Currently only updated by DUSTY_SCHED and AUTO_SCHED. + * @param crit_exponent + * Used by timing-driven placement to "sharpen" the timing criticality. + * Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED. + * @param move_lim + * Current block move limit. + * Currently only updated by DUSTY_SCHED. + * @param move_lim_max + * Maximum block move limit. + * + * Private members: + * @param UPPER_RLIM + * The upper limit for the range limiter value. + * @param FINAL_RLIM + * The final rlim (range limit) is 1, which is the smallest value that + * can still make progress, since an rlim of 0 wouldn't allow any swaps. + * @param INVERSE_DELTA_RLIM + * Used to update crit_exponent. See update_rlim() for more. + * + * Mutators: + * @param outer_loop_update() + * Update the annealing state variables in the placement outer loop. + * @param update_rlim(), update_crit_exponent(), update_move_lim() + * Inline subroutines used by the main routine outer_loop_update(). + */ +class t_annealing_state { + public: + float t; + float restart_t; + float alpha; + int num_temps; + + float rlim; + float crit_exponent; + int move_lim; + int move_lim_max; + + private: + float UPPER_RLIM; + float FINAL_RLIM = 1.; + float INVERSE_DELTA_RLIM; + + public: //Constructor + t_annealing_state() = delete; + t_annealing_state(const t_annealing_sched& annealing_sched, + float first_t, + float first_rlim, + int first_move_lim, + float first_crit_exponent); + + public: //Mutator + /** + * @brief Update the annealing state according to the annealing schedule selected. + * + * USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria. + * AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio. + * DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio. + * See doc/src/vpr/dusty_sa.rst for more details. + * + * @return True->continues the annealing. False->exits the annealing. + */ + bool outer_loop_update(float success_rate, + const t_placer_costs& costs, + const t_placer_opts& placer_opts, + const t_annealing_sched& annealing_sched); + + private: //Mutator + /** + * @brief Update the range limiter to keep acceptance prob. near 0.44. + * + * Use a floating point rlim to allow gradual transitions at low temps. + * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM). + */ + inline void update_rlim(float success_rate); + + /** + * @brief Update the criticality exponent. + * + * When rlim shrinks towards the FINAL_RLIM value (indicating + * that we are fine-tuning a more optimized placement), we can + * focus more on a smaller number of critical connections. + * To achieve this, we make the crit_exponent sharper, so that + * critical connections would become more critical than before. + * + * We calculate how close rlim is to its final value comparing + * to its initial value. Then, we apply the same scaling factor + * on the crit_exponent so that it lands on the suitable value + * between td_place_exp_first and td_place_exp_last. The scaling + * factor is calculated and applied linearly. + */ + inline void update_crit_exponent(const t_placer_opts& placer_opts); + + /** + * @brief Update the move limit based on the success rate. + * + * The value is bounded between 1 and move_lim_max. + */ + inline void update_move_lim(float success_target, float success_rate); +}; + +class PlacementAnnealer { + + private: + t_annealing_state annealing_state_; +}; \ No newline at end of file diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index aaa5620af50..90b566fb753 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -24,6 +24,7 @@ #include "globals.h" #include "place.h" +#include "annealer.h" #include "read_place.h" #include "draw.h" #include "place_and_route.h" @@ -688,8 +689,7 @@ void try_place(const Netlist<>& net_list, EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved first_rlim, first_move_lim, - first_crit_exponent, - device_ctx.grid.get_num_layers()); + first_crit_exponent); /* Update the starting temperature for placement annealing to a more appropriate value */ state.t = starting_t(&state, &costs, annealing_sched, @@ -1173,7 +1173,7 @@ static float starting_t(const t_annealing_state* state, PlacerState& placer_state, NetCostHandler& net_cost_handler, std::optional& noc_cost_handler) { - if (annealing_sched.type == USER_SCHED) { + if (annealing_sched.type == e_sched_type::USER_SCHED) { return (annealing_sched.init_t); } diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index ce24914b7f2..ec7ecb8982e 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -71,38 +71,6 @@ t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) { return *this; } -///@brief Constructor: Initialize all annealing state variables and macros. -t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, - float first_rlim, - int first_move_lim, - float first_crit_exponent, - int num_laters) { - num_temps = 0; - alpha = annealing_sched.alpha_min; - t = first_t; - restart_t = first_t; - rlim = first_rlim; - move_lim_max = first_move_lim; - crit_exponent = first_crit_exponent; - - /* Determine the current move_lim based on the schedule type */ - if (annealing_sched.type == DUSTY_SCHED) { - move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target)); - } else { - move_lim = move_lim_max; - } - - NUM_LAYERS = num_laters; - - /* Store this inverse value for speed when updating crit_exponent. */ - INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); - - /* The range limit cannot exceed the largest grid size. */ - auto& grid = g_vpr_ctx.device().grid; - UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1); -} - int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) { const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -126,112 +94,6 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch return move_lim; } -bool t_annealing_state::outer_loop_update(float success_rate, - const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched) { -#ifndef NO_GRAPHICS - t_draw_state* draw_state = get_draw_state_vars(); - if (!draw_state->list_of_breakpoints.empty()) { - /* Update temperature in the current information variable. */ - get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++; - } -#endif - - if (annealing_sched.type == USER_SCHED) { - /* Update t with user specified alpha. */ - t *= annealing_sched.alpha_t; - - /* Check if the exit criterion is met. */ - bool exit_anneal = t >= annealing_sched.exit_t; - - return exit_anneal; - } - - /* Automatically determine exit temperature. */ - auto& cluster_ctx = g_vpr_ctx.clustering(); - float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); - - if (annealing_sched.type == DUSTY_SCHED) { - /* May get nan if there are no nets */ - bool restart_temp = t < t_exit || std::isnan(t_exit); - - /* If the success rate or the temperature is * - * too low, reset the temperature and alpha. */ - if (success_rate < annealing_sched.success_min || restart_temp) { - /* Only exit anneal when alpha gets too large. */ - if (alpha > annealing_sched.alpha_max) { - return false; - } - /* Take a half step from the restart temperature. */ - t = restart_t / sqrt(alpha); - /* Update alpha. */ - alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay); - } else { - /* If the success rate is promising, next time * - * reset t to the current annealing temperature. */ - if (success_rate > annealing_sched.success_target) { - restart_t = t; - } - /* Update t. */ - t *= alpha; - } - - /* Update move lim. */ - update_move_lim(annealing_sched.success_target, success_rate); - } else { - VTR_ASSERT_SAFE(annealing_sched.type == AUTO_SCHED); - /* Automatically adjust alpha according to success rate. */ - if (success_rate > 0.96) { - alpha = 0.5; - } else if (success_rate > 0.8) { - alpha = 0.9; - } else if (success_rate > 0.15 || rlim > 1.) { - alpha = 0.95; - } else { - alpha = 0.8; - } - /* Update temp. */ - t *= alpha; - /* Must be duplicated to retain previous behavior. */ - if (t < t_exit || std::isnan(t_exit)) { - return false; - } - } - - /* Update the range limiter. */ - update_rlim(success_rate); - - /* If using timing driven algorithm, update the crit_exponent. */ - if (placer_opts.place_algorithm.is_timing_driven()) { - update_crit_exponent(placer_opts); - } - - /* Continues the annealing. */ - return true; -} - -void t_annealing_state::update_rlim(float success_rate) { - rlim *= (1. - 0.44 + success_rate); - rlim = std::min(rlim, UPPER_RLIM); - rlim = std::max(rlim, FINAL_RLIM); -} - -void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { - /* If rlim == FINAL_RLIM, then scale == 0. */ - float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM; - - /* Apply the scaling factor on crit_exponent. */ - crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) - + placer_opts.td_place_exp_first; -} - -void t_annealing_state::update_move_lim(float success_target, float success_rate) { - move_lim = move_lim_max * (success_target / success_rate); - move_lim = std::min(move_lim, move_lim_max); - move_lim = std::max(move_lim, 1); -} - ///@brief Clear all data fields. void t_placer_statistics::reset() { av_cost = 0.; @@ -390,7 +252,7 @@ bool macro_can_be_placed(const t_pl_macro& pl_macro, } } - return (mac_can_be_placed); + return mac_can_be_placed; } NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest) diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 60d4a86b1c5..49f4246dbe5 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -126,130 +126,6 @@ class t_placer_costs { t_place_algorithm place_algorithm; }; -/** - * @brief Stores variables that are used by the annealing process. - * - * This structure is updated by update_annealing_state() on each outer - * loop iteration. It stores various important variables that need to - * be accessed during the placement inner loop. - * - * Private variables are not given accessor functions. They serve as - * macros originally defined in place.cpp as global scope variables. - * - * Public members: - * @param t - * Temperature for simulated annealing. - * @param restart_t - * Temperature used after restart due to minimum success ratio. - * Currently only used and updated by DUSTY_SCHED. - * @param alpha - * Temperature decays factor (multiplied each outer loop iteration). - * @param num_temps - * The count of how many temperature iterations have passed. - * - * @param rlim - * Range limit for block swaps. - * Currently only updated by DUSTY_SCHED and AUTO_SCHED. - * @param crit_exponent - * Used by timing-driven placement to "sharpen" the timing criticality. - * Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED. - * @param move_lim - * Current block move limit. - * Currently only updated by DUSTY_SCHED. - * @param move_lim_max - * Maximum block move limit. - * - * Private members: - * @param UPPER_RLIM - * The upper limit for the range limiter value. - * @param FINAL_RLIM - * The final rlim (range limit) is 1, which is the smallest value that - * can still make progress, since an rlim of 0 wouldn't allow any swaps. - * @param INVERSE_DELTA_RLIM - * Used to update crit_exponent. See update_rlim() for more. - * - * Mutators: - * @param outer_loop_update() - * Update the annealing state variables in the placement outer loop. - * @param update_rlim(), update_crit_exponent(), update_move_lim() - * Inline subroutines used by the main routine outer_loop_update(). - */ -class t_annealing_state { - public: - float t; - float restart_t; - float alpha; - int num_temps; - - float rlim; - float crit_exponent; - int move_lim; - int move_lim_max; - - private: - float UPPER_RLIM; - float FINAL_RLIM = 1.; - float INVERSE_DELTA_RLIM; - int NUM_LAYERS = 1; - - public: //Constructor - t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, - float first_rlim, - int first_move_lim, - float first_crit_exponent, - int num_layers); - - public: //Mutator - /** - * @brief Update the annealing state according to the annealing schedule selected. - * - * USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria. - * AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio. - * DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio. - * See doc/src/vpr/dusty_sa.rst for more details. - * - * @return True->continues the annealing. False->exits the annealing. - */ - bool outer_loop_update(float success_rate, - const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched); - - private: //Mutator - /** - * @brief Update the range limiter to keep acceptance prob. near 0.44. - * - * Use a floating point rlim to allow gradual transitions at low temps. - * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM). - */ - inline void update_rlim(float success_rate); - - /** - * @brief Update the criticality exponent. - * - * When rlim shrinks towards the FINAL_RLIM value (indicating - * that we are fine-tuning a more optimized placement), we can - * focus more on a smaller number of critical connections. - * To achieve this, we make the crit_exponent sharper, so that - * critical connections would become more critical than before. - * - * We calculate how close rlim is to its final value comparing - * to its initial value. Then, we apply the same scaling factor - * on the crit_exponent so that it lands on the suitable value - * between td_place_exp_first and td_place_exp_last. The scaling - * factor is calculated and applied linearly. - */ - inline void update_crit_exponent(const t_placer_opts& placer_opts); - - /** - * @brief Update the move limit based on the success rate. - * - * The value is bounded between 1 and move_lim_max. - */ - inline void update_move_lim(float success_target, float success_rate); -}; - /** * @brief Stores statistics produced by a single annealing iteration. * From 32a2726602c4471372a80c3a84dfc22891e84bf7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 15 Oct 2024 19:33:47 -0400 Subject: [PATCH 02/31] commit before I go home. --- vpr/src/base/vpr_types.h | 2 +- vpr/src/place/annealer.cpp | 911 +++++++++++++++++++++++++++++++++++ vpr/src/place/annealer.h | 104 +++- vpr/src/place/move_utils.h | 12 - vpr/src/place/place.cpp | 462 +----------------- vpr/src/place/place.h | 1 - vpr/src/place/placer_state.h | 8 + 7 files changed, 1032 insertions(+), 468 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 84432ed7181..cb280ff36ec 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1066,6 +1066,7 @@ enum class e_move_type; struct t_placer_opts { t_place_algorithm place_algorithm; t_place_algorithm place_quench_algorithm; + t_annealing_sched anneal_sched; ///name, (to_type ? to_type->name : "EMPTY"), \ + affected_blocks.moved_blocks.size()); \ + } \ + } while (false) + +# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ + outcome, reason) \ + do { \ + if (f_move_stats_file) { \ + fprintf(f_move_stats_file.get(), \ + "%g,%g,%g," \ + "%s,%s\n", \ + delta_cost, delta_bb_cost, delta_td_cost, \ + outcome, reason); \ + } \ + } while (false) + +#else + +# define LOG_MOVE_STATS_HEADER() \ + do { \ + fprintf(move_stats_file_.get(), \ + "VTR_ENABLE_DEBUG_LOGGING disabled " \ + "-- No move stats recorded\n"); \ + } while (false) + +# define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \ + do { \ + } while (false) + +# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ + outcome, reason) \ + do { \ + } while (false) + +#endif + + +/** + * @brief Invalidates the connections affected by the specified block moves. + * + * All the connections recorded in blocks_affected.affected_pins have different + * values for `proposed_connection_delay` and `connection_delay`. + * + * Invalidate all the timing graph edges associated with these connections via + * the NetPinTimingInvalidator class. + */ +static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, + NetPinTimingInvalidator* pin_tedges_invalidator, + TimingInfo* timing_info); + +/** + * @brief Update the connection_timing_cost values from the temporary + * values for all connections that have/haven't changed. + * + * All the connections have already been gathered by blocks_affected.affected_pins + * after running the routine find_affected_nets_and_update_costs() in try_swap(). + */ +static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerState& placer_state); + +/** + * @brief Check if the setup slack has gotten better or worse due to block swap. + * + * Get all the modified slack values via the PlacerSetupSlacks class, and compare + * then with the original values at these connections. Sort them and compare them + * one by one, and return the difference of the first different pair. + * + * If the new slack value is larger(better), than return a negative value so that + * the move will be accepted. If the new slack value is smaller(worse), return a + * positive value so that the move will be rejected. + * + * If no slack values have changed, then return an arbitrary positive number. A + * move resulting in no change in the slack values should probably be unnecessary. + * + * The sorting is need to prevent in the unlikely circumstances that a bad slack + * value suddenly got very good due to the block move, while a good slack value + * got very bad, perhaps even worse than the original worse slack value. + */ +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state); + +static e_move_result assess_swap(double delta_c, double t); + +static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, + NetPinTimingInvalidator* pin_tedges_invalidator, + TimingInfo* timing_info) { + VTR_ASSERT_SAFE(timing_info); + VTR_ASSERT_SAFE(pin_tedges_invalidator); + + // Invalidate timing graph edges affected by the move + for (ClusterPinId pin : blocks_affected.affected_pins) { + pin_tedges_invalidator->invalidate_connection(pin, timing_info); + } +} + +static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerState& placer_state) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + auto& p_timing_ctx = placer_state.mutable_timing(); + auto& connection_delay = p_timing_ctx.connection_delay; + auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; + auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; + auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; + + //Go through all the sink pins affected + for (ClusterPinId pin_id : blocks_affected.affected_pins) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + int ipin = clb_nlist.pin_net_index(pin_id); + + //Commit the timing delay and cost values + connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } +} + +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, + const PlacerState& placer_state) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + const auto& p_timing_ctx = placer_state.timing(); + const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack; + + //Find the original/proposed setup slacks of pins with modified values + std::vector original_setup_slacks, proposed_setup_slacks; + + auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); + for (ClusterPinId clb_pin : clb_pins_modified) { + ClusterNetId net_id = clb_nlist.pin_net(clb_pin); + size_t ipin = clb_nlist.pin_net_index(clb_pin); + + original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); + proposed_setup_slacks.push_back( + setup_slacks->setup_slack(net_id, ipin)); + } + + //Sort in ascending order, from the worse slack value to the best + std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end()); + std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); + + //Check the first pair of slack values that are different + //If found, return their difference + for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { + float slack_diff = original_setup_slacks[idiff] + - proposed_setup_slacks[idiff]; + + if (slack_diff != 0) { + return slack_diff; + } + } + + //If all slack values are identical (or no modified slack values), + //reject this move by returning an arbitrary positive number as cost. + return 1; +} + +static e_move_result assess_swap(double delta_c, double t) { + /* Returns: 1 -> move accepted, 0 -> rejected. */ + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c); + if (delta_c <= 0) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n"); + return ACCEPTED; + } + + if (t == 0.) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n"); + return REJECTED; + } + + float fnum = vtr::frand(); + float prob_fac = std::exp(-delta_c / t); + if (prob_fac > fnum) { + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n"); + return ACCEPTED; + } + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); + return REJECTED; +} + +//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on +//the move proposed in blocks_affected +static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, + PlacerTimingContext& p_timing_ctx) { +#ifndef VTR_ASSERT_SAFE_ENABLED + (void)blocks_affected; + (void)p_timing_ctx; +#else + //Invalidate temp delay & timing cost values to match sanity checks in + //comp_td_connection_cost() + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; + auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; + + for (ClusterPinId pin : blocks_affected.affected_pins) { + ClusterNetId net = clb_nlist.pin_net(pin); + int ipin = clb_nlist.pin_net_index(pin); + proposed_connection_delay[net][ipin] = INVALID_DELAY; + proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; + } +#endif +} + +/** + * @brief Compute the total normalized cost for a given placement. This + * computation will vary depending on the placement modes. + * + * @param costs The current placement cost components and their normalization + * factors + * @param placer_opts Determines the placement mode + * @param noc_opts Determines if placement includes the NoC + * @return double The computed total cost of the current placement + */ +static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { + double total_cost = 0.0; + + if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { + // in bounding box mode we only care about wirelength + total_cost = costs->bb_cost * costs->bb_cost_norm; + } else if (placer_opts.place_algorithm.is_timing_driven()) { + // in timing mode we include both wirelength and timing costs + total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm); + } + + if (noc_opts.noc) { + // in noc mode we include noc aggregate bandwidth and noc latency + total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts); + } + + return total_cost; +} + +/** + * @brief Updates all the cost normalization factors during the outer + * loop iteration of the placement. At each temperature change, these + * values are updated so that we can balance the tradeoff between the + * different placement cost components (timing, wirelength and NoC). + * Depending on the placement mode the corresponding normalization factors are + * updated. + * + * @param costs Contains the normalization factors which need to be updated + * @param placer_opts Determines the placement mode + * @param noc_opts Determines if placement includes the NoC + * @param noc_cost_handler Computes normalization factors for NoC-related cost terms + */ +static void update_placement_cost_normalization_factors(t_placer_costs* costs, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + const std::optional& noc_cost_handler) { + /* Update the cost normalization factors */ + costs->update_norm_factors(); + + // update the noc normalization factors if the placement includes the NoC + if (noc_opts.noc) { + noc_cost_handler->update_noc_normalization_factors(*costs); + } + + // update the current total placement cost + costs->cost = get_total_cost(costs, placer_opts, noc_opts); +} ///@brief Constructor: Initialize all annealing state variables and macros. t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, @@ -142,4 +450,607 @@ void t_annealing_state::update_move_lim(float success_target, float success_rate move_lim = move_lim_max * (success_target / success_rate); move_lim = std::min(move_lim, move_lim_max); move_lim = std::max(move_lim, 1); +} + +PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, + PlacerState& placer_state, + t_placer_costs& costs, + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler, + const t_noc_opts& noc_opts, + MoveGenerator& move_generator_1, + MoveGenerator& move_generator_2, + ManualMoveGenerator& manual_move_generator, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + int move_lim) + : placer_opts_(placer_opts) + , placer_state_(placer_state) + , costs_(costs) + , net_cost_handler_(net_cost_handler) + , noc_cost_handler_(noc_cost_handler) + , noc_opts_(noc_opts) + , move_generator_1_(move_generator_1) + , move_generator_2_(move_generator_2) + , manual_move_generator_(manual_move_generator) + , delay_model_(delay_model) + , criticalities_(criticalities) + , setup_slacks_(setup_slacks) + , timing_info_(timing_info) + , pin_timing_invalidator_(pin_timing_invalidator) + , move_stats_file_(nullptr, vtr::fclose) + , outer_crit_iter_count_(1) + , blocks_affected_(placer_state.block_locs().size()) +{ + const auto& device_ctx = g_vpr_ctx.device(); + + float first_crit_exponent; + if (placer_opts.place_algorithm.is_timing_driven()) { + first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ + } else { + first_crit_exponent = 0.f; + } + + int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched); + + int inner_recompute_limit; + if (placer_opts.inner_loop_recompute_divider != 0) { + inner_recompute_limit = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); + } else { + // don't do an inner recompute + inner_recompute_limit = first_move_lim + 1; + } + + /* calculate the number of moves in the quench that we should recompute timing after based on the value of * + * the commandline option quench_recompute_divider */ + int quench_recompute_limit; + if (placer_opts.quench_recompute_divider != 0) { + quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); + } else { + /*don't do an quench recompute */ + quench_recompute_limit = first_move_lim + 1; + } + + // Get the first range limiter + placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); + + annealing_state_ = t_annealing_state(placer_opts_.anneal_sched, + EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved + placer_state_.move().first_rlim, + first_move_lim, + first_crit_exponent); + + if (!placer_opts.move_stats_file.empty()) { + move_stats_file_ = std::unique_ptr( + vtr::fopen(placer_opts.move_stats_file.c_str(), "w"), + vtr::fclose); + LOG_MOVE_STATS_HEADER(); + } + + //allocate move type statistics vectors + move_type_stats_.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + move_type_stats_.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); + + // Update the starting temperature for placement annealing to a more appropriate value + annealing_state_.t = estimate_starting_temperature(); +} + +float PlacementAnnealer::estimate_starting_temperature() { + if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) { + return placer_opts_.anneal_sched.init_t; + } + + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + // Use to calculate the average of cost when swap is accepted. + int num_accepted = 0; + + // Use double types to avoid round off. + double av = 0., sum_of_squares = 0.; + + // Determines the block swap loop count. + int move_lim = std::min(annealing_state_.move_lim_max, (int)cluster_ctx.clb_nlist.blocks().size()); + + bool manual_move_enabled = false; + + for (int i = 0; i < move_lim; i++) { +#ifndef NO_GRAPHICS + // Checks manual move flag for manual move feature + t_draw_state* draw_state = get_draw_state_vars(); + if (draw_state->show_graphics) { + manual_move_enabled = manual_move_is_selected(); + } +#endif /*NO_GRAPHICS*/ + + // TODO: remove this + constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; + + // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack + e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm, + REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled); + + if (swap_result == ACCEPTED) { + num_accepted++; + av += costs_.cost; + sum_of_squares += costs_.cost * costs_.cost; + swap_stats_.num_swap_accepted++; + } else if (swap_result == ABORTED) { + swap_stats_.num_swap_aborted++; + } else { + swap_stats_.num_swap_rejected++; + } + } + + // Take the average of the accepted swaps' cost values. + av = num_accepted > 0 ? (av / num_accepted) : 0.; + + // Get the standard deviation. + double std_dev = get_std_dev(num_accepted, sum_of_squares, av); + + // Print warning if not all swaps are accepted. + if (num_accepted != move_lim) { + VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n", + num_accepted, move_lim); + } + +#ifdef VERBOSE + /* Print stats related to finding the initital temp. */ + VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev); +#endif + + // Improved initial placement uses a fast SA for NoC routers and centroid placement + // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement + float init_temp = std_dev / 64; + + return init_temp; +} + + +/** + * @brief Pick some block and moves it to another spot. + * + * If the new location is empty, directly move the block. If the new location + * is occupied, switch the blocks. Due to the different sizes of the blocks, + * this block switching may occur for multiple times. It might also cause the + * current swap attempt to abort due to inability to find suitable locations + * for moved blocks. + * + * The move generator will record all the switched blocks in the variable + * `blocks_affected`. Afterwards, the move will be assessed by the chosen + * cost formulation. Currently, there are three ways to assess move cost, + * which are stored in the enum type `t_place_algorithm`. + * + * @return Whether the block swap is accepted, rejected or aborted. + */ +e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + float timing_bb_factor, + bool manual_move_enabled) { + /* Picks some block and moves it to another spot. If this spot is + * occupied, switch the blocks. Assess the change in cost function. + * rlim is the range limiter. + * Returns whether the swap is accepted, rejected or aborted. + * Passes back the new value of the cost functions. + */ + auto& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); + + float rlim_escape_fraction = placer_opts_.rlim_escape_fraction; + float timing_tradeoff = placer_opts_.timing_tradeoff; + + PlaceCritParams crit_params; + crit_params.crit_exponent = annealing_state_.crit_exponent; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + // move type and block type chosen by the agent + t_propose_action proposed_action{e_move_type::UNIFORM, -1}; + + swap_stats_.num_ts_called++; + + MoveOutcomeStats move_outcome_stats; + + /* I'm using negative values of proposed_net_cost as a flag, + * so DO NOT use cost functions that can go negative. */ + + double delta_c = 0; //Change in cost due to this swap. + double bb_delta_c = 0; //Change in the bounding box (wiring) cost. + double timing_delta_c = 0; //Change in the timing cost (delay * criticality). + + // Determine whether we need to force swap two router blocks + bool router_block_move = false; + if (noc_opts_.noc) { + router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage); + } + + /* Allow some fraction of moves to not be restricted by rlim, + /* in the hopes of better escaping local minima. */ + float rlim; + if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) { + rlim = std::numeric_limits::infinity(); + } else { + rlim = annealing_state_.rlim; + } + + e_create_move create_move_outcome = e_create_move::ABORT; + + //When manual move toggle button is active, the manual move window asks the user for input. + if (manual_move_enabled) { +#ifndef NO_GRAPHICS + create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_, + proposed_action.move_type, rlim, placer_opts_, + criticalities_); +#endif //NO_GRAPHICS + } else if (router_block_move) { + // generate a move where two random router blocks are swapped + create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry); + proposed_action.move_type = e_move_type::UNIFORM; + } else { + //Generate a new move (perturbation) used to explore the space of possible placements + create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_); + } + + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + LOG_MOVE_STATS_PROPOSED(t, blocks_affected_); + + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", + costs_.cost, costs_.bb_cost, costs_.timing_cost); + + e_move_result move_outcome = e_move_result::ABORTED; + + if (create_move_outcome == e_create_move::ABORT) { + LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), "ABORTED", + "illegal move"); + + move_outcome = ABORTED; + + } else { + VTR_ASSERT(create_move_outcome == e_create_move::VALID); + + /* + * To make evaluating the move simpler (e.g. calculating changed bounding box), + * we first move the blocks to their new locations (apply the move to + * blk_loc_registry.block_locs) and then compute the change in cost. If the move + * is accepted, the inverse look-up in place_ctx.grid_blocks is updated + * (committing the move). If the move is rejected, the blocks are returned to + * their original positions (reverting blk_loc_registry.block_locs to its original state). + * + * Note that the inverse look-up place_ctx.grid_blocks is only updated after + * move acceptance is determined, so it should not be used when evaluating a move. + */ + + /* Update the block positions */ + blk_loc_registry.apply_move_blocks(blocks_affected_); + + //Find all the nets affected by this swap and update the wiring costs. + //This cost value doesn't depend on the timing info. + // + //Also find all the pins affected by the swap, and calculates new connection + //delays and timing costs and store them in proposed_* data structures. + net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_, + bb_delta_c, timing_delta_c); + + //For setup slack analysis, we first do a timing analysis to get the newest + //slack values resulted from the proposed block moves. If the move turns out + //to be accepted, we keep the updated slack values and commit the block moves. + //If rejected, we reject the proposed block moves and revert this timing analysis. + if (place_algorithm == SLACK_TIMING_PLACE) { + // Invalidates timing of modified connections for incremental timing updates. + invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + + /* Update the connection_timing_cost and connection_delay * + * values from the temporary values. */ + commit_td_cost(blocks_affected_, placer_state_); + + /* Update timing information. Since we are analyzing setup slacks, * + * we only update those values and keep the criticalities stale * + * so as not to interfere with the original timing driven algorithm. * + * + * Note: the timing info must be updated after applying block moves * + * and committing the timing driven delays and costs. * + * If we wish to revert this timing update due to move rejection, * + * we need to revert block moves and restore the timing values. */ + criticalities_->disable_update(); + setup_slacks_->enable_update(); + update_timing_classes(crit_params, timing_info_, criticalities_, + setup_slacks_, pin_timing_invalidator_, placer_state_); + + /* Get the setup slack analysis cost */ + //TODO: calculate a weighted average of the slack cost and wiring cost + delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm; + } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { + /* Take delta_c as a combination of timing and wiring cost. In + * addition to `timing_tradeoff`, we normalize the cost values */ + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " + "timing_delta_c %e, timing_cost_norm %e\n", + bb_delta_c, + costs_.bb_cost_norm, + timing_tradeoff, + timing_delta_c, + costs_.timing_cost_norm); + delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; + } else { + VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tMove bb_delta_c %e, bb_cost_norm %e\n", + bb_delta_c, + costs_.bb_cost_norm); + delta_c = bb_delta_c * costs_.bb_cost_norm; + } + + NocCostTerms noc_delta_c; // change in NoC cost + /* Update the NoC data structure and costs*/ + if (noc_opts_.noc) { + VTR_ASSERT_SAFE(noc_cost_handler_.has_value()); + noc_cost_handler_->find_affected_noc_routers_and_update_noc_costs(blocks_affected_, noc_delta_c); + + // Include the NoC delta costs in the total cost change for this swap + delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_); + } + + /* 1 -> move accepted, 0 -> rejected. */ + move_outcome = assess_swap(delta_c, annealing_state_.t); + + //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. +#ifndef NO_GRAPHICS + if (manual_move_enabled) { + move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome); + } +#endif //NO_GRAPHICS + + if (move_outcome == ACCEPTED) { + costs_.cost += delta_c; + costs_.bb_cost += bb_delta_c; + + if (place_algorithm == SLACK_TIMING_PLACE) { + // Update the timing driven cost as usual + costs_.timing_cost += timing_delta_c; + + // Commit the setup slack information + // The timing delay and cost values should be committed already + commit_setup_slacks(setup_slacks_, placer_state_); + } + + if (place_algorithm == CRITICALITY_TIMING_PLACE) { + costs_.timing_cost += timing_delta_c; + + /* Invalidates timing of modified connections for incremental * + * timing updates. These invalidations are accumulated for a * + * big timing update in the outer loop. */ + invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + + /* Update the connection_timing_cost and connection_delay * + * values from the temporary values. */ + commit_td_cost(blocks_affected_, placer_state_); + } + + /* Update net cost functions and reset flags. */ + net_cost_handler_.update_move_nets(); + + /* Update clb data structures since we kept the move. */ + blk_loc_registry.commit_move_blocks(blocks_affected_); + + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + if (noc_opts_.noc){ + noc_cost_handler_->commit_noc_costs(); + costs_ += noc_delta_c; + } + + //Highlights the new block when manual move is selected. +#ifndef NO_GRAPHICS + if (manual_move_enabled) { + manual_move_highlight_new_block_location(); + } +#endif //NO_GRAPHICS + + } else { + VTR_ASSERT_SAFE(move_outcome == REJECTED); + + // Reset the net cost function flags first. + net_cost_handler_.reset_move_nets(); + + // Restore the blk_loc_registry.block_locs data structures to their state before the move. + blk_loc_registry.revert_move_blocks(blocks_affected_); + + if (place_algorithm == SLACK_TIMING_PLACE) { + /* Revert the timing delays and costs to pre-update values. */ + /* These routines must be called after reverting the block moves. */ + //TODO: make this process incremental + comp_td_connection_delays(delay_model_, placer_state_); + comp_td_costs(delay_model_, *criticalities_, placer_state_, &costs_.timing_cost); + + /* Re-invalidate the affected sink pins since the proposed + * move is rejected, and the same blocks are reverted to + * their original positions. */ + invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + + // Revert the timing update + update_timing_classes(crit_params, timing_info_, criticalities_, + setup_slacks_, pin_timing_invalidator_, placer_state_); + + VTR_ASSERT_SAFE_MSG( + verify_connection_setup_slacks(setup_slacks_, placer_state_), + "The current setup slacks should be identical to the values before the try swap timing info update."); + } + + if (place_algorithm == CRITICALITY_TIMING_PLACE) { + // Un-stage the values stored in proposed_* data structures + revert_td_cost(blocks_affected_, placer_state_.mutable_timing()); + } + + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + /* Revert the traffic flow routes within the NoC*/ + if (noc_opts_.noc) { + noc_cost_handler_->revert_noc_traffic_flow_routes(blocks_affected_); + } + } + + move_outcome_stats.delta_cost_norm = delta_c; + move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs_.bb_cost_norm; + move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs_.timing_cost_norm; + + move_outcome_stats.delta_bb_cost_abs = bb_delta_c; + move_outcome_stats.delta_timing_cost_abs = timing_delta_c; + + LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); + } + move_outcome_stats.outcome = move_outcome; + + // If we force a router block move then it was not proposed by the + // move generator, so we should not calculate the reward and update + // the move generators status since this outcome is not a direct + // consequence of the move generator + if (!router_block_move) { + move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor); + } + +#ifdef VTR_ENABLE_DEBUG_LOGGING +# ifndef NO_GRAPHICS + stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c); +# endif +#endif + + // Clear the data structure containing block move info + blocks_affected_.clear_move_blocks(); + +#if 0 + // Check that each accepted swap yields a valid placement. This will + // greatly slow the placer, but can debug some issues. + check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts); +#endif + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", + costs_.cost, costs_.bb_cost, costs_.timing_cost); + return move_outcome; +} + +/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ +void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) { + if (placer_opts_.place_algorithm.is_timing_driven()) { + /* At each temperature change we update these values to be used + * for normalizing the tradeoff between timing and wirelength (bb) */ + if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter + || placer_opts_.inner_loop_recompute_divider != 0) { +#ifdef VERBOSE + VTR_LOG("Outer loop recompute criticalities\n"); +#endif + // Avoid division by zero + num_connections = std::max(num_connections, 1); + VTR_ASSERT(num_connections > 0); + + PlaceCritParams crit_params; + crit_params.crit_exponent = annealing_state_.crit_exponent; + crit_params.crit_limit = placer_opts_.place_crit_limit; + + //Update all timing related classes + perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_, + pin_timing_invalidator_, timing_info_, &costs_, placer_state_); + + outer_crit_iter_count_ = 0; + } + outer_crit_iter_count_++; + } + + // Update the cost normalization factors + update_placement_cost_normalization_factors(&costs_, placer_opts_, noc_opts_, noc_cost_handler_); +} + +/* Function which contains the inner loop of the simulated annealing */ +void placement_inner_loop(int inner_recompute_limit, + t_placer_statistics* stats, + + int* moves_since_cost_recompute, + PlacerSetupSlacks* setup_slacks, + MoveGenerator& move_generator, + float timing_bb_factor + ) { + // How many times have we dumped placement to a file this temperature? + int inner_placement_save_count = 0; + + stats->reset(); + + bool manual_move_enabled = false; + + // Inner loop begins + for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) { + e_move_result swap_result = try_swap(move_generator, + placer_opts, noc_opts, move_type_stat, place_algorithm, + timing_bb_factor, manual_move_enabled); + + if (swap_result == ACCEPTED) { + /* Move was accepted. Update statistics that are useful for the annealing schedule. */ + stats->single_swap_update(*costs); + swap_stats.num_swap_accepted++; + } else if (swap_result == ABORTED) { + swap_stats.num_swap_aborted++; + } else { // swap_result == REJECTED + swap_stats.num_swap_rejected++; + } + + if (place_algorithm.is_timing_driven()) { + /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? + * We do this only once in a while, since it is expensive. + */ + if (inner_crit_iter_count >= inner_recompute_limit + && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */ + + inner_crit_iter_count = 0; +#ifdef VERBOSE + VTR_LOG("Inner loop recompute criticalities\n"); +#endif + + PlaceCritParams crit_params; + crit_params.crit_exponent = state->crit_exponent; + crit_params.crit_limit = placer_opts.place_crit_limit; + + //Update all timing related classes + perform_full_timing_update(crit_params, delay_model, criticalities, + setup_slacks, pin_timing_invalidator, + timing_info, costs, placer_state); + } + inner_crit_iter_count++; + } + + /* Lines below prevent too much round-off error from accumulating + * in the cost over many iterations (due to incremental updates). + * This round-off can lead to error checks failing because the cost + * is different from what you get when you recompute from scratch. + */ + ++(*moves_since_cost_recompute); + if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { + net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs); + + if (noc_cost_handler.has_value()) { + noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs); + } + + *moves_since_cost_recompute = 0; + } + + if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0 + && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) { + std::string filename = vtr::string_fmt("placement_%03d_%03d.place", + state->num_temps + 1, inner_placement_save_count); + VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", + inner_iter, state->move_lim, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs()); + ++inner_placement_save_count; + } + } + + /* Calculate the success_rate and std_dev of the costs. */ + stats->calc_iteration_stats(*costs, state->move_lim); } \ No newline at end of file diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 0c046bcdf5f..b19ef8f5968 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -1,9 +1,32 @@ #pragma once +#include "vpr_types.h" + +#include "move_generator.h" // movestats +#include "net_cost_handler.h" + +#include + +class PlacerState; class t_placer_costs; struct t_placer_opts; -struct t_annealing_sched; + +class NocCostHandler; +class ManualMoveGenerator; +class NetPinTimingInvalidator; + +/** + * These variables keep track of the number of swaps + * rejected, accepted or aborted. The total number of swap attempts + * is the sum of the three number. + */ +struct t_swap_stats { + int num_swap_rejected = 0; + int num_swap_accepted = 0; + int num_swap_aborted = 0; + int num_ts_called = 0; +}; /** * @brief Stores variables that are used by the annealing process. @@ -71,7 +94,7 @@ class t_annealing_state { float INVERSE_DELTA_RLIM; public: //Constructor - t_annealing_state() = delete; + t_annealing_state() = default; t_annealing_state(const t_annealing_sched& annealing_sched, float first_t, float first_rlim, @@ -128,8 +151,83 @@ class t_annealing_state { inline void update_move_lim(float success_target, float success_rate); }; + class PlacementAnnealer { + public: + PlacementAnnealer(const t_placer_opts& placer_opts, + PlacerState& placer_state, + t_placer_costs& costs, + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler, + const t_noc_opts& noc_opts, + MoveGenerator& move_generator_1, + MoveGenerator& move_generator_2, + ManualMoveGenerator& manual_move_generator, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + int move_lim); + + void placement_inner_loop(const t_annealing_state* state, + const t_placer_opts& placer_opts, + const t_noc_opts& noc_opts, + int inner_recompute_limit, + t_placer_statistics* stats, + t_placer_costs* costs, + int* moves_since_cost_recompute, + NetPinTimingInvalidator* pin_timing_invalidator, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + MoveGenerator& move_generator, + ManualMoveGenerator& manual_move_generator, + t_pl_blocks_to_be_moved& blocks_affected, + SetupTimingInfo* timing_info, + const t_place_algorithm& place_algorithm, + MoveTypeStat& move_type_stat, + float timing_bb_factor, + t_swap_stats& swap_stats, + PlacerState& placer_state, + NetCostHandler& net_cost_handler, + std::optional& noc_cost_handler); + + void outer_loop_update_timing_info(int num_connections); + + e_move_result try_swap(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + float timing_bb_factor, + bool manual_move_enabled); + + public: + const t_placer_opts& placer_opts_; + PlacerState& placer_state_; + t_placer_costs& costs_; + NetCostHandler& net_cost_handler_; + std::optional& noc_cost_handler_; + const t_noc_opts& noc_opts_; + + MoveGenerator& move_generator_1_; + MoveGenerator& move_generator_2_; + ManualMoveGenerator& manual_move_generator_; + + const PlaceDelayModel* delay_model_; + PlacerCriticalities* criticalities_; + PlacerSetupSlacks* setup_slacks_; + SetupTimingInfo* timing_info_; + NetPinTimingInvalidator* pin_timing_invalidator_; + std::unique_ptr move_stats_file_; + int outer_crit_iter_count_; - private: t_annealing_state annealing_state_; + /// Swap statistics keep record of the number accepted/rejected/aborted swaps. + t_swap_stats swap_stats_; + MoveTypeStat move_type_stats_; + + t_pl_blocks_to_be_moved blocks_affected_; + + + private: + float estimate_starting_temperature(); }; \ No newline at end of file diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 0c221f89c4a..99151695dab 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -91,18 +91,6 @@ struct t_range_limiters { float dm_rlim; }; -/** - * These variables keep track of the number of swaps - * rejected, accepted or aborted. The total number of swap attempts - * is the sum of the three number. - */ -struct t_swap_stats { - int num_swap_rejected = 0; - int num_swap_accepted = 0; - int num_swap_aborted = 0; - int num_ts_called = 0; -}; - e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to, diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 90b566fb753..33fc09fa342 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -90,84 +90,6 @@ static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); constexpr float INVALID_COST = std::numeric_limits::quiet_NaN(); -/********************** Variables local to place.c ***************************/ - - -std::unique_ptr f_move_stats_file(nullptr, - vtr::fclose); - -#ifdef VTR_ENABLE_DEBUG_LOGGIING -# define LOG_MOVE_STATS_HEADER() \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "temp,from_blk,to_blk,from_type,to_type," \ - "blk_count," \ - "delta_cost,delta_bb_cost,delta_td_cost," \ - "outcome,reason\n"); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, affected_blocks) \ - do { \ - if (f_move_stats_file) { \ - auto& place_ctx = g_vpr_ctx.placement(); \ - auto& cluster_ctx = g_vpr_ctx.clustering(); \ - ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num; \ - \ - t_pl_loc to = affected_blocks.moved_blocks[0].new_loc; \ - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; \ - \ - t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \ - t_logical_block_type_ptr to_type = nullptr; \ - if (b_to) { \ - to_type = cluster_ctx.clb_nlist.block_type(b_to); \ - } \ - \ - fprintf(f_move_stats_file.get(), \ - "%g," \ - "%d,%d," \ - "%s,%s," \ - "%d,", \ - t, \ - int(b_from), int(b_to), \ - from_type->name, (to_type ? to_type->name : "EMPTY"), \ - affected_blocks.moved_blocks.size()); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "%g,%g,%g," \ - "%s,%s\n", \ - delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason); \ - } \ - } while (false) - -#else - -# define LOG_MOVE_STATS_HEADER() \ - do { \ - fprintf(f_move_stats_file.get(), \ - "VTR_ENABLE_DEBUG_LOGGING disabled " \ - "-- No move stats recorded\n"); \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \ - do { \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - } while (false) - -#endif - /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE void print_clb_placement(const char* fname); @@ -254,43 +176,9 @@ static float starting_t(const t_annealing_state* state, static int count_connections(); -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state); - -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerTimingContext& p_timing_ctx); - -static void invalidate_affected_connections( - const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info); - static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state); -static e_move_result assess_swap(double delta_c, double t); - -static void update_placement_cost_normalization_factors(t_placer_costs* costs, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::optional& noc_cost_handler); - -static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); - -static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - t_placer_costs* costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - PlacerState& placer_state, - const std::optional& noc_cost_handler); - static void placement_inner_loop(const t_annealing_state* state, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts, @@ -346,7 +234,6 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry); /*****************************************************************************/ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, - t_annealing_sched annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, @@ -372,8 +259,6 @@ void try_place(const Netlist<>& net_list, auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count; - float first_crit_exponent; - t_placer_costs costs(placer_opts.place_algorithm); @@ -395,9 +280,6 @@ void try_place(const Netlist<>& net_list, t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size()); - // Swap statistics keep record of the number accepted/rejected/aborted swaps. - t_swap_stats swap_stats; - if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_delay_model(net_list, @@ -422,7 +304,7 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - int move_lim = (int)(annealing_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); + int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); PlacerState placer_state; auto& place_move_ctx = placer_state.mutable_move(); @@ -490,8 +372,6 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_algorithm.is_timing_driven()) { costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ - num_connections = count_connections(); VTR_LOG("\n"); VTR_LOG("There are %d point to point connections in this circuit.\n", @@ -530,7 +410,7 @@ void try_place(const Netlist<>& net_list, //First time compute timing and costs, compute from scratch PlaceCritParams crit_params; - crit_params.crit_exponent = first_crit_exponent; + crit_params.crit_exponent = placer_opts.td_place_exp_first; crit_params.crit_limit = placer_opts.place_crit_limit; initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(), @@ -574,7 +454,6 @@ void try_place(const Netlist<>& net_list, /* Other initializations */ outer_crit_iter_count = 0; num_connections = 0; - first_crit_exponent = 0; } if (noc_opts.noc) { @@ -650,62 +529,6 @@ void try_place(const Netlist<>& net_list, print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - int first_move_lim = get_initial_move_lim(placer_opts, annealing_sched); - - int inner_recompute_limit; - if (placer_opts.inner_loop_recompute_divider != 0) { - inner_recompute_limit = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); - } else { - /*don't do an inner recompute */ - inner_recompute_limit = first_move_lim + 1; - } - - /* calculate the number of moves in the quench that we should recompute timing after based on the value of * - * the commandline option quench_recompute_divider */ - int quench_recompute_limit; - if (placer_opts.quench_recompute_divider != 0) { - quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); - } else { - /*don't do an quench recompute */ - quench_recompute_limit = first_move_lim + 1; - } - - //allocate helper vectors that are used by many move generators - place_move_ctx.X_coord.resize(10, 0); - place_move_ctx.Y_coord.resize(10, 0); - place_move_ctx.layer_coord.resize(10, 0); - - //allocate move type statistics vectors - MoveTypeStat move_type_stat; - move_type_stat.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - move_type_stat.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - move_type_stat.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); - - /* Get the first range limiter */ - float first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - place_move_ctx.first_rlim = first_rlim; - - t_annealing_state state(annealing_sched, - EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved - first_rlim, - first_move_lim, - first_crit_exponent); - - /* Update the starting temperature for placement annealing to a more appropriate value */ - state.t = starting_t(&state, &costs, annealing_sched, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), timing_info.get(), *move_generator, - manual_move_generator, pin_timing_invalidator.get(), - blocks_affected, placer_opts, noc_opts, move_type_stat, - swap_stats, placer_state, net_cost_handler, noc_cost_handler); - - if (!placer_opts.move_stats_file.empty()) { - f_move_stats_file = std::unique_ptr( - vtr::fopen(placer_opts.move_stats_file.c_str(), "w"), - vtr::fclose); - LOG_MOVE_STATS_HEADER(); - } - tot_iter = 0; moves_since_cost_recompute = 0; @@ -727,6 +550,10 @@ void try_place(const Netlist<>& net_list, //Define the timing bb weight factor for the agent's reward function float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT; + PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, + noc_opts, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(), + placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); + if (!skip_anneal) { //Table header VTR_LOG("\n"); @@ -736,21 +563,15 @@ void try_place(const Netlist<>& net_list, do { vtr::Timer temperature_timer; - outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections, - state.crit_exponent, &outer_crit_iter_count, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state, noc_cost_handler); + annealer.outer_loop_update_timing_info(num_connections); if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); - //see if we should save the current placement solution as a checkpoint - - if (placer_opts.place_checkpointing - && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) { + // see if we should save the current placement solution as a checkpoint + if (placer_opts.place_checkpointing && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) { save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), placement_checkpoint, timing_info, costs, critical_path.delay()); @@ -761,7 +582,7 @@ void try_place(const Netlist<>& net_list, assign_current_move_generator(move_generator, move_generator2, agent_state, placer_opts, false, current_move_generator); - //do a complete inner loop iteration + // do a complete inner loop iteration placement_inner_loop(&state, placer_opts, noc_opts, inner_recompute_limit, &stats, &costs, &moves_since_cost_recompute, @@ -818,11 +639,7 @@ void try_place(const Netlist<>& net_list, vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections, - state.crit_exponent, &outer_crit_iter_count, - place_delay_model.get(), placer_criticalities.get(), - placer_setup_slacks.get(), pin_timing_invalidator.get(), - timing_info.get(), placer_state, noc_cost_handler); + annealer.outer_loop_update_timing_info(num_connections); //move the appropriate move_generator to be the current used move generator assign_current_move_generator(move_generator, move_generator2, @@ -991,47 +808,7 @@ void try_place(const Netlist<>& net_list, copy_locs_to_global_state(blk_loc_registry); } -/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ -static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - t_placer_costs* costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - PlacerState& placer_state, - const std::optional& noc_cost_handler) { - if (placer_opts.place_algorithm.is_timing_driven()) { - /*at each temperature change we update these values to be used */ - /*for normalizing the tradeoff between timing and wirelength (bb) */ - if (*outer_crit_iter_count >= placer_opts.recompute_crit_iter - || placer_opts.inner_loop_recompute_divider != 0) { -#ifdef VERBOSE - VTR_LOG("Outer loop recompute criticalities\n"); -#endif - num_connections = std::max(num_connections, 1); //Avoid division by zero - VTR_ASSERT(num_connections > 0); - - PlaceCritParams crit_params; - crit_params.crit_exponent = crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, setup_slacks, - pin_timing_invalidator, timing_info, costs, placer_state); - - *outer_crit_iter_count = 0; - } - (*outer_crit_iter_count)++; - } - - /* Update the cost normalization factors */ - update_placement_cost_normalization_factors(costs, placer_opts, noc_opts, noc_cost_handler); -} /* Function which contains the inner loop of the simulated annealing */ static void placement_inner_loop(const t_annealing_state* state, @@ -1618,223 +1395,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, return cube_bb; } -/** - * @brief Updates all the cost normalization factors during the outer - * loop iteration of the placement. At each temperature change, these - * values are updated so that we can balance the tradeoff between the - * different placement cost components (timing, wirelength and NoC). - * Depending on the placement mode the corresponding normalization factors are - * updated. - * - * @param costs Contains the normalization factors which need to be updated - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @param noc_cost_handler Computes normalization factors for NoC-related cost terms - */ -static void update_placement_cost_normalization_factors(t_placer_costs* costs, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::optional& noc_cost_handler) { - /* Update the cost normalization factors */ - costs->update_norm_factors(); - - // update the noc normalization factors if the placement includes the NoC - if (noc_opts.noc) { - noc_cost_handler->update_noc_normalization_factors(*costs); - } - - // update the current total placement cost - costs->cost = get_total_cost(costs, placer_opts, noc_opts); -} - -/** - * @brief Compute the total normalized cost for a given placement. This - * computation will vary depending on the placement modes. - * - * @param costs The current placement cost components and their normalization - * factors - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @return double The computed total cost of the current placement - */ -static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { - double total_cost = 0.0; - - if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { - // in bounding box mode we only care about wirelength - total_cost = costs->bb_cost * costs->bb_cost_norm; - } else if (placer_opts.place_algorithm.is_timing_driven()) { - // in timing mode we include both wirelength and timing costs - total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm); - } - - if (noc_opts.noc) { - // in noc mode we include noc aggregate bandwidth and noc latency - total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts); - } - - return total_cost; -} - -/** - * @brief Check if the setup slack has gotten better or worse due to block swap. - * - * Get all the modified slack values via the PlacerSetupSlacks class, and compare - * then with the original values at these connections. Sort them and compare them - * one by one, and return the difference of the first different pair. - * - * If the new slack value is larger(better), than return a negative value so that - * the move will be accepted. If the new slack value is smaller(worse), return a - * positive value so that the move will be rejected. - * - * If no slack values have changed, then return an arbitrary positive number. A - * move resulting in no change in the slack values should probably be unnecessary. - * - * The sorting is need to prevent in the unlikely circumstances that a bad slack - * value suddenly got very good due to the block move, while a good slack value - * got very bad, perhaps even worse than the original worse slack value. - */ -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, - const PlacerState& placer_state) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& clb_nlist = cluster_ctx.clb_nlist; - - const auto& p_timing_ctx = placer_state.timing(); - const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack; - - //Find the original/proposed setup slacks of pins with modified values - std::vector original_setup_slacks, proposed_setup_slacks; - - auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); - for (ClusterPinId clb_pin : clb_pins_modified) { - ClusterNetId net_id = clb_nlist.pin_net(clb_pin); - size_t ipin = clb_nlist.pin_net_index(clb_pin); - - original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); - proposed_setup_slacks.push_back( - setup_slacks->setup_slack(net_id, ipin)); - } - - //Sort in ascending order, from the worse slack value to the best - std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end()); - std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); - - //Check the first pair of slack values that are different - //If found, return their difference - for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { - float slack_diff = original_setup_slacks[idiff] - - proposed_setup_slacks[idiff]; - - if (slack_diff != 0) { - return slack_diff; - } - } - - //If all slack values are identical (or no modified slack values), - //reject this move by returning an arbitrary positive number as cost. - return 1; -} - -static e_move_result assess_swap(double delta_c, double t) { - /* Returns: 1 -> move accepted, 0 -> rejected. */ - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c); - if (delta_c <= 0) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n"); - return ACCEPTED; - } - - if (t == 0.) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n"); - return REJECTED; - } - - float fnum = vtr::frand(); - float prob_fac = std::exp(-delta_c / t); - if (prob_fac > fnum) { - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n"); - return ACCEPTED; - } - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); - return REJECTED; -} - -/** - * @brief Update the connection_timing_cost values from the temporary - * values for all connections that have/haven't changed. - * - * All the connections have already been gathered by blocks_affected.affected_pins - * after running the routine find_affected_nets_and_update_costs() in try_swap(). - */ -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& p_timing_ctx = placer_state.mutable_timing(); - auto& connection_delay = p_timing_ctx.connection_delay; - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - //Go through all the sink pins affected - for (ClusterPinId pin_id : blocks_affected.affected_pins) { - ClusterNetId net_id = clb_nlist.pin_net(pin_id); - int ipin = clb_nlist.pin_net_index(pin_id); - - //Commit the timing delay and cost values - connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; - proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; - proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } -} - -//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on -//the move proposed in blocks_affected -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerTimingContext& p_timing_ctx) { -#ifndef VTR_ASSERT_SAFE_ENABLED - (void)blocks_affected; - (void)p_timing_ctx; -#else - //Invalidate temp delay & timing cost values to match sanity checks in - //comp_td_connection_cost() - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - for (ClusterPinId pin : blocks_affected.affected_pins) { - ClusterNetId net = clb_nlist.pin_net(pin); - int ipin = clb_nlist.pin_net_index(pin); - proposed_connection_delay[net][ipin] = INVALID_DELAY; - proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; - } -#endif -} - -/** - * @brief Invalidates the connections affected by the specified block moves. - * - * All the connections recorded in blocks_affected.affected_pins have different - * values for `proposed_connection_delay` and `connection_delay`. - * - * Invalidate all the timing graph edges associated with these connections via - * the NetPinTimingInvalidator class. - */ -static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info) { - VTR_ASSERT_SAFE(timing_info); - VTR_ASSERT_SAFE(pin_tedges_invalidator); - - /* Invalidate timing graph edges affected by the move */ - for (ClusterPinId pin : blocks_affected.affected_pins) { - pin_tedges_invalidator->invalidate_connection(pin, timing_info); - } -} - /* Allocates the major structures needed only by the placer, primarily for * * computing costs quickly and such. */ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts, diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h index 138c6cdd05d..210663823a8 100644 --- a/vpr/src/place/place.h +++ b/vpr/src/place/place.h @@ -5,7 +5,6 @@ void try_place(const Netlist<>& net_list, const t_placer_opts& placer_opts, - t_annealing_sched annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 97941f639b1..344839a1bd5 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -119,6 +119,14 @@ struct PlacerMoveContext : public Context { // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option) std::vector> highly_crit_pins; + + public: + PlacerMoveContext() { + // allocate helper vectors that are used by many move generators + X_coord.resize(10, 0); + Y_coord.resize(10, 0); + layer_coord.resize(10, 0); + } }; /** From 5db48fc81a176dd73548e1e88b2d36424e559beb Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 18 Oct 2024 17:02:23 -0400 Subject: [PATCH 03/31] solve compilation errors --- vpr/src/place/annealer.cpp | 216 +++++----- vpr/src/place/annealer.h | 69 ++-- vpr/src/place/move_generator.cpp | 2 +- vpr/src/place/move_generator.h | 2 +- vpr/src/place/place.cpp | 690 ++----------------------------- vpr/src/place/place_util.cpp | 20 + vpr/src/place/place_util.h | 12 + 7 files changed, 200 insertions(+), 811 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 0fdcd9dbcb9..3203d79576f 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -14,6 +14,7 @@ #include "noc_place_utils.h" #include "NetPinTimingInvalidator.h" #include "place_timing_update.h" +#include "read_place.h" #ifdef VTR_ENABLE_DEBUG_LOGGIING # define LOG_MOVE_STATS_HEADER() \ @@ -259,35 +260,6 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, #endif } -/** - * @brief Compute the total normalized cost for a given placement. This - * computation will vary depending on the placement modes. - * - * @param costs The current placement cost components and their normalization - * factors - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @return double The computed total cost of the current placement - */ -static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { - double total_cost = 0.0; - - if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { - // in bounding box mode we only care about wirelength - total_cost = costs->bb_cost * costs->bb_cost_norm; - } else if (placer_opts.place_algorithm.is_timing_driven()) { - // in timing mode we include both wirelength and timing costs - total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm); - } - - if (noc_opts.noc) { - // in noc mode we include noc aggregate bandwidth and noc latency - total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts); - } - - return total_cost; -} - /** * @brief Updates all the cost normalization factors during the outer * loop iteration of the placement. At each temperature change, these @@ -314,7 +286,7 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs, } // update the current total placement cost - costs->cost = get_total_cost(costs, placer_opts, noc_opts); + costs->cost = costs->get_total_cost(placer_opts, noc_opts); } ///@brief Constructor: Initialize all annealing state variables and macros. @@ -348,60 +320,60 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, bool t_annealing_state::outer_loop_update(float success_rate, const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched) { + const t_placer_opts& placer_opts) { #ifndef NO_GRAPHICS t_draw_state* draw_state = get_draw_state_vars(); if (!draw_state->list_of_breakpoints.empty()) { - /* Update temperature in the current information variable. */ + // Update temperature in the current information variable. get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++; } #endif - if (annealing_sched.type == e_sched_type::USER_SCHED) { - /* Update t with user specified alpha. */ - t *= annealing_sched.alpha_t; + if (placer_opts.anneal_sched.type == e_sched_type::USER_SCHED) { + // Update t with user specified alpha. + t *= placer_opts.anneal_sched.alpha_t; - /* Check if the exit criterion is met. */ - bool exit_anneal = t >= annealing_sched.exit_t; + // Check if the exit criterion is met. + bool exit_anneal = t >= placer_opts.anneal_sched.exit_t; return exit_anneal; } - /* Automatically determine exit temperature. */ + // Automatically determine exit temperature. auto& cluster_ctx = g_vpr_ctx.clustering(); float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); - if (annealing_sched.type == e_sched_type::DUSTY_SCHED) { - /* May get nan if there are no nets */ + if (placer_opts.anneal_sched.type == e_sched_type::DUSTY_SCHED) { + // May get nan if there are no nets bool restart_temp = t < t_exit || std::isnan(t_exit); /* If the success rate or the temperature is * * too low, reset the temperature and alpha. */ - if (success_rate < annealing_sched.success_min || restart_temp) { - /* Only exit anneal when alpha gets too large. */ - if (alpha > annealing_sched.alpha_max) { + if (success_rate < placer_opts.anneal_sched.success_min || restart_temp) { + // Only exit anneal when alpha gets too large. + if (alpha > placer_opts.anneal_sched.alpha_max) { return false; } - /* Take a half step from the restart temperature. */ + + // Take a half step from the restart temperature. t = restart_t / sqrt(alpha); - /* Update alpha. */ - alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay); + // Update alpha. + alpha = 1.0 - ((1.0 - alpha) * placer_opts.anneal_sched.alpha_decay); } else { /* If the success rate is promising, next time * * reset t to the current annealing temperature. */ - if (success_rate > annealing_sched.success_target) { + if (success_rate > placer_opts.anneal_sched.success_target) { restart_t = t; } - /* Update t. */ + // Update t. t *= alpha; } - /* Update move lim. */ - update_move_lim(annealing_sched.success_target, success_rate); + // Update move lim. + update_move_lim(placer_opts.anneal_sched.success_target, success_rate); } else { - VTR_ASSERT_SAFE(annealing_sched.type == e_sched_type::AUTO_SCHED); - /* Automatically adjust alpha according to success rate. */ + VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); + // Automatically adjust alpha according to success rate. if (success_rate > 0.96) { alpha = 0.5; } else if (success_rate > 0.8) { @@ -411,23 +383,23 @@ bool t_annealing_state::outer_loop_update(float success_rate, } else { alpha = 0.8; } - /* Update temp. */ + // Update temp. t *= alpha; - /* Must be duplicated to retain previous behavior. */ + // Must be duplicated to retain previous behavior. if (t < t_exit || std::isnan(t_exit)) { return false; } } - /* Update the range limiter. */ + // Update the range limiter. update_rlim(success_rate); - /* If using timing driven algorithm, update the crit_exponent. */ + // If using timing driven algorithm, update the crit_exponent. if (placer_opts.place_algorithm.is_timing_driven()) { update_crit_exponent(placer_opts); } - /* Continues the annealing. */ + // Continues the annealing. return true; } @@ -496,13 +468,15 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched); - int inner_recompute_limit; + if (placer_opts.inner_loop_recompute_divider != 0) { - inner_recompute_limit = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); + inner_recompute_limit_ = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); } else { // don't do an inner recompute - inner_recompute_limit = first_move_lim + 1; + inner_recompute_limit_ = first_move_lim + 1; } + moves_since_cost_recompute_ = 0; + tot_iter_ = 0; /* calculate the number of moves in the quench that we should recompute timing after based on the value of * * the commandline option quench_recompute_divider */ @@ -510,7 +484,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, if (placer_opts.quench_recompute_divider != 0) { quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); } else { - /*don't do an quench recompute */ + // don't do an quench recompute quench_recompute_limit = first_move_lim + 1; } @@ -609,23 +583,6 @@ float PlacementAnnealer::estimate_starting_temperature() { return init_temp; } - -/** - * @brief Pick some block and moves it to another spot. - * - * If the new location is empty, directly move the block. If the new location - * is occupied, switch the blocks. Due to the different sizes of the blocks, - * this block switching may occur for multiple times. It might also cause the - * current swap attempt to abort due to inability to find suitable locations - * for moved blocks. - * - * The move generator will record all the switched blocks in the variable - * `blocks_affected`. Afterwards, the move will be assessed by the chosen - * cost formulation. Currently, there are three ways to assess move cost, - * which are stored in the enum type `t_place_algorithm`. - * - * @return Whether the block swap is accepted, rejected or aborted. - */ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, const t_place_algorithm& place_algorithm, float timing_bb_factor, @@ -969,43 +926,37 @@ void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) { } /* Function which contains the inner loop of the simulated annealing */ -void placement_inner_loop(int inner_recompute_limit, - t_placer_statistics* stats, - - int* moves_since_cost_recompute, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - float timing_bb_factor - ) { +void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, + float timing_bb_factor) { // How many times have we dumped placement to a file this temperature? int inner_placement_save_count = 0; - stats->reset(); + placer_stats_.reset(); bool manual_move_enabled = false; // Inner loop begins - for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) { - e_move_result swap_result = try_swap(move_generator, - placer_opts, noc_opts, move_type_stat, place_algorithm, + for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) { + e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, timing_bb_factor, manual_move_enabled); if (swap_result == ACCEPTED) { - /* Move was accepted. Update statistics that are useful for the annealing schedule. */ - stats->single_swap_update(*costs); - swap_stats.num_swap_accepted++; + // Move was accepted. Update statistics that are useful for the annealing schedule. + placer_stats_.single_swap_update(costs_); + swap_stats_.num_swap_accepted++; } else if (swap_result == ABORTED) { - swap_stats.num_swap_aborted++; + swap_stats_.num_swap_aborted++; } else { // swap_result == REJECTED - swap_stats.num_swap_rejected++; + swap_stats_.num_swap_rejected++; } - if (place_algorithm.is_timing_driven()) { + if (placer_opts_.place_algorithm.is_timing_driven()) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ - if (inner_crit_iter_count >= inner_recompute_limit - && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */ + + // on last iteration don't recompute + if (inner_crit_iter_count >= inner_recompute_limit_ && inner_iter != annealing_state_.move_lim - 1) { inner_crit_iter_count = 0; #ifdef VERBOSE @@ -1013,13 +964,13 @@ void placement_inner_loop(int inner_recompute_limit, #endif PlaceCritParams crit_params; - crit_params.crit_exponent = state->crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; + crit_params.crit_exponent = annealing_state_.crit_exponent; + crit_params.crit_limit = placer_opts_.place_crit_limit; - //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, - setup_slacks, pin_timing_invalidator, - timing_info, costs, placer_state); + // Update all timing related classes + perform_full_timing_update(crit_params, delay_model_, criticalities_, + setup_slacks_, pin_timing_invalidator_, + timing_info_, &costs_, placer_state_); } inner_crit_iter_count++; } @@ -1029,28 +980,55 @@ void placement_inner_loop(int inner_recompute_limit, * This round-off can lead to error checks failing because the cost * is different from what you get when you recompute from scratch. */ - ++(*moves_since_cost_recompute); - if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { - net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs); + moves_since_cost_recompute_++; + if (moves_since_cost_recompute_ > MAX_MOVES_BEFORE_RECOMPUTE) { + net_cost_handler_.recompute_costs_from_scratch(delay_model_, criticalities_, costs_); - if (noc_cost_handler.has_value()) { - noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs); + if (noc_cost_handler_.has_value()) { + noc_cost_handler_->recompute_costs_from_scratch(noc_opts_, costs_); } - *moves_since_cost_recompute = 0; + moves_since_cost_recompute_ = 0; } - if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0 - && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) { + if (placer_opts_.placement_saves_per_temperature >= 1 && inner_iter > 0 + && (inner_iter + 1) % (annealing_state_.move_lim / placer_opts_.placement_saves_per_temperature) == 0) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - state->num_temps + 1, inner_placement_save_count); + annealing_state_.num_temps + 1, inner_placement_save_count); VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", - inner_iter, state->move_lim, filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs()); + inner_iter, annealing_state_.move_lim, filename.c_str()); + print_place(nullptr, nullptr, filename.c_str(), placer_state_.block_locs()); ++inner_placement_save_count; } } - /* Calculate the success_rate and std_dev of the costs. */ - stats->calc_iteration_stats(*costs, state->move_lim); -} \ No newline at end of file + // Calculate the success_rate and std_dev of the costs. + placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim); + + tot_iter_ += annealing_state_.move_lim; + ++annealing_state_.num_temps; +} + +int PlacementAnnealer::get_total_iteration() const { + return tot_iter_; +} + +const t_annealing_state& PlacementAnnealer::get_annealing_state() const { + return annealing_state_; +} + +bool PlacementAnnealer::outer_loop_update_state() { + return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_); +} + +void PlacementAnnealer::start_quench() { + // Freeze out: only accept solutions that improve placement. + annealing_state_.t = 0; + + //Revert the move limit to initial value. + annealing_state_.move_lim = annealing_state_.move_lim_max; +} + +std::tuple PlacementAnnealer::get_stats() const { + return {swap_stats_, move_type_stats_, placer_stats_}; +} diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index b19ef8f5968..02b48967525 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -7,6 +7,7 @@ #include "net_cost_handler.h" #include +#include class PlacerState; class t_placer_costs; @@ -114,8 +115,7 @@ class t_annealing_state { */ bool outer_loop_update(float success_rate, const t_placer_costs& costs, - const t_placer_opts& placer_opts, - const t_annealing_sched& annealing_sched); + const t_placer_opts& placer_opts); private: //Mutator /** @@ -170,36 +170,43 @@ class PlacementAnnealer { NetPinTimingInvalidator* pin_timing_invalidator, int move_lim); - void placement_inner_loop(const t_annealing_state* state, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - int inner_recompute_limit, - t_placer_statistics* stats, - t_placer_costs* costs, - int* moves_since_cost_recompute, - NetPinTimingInvalidator* pin_timing_invalidator, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info, - const t_place_algorithm& place_algorithm, - MoveTypeStat& move_type_stat, - float timing_bb_factor, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler); + /* Function which contains the inner loop of the simulated annealing */ + void placement_inner_loop(MoveGenerator& move_generator, + float timing_bb_factor); void outer_loop_update_timing_info(int num_connections); + bool outer_loop_update_state(); + + /** + * @brief Pick some block and moves it to another spot. + * + * If the new location is empty, directly move the block. If the new location + * is occupied, switch the blocks. Due to the different sizes of the blocks, + * this block switching may occur for multiple times. It might also cause the + * current swap attempt to abort due to inability to find suitable locations + * for moved blocks. + * + * The move generator will record all the switched blocks in the variable + * `blocks_affected`. Afterwards, the move will be assessed by the chosen + * cost formulation. Currently, there are three ways to assess move cost, + * which are stored in the enum type `t_place_algorithm`. + * + * @return Whether the block swap is accepted, rejected or aborted. + */ e_move_result try_swap(MoveGenerator& move_generator, const t_place_algorithm& place_algorithm, float timing_bb_factor, bool manual_move_enabled); + int get_total_iteration() const; + + const t_annealing_state& get_annealing_state() const; + + std::tuple get_stats() const; + + void start_quench(); + public: const t_placer_opts& placer_opts_; PlacerState& placer_state_; @@ -224,10 +231,24 @@ class PlacementAnnealer { /// Swap statistics keep record of the number accepted/rejected/aborted swaps. t_swap_stats swap_stats_; MoveTypeStat move_type_stats_; + t_placer_statistics placer_stats_; t_pl_blocks_to_be_moved blocks_affected_; + private: + + /** + * @brief The maximum number of swap attempts before invoking the + * once-in-a-while placement legality check as well as floating point + * variables round-offs check. + */ + static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; + + int inner_recompute_limit_; + int moves_since_cost_recompute_; + int tot_iter_; private: + ///@brief Find the starting temperature for the annealing loop. float estimate_starting_temperature(); }; \ No newline at end of file diff --git a/vpr/src/place/move_generator.cpp b/vpr/src/place/move_generator.cpp index 2d1bcfbc64a..59795a63cc6 100644 --- a/vpr/src/place/move_generator.cpp +++ b/vpr/src/place/move_generator.cpp @@ -51,7 +51,7 @@ void MoveGenerator::calculate_reward_and_process_outcome(const MoveOutcomeStats& } } -void MoveTypeStat::print_placement_move_types_stats() { +void MoveTypeStat::print_placement_move_types_stats() const { VTR_LOG("\n\nPlacement perturbation distribution by block and move type: \n"); VTR_LOG( diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 0c83bb9d5eb..14f76a5b136 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -37,7 +37,7 @@ struct MoveTypeStat { /** * @brief Prints placement perturbation distribution by block and move type. */ - void print_placement_move_types_stats(); + void print_placement_move_types_stats() const; }; /** diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 33fc09fa342..f4ccaf083f1 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -82,11 +82,6 @@ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; #endif /************** Types and defines local to place.c ***************************/ -/* This defines the maximum number of swap attempts before invoking the * - * once-in-a-while placement legality check as well as floating point * - * variables round-offs check. */ -static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; - constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); constexpr float INVALID_COST = std::numeric_limits::quiet_NaN(); @@ -112,28 +107,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac static void free_placement_structs(); -static e_move_result try_swap(const t_annealing_state* state, - t_placer_costs* costs, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - SetupTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - const t_place_algorithm& place_algorithm, - float timing_bb_factor, - bool manual_move_enabled, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler); - - static void check_place(const t_placer_costs& costs, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, @@ -155,53 +128,11 @@ static int check_placement_consistency(const BlkLocRegistry& blk_loc_registry); static int check_block_placement_consistency(const BlkLocRegistry& blk_loc_registry); static int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_registry); -static float starting_t(const t_annealing_state* state, - t_placer_costs* costs, - t_annealing_sched annealing_sched, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - SetupTimingInfo* timing_info, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler); - static int count_connections(); static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state); -static void placement_inner_loop(const t_annealing_state* state, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - int inner_recompute_limit, - t_placer_statistics* stats, - t_placer_costs* costs, - int* moves_since_cost_recompute, - NetPinTimingInvalidator* pin_timing_invalidator, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info, - const t_place_algorithm& place_algorithm, - MoveTypeStat& move_type_stat, - float timing_bb_factor, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler); - static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, @@ -258,7 +189,7 @@ void try_place(const Netlist<>& net_list, auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count; + int num_connections, outer_crit_iter_count; t_placer_costs costs(placer_opts.place_algorithm); @@ -267,7 +198,6 @@ void try_place(const Netlist<>& net_list, float sWNS = NAN; char msg[vtr::bufsize]; - t_placer_statistics stats; t_placement_checkpoint placement_checkpoint; @@ -469,7 +399,7 @@ void try_place(const Netlist<>& net_list, } // set the starting total placement cost - costs.cost = get_total_cost(&costs, placer_opts, noc_opts); + costs.cost = costs.get_total_cost(placer_opts, noc_opts); //Sanity check that initial placement is legal check_place(costs, @@ -529,9 +459,6 @@ void try_place(const Netlist<>& net_list, print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } - tot_iter = 0; - moves_since_cost_recompute = 0; - bool skip_anneal = false; #ifdef ENABLE_ANALYTIC_PLACE @@ -554,6 +481,9 @@ void try_place(const Netlist<>& net_list, noc_opts, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); + const t_annealing_state& annealing_state = annealer.get_annealing_state(); + const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats(); + if (!skip_anneal) { //Table header VTR_LOG("\n"); @@ -583,40 +513,28 @@ void try_place(const Netlist<>& net_list, agent_state, placer_opts, false, current_move_generator); // do a complete inner loop iteration - placement_inner_loop(&state, placer_opts, noc_opts, - inner_recompute_limit, - &stats, &costs, &moves_since_cost_recompute, - pin_timing_invalidator.get(), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - *current_move_generator, manual_move_generator, - blocks_affected, timing_info.get(), - placer_opts.place_algorithm, move_type_stat, - timing_bb_factor, swap_stats, placer_state, - net_cost_handler, noc_cost_handler); - + annealer.placement_inner_loop(*current_move_generator, + timing_bb_factor); //move the update used move_generator to its original variable update_move_generator(move_generator, move_generator2, agent_state, placer_opts, false, current_move_generator); - tot_iter += state.move_lim; - ++state.num_temps; - - print_place_status(state, stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, tot_iter, + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); if (placer_opts.place_algorithm.is_timing_driven() && placer_opts.place_agent_multistate && agent_state == e_agent_state::EARLY_IN_THE_ANNEAL) { - if (state.alpha < 0.85 && state.alpha > 0.6) { + if (annealing_state.alpha < 0.85 && annealing_state.alpha > 0.6) { agent_state = e_agent_state::LATE_IN_THE_ANNEAL; VTR_LOG("Agent's 2nd state: \n"); } } sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", - costs.cost, costs.bb_cost, costs.timing_cost, state.t); + costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); @@ -625,14 +543,12 @@ void try_place(const Netlist<>& net_list, // print_clb_placement("first_iteration_clb_placement.echo"); // } //#endif - } while (state.outer_loop_update(stats.success_rate, costs, placer_opts, - annealing_sched)); + } while (annealer.outer_loop_update_state()); /* Outer loop of the simulated annealing ends */ } //skip_anneal ends - /* Start Quench */ - state.t = 0; //Freeze out: only accept solutions that improve placement. - state.move_lim = state.move_lim_max; //Revert the move limit to initial value. + // Start Quench + annealer.start_quench(); auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ @@ -647,40 +563,27 @@ void try_place(const Netlist<>& net_list, /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ - placement_inner_loop(&state, placer_opts, noc_opts, - quench_recompute_limit, - &stats, &costs, &moves_since_cost_recompute, - pin_timing_invalidator.get(), place_delay_model.get(), - placer_criticalities.get(), placer_setup_slacks.get(), - *current_move_generator, manual_move_generator, - blocks_affected, timing_info.get(), - placer_opts.place_quench_algorithm, move_type_stat, - timing_bb_factor, swap_stats, placer_state, - net_cost_handler, noc_cost_handler); - - - //move the update used move_generator to its original variable + annealer.placement_inner_loop(*current_move_generator, timing_bb_factor); + + // move the update used move_generator to its original variable update_move_generator(move_generator, move_generator2, agent_state, placer_opts, true, current_move_generator); - tot_iter += state.move_lim; - ++state.num_temps; - if (placer_opts.place_quench_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); } - print_place_status(state, stats, temperature_timer.elapsed_sec(), - critical_path.delay(), sTNS, sWNS, tot_iter, + print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), + critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); } auto post_quench_timing_stats = timing_ctx.stats; //Final timing analysis PlaceCritParams crit_params; - crit_params.crit_exponent = state.crit_exponent; + crit_params.crit_exponent = annealing_state.crit_exponent; crit_params.crit_limit = placer_opts.place_crit_limit; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -700,7 +603,7 @@ void try_place(const Netlist<>& net_list, if (placer_opts.placement_saves_per_temperature >= 1) { std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - state.num_temps + 1, 0); + annealing_state.num_temps + 1, 0); VTR_LOG("Saving final placement to file: %s\n", filename.c_str()); print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs()); } @@ -784,9 +687,9 @@ void try_place(const Netlist<>& net_list, // Print out swap statistics print_resources_utilization(blk_loc_registry); - print_placement_swaps_stats(state, swap_stats); + print_placement_swaps_stats(annealing_state, swap_stats); - move_type_stat.print_placement_move_types_stats(); + move_type_stats.print_placement_move_types_stats(); if (noc_opts.noc) { write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs()); @@ -808,112 +711,6 @@ void try_place(const Netlist<>& net_list, copy_locs_to_global_state(blk_loc_registry); } - - -/* Function which contains the inner loop of the simulated annealing */ -static void placement_inner_loop(const t_annealing_state* state, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - int inner_recompute_limit, - t_placer_statistics* stats, - t_placer_costs* costs, - int* moves_since_cost_recompute, - NetPinTimingInvalidator* pin_timing_invalidator, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info, - const t_place_algorithm& place_algorithm, - MoveTypeStat& move_type_stat, - float timing_bb_factor, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler) { - //How many times have we dumped placement to a file this temperature? - int inner_placement_save_count = 0; - - stats->reset(); - - bool manual_move_enabled = false; - - /* Inner loop begins */ - for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) { - e_move_result swap_result = try_swap(state, costs, move_generator, - manual_move_generator, timing_info, pin_timing_invalidator, - blocks_affected, delay_model, criticalities, setup_slacks, - placer_opts, noc_opts, move_type_stat, place_algorithm, - timing_bb_factor, manual_move_enabled, swap_stats, - placer_state, net_cost_handler, noc_cost_handler); - - if (swap_result == ACCEPTED) { - /* Move was accepted. Update statistics that are useful for the annealing schedule. */ - stats->single_swap_update(*costs); - swap_stats.num_swap_accepted++; - } else if (swap_result == ABORTED) { - swap_stats.num_swap_aborted++; - } else { // swap_result == REJECTED - swap_stats.num_swap_rejected++; - } - - if (place_algorithm.is_timing_driven()) { - /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? - * We do this only once in a while, since it is expensive. - */ - if (inner_crit_iter_count >= inner_recompute_limit - && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */ - - inner_crit_iter_count = 0; -#ifdef VERBOSE - VTR_LOG("Inner loop recompute criticalities\n"); -#endif - - PlaceCritParams crit_params; - crit_params.crit_exponent = state->crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - //Update all timing related classes - perform_full_timing_update(crit_params, delay_model, criticalities, - setup_slacks, pin_timing_invalidator, - timing_info, costs, placer_state); - } - inner_crit_iter_count++; - } - - /* Lines below prevent too much round-off error from accumulating - * in the cost over many iterations (due to incremental updates). - * This round-off can lead to error checks failing because the cost - * is different from what you get when you recompute from scratch. - */ - ++(*moves_since_cost_recompute); - if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) { - net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs); - - if (noc_cost_handler.has_value()) { - noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs); - } - - *moves_since_cost_recompute = 0; - } - - if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0 - && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) { - std::string filename = vtr::string_fmt("placement_%03d_%03d.place", - state->num_temps + 1, inner_placement_save_count); - VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n", - inner_iter, state->move_lim, filename.c_str()); - print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs()); - ++inner_placement_save_count; - } - } - - /* Calculate the success_rate and std_dev of the costs. */ - stats->calc_iteration_stats(*costs, state->move_lim); -} - /*only count non-global connections */ static int count_connections() { auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -928,446 +725,7 @@ static int count_connections() { count += cluster_ctx.clb_nlist.net_sinks(net_id).size(); } - return (count); -} - -///@brief Find the starting temperature for the annealing loop. -static float starting_t(const t_annealing_state* state, - t_placer_costs* costs, - t_annealing_sched annealing_sched, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - SetupTimingInfo* timing_info, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler) { - if (annealing_sched.type == e_sched_type::USER_SCHED) { - return (annealing_sched.init_t); - } - - auto& cluster_ctx = g_vpr_ctx.clustering(); - - /* Use to calculate the average of cost when swap is accepted. */ - int num_accepted = 0; - - /* Use double types to avoid round off. */ - double av = 0., sum_of_squares = 0.; - - /* Determines the block swap loop count. */ - int move_lim = std::min(state->move_lim_max, - (int)cluster_ctx.clb_nlist.blocks().size()); - - bool manual_move_enabled = false; - - for (int i = 0; i < move_lim; i++) { -#ifndef NO_GRAPHICS - //Checks manual move flag for manual move feature - t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->show_graphics) { - manual_move_enabled = manual_move_is_selected(); - } -#endif /*NO_GRAPHICS*/ - - //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack - e_move_result swap_result = try_swap(state, costs, move_generator, - manual_move_generator, timing_info, pin_timing_invalidator, - blocks_affected, delay_model, criticalities, setup_slacks, - placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm, - REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, swap_stats, - placer_state, net_cost_handler, noc_cost_handler); - - - if (swap_result == ACCEPTED) { - num_accepted++; - av += costs->cost; - sum_of_squares += costs->cost * costs->cost; - swap_stats.num_swap_accepted++; - } else if (swap_result == ABORTED) { - swap_stats.num_swap_aborted++; - } else { - swap_stats.num_swap_rejected++; - } - } - - /* Take the average of the accepted swaps' cost values. */ - av = num_accepted > 0 ? (av / num_accepted) : 0.; - - /* Get the standard deviation. */ - double std_dev = get_std_dev(num_accepted, sum_of_squares, av); - - /* Print warning if not all swaps are accepted. */ - if (num_accepted != move_lim) { - VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n", - num_accepted, move_lim); - } - -#ifdef VERBOSE - /* Print stats related to finding the initital temp. */ - VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev); -#endif - - // Improved initial placement uses a fast SA for NoC routers and centroid placement - // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement - float init_temp = std_dev / 64; - - return init_temp; -} - -/** - * @brief Pick some block and moves it to another spot. - * - * If the new location is empty, directly move the block. If the new location - * is occupied, switch the blocks. Due to the different sizes of the blocks, - * this block switching may occur for multiple times. It might also cause the - * current swap attempt to abort due to inability to find suitable locations - * for moved blocks. - * - * The move generator will record all the switched blocks in the variable - * `blocks_affected`. Afterwards, the move will be assessed by the chosen - * cost formulation. Currently, there are three ways to assess move cost, - * which are stored in the enum type `t_place_algorithm`. - * - * @return Whether the block swap is accepted, rejected or aborted. - */ -static e_move_result try_swap(const t_annealing_state* state, - t_placer_costs* costs, - MoveGenerator& move_generator, - ManualMoveGenerator& manual_move_generator, - SetupTimingInfo* timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - t_pl_blocks_to_be_moved& blocks_affected, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - MoveTypeStat& move_type_stat, - const t_place_algorithm& place_algorithm, - float timing_bb_factor, - bool manual_move_enabled, - t_swap_stats& swap_stats, - PlacerState& placer_state, - NetCostHandler& net_cost_handler, - std::optional& noc_cost_handler) { - /* Picks some block and moves it to another spot. If this spot is * - * occupied, switch the blocks. Assess the change in cost function. * - * rlim is the range limiter. * - * Returns whether the swap is accepted, rejected or aborted. * - * Passes back the new value of the cost functions. */ - auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); - - float rlim_escape_fraction = placer_opts.rlim_escape_fraction; - float timing_tradeoff = placer_opts.timing_tradeoff; - - PlaceCritParams crit_params; - crit_params.crit_exponent = state->crit_exponent; - crit_params.crit_limit = placer_opts.place_crit_limit; - - // move type and block type chosen by the agent - t_propose_action proposed_action{e_move_type::UNIFORM, -1}; - - swap_stats.num_ts_called++; - - MoveOutcomeStats move_outcome_stats; - - /* I'm using negative values of proposed_net_cost as a flag, * - * so DO NOT use cost functions that can go negative. */ - - double delta_c = 0; //Change in cost due to this swap. - double bb_delta_c = 0; //Change in the bounding box (wiring) cost. - double timing_delta_c = 0; //Change in the timing cost (delay * criticality). - - // Determine whether we need to force swap two router blocks - bool router_block_move = false; - if (noc_opts.noc) { - router_block_move = check_for_router_swap(noc_opts.noc_swap_percentage); - } - - /* Allow some fraction of moves to not be restricted by rlim, */ - /* in the hopes of better escaping local minima. */ - float rlim; - if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) { - rlim = std::numeric_limits::infinity(); - } else { - rlim = state->rlim; - } - - e_create_move create_move_outcome = e_create_move::ABORT; - - //When manual move toggle button is active, the manual move window asks the user for input. - if (manual_move_enabled) { -#ifndef NO_GRAPHICS - create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected, - proposed_action.move_type, rlim, placer_opts, - criticalities); -#else //NO_GRAPHICS - //Cast to void to explicitly avoid warning. - (void)manual_move_generator; -#endif //NO_GRAPHICS - } else if (router_block_move) { - // generate a move where two random router blocks are swapped - create_move_outcome = propose_router_swap(blocks_affected, rlim, placer_state.blk_loc_registry()); - proposed_action.move_type = e_move_type::UNIFORM; - } else { - //Generate a new move (perturbation) used to explore the space of possible placements - create_move_outcome = move_generator.propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities); - } - - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - LOG_MOVE_STATS_PROPOSED(t, blocks_affected); - - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", - costs->cost, costs->bb_cost, costs->timing_cost); - - e_move_result move_outcome = e_move_result::ABORTED; - - if (create_move_outcome == e_create_move::ABORT) { - LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), "ABORTED", - "illegal move"); - - move_outcome = ABORTED; - - } else { - VTR_ASSERT(create_move_outcome == e_create_move::VALID); - - /* - * To make evaluating the move simpler (e.g. calculating changed bounding box), - * we first move the blocks to their new locations (apply the move to - * blk_loc_registry.block_locs) and then compute the change in cost. If the move - * is accepted, the inverse look-up in place_ctx.grid_blocks is updated - * (committing the move). If the move is rejected, the blocks are returned to - * their original positions (reverting blk_loc_registry.block_locs to its original state). - * - * Note that the inverse look-up place_ctx.grid_blocks is only updated after - * move acceptance is determined, so it should not be used when evaluating a move. - */ - - /* Update the block positions */ - blk_loc_registry.apply_move_blocks(blocks_affected); - - //Find all the nets affected by this swap and update the wiring costs. - //This cost value doesn't depend on the timing info. - // - //Also find all the pins affected by the swap, and calculates new connection - //delays and timing costs and store them in proposed_* data structures. - net_cost_handler.find_affected_nets_and_update_costs(delay_model, criticalities, blocks_affected, - bb_delta_c, timing_delta_c); - - //For setup slack analysis, we first do a timing analysis to get the newest - //slack values resulted from the proposed block moves. If the move turns out - //to be accepted, we keep the updated slack values and commit the block moves. - //If rejected, we reject the proposed block moves and revert this timing analysis. - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Invalidates timing of modified connections for incremental timing updates. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ - commit_td_cost(blocks_affected, placer_state); - - /* Update timing information. Since we are analyzing setup slacks, * - * we only update those values and keep the criticalities stale * - * so as not to interfere with the original timing driven algorithm. * - * - * Note: the timing info must be updated after applying block moves * - * and committing the timing driven delays and costs. * - * If we wish to revert this timing update due to move rejection, * - * we need to revert block moves and restore the timing values. */ - criticalities->disable_update(); - setup_slacks->enable_update(); - update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator, placer_state); - - /* Get the setup slack analysis cost */ - //TODO: calculate a weighted average of the slack cost and wiring cost - delta_c = analyze_setup_slack_cost(setup_slacks, placer_state) * costs->timing_cost_norm; - } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { - /* Take delta_c as a combination of timing and wiring cost. In - * addition to `timing_tradeoff`, we normalize the cost values */ - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " - "timing_delta_c %e, timing_cost_norm %e\n", - bb_delta_c, - costs->bb_cost_norm, - timing_tradeoff, - timing_delta_c, - costs->timing_cost_norm); - delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm - + timing_tradeoff * timing_delta_c * costs->timing_cost_norm; - } else { - VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tMove bb_delta_c %e, bb_cost_norm %e\n", - bb_delta_c, - costs->bb_cost_norm); - delta_c = bb_delta_c * costs->bb_cost_norm; - } - - NocCostTerms noc_delta_c; // change in NoC cost - /* Update the NoC data structure and costs*/ - if (noc_opts.noc) { - VTR_ASSERT_SAFE(noc_cost_handler.has_value()); - noc_cost_handler->find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c); - - // Include the NoC delta costs in the total cost change for this swap - delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts); - } - - /* 1 -> move accepted, 0 -> rejected. */ - move_outcome = assess_swap(delta_c, state->t); - - //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. -#ifndef NO_GRAPHICS - if (manual_move_enabled) { - move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome); - } -#endif //NO_GRAPHICS - - if (move_outcome == ACCEPTED) { - costs->cost += delta_c; - costs->bb_cost += bb_delta_c; - - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Update the timing driven cost as usual */ - costs->timing_cost += timing_delta_c; - - //Commit the setup slack information - //The timing delay and cost values should be committed already - commit_setup_slacks(setup_slacks, placer_state); - } - - if (place_algorithm == CRITICALITY_TIMING_PLACE) { - costs->timing_cost += timing_delta_c; - - /* Invalidates timing of modified connections for incremental * - * timing updates. These invalidations are accumulated for a * - * big timing update in the outer loop. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ - commit_td_cost(blocks_affected, placer_state); - } - - /* Update net cost functions and reset flags. */ - net_cost_handler.update_move_nets(); - - /* Update clb data structures since we kept the move. */ - blk_loc_registry.commit_move_blocks(blocks_affected); - - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - if (noc_opts.noc){ - noc_cost_handler->commit_noc_costs(); - *costs += noc_delta_c; - } - - //Highlights the new block when manual move is selected. -#ifndef NO_GRAPHICS - if (manual_move_enabled) { - manual_move_highlight_new_block_location(); - } -#endif //NO_GRAPHICS - - } else { - VTR_ASSERT_SAFE(move_outcome == REJECTED); - - /* Reset the net cost function flags first. */ - net_cost_handler.reset_move_nets(); - - /* Restore the blk_loc_registry.block_locs data structures to their state before the move. */ - blk_loc_registry.revert_move_blocks(blocks_affected); - - if (place_algorithm == SLACK_TIMING_PLACE) { - /* Revert the timing delays and costs to pre-update values. */ - /* These routines must be called after reverting the block moves. */ - //TODO: make this process incremental - comp_td_connection_delays(delay_model, placer_state); - comp_td_costs(delay_model, *criticalities, placer_state, &costs->timing_cost); - - /* Re-invalidate the affected sink pins since the proposed * - * move is rejected, and the same blocks are reverted to * - * their original positions. */ - invalidate_affected_connections(blocks_affected, - pin_timing_invalidator, timing_info); - - /* Revert the timing update */ - update_timing_classes(crit_params, timing_info, criticalities, - setup_slacks, pin_timing_invalidator, placer_state); - - VTR_ASSERT_SAFE_MSG( - verify_connection_setup_slacks(setup_slacks, placer_state), - "The current setup slacks should be identical to the values before the try swap timing info update."); - } - - if (place_algorithm == CRITICALITY_TIMING_PLACE) { - /* Unstage the values stored in proposed_* data structures */ - revert_td_cost(blocks_affected, placer_state.mutable_timing()); - } - - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stat.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - /* Revert the traffic flow routes within the NoC*/ - if (noc_opts.noc) { - noc_cost_handler->revert_noc_traffic_flow_routes(blocks_affected); - } - } - - move_outcome_stats.delta_cost_norm = delta_c; - move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs->bb_cost_norm; - move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs->timing_cost_norm; - - move_outcome_stats.delta_bb_cost_abs = bb_delta_c; - move_outcome_stats.delta_timing_cost_abs = timing_delta_c; - - LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); - } - move_outcome_stats.outcome = move_outcome; - - // If we force a router block move then it was not proposed by the - // move generator, so we should not calculate the reward and update - // the move generators status since this outcome is not a direct - // consequence of the move generator - if (!router_block_move) { - move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor); - } - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# ifndef NO_GRAPHICS - stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c); -# endif -#endif - - /* Clear the data structure containing block move info */ - blocks_affected.clear_move_blocks(); - -#if 0 - // Check that each accepted swap yields a valid placement. This will - // greatly slow the placer, but can debug some issues. - check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts); -#endif - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", - costs->cost, costs->bb_cost, costs->timing_cost); - return move_outcome; + return count; } static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index ec7ecb8982e..e09bb2d5dd6 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -8,6 +8,7 @@ #include "globals.h" #include "draw_global.h" #include "place_constraints.h" +#include "noc_place_utils.h" /** * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`. @@ -65,6 +66,25 @@ void t_placer_costs::update_norm_factors() { } } +double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { + double total_cost = 0.0; + + if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { + // in bounding box mode we only care about wirelength + total_cost = bb_cost * bb_cost_norm; + } else if (placer_opts.place_algorithm.is_timing_driven()) { + // in timing mode we include both wirelength and timing costs + total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); + } + + if (noc_opts.noc) { + // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion + total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts); + } + + return total_cost; +} + t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) { noc_cost_terms += noc_delta_cost; diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 49f4246dbe5..d76e738f70e 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -114,6 +114,18 @@ class t_placer_costs { */ void update_norm_factors(); + /** + * @brief Compute the total normalized cost for a given placement. This + * computation will vary depending on the placement modes. + * + * @param costs The current placement cost components and their normalization + * factors + * @param placer_opts Determines the placement mode + * @param noc_opts Determines if placement includes the NoC + * @return double The computed total cost of the current placement + */ + double get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts); + /** * @brief Accumulates NoC cost difference terms * From 896810eb1dc9a67ea3dd1721af7c59094a9750a4 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 13:38:22 -0400 Subject: [PATCH 04/31] show annealing schedule in ShowSetup.cpp --- vpr/src/base/ShowSetup.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 68aa073759d..f0280669cd9 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -17,8 +17,7 @@ /******** Function Prototypes ********/ static void ShowPackerOpts(const t_packer_opts& PackerOpts); static void ShowNetlistOpts(const t_netlist_opts& NetlistOpts); -static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, - const t_annealing_sched& AnnealSched); +static void ShowPlacerOpts(const t_placer_opts& PlacerOpts); static void ShowAnalyticalPlacerOpts(const t_ap_opts& APOpts); static void ShowRouterOpts(const t_router_opts& RouterOpts); static void ShowAnalysisOpts(const t_analysis_opts& AnalysisOpts); @@ -56,7 +55,7 @@ void ShowSetup(const t_vpr_setup& vpr_setup) { ShowPackerOpts(vpr_setup.PackerOpts); } if (vpr_setup.PlacerOpts.doPlacement) { - ShowPlacerOpts(vpr_setup.PlacerOpts, vpr_setup.AnnealSched); + ShowPlacerOpts(vpr_setup.PlacerOpts); } if (vpr_setup.APOpts.doAP) { ShowAnalyticalPlacerOpts(vpr_setup.APOpts); @@ -498,8 +497,7 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { VTR_LOG("\n"); } -static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, - const t_annealing_sched& AnnealSched) { +static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { VTR_LOG("PlacerOpts.place_freq: "); switch (PlacerOpts.place_freq) { case PLACE_ONCE: @@ -609,7 +607,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlaceOpts.seed: %d\n", PlacerOpts.seed); - ShowAnnealSched(AnnealSched); + ShowAnnealSched(PlacerOpts.anneal_sched); } VTR_LOG("\n"); } From 8ffa91379b863f353c3d55060dae941cb9091877 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 13:54:33 -0400 Subject: [PATCH 05/31] added a constructor for PlacerTimingContext --- vpr/src/base/SetupVPR.cpp | 5 ++- vpr/src/base/SetupVPR.h | 1 - vpr/src/base/place_and_route.cpp | 8 ++--- vpr/src/base/place_and_route.h | 1 - vpr/src/base/vpr_api.cpp | 8 ++--- vpr/src/base/vpr_api.h | 1 - vpr/src/place/annealer.cpp | 52 ++++------------------------ vpr/src/place/annealer.h | 1 + vpr/src/place/place.cpp | 39 ++------------------- vpr/src/place/placer_state.cpp | 58 ++++++++++++++++++++++++++++++++ vpr/src/place/placer_state.h | 22 ++++++++++++ 11 files changed, 97 insertions(+), 99 deletions(-) create mode 100644 vpr/src/place/placer_state.cpp diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index d6315762786..38ac3c595c7 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -97,7 +97,6 @@ void SetupVPR(const t_options* options, t_packer_opts* packerOpts, t_placer_opts* placerOpts, t_ap_opts* apOpts, - t_annealing_sched* annealSched, t_router_opts* routerOpts, t_analysis_opts* analysisOpts, t_noc_opts* nocOpts, @@ -145,7 +144,7 @@ void SetupVPR(const t_options* options, SetupNetlistOpts(*options, *netlistOpts); SetupPlacerOpts(*options, placerOpts); - SetupAnnealSched(*options, annealSched); + SetupAnnealSched(*options, &placerOpts->anneal_sched); SetupRouterOpts(*options, routerOpts); SetupAnalysisOpts(*options, *analysisOpts); SetupPowerOpts(*options, powerOpts, arch); @@ -155,7 +154,7 @@ void SetupVPR(const t_options* options, //save the device layout, which is required to parse the architecture file arch->device_layout = options->device_layout; - if (readArchFile == true) { + if (readArchFile) { vtr::ScopedStartFinishTimer t("Loading Architecture Description"); switch (options->arch_format) { case e_arch_format::VTR: diff --git a/vpr/src/base/SetupVPR.h b/vpr/src/base/SetupVPR.h index 451fdc6567a..45bf510c18c 100644 --- a/vpr/src/base/SetupVPR.h +++ b/vpr/src/base/SetupVPR.h @@ -17,7 +17,6 @@ void SetupVPR(const t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index ab5cf31ca4f..60e2459a6ba 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -55,7 +55,6 @@ static float comp_width(t_chan* chan, float x, float separation); int binary_search_place_and_route(const Netlist<>& placement_net_list, const Netlist<>& router_net_list, const t_placer_opts& placer_opts_ref, - const t_annealing_sched& annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, @@ -183,7 +182,6 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, - annealing_sched, router_opts, analysis_opts, noc_opts, @@ -191,7 +189,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, det_routing_arch, segment_inf, arch->Directs, - false); + /*is_flat=*/false); } success = route(router_net_list, current, @@ -326,10 +324,10 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, break; if (placer_opts.place_freq == PLACE_ALWAYS) { placer_opts.place_chan_width = current; - try_place(placement_net_list, placer_opts, annealing_sched, router_opts, analysis_opts, noc_opts, + try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts, arch->Chans, det_routing_arch, segment_inf, arch->Directs, - false); + /*is_flat=*/false); } success = route(router_net_list, diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h index b4735ed8af4..6f191c0ff9e 100644 --- a/vpr/src/base/place_and_route.h +++ b/vpr/src/base/place_and_route.h @@ -25,7 +25,6 @@ struct t_fmap_cell { int binary_search_place_and_route(const Netlist<>& placement_net_list, const Netlist<>& router_net_list, const t_placer_opts& placer_opts_ref, - const t_annealing_sched& annealing_sched, const t_router_opts& router_opts, const t_analysis_opts& analysis_opts, const t_noc_opts& noc_opts, diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 86cbdaabd80..16589cdf8bc 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -287,7 +287,6 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a &vpr_setup->PackerOpts, &vpr_setup->PlacerOpts, &vpr_setup->APOpts, - &vpr_setup->AnnealSched, &vpr_setup->RouterOpts, &vpr_setup->AnalysisOpts, &vpr_setup->NocOpts, @@ -830,7 +829,6 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch& try_place(net_list, vpr_setup.PlacerOpts, - vpr_setup.AnnealSched, vpr_setup.RouterOpts, vpr_setup.AnalysisOpts, vpr_setup.NocOpts, @@ -1058,7 +1056,6 @@ RouteStatus vpr_route_min_W(const Netlist<>& net_list, int min_W = binary_search_place_and_route((const Netlist<>&)g_vpr_ctx.clustering().clb_nlist, net_list, vpr_setup.PlacerOpts, - vpr_setup.AnnealSched, router_opts, vpr_setup.AnalysisOpts, vpr_setup.NocOpts, @@ -1290,8 +1287,9 @@ static void free_complex_block_types() { void free_circuit() { //Free new net structures auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { cluster_ctx.clb_nlist.remove_block(blk_id); + } cluster_ctx.clb_nlist = ClusteredNetlist(); } @@ -1369,7 +1367,6 @@ void vpr_setup_vpr(t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, @@ -1395,7 +1392,6 @@ void vpr_setup_vpr(t_options* Options, PackerOpts, PlacerOpts, APOpts, - AnnealSched, RouterOpts, AnalysisOpts, NocOpts, diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h index dca8f7441ad..93cf2d12cc1 100644 --- a/vpr/src/base/vpr_api.h +++ b/vpr/src/base/vpr_api.h @@ -179,7 +179,6 @@ void vpr_setup_vpr(t_options* Options, t_packer_opts* PackerOpts, t_placer_opts* PlacerOpts, t_ap_opts* APOpts, - t_annealing_sched* AnnealSched, t_router_opts* RouterOpts, t_analysis_opts* AnalysisOpts, t_noc_opts* NocOpts, diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 3203d79576f..204b7456c35 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -101,17 +101,6 @@ static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, NetPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info); - -/** - * @brief Update the connection_timing_cost values from the temporary - * values for all connections that have/haven't changed. - * - * All the connections have already been gathered by blocks_affected.affected_pins - * after running the routine find_affected_nets_and_update_costs() in try_swap(). - */ -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state); - /** * @brief Check if the setup slack has gotten better or worse due to block swap. * @@ -147,30 +136,6 @@ static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& block } } -static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerState& placer_state) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& p_timing_ctx = placer_state.mutable_timing(); - auto& connection_delay = p_timing_ctx.connection_delay; - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& connection_timing_cost = p_timing_ctx.connection_timing_cost; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - //Go through all the sink pins affected - for (ClusterPinId pin_id : blocks_affected.affected_pins) { - ClusterNetId net_id = clb_nlist.pin_net(pin_id); - int ipin = clb_nlist.pin_net_index(pin_id); - - //Commit the timing delay and cost values - connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; - proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; - proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } -} - static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -188,8 +153,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, size_t ipin = clb_nlist.pin_net_index(clb_pin); original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); - proposed_setup_slacks.push_back( - setup_slacks->setup_slack(net_id, ipin)); + proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); } //Sort in ascending order, from the worse slack value to the best @@ -199,8 +163,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, //Check the first pair of slack values that are different //If found, return their difference for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { - float slack_diff = original_setup_slacks[idiff] - - proposed_setup_slacks[idiff]; + float slack_diff = original_setup_slacks[idiff] - proposed_setup_slacks[idiff]; if (slack_diff != 0) { return slack_diff; @@ -480,12 +443,11 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, /* calculate the number of moves in the quench that we should recompute timing after based on the value of * * the commandline option quench_recompute_divider */ - int quench_recompute_limit; if (placer_opts.quench_recompute_divider != 0) { - quench_recompute_limit = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); + quench_recompute_limit_ = static_cast(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider); } else { // don't do an quench recompute - quench_recompute_limit = first_move_lim + 1; + quench_recompute_limit_ = first_move_lim + 1; } // Get the first range limiter @@ -623,7 +585,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, } /* Allow some fraction of moves to not be restricted by rlim, - /* in the hopes of better escaping local minima. */ + * in the hopes of better escaping local minima. */ float rlim; if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) { rlim = std::numeric_limits::infinity(); @@ -704,7 +666,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ - commit_td_cost(blocks_affected_, placer_state_); + placer_state_.mutable_timing().commit_td_cost(blocks_affected_); /* Update timing information. Since we are analyzing setup slacks, * * we only update those values and keep the criticalities stale * @@ -787,7 +749,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ - commit_td_cost(blocks_affected_, placer_state_); + placer_state_.mutable_timing().commit_td_cost(blocks_affected_); } /* Update net cost functions and reset flags. */ diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 02b48967525..0c6b2e66ec3 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -245,6 +245,7 @@ class PlacementAnnealer { static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; int inner_recompute_limit_; + int quench_recompute_limit_; int moves_since_cost_recompute_; int tot_iter_; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f4ccaf083f1..9bab97519aa 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -82,8 +82,7 @@ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; #endif /************** Types and defines local to place.c ***************************/ -constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); -constexpr float INVALID_COST = std::numeric_limits::quiet_NaN(); +constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE @@ -130,9 +129,6 @@ static int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_regis static int count_connections(); -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, - const PlacerState& placer_state); - static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const t_analysis_opts& analysis_opts, const SetupTimingInfo& timing_info, @@ -236,8 +232,7 @@ void try_place(const Netlist<>& net_list, int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - PlacerState placer_state; - auto& place_move_ctx = placer_state.mutable_move(); + PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven()); auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); const auto& p_timing_ctx = placer_state.timing(); const auto& p_runtime_ctx = placer_state.runtime(); @@ -777,36 +772,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins); } - if (placer_opts.place_algorithm.is_timing_driven()) { - /* Allocate structures associated with timing driven placement */ - /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ - - auto& p_timing_ctx = placer_state.mutable_timing(); - - p_timing_ctx.connection_delay = make_net_pins_matrix((const Netlist<>&)cluster_ctx.clb_nlist, 0.f); - p_timing_ctx.proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); - - p_timing_ctx.connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); - - p_timing_ctx.connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); - p_timing_ctx.proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); - p_timing_ctx.net_timing_cost.resize(num_nets, 0.); - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { - p_timing_ctx.connection_delay[net_id][ipin] = 0; - p_timing_ctx.proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - - p_timing_ctx.proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) - continue; - - p_timing_ctx.connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } - } - } - auto& place_move_ctx = placer_state.mutable_move(); if (place_ctx.cube_bb) { diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp new file mode 100644 index 00000000000..ab9edd12836 --- /dev/null +++ b/vpr/src/place/placer_state.cpp @@ -0,0 +1,58 @@ + +#include "placer_state.h" + +#include "globals.h" +#include "move_transactions.h" + + +PlacerTimingContext::PlacerTimingContext(bool placement_is_timing_driven) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const size_t num_nets = cluster_ctx.clb_nlist.nets().size(); + + if (placement_is_timing_driven) { + connection_delay = make_net_pins_matrix((const Netlist<>&)cluster_ctx.clb_nlist, 0.f); + proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); + + connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); + + connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); + proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); + net_timing_cost.resize(num_nets, 0.); + + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { + connection_delay[net_id][ipin] = 0; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + + if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + continue; + } + + connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } + } + } +} + +void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + // Go through all the sink pins affected + for (ClusterPinId pin_id : blocks_affected.affected_pins) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + int ipin = clb_nlist.pin_net_index(pin_id); + + // Commit the timing delay and cost values + connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } +} + +PlacerState::PlacerState(bool placement_is_timing_driven) + : timing_(placement_is_timing_driven) {} diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 344839a1bd5..9c99830c994 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -24,6 +24,23 @@ * use mutable_timing() to access it. For more, see PlacerTimingCosts. */ struct PlacerTimingContext : public Context { + PlacerTimingContext() = delete; + + /** + * @brief Allocate structures associated with timing driven placement + * @param placement_is_timing_driven Specifies whether the placement is timing driven. + */ + PlacerTimingContext(bool placement_is_timing_driven); + + /** + * @brief Update the connection_timing_cost values from the temporary + * values for all connections that have/haven't changed. + * + * All the connections have already been gathered by blocks_affected.affected_pins + * after running the routine find_affected_nets_and_update_costs() in try_swap(). + */ + void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); + /** * @brief Net connection delays based on the committed block positions. * @@ -74,6 +91,8 @@ struct PlacerTimingContext : public Context { * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1] */ vtr::vector net_timing_cost; + + static constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); }; /** @@ -143,6 +162,9 @@ struct PlacerMoveContext : public Context { * how to use this class due to similar implementation style. */ class PlacerState : public Context { + public: + PlacerState(bool placement_is_timing_driven); + public: inline const PlacerTimingContext& timing() const { return timing_; } inline PlacerTimingContext& mutable_timing() { return timing_; } From 2ed4cac5cab92d1728ab91fd376b650bcf03bfbb Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 14:02:24 -0400 Subject: [PATCH 06/31] add a constructor for PlacerMoveContext --- vpr/src/place/place.cpp | 21 +-------------------- vpr/src/place/placer_state.cpp | 32 ++++++++++++++++++++++++++++++-- vpr/src/place/placer_state.h | 16 +++++++--------- 3 files changed, 38 insertions(+), 31 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 9bab97519aa..38a4a3508d8 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -232,7 +232,7 @@ void try_place(const Netlist<>& net_list, int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333)); - PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven()); + PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb); auto& blk_loc_registry = placer_state.mutable_blk_loc_registry(); const auto& p_timing_ctx = placer_state.timing(); const auto& p_runtime_ctx = placer_state.runtime(); @@ -763,8 +763,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac size_t num_nets = cluster_ctx.clb_nlist.nets().size(); - const int num_layers = device_ctx.grid.get_num_layers(); - init_placement_context(placer_state.mutable_blk_loc_registry(), directs); int max_pins_per_clb = 0; @@ -772,23 +770,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins); } - auto& place_move_ctx = placer_state.mutable_move(); - - if (place_ctx.cube_bb) { - place_move_ctx.bb_coords.resize(num_nets, t_bb()); - place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb()); - } else { - VTR_ASSERT_SAFE(!place_ctx.cube_bb); - place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); - place_move_ctx.layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); - } - - place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); - for (size_t flat_idx = 0; flat_idx < place_move_ctx.num_sink_pin_layer.size(); flat_idx++) { - auto& elem = place_move_ctx.num_sink_pin_layer.get(flat_idx); - elem = OPEN; - } - place_ctx.compressed_block_grids = create_compressed_block_grids(); if (noc_opts.noc) { diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp index ab9edd12836..57fe8735685 100644 --- a/vpr/src/place/placer_state.cpp +++ b/vpr/src/place/placer_state.cpp @@ -4,6 +4,33 @@ #include "globals.h" #include "move_transactions.h" +PlacerMoveContext::PlacerMoveContext(bool cube_bb) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + // allocate helper vectors that are used by many move generators + X_coord.resize(10, 0); + Y_coord.resize(10, 0); + layer_coord.resize(10, 0); + + const size_t num_nets = cluster_ctx.clb_nlist.nets().size(); + + const int num_layers = device_ctx.grid.get_num_layers(); + + if (cube_bb) { + bb_coords.resize(num_nets, t_bb()); + bb_num_on_edges.resize(num_nets, t_bb()); + } else { + layer_bb_num_on_edges.resize(num_nets, std::vector(num_layers, t_2D_bb())); + layer_bb_coords.resize(num_nets, std::vector(num_layers, t_2D_bb())); + } + + num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); + for (size_t flat_idx = 0; flat_idx < num_sink_pin_layer.size(); flat_idx++) { + auto& elem = num_sink_pin_layer.get(flat_idx); + elem = OPEN; + } +} PlacerTimingContext::PlacerTimingContext(bool placement_is_timing_driven) { const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -54,5 +81,6 @@ void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_a } } -PlacerState::PlacerState(bool placement_is_timing_driven) - : timing_(placement_is_timing_driven) {} +PlacerState::PlacerState(bool placement_is_timing_driven, bool cube_bb) + : timing_(placement_is_timing_driven) + , move_(cube_bb) {} diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 9c99830c994..b35e56dab9c 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -109,6 +109,10 @@ struct PlacerRuntimeContext : public Context { * @brief Placement Move generators data */ struct PlacerMoveContext : public Context { + public: + PlacerMoveContext() = delete; + explicit PlacerMoveContext(bool cube_bb); + public: // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) vtr::vector bb_num_on_edges; @@ -138,16 +142,10 @@ struct PlacerMoveContext : public Context { // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option) std::vector> highly_crit_pins; - - public: - PlacerMoveContext() { - // allocate helper vectors that are used by many move generators - X_coord.resize(10, 0); - Y_coord.resize(10, 0); - layer_coord.resize(10, 0); - } }; + + /** * @brief This object encapsulates VPR placer's state. * @@ -163,7 +161,7 @@ struct PlacerMoveContext : public Context { */ class PlacerState : public Context { public: - PlacerState(bool placement_is_timing_driven); + PlacerState(bool placement_is_timing_driven, bool cube_bb); public: inline const PlacerTimingContext& timing() const { return timing_; } From ecbcba969beb6b183862f39114a0942ee4bc802c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 14:15:33 -0400 Subject: [PATCH 07/31] added invalidate_affected_connections() to NetPinTimingInvalidator --- vpr/src/place/annealer.cpp | 31 +++--------------------- vpr/src/timing/NetPinTimingInvalidator.h | 19 +++++++++++++++ 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 204b7456c35..91da212ae67 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -88,19 +88,6 @@ #endif - -/** - * @brief Invalidates the connections affected by the specified block moves. - * - * All the connections recorded in blocks_affected.affected_pins have different - * values for `proposed_connection_delay` and `connection_delay`. - * - * Invalidate all the timing graph edges associated with these connections via - * the NetPinTimingInvalidator class. - */ -static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info); /** * @brief Check if the setup slack has gotten better or worse due to block swap. * @@ -124,18 +111,6 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, static e_move_result assess_swap(double delta_c, double t); -static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, - NetPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info) { - VTR_ASSERT_SAFE(timing_info); - VTR_ASSERT_SAFE(pin_tedges_invalidator); - - // Invalidate timing graph edges affected by the move - for (ClusterPinId pin : blocks_affected.affected_pins) { - pin_tedges_invalidator->invalidate_connection(pin, timing_info); - } -} - static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state) { const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -662,7 +637,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, //If rejected, we reject the proposed block moves and revert this timing analysis. if (place_algorithm == SLACK_TIMING_PLACE) { // Invalidates timing of modified connections for incremental timing updates. - invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ @@ -745,7 +720,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Invalidates timing of modified connections for incremental * * timing updates. These invalidations are accumulated for a * * big timing update in the outer loop. */ - invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); /* Update the connection_timing_cost and connection_delay * * values from the temporary values. */ @@ -792,7 +767,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Re-invalidate the affected sink pins since the proposed * move is rejected, and the same blocks are reverted to * their original positions. */ - invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_); + pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); // Revert the timing update update_timing_classes(crit_params, timing_info_, criticalities_, diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h index f452b95bd7a..754d118aef2 100644 --- a/vpr/src/timing/NetPinTimingInvalidator.h +++ b/vpr/src/timing/NetPinTimingInvalidator.h @@ -4,6 +4,7 @@ #include "tatum/TimingGraphFwd.hpp" #include "timing_info.h" #include "vtr_range.h" +#include "move_transactions.h" #include "vtr_vec_id_set.h" @@ -21,6 +22,24 @@ class NetPinTimingInvalidator { virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0; virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0; virtual void reset() = 0; + + /** + * @brief Invalidates the connections affected by the specified block moves. + * + * All the connections recorded in blocks_affected.affected_pins have different + * values for `proposed_connection_delay` and `connection_delay`. + * + * Invalidate all the timing graph edges associated with these connections via + * the NetPinTimingInvalidator class. + */ + void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) { + VTR_ASSERT_SAFE(timing_info); + + // Invalidate timing graph edges affected by the move + for (ClusterPinId pin : blocks_affected.affected_pins) { + invalidate_connection(pin, timing_info); + } + } }; //Helper class for iterating through the timing edges associated with a particular From 34e0e909841f1a8afe52fcddba805b573f26847e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 14:16:06 -0400 Subject: [PATCH 08/31] added PlacerTimingContext::revert_td_cost() --- vpr/src/place/annealer.cpp | 27 +-------------------------- vpr/src/place/placer_state.cpp | 20 ++++++++++++++++++++ vpr/src/place/placer_state.h | 6 ++++++ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 91da212ae67..003f0ea3045 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -173,31 +173,6 @@ static e_move_result assess_swap(double delta_c, double t) { return REJECTED; } -//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on -//the move proposed in blocks_affected -static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected, - PlacerTimingContext& p_timing_ctx) { -#ifndef VTR_ASSERT_SAFE_ENABLED - (void)blocks_affected; - (void)p_timing_ctx; -#else - //Invalidate temp delay & timing cost values to match sanity checks in - //comp_td_connection_cost() - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay; - auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost; - - for (ClusterPinId pin : blocks_affected.affected_pins) { - ClusterNetId net = clb_nlist.pin_net(pin); - int ipin = clb_nlist.pin_net_index(pin); - proposed_connection_delay[net][ipin] = INVALID_DELAY; - proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; - } -#endif -} - /** * @brief Updates all the cost normalization factors during the outer * loop iteration of the placement. At each temperature change, these @@ -780,7 +755,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, if (place_algorithm == CRITICALITY_TIMING_PLACE) { // Un-stage the values stored in proposed_* data structures - revert_td_cost(blocks_affected_, placer_state_.mutable_timing()); + placer_state_.mutable_timing().revert_td_cost(blocks_affected_); } if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp index 57fe8735685..e83d74bbe2c 100644 --- a/vpr/src/place/placer_state.cpp +++ b/vpr/src/place/placer_state.cpp @@ -81,6 +81,26 @@ void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_a } } +void PlacerTimingContext::revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { +#ifndef VTR_ASSERT_SAFE_ENABLED + (void)blocks_affected; +#else + //Invalidate temp delay & timing cost values to match sanity checks in + //comp_td_connection_cost() + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + + for (ClusterPinId pin : blocks_affected.affected_pins) { + ClusterNetId net = clb_nlist.pin_net(pin); + int ipin = clb_nlist.pin_net_index(pin); + proposed_connection_delay[net][ipin] = INVALID_DELAY; + proposed_connection_timing_cost[net][ipin] = INVALID_DELAY; + } +#endif +} + PlacerState::PlacerState(bool placement_is_timing_driven, bool cube_bb) : timing_(placement_is_timing_driven) , move_(cube_bb) {} + diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index b35e56dab9c..c727ac181e5 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -41,6 +41,12 @@ struct PlacerTimingContext : public Context { */ void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); + /** + * @brief Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost + * based on the move proposed in blocks_affected + */ + void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); + /** * @brief Net connection delays based on the committed block positions. * From 15540aef82b48d4129522c88becdabe47d782494 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 16:49:05 -0400 Subject: [PATCH 09/31] add select_move_generator() function --- vpr/src/place/RL_agent_util.cpp | 38 ++++++----------------- vpr/src/place/RL_agent_util.h | 23 +++++--------- vpr/src/place/annealer.cpp | 55 +++++++++------------------------ vpr/src/place/annealer.h | 3 +- vpr/src/place/place.cpp | 44 ++++++++------------------ vpr/src/place/place_util.cpp | 4 +++ vpr/src/place/place_util.h | 8 +++-- 7 files changed, 55 insertions(+), 120 deletions(-) diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp index e418a7db6ee..e080b335fe7 100644 --- a/vpr/src/place/RL_agent_util.cpp +++ b/vpr/src/place/RL_agent_util.cpp @@ -122,40 +122,20 @@ std::pair, std::unique_ptr> create return move_generators; } -void assign_current_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator) { +MoveGenerator& select_move_generator(std::unique_ptr& move_generator, + std::unique_ptr& move_generator2, + e_agent_state agent_state, + const t_placer_opts& placer_opts, + bool in_quench) { if (in_quench) { if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate) - current_move_generator = std::move(move_generator2); + return *move_generator2; else - current_move_generator = std::move(move_generator); + return *move_generator; } else { if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate) - current_move_generator = std::move(move_generator); + return *move_generator; else - current_move_generator = std::move(move_generator2); - } -} - -void update_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator) { - if (in_quench) { - if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate) - move_generator2 = std::move(current_move_generator); - else - move_generator = std::move(current_move_generator); - } else { - if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate) - move_generator = std::move(current_move_generator); - else - move_generator2 = std::move(current_move_generator); + return *move_generator2; } } \ No newline at end of file diff --git a/vpr/src/place/RL_agent_util.h b/vpr/src/place/RL_agent_util.h index afe8bf3b6cb..166ddccbf2a 100644 --- a/vpr/src/place/RL_agent_util.h +++ b/vpr/src/place/RL_agent_util.h @@ -31,22 +31,13 @@ std::pair, std::unique_ptr> create double noc_attraction_weight); /** - * @brief copy one of the available move_generators to be the current move_generator that would be used in the placement based on the placer_options and the agent state + * @brief Returns to one of the available move generators to be the current move generator + * that would be used in the placement based on the placer_options and the agent state */ -void assign_current_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator); +MoveGenerator& select_move_generator(std::unique_ptr& move_generator, + std::unique_ptr& move_generator2, + e_agent_state agent_state, + const t_placer_opts& placer_opts, + bool in_quench); -/** - * @brief move the updated current_move_generator to its original move_Generator structure based on the placer_options and the agent state - */ -void update_move_generator(std::unique_ptr& move_generator, - std::unique_ptr& move_generator2, - e_agent_state agent_state, - const t_placer_opts& placer_opts, - bool in_quench, - std::unique_ptr& current_move_generator); #endif diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 003f0ea3045..51508e65792 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -173,35 +173,6 @@ static e_move_result assess_swap(double delta_c, double t) { return REJECTED; } -/** - * @brief Updates all the cost normalization factors during the outer - * loop iteration of the placement. At each temperature change, these - * values are updated so that we can balance the tradeoff between the - * different placement cost components (timing, wirelength and NoC). - * Depending on the placement mode the corresponding normalization factors are - * updated. - * - * @param costs Contains the normalization factors which need to be updated - * @param placer_opts Determines the placement mode - * @param noc_opts Determines if placement includes the NoC - * @param noc_cost_handler Computes normalization factors for NoC-related cost terms - */ -static void update_placement_cost_normalization_factors(t_placer_costs* costs, - const t_placer_opts& placer_opts, - const t_noc_opts& noc_opts, - const std::optional& noc_cost_handler) { - /* Update the cost normalization factors */ - costs->update_norm_factors(); - - // update the noc normalization factors if the placement includes the NoC - if (noc_opts.noc) { - noc_cost_handler->update_noc_normalization_factors(*costs); - } - - // update the current total placement cost - costs->cost = costs->get_total_cost(placer_opts, noc_opts); -} - ///@brief Constructor: Initialize all annealing state variables and macros. t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, float first_t, @@ -369,6 +340,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , move_stats_file_(nullptr, vtr::fclose) , outer_crit_iter_count_(1) , blocks_affected_(placer_state.block_locs().size()) + , quench_started_(false) { const auto& device_ctx = g_vpr_ctx.device(); @@ -388,8 +360,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, // don't do an inner recompute inner_recompute_limit_ = first_move_lim + 1; } - moves_since_cost_recompute_ = 0; - tot_iter_ = 0; /* calculate the number of moves in the quench that we should recompute timing after based on the value of * * the commandline option quench_recompute_divider */ @@ -400,6 +370,9 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, quench_recompute_limit_ = first_move_lim + 1; } + moves_since_cost_recompute_ = 0; + tot_iter_ = 0; + // Get the first range limiter placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); @@ -807,19 +780,15 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, } /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ -void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) { +void PlacementAnnealer::outer_loop_update_timing_info() { if (placer_opts_.place_algorithm.is_timing_driven()) { /* At each temperature change we update these values to be used * for normalizing the tradeoff between timing and wirelength (bb) */ - if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter - || placer_opts_.inner_loop_recompute_divider != 0) { + if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter || + placer_opts_.inner_loop_recompute_divider != 0) { #ifdef VERBOSE VTR_LOG("Outer loop recompute criticalities\n"); #endif - // Avoid division by zero - num_connections = std::max(num_connections, 1); - VTR_ASSERT(num_connections > 0); - PlaceCritParams crit_params; crit_params.crit_exponent = annealing_state_.crit_exponent; crit_params.crit_limit = placer_opts_.place_crit_limit; @@ -834,7 +803,10 @@ void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) { } // Update the cost normalization factors - update_placement_cost_normalization_factors(&costs_, placer_opts_, noc_opts_, noc_cost_handler_); + costs_.update_norm_factors(); + + // update the current total placement cost + costs_.cost = costs_.get_total_cost(placer_opts_, noc_opts_); } /* Function which contains the inner loop of the simulated annealing */ @@ -867,8 +839,9 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, * We do this only once in a while, since it is expensive. */ + const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_; // on last iteration don't recompute - if (inner_crit_iter_count >= inner_recompute_limit_ && inner_iter != annealing_state_.move_lim - 1) { + if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) { inner_crit_iter_count = 0; #ifdef VERBOSE @@ -934,6 +907,8 @@ bool PlacementAnnealer::outer_loop_update_state() { } void PlacementAnnealer::start_quench() { + quench_started_ = true; + // Freeze out: only accept solutions that improve placement. annealing_state_.t = 0; diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 0c6b2e66ec3..b932ed56d7e 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -174,7 +174,7 @@ class PlacementAnnealer { void placement_inner_loop(MoveGenerator& move_generator, float timing_bb_factor); - void outer_loop_update_timing_info(int num_connections); + void outer_loop_update_timing_info(); bool outer_loop_update_state(); @@ -248,6 +248,7 @@ class PlacementAnnealer { int quench_recompute_limit_; int moves_since_cost_recompute_; int tot_iter_; + bool quench_started_; private: ///@brief Find the starting temperature for the annealing loop. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 38a4a3508d8..f9c51b28e37 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -185,9 +185,8 @@ void try_place(const Netlist<>& net_list, auto& timing_ctx = g_vpr_ctx.timing(); auto pre_place_timing_stats = timing_ctx.stats; - int num_connections, outer_crit_iter_count; - t_placer_costs costs(placer_opts.place_algorithm); + t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc); tatum::TimingPathInfo critical_path; float sTNS = NAN; @@ -297,7 +296,7 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_algorithm.is_timing_driven()) { costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); - num_connections = count_connections(); + int num_connections = count_connections(); VTR_LOG("\n"); VTR_LOG("There are %d point to point connections in this circuit.\n", num_connections); @@ -359,8 +358,6 @@ void try_place(const Netlist<>& net_list, *timing_info, debug_tnode); } - outer_crit_iter_count = 1; - /* Initialize the normalization factors. Calling costs.update_norm_factors() * * here would fail the golden results of strong_sdc benchmark */ costs.timing_cost_norm = 1 / costs.timing_cost; @@ -375,10 +372,6 @@ void try_place(const Netlist<>& net_list, /* Timing cost and normalization factors are not used */ costs.timing_cost = INVALID_COST; costs.timing_cost_norm = INVALID_COST; - - /* Other initializations */ - outer_crit_iter_count = 0; - num_connections = 0; } if (noc_opts.noc) { @@ -467,8 +460,6 @@ void try_place(const Netlist<>& net_list, //RL agent state definition e_agent_state agent_state = e_agent_state::EARLY_IN_THE_ANNEAL; - std::unique_ptr current_move_generator; - //Define the timing bb weight factor for the agent's reward function float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT; @@ -488,7 +479,7 @@ void try_place(const Netlist<>& net_list, do { vtr::Timer temperature_timer; - annealer.outer_loop_update_timing_info(num_connections); + annealer.outer_loop_update_timing_info(); if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); @@ -503,18 +494,14 @@ void try_place(const Netlist<>& net_list, } } - //move the appropriate move_generator to be the current used move generator - assign_current_move_generator(move_generator, move_generator2, - agent_state, placer_opts, false, current_move_generator); + // select the appropriate move generator + MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2, + agent_state, placer_opts, false); // do a complete inner loop iteration - annealer.placement_inner_loop(*current_move_generator, + annealer.placement_inner_loop(current_move_generator, timing_bb_factor); - //move the update used move_generator to its original variable - update_move_generator(move_generator, move_generator2, agent_state, - placer_opts, false, current_move_generator); - print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); @@ -530,8 +517,7 @@ void try_place(const Netlist<>& net_list, sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); - update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, - timing_info); + update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); //#ifdef VERBOSE // if (getEchoEnabled()) { @@ -550,19 +536,15 @@ void try_place(const Netlist<>& net_list, vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - annealer.outer_loop_update_timing_info(num_connections); + annealer.outer_loop_update_timing_info(); - //move the appropriate move_generator to be the current used move generator - assign_current_move_generator(move_generator, move_generator2, - agent_state, placer_opts, true, current_move_generator); + // select the appropriate move generator + MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2, + agent_state, placer_opts, true); /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ - annealer.placement_inner_loop(*current_move_generator, timing_bb_factor); - - // move the update used move_generator to its original variable - update_move_generator(move_generator, move_generator2, agent_state, - placer_opts, true, current_move_generator); + annealer.placement_inner_loop(current_move_generator, timing_bb_factor); if (placer_opts.place_quench_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index e09bb2d5dd6..aa65a15110a 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -64,6 +64,10 @@ void t_placer_costs::update_norm_factors() { VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost } + + if (noc_enabled) { + NocCostHandler::update_noc_normalization_factors(*this); + } } double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index d76e738f70e..2a816e01350 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -101,8 +101,9 @@ class t_placer_costs { NocCostTerms noc_cost_norm_factors; public: //Constructor - explicit t_placer_costs(t_place_algorithm algo) - : place_algorithm(algo) {} + explicit t_placer_costs(t_place_algorithm algo, bool noc) + : place_algorithm(algo) + , noc_enabled(noc) {} t_placer_costs() = default; public: //Mutator @@ -134,8 +135,9 @@ class t_placer_costs { t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost); private: - double MAX_INV_TIMING_COST = 1.e12; + static constexpr double MAX_INV_TIMING_COST = 1.e12; t_place_algorithm place_algorithm; + bool noc_enabled; }; /** From c005b70a1e76f5ffdb048ef0dc6582d61b01d008 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 17:02:53 -0400 Subject: [PATCH 10/31] enum class e_place_algorithm and e_place_bounding_box_mode --- vpr/src/base/ShowSetup.cpp | 6 +++--- vpr/src/base/read_options.cpp | 30 +++++++++++++++--------------- vpr/src/base/vpr_types.h | 6 +++--- vpr/src/place/annealer.cpp | 14 +++++++------- vpr/src/place/net_cost_handler.cpp | 2 +- vpr/src/place/place.cpp | 10 +++++----- vpr/src/place/place_util.cpp | 4 ++-- 7 files changed, 36 insertions(+), 36 deletions(-) diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index f0280669cd9..66170063d67 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -516,13 +516,13 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { || (PLACE_ALWAYS == PlacerOpts.place_freq)) { VTR_LOG("PlacerOpts.place_algorithm: "); switch (PlacerOpts.place_algorithm.get()) { - case BOUNDING_BOX_PLACE: + case e_place_algorithm::BOUNDING_BOX_PLACE: VTR_LOG("BOUNDING_BOX_PLACE\n"); break; - case CRITICALITY_TIMING_PLACE: + case e_place_algorithm::CRITICALITY_TIMING_PLACE: VTR_LOG("CRITICALITY_TIMING_PLACE\n"); break; - case SLACK_TIMING_PLACE: + case e_place_algorithm::SLACK_TIMING_PLACE: VTR_LOG("SLACK_TIMING_PLACE\n"); break; default: diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index eeb4bbfaee0..ce20940cda5 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -393,11 +393,11 @@ struct ParsePlaceAlgorithm { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "bounding_box") { - conv_value.set_value(BOUNDING_BOX_PLACE); + conv_value.set_value(e_place_algorithm::BOUNDING_BOX_PLACE); } else if (str == "criticality_timing") { - conv_value.set_value(CRITICALITY_TIMING_PLACE); + conv_value.set_value(e_place_algorithm::CRITICALITY_TIMING_PLACE); } else if (str == "slack_timing") { - conv_value.set_value(SLACK_TIMING_PLACE); + conv_value.set_value(e_place_algorithm::SLACK_TIMING_PLACE); } else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -415,12 +415,12 @@ struct ParsePlaceAlgorithm { ConvertedValue to_str(e_place_algorithm val) { ConvertedValue conv_value; - if (val == BOUNDING_BOX_PLACE) { + if (val == e_place_algorithm::BOUNDING_BOX_PLACE) { conv_value.set_value("bounding_box"); - } else if (val == CRITICALITY_TIMING_PLACE) { + } else if (val == e_place_algorithm::CRITICALITY_TIMING_PLACE) { conv_value.set_value("criticality_timing"); } else { - VTR_ASSERT(val == SLACK_TIMING_PLACE); + VTR_ASSERT(val == e_place_algorithm::SLACK_TIMING_PLACE); conv_value.set_value("slack_timing"); } return conv_value; @@ -435,11 +435,11 @@ struct ParsePlaceBoundingBox { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "auto_bb") { - conv_value.set_value(AUTO_BB); + conv_value.set_value(e_place_bounding_box_mode::AUTO_BB); } else if (str == "cube_bb") { - conv_value.set_value(CUBE_BB); + conv_value.set_value(e_place_bounding_box_mode::CUBE_BB); } else if (str == "per_layer_bb") { - conv_value.set_value(PER_LAYER_BB); + conv_value.set_value(e_place_bounding_box_mode::PER_LAYER_BB); } else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -450,12 +450,12 @@ struct ParsePlaceBoundingBox { ConvertedValue to_str(e_place_bounding_box_mode val) { ConvertedValue conv_value; - if (val == AUTO_BB) { + if (val == e_place_bounding_box_mode::AUTO_BB) { conv_value.set_value("auto_bb"); - } else if (val == CUBE_BB) { + } else if (val == e_place_bounding_box_mode::CUBE_BB) { conv_value.set_value("cube_bb"); } else { - VTR_ASSERT(val == PER_LAYER_BB); + VTR_ASSERT(val == e_place_bounding_box_mode::PER_LAYER_BB); conv_value.set_value("per_layer_bb"); } return conv_value; @@ -3089,9 +3089,9 @@ void set_conditional_defaults(t_options& args) { //Which placement algorithm to use? if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(CRITICALITY_TIMING_PLACE, Provenance::INFERRED); + args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { - args.PlaceAlgorithm.set(BOUNDING_BOX_PLACE, Provenance::INFERRED); + args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); } } @@ -3105,7 +3105,7 @@ void set_conditional_defaults(t_options& args) { // Check for correct options combinations // If you are running WLdriven placement, the RL reward function should be // either basic or nonPenalizing basic - if (args.RL_agent_placement && (args.PlaceAlgorithm == BOUNDING_BOX_PLACE || !args.timing_analysis)) { + if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") { VTR_LOG_WARN( "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n" diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index cb280ff36ec..9ea6f8d1f70 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -876,13 +876,13 @@ struct t_annealing_sched { * is used when there is no timing information available (wiring only). * SLACK_TIMING_PLACE is mainly feasible during placement quench. */ -enum e_place_algorithm { +enum class e_place_algorithm { BOUNDING_BOX_PLACE, CRITICALITY_TIMING_PLACE, SLACK_TIMING_PLACE }; -enum e_place_bounding_box_mode { +enum class e_place_bounding_box_mode { AUTO_BB, CUBE_BB, PER_LAYER_BB @@ -929,7 +929,7 @@ class t_place_algorithm { ///@brief Check if the algorithm belongs to the timing driven category. inline bool is_timing_driven() const { - return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE; + return algo == e_place_algorithm::CRITICALITY_TIMING_PLACE || algo == e_place_algorithm::SLACK_TIMING_PLACE; } ///@brief Accessor: returns the underlying e_place_algorithm enum value. diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 51508e65792..6aad4e512eb 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -583,7 +583,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, //slack values resulted from the proposed block moves. If the move turns out //to be accepted, we keep the updated slack values and commit the block moves. //If rejected, we reject the proposed block moves and revert this timing analysis. - if (place_algorithm == SLACK_TIMING_PLACE) { + if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { // Invalidates timing of modified connections for incremental timing updates. pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); @@ -607,7 +607,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Get the setup slack analysis cost */ //TODO: calculate a weighted average of the slack cost and wiring cost delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm; - } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { + } else if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { /* Take delta_c as a combination of timing and wiring cost. In * addition to `timing_tradeoff`, we normalize the cost values */ VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, @@ -621,7 +621,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; } else { - VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove bb_delta_c %e, bb_cost_norm %e\n", bb_delta_c, @@ -653,7 +653,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, costs_.cost += delta_c; costs_.bb_cost += bb_delta_c; - if (place_algorithm == SLACK_TIMING_PLACE) { + if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { // Update the timing driven cost as usual costs_.timing_cost += timing_delta_c; @@ -662,7 +662,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, commit_setup_slacks(setup_slacks_, placer_state_); } - if (place_algorithm == CRITICALITY_TIMING_PLACE) { + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { costs_.timing_cost += timing_delta_c; /* Invalidates timing of modified connections for incremental * @@ -705,7 +705,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // Restore the blk_loc_registry.block_locs data structures to their state before the move. blk_loc_registry.revert_move_blocks(blocks_affected_); - if (place_algorithm == SLACK_TIMING_PLACE) { + if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { /* Revert the timing delays and costs to pre-update values. */ /* These routines must be called after reverting the block moves. */ //TODO: make this process incremental @@ -726,7 +726,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, "The current setup slacks should be identical to the values before the try swap timing info update."); } - if (place_algorithm == CRITICALITY_TIMING_PLACE) { + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { // Un-stage the values stored in proposed_* data structures placer_state_.mutable_timing().revert_td_cost(blocks_affected_); } diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 99ab6fa5ce2..a161fba3b3e 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1589,7 +1589,7 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m check_and_print_cost(new_timing_cost, costs.timing_cost, "timing_cost"); costs.timing_cost = new_timing_cost; } else { - VTR_ASSERT(placer_opts_.place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT(placer_opts_.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); costs.cost = new_bb_cost * costs.bb_cost_norm; } } diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f9c51b28e37..5d06a546cc5 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -363,7 +363,7 @@ void try_place(const Netlist<>& net_list, costs.timing_cost_norm = 1 / costs.timing_cost; costs.bb_cost_norm = 1 / costs.bb_cost; } else { - VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); /* Total cost is the same as wirelength cost normalized*/ costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL); @@ -710,7 +710,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, bool cube_bb; const int number_layers = g_vpr_ctx.device().grid.get_num_layers(); - if (place_bb_mode == AUTO_BB) { + if (place_bb_mode == e_place_bounding_box_mode::AUTO_BB) { // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen. if (number_layers > 1 && inter_layer_connections_limited_to_opin(rr_graph)) { @@ -718,12 +718,12 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode, } else { cube_bb = true; } - } else if (place_bb_mode == CUBE_BB) { + } else if (place_bb_mode == e_place_bounding_box_mode::CUBE_BB) { // The user has specifically asked for CUBE_BB cube_bb = true; } else { // The user has specifically asked for PER_LAYER_BB - VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB); + VTR_ASSERT_SAFE(place_bb_mode == e_place_bounding_box_mode::PER_LAYER_BB); cube_bb = false; } @@ -1143,4 +1143,4 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) { // update the graphics' reference to placement location variables get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry); #endif -} +} \ No newline at end of file diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index aa65a15110a..e3f3d9da567 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -61,7 +61,7 @@ void t_placer_costs::update_norm_factors() { //Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); } else { - VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE); + VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost } @@ -73,7 +73,7 @@ void t_placer_costs::update_norm_factors() { double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) { double total_cost = 0.0; - if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) { + if (placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE) { // in bounding box mode we only care about wirelength total_cost = bb_cost * bb_cost_norm; } else if (placer_opts.place_algorithm.is_timing_driven()) { From 8777a4b317b0b32e92f917c6387b575d726cdac6 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Oct 2024 17:25:29 -0400 Subject: [PATCH 11/31] enum class e_agent_algorithm --- vpr/src/base/SetupVPR.cpp | 1 - vpr/src/base/read_options.cpp | 8 ++++---- vpr/src/base/vpr_types.h | 8 +------- vpr/src/pack/cluster.cpp | 2 -- vpr/src/place/RL_agent_util.cpp | 2 +- 5 files changed, 6 insertions(+), 15 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 38ac3c595c7..5b9adcaea2d 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -607,7 +607,6 @@ void SetupPackerOpts(const t_options& Options, //TODO: document? PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */ PackerOpts->auto_compute_inter_cluster_net_delay = true; - PackerOpts->packer_algorithm = PACK_GREEDY; /* DEFAULT */ PackerOpts->device_layout = Options.device_layout; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index ce20940cda5..658fb245ce6 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -470,9 +470,9 @@ struct ParsePlaceAgentAlgorithm { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; if (str == "e_greedy") - conv_value.set_value(E_GREEDY); + conv_value.set_value(e_agent_algorithm::E_GREEDY); else if (str == "softmax") - conv_value.set_value(SOFTMAX); + conv_value.set_value(e_agent_algorithm::SOFTMAX); else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_agent_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; @@ -483,10 +483,10 @@ struct ParsePlaceAgentAlgorithm { ConvertedValue to_str(e_agent_algorithm val) { ConvertedValue conv_value; - if (val == E_GREEDY) + if (val == e_agent_algorithm::E_GREEDY) conv_value.set_value("e_greedy"); else { - VTR_ASSERT(val == SOFTMAX); + VTR_ASSERT(val == e_agent_algorithm::SOFTMAX); conv_value.set_value("softmax"); } return conv_value; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 9ea6f8d1f70..035f2d5abd0 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -789,11 +789,6 @@ enum e_stage_action { * * TODO: document each packing parameter */ -enum e_packer_algorithm { - PACK_GREEDY, - PACK_BRUTE_FORCE -}; - struct t_packer_opts { std::string circuit_file_name; std::string sdc_file_name; @@ -818,7 +813,6 @@ struct t_packer_opts { int transitive_fanout_threshold; int feasible_block_array_size; e_stage_action doPacking; - enum e_packer_algorithm packer_algorithm; std::string device_layout; e_timing_update_type timing_update_type; bool use_attraction_groups; @@ -952,7 +946,7 @@ enum class e_pad_loc_type { * Currently, the supported algorithms are: epsilon greedy and softmax * For more details, check simpleRL_move_generator.cpp */ -enum e_agent_algorithm { +enum class e_agent_algorithm { E_GREEDY, SOFTMAX }; diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp index 93683858f3f..0e9099f2ea2 100644 --- a/vpr/src/pack/cluster.cpp +++ b/vpr/src/pack/cluster.cpp @@ -97,8 +97,6 @@ std::map do_clustering(const t_packer_opts& pa /**************************************************************** * Initialization *****************************************************************/ - VTR_ASSERT(packer_opts.packer_algorithm == PACK_GREEDY); - t_cluster_progress_stats cluster_stats; //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis, diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp index e080b335fe7..1f0f45ae053 100644 --- a/vpr/src/place/RL_agent_util.cpp +++ b/vpr/src/place/RL_agent_util.cpp @@ -59,7 +59,7 @@ std::pair, std::unique_ptr> create second_state_avail_moves.push_back(e_move_type::NOC_ATTRACTION_CENTROID); } - if (placer_opts.place_agent_algorithm == E_GREEDY) { + if (placer_opts.place_agent_algorithm == e_agent_algorithm::E_GREEDY) { std::unique_ptr karmed_bandit_agent1, karmed_bandit_agent2; //agent's 1st state if (placer_opts.place_agent_space == e_agent_space::MOVE_BLOCK_TYPE) { From f146fd9b6bcc93a51610cba75acadc827314f969 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 20 Oct 2024 14:18:17 -0400 Subject: [PATCH 12/31] remove unused types from vpr_types.h --- vpr/src/base/vpr_types.h | 31 +++---------------------------- vpr/src/place/place.cpp | 3 +-- vpr/src/place/timing_place.cpp | 12 +++++++----- vpr/src/place/timing_place.h | 4 ++-- vpr/src/timing/timing_util.cpp | 4 ++-- vpr/src/timing/timing_util.h | 2 +- 6 files changed, 16 insertions(+), 40 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 035f2d5abd0..0daa84b5148 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -126,7 +126,7 @@ enum class e_router_lookahead { enum class e_route_bb_update { STATIC, /// { }; } // namespace std -struct t_place_region { - float capacity; /// sb; std::unique_ptr cb; short arch_wire_switch = 0; @@ -1575,16 +1568,6 @@ class t_chan_seg_details { */ typedef vtr::NdMatrix t_chan_details; -/** - * @brief A linked list of float pointers. - * - * Used for keeping track of which pathcosts in the router have been changed. - */ -struct t_linked_f_pointer { - t_linked_f_pointer* next; - float* fptr; -}; - constexpr bool is_pin(e_rr_type type) { return (type == IPIN || type == OPIN); } constexpr bool is_chan(e_rr_type type) { return (type == CHANX || type == CHANY); } constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == SINK); } @@ -1685,8 +1668,6 @@ struct t_non_configurable_rr_sets { std::set> edge_sets; }; -#define NO_PREVIOUS -1 - ///@brief Power estimation options struct t_power_opts { bool do_power; ///& net_list, placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin); placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin); - timing_info = make_setup_timing_info(placement_delay_calc, - placer_opts.timing_update_type); + timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); placer_setup_slacks = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 1e4985b7852..021bb6211fb 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -63,7 +63,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); // Routing for placement is not flat (at least for the time being) - float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), false); + float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); float new_crit = pow(clb_pin_crit, crit_params.crit_exponent); /* @@ -74,13 +74,15 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf */ if (!first_time_update_criticality) { if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net)); + place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), place_move_ctx.highly_crit_pins.end()); + place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), + place_move_ctx.highly_crit_pins.end()); } } else { - if (new_crit > crit_params.crit_limit) - place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net)); + if (new_crit > crit_params.crit_limit) { + place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); + } } /* The placer likes a great deal of contrast between criticalities. diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index 7ccf73c12f4..852c1aa6297 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -102,8 +102,8 @@ class PlacerCriticalities { public: //Lifetime PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); - PlacerCriticalities(const PlacerCriticalities& clb_nlist) = delete; - PlacerCriticalities& operator=(const PlacerCriticalities& clb_nlist) = delete; + PlacerCriticalities(const PlacerCriticalities&) = delete; + PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; public: //Accessors ///@brief Returns the criticality of the specified connection. diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 536667faa51..a210c0dbdcd 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -697,14 +697,14 @@ std::map count_clock_fanouts(const tatum::TimingGraph& */ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, - const ParentPinId& pin_id, + const ParentPinId pin_id, bool is_flat) { float pin_crit = 0.; if (is_flat) { pin_crit = timing_info.setup_pin_criticality(convert_to_atom_pin_id(pin_id)); } else { //There may be multiple atom netlist pins connected to this CLB pin - for (const auto atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) { + for (const AtomPinId atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) { //Take the maximum of the atom pin criticality as the CLB pin criticality pin_crit = std::max(pin_crit, timing_info.setup_pin_criticality(atom_pin)); } diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index 51fc0491c28..e4d45c84213 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -89,7 +89,7 @@ std::map count_clock_fanouts(const tatum::TimingGraph& //Return the criticality of a net's pin in the CLB netlist float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, - const ParentPinId& clb_pin, + const ParentPinId clb_pin, bool is_flat); //Return the setup slack of a net's pin in the CLB netlist From 7a64e46db81c3354c4bd5b8871003c2866591cf1 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 21 Oct 2024 16:22:31 -0400 Subject: [PATCH 13/31] converted logging macros in annealer.cpp to methods --- vpr/src/base/vpr_types.h | 9 +- vpr/src/draw/draw.cpp | 15 -- vpr/src/draw/draw_basic.cpp | 13 -- vpr/src/draw/draw_floorplanning.cpp | 3 - vpr/src/draw/draw_mux.cpp | 70 +++------ vpr/src/draw/draw_rr.cpp | 14 -- vpr/src/draw/draw_rr_edges.cpp | 17 --- vpr/src/draw/draw_searchbar.cpp | 24 +--- vpr/src/draw/draw_toggle_functions.cpp | 33 +---- vpr/src/draw/draw_triangle.cpp | 30 +--- vpr/src/place/annealer.cpp | 189 +++++++++++-------------- vpr/src/place/annealer.h | 5 + vpr/src/place/initial_placement.cpp | 9 +- vpr/src/place/move_utils.cpp | 32 ++--- vpr/src/place/move_utils.h | 2 - vpr/src/place/net_cost_handler.cpp | 2 +- vpr/src/place/net_cost_handler.h | 6 +- vpr/src/place/place.cpp | 42 +----- vpr/src/place/placer_breakpoint.cpp | 39 ++--- vpr/src/place/placer_breakpoint.h | 5 +- vpr/src/route/connection_router.cpp | 68 ++++----- vpr/src/route/route.cpp | 34 ++--- vpr/src/route/route_debug.cpp | 6 +- 23 files changed, 216 insertions(+), 451 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 0daa84b5148..3a4d89f0b19 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -86,6 +86,12 @@ enum class ScreenUpdatePriority { MAJOR = 1 }; +#ifdef VTR_ENABLE_DEBUG_LOGGING +constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = true; +#else +constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = false; +#endif + #define MAX_SHORT 32767 /* Values large enough to be way out of range for any data, but small enough @@ -1101,7 +1107,6 @@ struct t_placer_opts { float place_agent_gamma; float place_dm_rlim; e_agent_space place_agent_space; - //int place_timing_cost_func; std::string place_reward_fun; float place_crit_limit; int place_constraint_expand; @@ -1492,7 +1497,7 @@ struct t_seg_details { short arch_opin_between_dice_switch = 0; float Rmetal = 0; float Cmetal = 0; - bool twisted = 0; + bool twisted = false; enum Direction direction = Direction::NONE; int group_start = 0; int group_size = 0; diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index 716647c7f36..c77ab624c5c 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -13,14 +13,11 @@ */ #include -#include #include #include #include -#include #include #include -#include #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" @@ -29,7 +26,6 @@ #include "vtr_color_map.h" #include "vtr_path.h" -#include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" @@ -37,15 +33,10 @@ #include "draw.h" #include "draw_basic.h" #include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_mux.h" #include "draw_searchbar.h" -#include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "atom_netlist.h" #include "tatum/report/TimingPathCollector.hpp" #include "hsl.h" #include "route_export.h" @@ -53,19 +44,13 @@ #include "save_graphics.h" #include "timing_info.h" #include "physical_types.h" -#include "route_common.h" -#include "breakpoint.h" #include "manual_moves.h" #include "draw_noc.h" #include "draw_floorplanning.h" #include "move_utils.h" #include "ui_setup.h" -#include "buttons.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 64b3d49979f..43fc2b78b32 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -2,20 +2,15 @@ * that aren't RR nodes or muxes (they have their own file). * All functions in this file contain the prefix draw_. */ #include -#include -#include #include #include #include #include -#include #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" #include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -26,21 +21,13 @@ #include "draw_rr.h" #include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" -#include "intra_logic_block.h" #include "move_utils.h" #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ # include diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp index 8e93d0ca7bd..9ba201987aa 100644 --- a/vpr/src/draw/draw_floorplanning.cpp +++ b/vpr/src/draw/draw_floorplanning.cpp @@ -23,9 +23,6 @@ #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp index 746af57d811..e741112b6ba 100644 --- a/vpr/src/draw/draw_mux.cpp +++ b/vpr/src/draw/draw_mux.cpp @@ -1,41 +1,15 @@ /*draw_mux.cpp contains all functions that draw muxes.*/ -#include -#include -#include -#include + #include -#include #include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" - -#include "vpr_utils.h" -#include "vpr_error.h" - -#include "globals.h" #include "draw_color.h" -#include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" + #include "draw_mux.h" #include "read_xml_arch_file.h" -#include "draw_global.h" - -#include "move_utils.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ @@ -64,39 +38,31 @@ ezgl::rectangle draw_mux(ezgl::point2d origin, e_side orientation, float height, switch (orientation) { case TOP: //Clock-wise from bottom left - mux_polygon.push_back({origin.x - height / 2, origin.y - width / 2}); - mux_polygon.push_back( - {origin.x - (scale * height) / 2, origin.y + width / 2}); - mux_polygon.push_back( - {origin.x + (scale * height) / 2, origin.y + width / 2}); - mux_polygon.push_back({origin.x + height / 2, origin.y - width / 2}); + mux_polygon.emplace_back(origin.x - height / 2, origin.y - width / 2); + mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + height / 2, origin.y - width / 2); break; case BOTTOM: //Clock-wise from bottom left - mux_polygon.push_back( - {origin.x - (scale * height) / 2, origin.y - width / 2}); - mux_polygon.push_back({origin.x - height / 2, origin.y + width / 2}); - mux_polygon.push_back({origin.x + height / 2, origin.y + width / 2}); - mux_polygon.push_back( - {origin.x + (scale * height) / 2, origin.y - width / 2}); + mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y - width / 2); + mux_polygon.emplace_back(origin.x - height / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + height / 2, origin.y + width / 2); + mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y - width / 2); break; case LEFT: //Clock-wise from bottom left - mux_polygon.push_back( - {origin.x - width / 2, origin.y - (scale * height) / 2}); - mux_polygon.push_back( - {origin.x - width / 2, origin.y + (scale * height) / 2}); - mux_polygon.push_back({origin.x + width / 2, origin.y + height / 2}); - mux_polygon.push_back({origin.x + width / 2, origin.y - height / 2}); + mux_polygon.emplace_back(origin.x - width / 2, origin.y - (scale * height) / 2); + mux_polygon.emplace_back(origin.x - width / 2, origin.y + (scale * height) / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y + height / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y - height / 2); break; case RIGHT: //Clock-wise from bottom left - mux_polygon.push_back({origin.x - width / 2, origin.y - height / 2}); - mux_polygon.push_back({origin.x - width / 2, origin.y + height / 2}); - mux_polygon.push_back( - {origin.x + width / 2, origin.y + (scale * height) / 2}); - mux_polygon.push_back( - {origin.x + width / 2, origin.y - (scale * height) / 2}); + mux_polygon.emplace_back(origin.x - width / 2, origin.y - height / 2); + mux_polygon.emplace_back(origin.x - width / 2, origin.y + height / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y + (scale * height) / 2); + mux_polygon.emplace_back(origin.x + width / 2, origin.y - (scale * height) / 2); break; default: diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index 02645f6baf5..abfbf0babe8 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -1,20 +1,13 @@ /*draw_rr.cpp contains all functions that relate to drawing routing resources.*/ #include -#include -#include #include #include -#include #include -#include #include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -25,19 +18,12 @@ #include "draw_rr.h" #include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" #include "draw_searchbar.h" #include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif - #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ # include diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index 274e02e0fb3..c4e8cbe507b 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -1,19 +1,8 @@ /*draw_rr_edges.cpp contains all functions that draw lines between RR nodes.*/ -#include -#include -#include -#include #include -#include -#include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" #include "vpr_error.h" @@ -23,18 +12,12 @@ #include "draw.h" #include "draw_rr.h" #include "draw_rr_edges.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" #include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "draw_basic.h" -#include "move_utils.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index 00a1208bcba..834457e9263 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -1,42 +1,20 @@ /*draw_searchbar.cpp contains all functions related to searchbar actions.*/ #include -#include -#include -#include -#include -#include #include -#include #include "netlist_fwd.h" -#include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" -#include "vtr_color_map.h" -#include "vtr_path.h" #include "vpr_utils.h" -#include "vpr_error.h" #include "globals.h" #include "draw_color.h" #include "draw.h" #include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" #include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ @@ -261,7 +239,7 @@ std::set draw_expand_non_configurable_rr_nodes(RRNodeId from_node) { void deselect_all() { // Sets the color of all clbs, nets and rr_nodes to the default. - // as well as clearing the highlighed sub-block + // as well as clearing the highlighted sub-block t_draw_state* draw_state = get_draw_state_vars(); const auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp index 0f69b4c6087..9dab5955450 100644 --- a/vpr/src/draw/draw_toggle_functions.cpp +++ b/vpr/src/draw/draw_toggle_functions.cpp @@ -1,43 +1,19 @@ - -#include -#include #include -#include -#include -#include #include #include -#include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" -#include "vtr_color_map.h" -#include "vtr_path.h" - #include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" #include "draw_color.h" #include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" #include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" -#include "read_xml_arch_file.h" + #include "draw_global.h" #include "draw_basic.h" -#include "hsl.h" -#include "move_utils.h" -#include "intra_logic_block.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ @@ -70,13 +46,10 @@ constexpr float EMPTY_BLOCK_LIGHTEN_FACTOR = 0.20; * @param app ezgl::application */ void toggle_nets_cbk(GtkComboBox* self, ezgl::application* app) { - std::cout << "Nets toggled" << std::endl; enum e_draw_nets new_state; t_draw_state* draw_state = get_draw_state_vars(); - std::cout << draw_state << std::endl; gchar* setting = gtk_combo_box_text_get_active_text( GTK_COMBO_BOX_TEXT(self)); - std::cout << setting << std::endl; // assign corresponding enum value to draw_state->show_nets if (strcmp(setting, "None") == 0) new_state = DRAW_NO_NETS; @@ -467,7 +440,7 @@ void select_layer_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/ // Only iterate through checkboxes with name "Layer ...", skip Cross Layer Connection if (std::string(name).find("Layer") != std::string::npos && std::string(name).find("Cross") == std::string::npos) { - // Change the the boolean of the draw_layer_display vector depending on checkbox + // Change the boolean of the draw_layer_display vector depending on checkbox if (state) { draw_state->draw_layer_display[index].visible = true; } else { @@ -492,7 +465,7 @@ void transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/ int index = 0; // Iterate over transparency layers - for (GList* iter = children; iter != NULL; iter = g_list_next(iter)) { + for (GList* iter = children; iter != nullptr; iter = g_list_next(iter)) { if (GTK_IS_SPIN_BUTTON(iter->data)) { GtkWidget* spin_button = GTK_WIDGET(iter->data); const gchar* name = gtk_widget_get_name(spin_button); diff --git a/vpr/src/draw/draw_triangle.cpp b/vpr/src/draw/draw_triangle.cpp index 370868efbbc..b37785b3ab1 100644 --- a/vpr/src/draw/draw_triangle.cpp +++ b/vpr/src/draw/draw_triangle.cpp @@ -1,40 +1,12 @@ -#include -#include -#include + #include -#include -#include -#include -#include #include "vtr_assert.h" -#include "vtr_ndoffsetmatrix.h" -#include "vtr_memory.h" -#include "vtr_log.h" #include "vtr_color_map.h" -#include "vtr_path.h" - -#include "vpr_utils.h" -#include "vpr_error.h" - -#include "globals.h" #include "draw_color.h" -#include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_toggle_functions.h" #include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" -#include "read_xml_arch_file.h" #include "draw_global.h" -#include "draw_basic.h" -#include "move_utils.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "move_utils.h" -#endif #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * * track CPU runtime. */ diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 6aad4e512eb..5876f468e6f 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -15,78 +15,7 @@ #include "NetPinTimingInvalidator.h" #include "place_timing_update.h" #include "read_place.h" - -#ifdef VTR_ENABLE_DEBUG_LOGGIING -# define LOG_MOVE_STATS_HEADER() \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "temp,from_blk,to_blk,from_type,to_type," \ - "blk_count," \ - "delta_cost,delta_bb_cost,delta_td_cost," \ - "outcome,reason\n"); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, affected_blocks) \ - do { \ - if (f_move_stats_file) { \ - auto& place_ctx = g_vpr_ctx.placement(); \ - auto& cluster_ctx = g_vpr_ctx.clustering(); \ - ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num; \ - \ - t_pl_loc to = affected_blocks.moved_blocks[0].new_loc; \ - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; \ - \ - t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \ - t_logical_block_type_ptr to_type = nullptr; \ - if (b_to) { \ - to_type = cluster_ctx.clb_nlist.block_type(b_to); \ - } \ - \ - fprintf(f_move_stats_file.get(), \ - "%g," \ - "%d,%d," \ - "%s,%s," \ - "%d,", \ - t, \ - int(b_from), int(b_to), \ - from_type->name, (to_type ? to_type->name : "EMPTY"), \ - affected_blocks.moved_blocks.size()); \ - } \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - if (f_move_stats_file) { \ - fprintf(f_move_stats_file.get(), \ - "%g,%g,%g," \ - "%s,%s\n", \ - delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason); \ - } \ - } while (false) - -#else - -# define LOG_MOVE_STATS_HEADER() \ - do { \ - fprintf(move_stats_file_.get(), \ - "VTR_ENABLE_DEBUG_LOGGING disabled " \ - "-- No move stats recorded\n"); \ - } while (false) - -# define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \ - do { \ - } while (false) - -# define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \ - outcome, reason) \ - do { \ - } while (false) - -#endif +#include "placer_breakpoint.h" /** * @brief Check if the setup slack has gotten better or worse due to block swap. @@ -456,11 +385,6 @@ float PlacementAnnealer::estimate_starting_temperature() { num_accepted, move_lim); } -#ifdef VERBOSE - /* Print stats related to finding the initital temp. */ - VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev); -#endif - // Improved initial placement uses a fast SA for NoC routers and centroid placement // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement float init_temp = std_dev / 64; @@ -483,9 +407,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, float rlim_escape_fraction = placer_opts_.rlim_escape_fraction; float timing_tradeoff = placer_opts_.timing_tradeoff; - PlaceCritParams crit_params; - crit_params.crit_exponent = annealing_state_.crit_exponent; - crit_params.crit_limit = placer_opts_.place_crit_limit; + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; // move type and block type chosen by the agent t_propose_action proposed_action{e_move_type::UNIFORM, -1}; @@ -537,7 +460,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; } - LOG_MOVE_STATS_PROPOSED(t, blocks_affected_); + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) LOG_MOVE_STATS_PROPOSED(); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n", @@ -546,10 +470,12 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, e_move_result move_outcome = e_move_result::ABORTED; if (create_move_outcome == e_create_move::ABORT) { - LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), - std::numeric_limits::quiet_NaN(), "ABORTED", - "illegal move"); + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + LOG_MOVE_STATS_OUTCOME(std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), + std::numeric_limits::quiet_NaN(), "ABORTED", + "illegal move"); + } move_outcome = ABORTED; @@ -747,7 +673,9 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, move_outcome_stats.delta_bb_cost_abs = bb_delta_c; move_outcome_stats.delta_timing_cost_abs = timing_delta_c; - LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); + } } move_outcome_stats.outcome = move_outcome; @@ -759,20 +687,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor); } -#ifdef VTR_ENABLE_DEBUG_LOGGING -# ifndef NO_GRAPHICS - stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c); -# endif +#ifndef NO_GRAPHICS + stop_placement_and_check_breakpoints(blocks_affected_, move_outcome, delta_c, bb_delta_c, timing_delta_c); #endif + // Clear the data structure containing block move info blocks_affected_.clear_move_blocks(); -#if 0 - // Check that each accepted swap yields a valid placement. This will - // greatly slow the placer, but can debug some issues. - check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts); -#endif VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n", costs_.cost, costs_.bb_cost, costs_.timing_cost); @@ -786,12 +708,9 @@ void PlacementAnnealer::outer_loop_update_timing_info() { * for normalizing the tradeoff between timing and wirelength (bb) */ if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter || placer_opts_.inner_loop_recompute_divider != 0) { -#ifdef VERBOSE - VTR_LOG("Outer loop recompute criticalities\n"); -#endif - PlaceCritParams crit_params; - crit_params.crit_exponent = annealing_state_.crit_exponent; - crit_params.crit_limit = placer_opts_.place_crit_limit; + + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; //Update all timing related classes perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_, @@ -844,13 +763,9 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) { inner_crit_iter_count = 0; -#ifdef VERBOSE - VTR_LOG("Inner loop recompute criticalities\n"); -#endif - PlaceCritParams crit_params; - crit_params.crit_exponent = annealing_state_.crit_exponent; - crit_params.crit_limit = placer_opts_.place_crit_limit; + PlaceCritParams crit_params{annealing_state_.crit_exponent, + placer_opts_.place_crit_limit}; // Update all timing related classes perform_full_timing_update(crit_params, delay_model_, criticalities_, @@ -919,3 +834,63 @@ void PlacementAnnealer::start_quench() { std::tuple PlacementAnnealer::get_stats() const { return {swap_stats_, move_type_stats_, placer_stats_}; } + +void PlacementAnnealer::LOG_MOVE_STATS_HEADER() { + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "temp,from_blk,to_blk,from_type,to_type," + "blk_count," + "delta_cost,delta_bb_cost,delta_td_cost," + "outcome,reason\n"); + } + } else { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "VTR_ENABLE_DEBUG_LOGGING disabled " + "-- No move stats recorded\n"); + } + } +} + +void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& grid_blocks = placer_state_.grid_blocks(); + + if (move_stats_file_) { + + ClusterBlockId b_from = blocks_affected_.moved_blocks[0].block_num; + + + t_pl_loc to = blocks_affected_.moved_blocks[0].new_loc; + ClusterBlockId b_to = grid_blocks.block_at_location(to); + + t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); + t_logical_block_type_ptr to_type = nullptr; + if (b_to) { + to_type = cluster_ctx.clb_nlist.block_type(b_to); + } + + fprintf(move_stats_file_.get(), + "%g," + "%d,%d," + "%s,%s," + "%d,", + annealing_state_.t, + int(b_from), int(b_to), + from_type->name, (to_type ? to_type->name : "EMPTY"), + blocks_affected_.moved_blocks.size()); + } +} + +void PlacementAnnealer::LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, + const char* outcome, const char* reason) { + if (move_stats_file_) { + fprintf(move_stats_file_.get(), + "%g,%g,%g," + "%s,%s\n", + delta_cost, delta_bb_cost, delta_td_cost, + outcome, reason); + } +} \ No newline at end of file diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index b932ed56d7e..dc281e33a02 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -250,6 +250,11 @@ class PlacementAnnealer { int tot_iter_; bool quench_started_; + void LOG_MOVE_STATS_HEADER(); + void LOG_MOVE_STATS_PROPOSED(); + void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, + const char* outcome, const char* reason); + private: ///@brief Find the starting temperature for the annealing loop. float estimate_starting_temperature(); diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index bb76d0d6cc8..8ff45fa96dd 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -1038,11 +1038,10 @@ static void place_all_blocks(const t_placer_opts& placer_opts, auto blk_id_type = cluster_ctx.clb_nlist.block_type(blk_id); -#ifdef VTR_ENABLE_DEBUG_LOGGING - enable_placer_debug(placer_opts, blk_id); -#else - (void)placer_opts; -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + enable_placer_debug(placer_opts, blk_id); + } + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Popped Block %d\n", size_t(blk_id)); blocks_placed_since_heap_update++; diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 262a801c611..7b2d02ea965 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -12,10 +12,20 @@ #include "place_constraints.h" #include "placer_state.h" -//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. +//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. +// When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. //Note: The flag is only effective if compiled with VTR_ENABLE_DEBUG_LOGGING bool f_placer_breakpoint_reached = false; +//Accessor for f_placer_breakpoint_reached +bool placer_breakpoint_reached() { + return f_placer_breakpoint_reached; +} + +void set_placer_breakpoint_reached(bool flag) { + f_placer_breakpoint_reached = flag; +} + e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlockId b_from, t_pl_loc to, @@ -487,7 +497,6 @@ bool is_legal_swap_to_location(ClusterBlockId blk, return true; } -#ifdef VTR_ENABLE_DEBUG_LOGGING void enable_placer_debug(const t_placer_opts& placer_opts, ClusterBlockId blk_id) { if (!blk_id.is_valid()) { @@ -535,7 +544,6 @@ void enable_placer_debug(const t_placer_opts& placer_opts, if (active_blk_debug) f_placer_debug &= match_blk; if (active_net_debug) f_placer_debug &= match_net; } -#endif ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, @@ -564,11 +572,10 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, b_from = pick_from_block(logical_blk_type_index); } } -#ifdef VTR_ENABLE_DEBUG_LOGGING - enable_placer_debug(placer_opts, b_from); -#else - (void)placer_opts; -#endif + + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + enable_placer_debug(placer_opts, b_from); + } return b_from; } @@ -771,15 +778,6 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, return true; } -//Accessor for f_placer_breakpoint_reached -bool placer_breakpoint_reached() { - return f_placer_breakpoint_reached; -} - -void set_placer_breakpoint_reached(bool flag) { - f_placer_breakpoint_reached = flag; -} - bool find_to_loc_median(t_logical_block_type_ptr blk_type, const t_pl_loc& from_loc, const t_bb* limit_coords, diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 99151695dab..0ca993abe93 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -439,7 +439,6 @@ t_bb union_2d_bb(const std::vector& tbb_vec); std::pair union_2d_bb_incr(const std::vector& num_edge_vec, const std::vector& bb_vec); -#ifdef VTR_ENABLE_DEBUG_LOGGING /** * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets * connected to the block share the same ID as the net ID passed to the placer_debug_net parameter of the command line, @@ -450,6 +449,5 @@ std::pair union_2d_bb_incr(const std::vector& num_edge_vec, */ void enable_placer_debug(const t_placer_opts& placer_opts, ClusterBlockId blk_id); -#endif #endif diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index a161fba3b3e..d213e2827ab 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -104,12 +104,12 @@ static double wirelength_crossing_count(size_t fanout); NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, - size_t num_nets, bool cube_bb) : cube_bb_(cube_bb) , placer_state_(placer_state) , placer_opts_(placer_opts) { const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); // Either 3D BB or per layer BB data structure are used, not both. if (cube_bb_) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 65fab00afc2..12ded6db838 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -43,11 +43,11 @@ class NetCostHandler { * are affected by a move and data needed per net about where their terminals are in order to quickly (incrementally) update * their wirelength costs. These data structures are (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count, * and ts_nets_to_update. - * @param num_nets Number of nets in the netlist used by the placement engine (currently clustered netlist) + * @param placer_opts Contains some parameters that determine how the bounding box is computed. + * @param placer_state Contains information about block locations and net bounding boxes. * @param cube_bb True if the 3D bounding box should be used, false otherwise. - * @param place_cost_exp It is an exponent to which you take the average inverse channel */ - NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, size_t num_nets, bool cube_bb); + NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb); /** * @brief Finds the bb cost from scratch. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 2c07a2354f7..522d6877489 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1,9 +1,6 @@ #include #include #include -#include -#include -#include #include #include @@ -11,29 +8,21 @@ #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_util.h" -#include "vtr_random.h" -#include "vtr_geometry.h" #include "vtr_time.h" #include "vtr_math.h" -#include "vtr_ndmatrix.h" #include "vpr_types.h" #include "vpr_error.h" #include "vpr_utils.h" -#include "vpr_net_pins_matrix.h" #include "globals.h" #include "place.h" #include "annealer.h" #include "read_place.h" #include "draw.h" -#include "place_and_route.h" -#include "net_delay.h" -#include "timing_place_lookup.h" #include "timing_place.h" #include "read_xml_arch_file.h" #include "echo_files.h" -#include "place_macro.h" #include "histogram.h" #include "place_util.h" #include "analytic_placer.h" @@ -43,7 +32,6 @@ #include "move_transactions.h" #include "move_utils.h" #include "place_constraints.h" -#include "manual_moves.h" #include "buttons.h" #include "manual_move_generator.h" @@ -56,14 +44,11 @@ #include "tatum/echo_writer.hpp" #include "tatum/TimingReporter.hpp" -#include "placer_breakpoint.h" #include "RL_agent_util.h" #include "place_checkpoint.h" #include "clustered_netlist_utils.h" -#include "cluster_placement.h" - #include "noc_place_utils.h" #include "net_cost_handler.h" @@ -75,15 +60,6 @@ */ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; -#ifdef VTR_ENABLE_DEBUG_LOGGING -# include "draw_types.h" -# include "draw_global.h" -# include "draw_color.h" -#endif - -/************** Types and defines local to place.c ***************************/ -constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); - /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE void print_clb_placement(const char* fname); @@ -369,6 +345,7 @@ void try_place(const Netlist<>& net_list, costs.bb_cost_norm = 1 / costs.bb_cost; /* Timing cost and normalization factors are not used */ + constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); costs.timing_cost = INVALID_COST; costs.timing_cost_norm = INVALID_COST; } @@ -673,10 +650,8 @@ void try_place(const Netlist<>& net_list, free_placement_structs(); - print_timing_stats("Placement Quench", post_quench_timing_stats, - pre_quench_timing_stats); - print_timing_stats("Placement Total ", timing_ctx.stats, - pre_place_timing_stats); + print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats); + print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats); VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", p_runtime_ctx.f_update_td_costs_connections_elapsed_sec, @@ -736,28 +711,19 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac const std::vector& directs, PlacerState& placer_state, std::optional& noc_cost_handler) { - const auto& device_ctx = g_vpr_ctx.device(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); place_ctx.lock_loc_vars(); - size_t num_nets = cluster_ctx.clb_nlist.nets().size(); - init_placement_context(placer_state.mutable_blk_loc_registry(), directs); - int max_pins_per_clb = 0; - for (const t_physical_tile_type& type : device_ctx.physical_tile_types) { - max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins); - } - place_ctx.compressed_block_grids = create_compressed_block_grids(); if (noc_opts.noc) { noc_cost_handler.emplace(placer_state.block_locs()); } - return NetCostHandler{placer_opts, placer_state, num_nets, place_ctx.cube_bb}; + return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb}; } /* Frees the major structures needed by the placer (and not needed * diff --git a/vpr/src/place/placer_breakpoint.cpp b/vpr/src/place/placer_breakpoint.cpp index b576bc64f04..7b0f561f152 100644 --- a/vpr/src/place/placer_breakpoint.cpp +++ b/vpr/src/place/placer_breakpoint.cpp @@ -1,45 +1,52 @@ #include "placer_breakpoint.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING //map of the available move types and their corresponding type number std::map available_move_types = { {0, "Uniform"}}; -# ifndef NO_GRAPHICS +#ifndef NO_GRAPHICS //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected) { - get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.clear(); + BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state(); + + bp_state->blocks_affected_by_move.clear(); for (size_t i = 0; i < blocksAffected.moved_blocks.size(); i++) { //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class - get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num)); + bp_state->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num)); } } -void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c) { +void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, + double delta_c, double bb_delta_c, double timing_delta_c) { t_draw_state* draw_state = get_draw_state_vars(); - if (draw_state->list_of_breakpoints.size() != 0) { + BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state(); + + if (!draw_state->list_of_breakpoints.empty()) { //update current information transform_blocks_affected(blocks_affected); - get_bp_state_globals()->get_glob_breakpoint_state()->move_num++; - get_bp_state_globals()->get_glob_breakpoint_state()->from_block = size_t(blocks_affected.moved_blocks[0].block_num); + bp_state->move_num++; + bp_state->from_block = size_t(blocks_affected.moved_blocks[0].block_num); //check for breakpoints set_placer_breakpoint_reached(check_for_breakpoints(true)); // the passed flag is true as we are in the placer - if (placer_breakpoint_reached()) - breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), true); - } else + if (placer_breakpoint_reached()) { + breakpoint_info_window(bp_state->bp_description, *bp_state, true); + } + } else { set_placer_breakpoint_reached(false); + } if (placer_breakpoint_reached() && draw_state->show_graphics) { std::string msg = available_move_types[0]; - if (move_outcome == 0) + if (move_outcome == 0) { msg += vtr::string_fmt(", Rejected"); - else if (move_outcome == 1) + } else if (move_outcome == 1) { msg += vtr::string_fmt(", Accepted"); - else + } else { msg += vtr::string_fmt(", Aborted"); + } msg += vtr::string_fmt(", Delta_cost: %1.6f (bb_delta_cost= %1.5f , timing_delta_c= %6.1e)", delta_c, bb_delta_c, timing_delta_c); @@ -48,6 +55,4 @@ void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affect } } -# endif //NO_GRAPHICS - -#endif //VTR_ENABLE_DEBUG_LOGGING +#endif //NO_GRAPHICS diff --git a/vpr/src/place/placer_breakpoint.h b/vpr/src/place/placer_breakpoint.h index c01ef77450c..510b7071e0d 100644 --- a/vpr/src/place/placer_breakpoint.h +++ b/vpr/src/place/placer_breakpoint.h @@ -7,14 +7,13 @@ #include "breakpoint.h" #include "draw.h" -#ifdef VTR_ENABLE_DEBUG_LOGGING //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected); //checks the breakpoint and see whether one of them was reached and pause place,emt accordingly -void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c); +void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, + double delta_c, double bb_delta_c, double timing_delta_c); -#endif #endif diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 5409d5ec49f..1cdb4c8cfd5 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -11,17 +11,10 @@ static bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node); -#ifdef VTR_ENABLE_DEBUG_LOGGING static void update_router_stats(RouterStats* router_stats, bool is_push, - RRNodeId rr_node_id, + RRNodeId rr_node_i, const RRGraphView* rr_graph); -#else -static void update_router_stats(RouterStats* router_stats, - bool is_push, - RRNodeId /*rr_node_id*/, - const RRGraphView* /*rr_graph*/); -#endif /** return tuple */ template @@ -225,7 +218,7 @@ t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(RRNodeI // cheapest t_heap in current route tree to be expanded on cheapest = heap_.get_heap_head(); update_router_stats(router_stats_, - false, + /*is_push=*/false, cheapest->index, rr_graph_); @@ -315,7 +308,7 @@ vtr::vector ConnectionRouter::timing_driven_find_all_sho // cheapest t_heap in current route tree to be expanded on t_heap* cheapest = heap_.get_heap_head(); update_router_stats(router_stats_, - false, + /*is_push=*/false, cheapest->index, rr_graph_); @@ -610,7 +603,7 @@ void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params& heap_.add_to_heap(next_ptr); update_router_stats(router_stats_, - true, + /*is_push=*/true, to_node, rr_graph_); @@ -925,13 +918,13 @@ void ConnectionRouter::add_route_tree_node_to_heap( } update_router_stats(router_stats_, - true, + /*is_push=*/true, inode, rr_graph_); -#ifdef VTR_ENABLE_DEBUG_LOGGING - router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++; -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++; + } } /* Expand bb by inode's extents and clip against net_bb */ @@ -1073,45 +1066,38 @@ static inline bool relevant_node_to_target(const RRGraphView* rr_graph, return false; } -#ifdef VTR_ENABLE_DEBUG_LOGGING static inline void update_router_stats(RouterStats* router_stats, bool is_push, RRNodeId rr_node_id, const RRGraphView* rr_graph) { -#else -static inline void update_router_stats(RouterStats* router_stats, - bool is_push, - RRNodeId /*rr_node_id*/, - const RRGraphView* /*rr_graph*/) { -#endif if (is_push) { router_stats->heap_pushes++; } else { router_stats->heap_pops++; } -#ifdef VTR_ENABLE_DEBUG_LOGGING - auto node_type = rr_graph->node_type(rr_node_id); - VTR_ASSERT(node_type != NUM_RR_TYPES); - - if (is_inter_cluster_node(*rr_graph, rr_node_id)) { - if (is_push) { - router_stats->inter_cluster_node_pushes++; - router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; - } else { - router_stats->inter_cluster_node_pops++; - router_stats->inter_cluster_node_type_cnt_pops[node_type]++; - } - } else { - if (is_push) { - router_stats->intra_cluster_node_pushes++; - router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + auto node_type = rr_graph->node_type(rr_node_id); + VTR_ASSERT(node_type != NUM_RR_TYPES); + + if (is_inter_cluster_node(*rr_graph, rr_node_id)) { + if (is_push) { + router_stats->inter_cluster_node_pushes++; + router_stats->inter_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->inter_cluster_node_pops++; + router_stats->inter_cluster_node_type_cnt_pops[node_type]++; + } } else { - router_stats->intra_cluster_node_pops++; - router_stats->intra_cluster_node_type_cnt_pops[node_type]++; + if (is_push) { + router_stats->intra_cluster_node_pushes++; + router_stats->intra_cluster_node_type_cnt_pushes[node_type]++; + } else { + router_stats->intra_cluster_node_pops++; + router_stats->intra_cluster_node_type_cnt_pops[node_type]++; + } } } -#endif } std::unique_ptr make_connection_router(e_heap_type heap_type, diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp index 24ee0b5a8cf..5cd8f3faee3 100644 --- a/vpr/src/route/route.cpp +++ b/vpr/src/route/route.cpp @@ -592,10 +592,12 @@ bool route(const Netlist<>& net_list, //If the routing fails, print the overused info print_overused_nodes_status(router_opts, overuse_info); -#ifdef VTR_ENABLE_DEBUG_LOGGING - if (f_router_debug) - print_invalid_routing_info(net_list, is_flat); -#endif + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + if (f_router_debug) { + print_invalid_routing_info(net_list, is_flat); + } + } + } if (router_opts.with_timing_analysis) { @@ -608,19 +610,19 @@ bool route(const Netlist<>& net_list, VTR_LOG( "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu ", router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops); -#ifdef VTR_ENABLE_DEBUG_LOGGING - VTR_LOG( - "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", - router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, - router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); - for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { - VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); - VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); - VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + VTR_LOG( + "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", + router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, + router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); + for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { + VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + } } -#endif VTR_LOG("\n"); return success; diff --git a/vpr/src/route/route_debug.cpp b/vpr/src/route/route_debug.cpp index 022b8da8071..6745b67013a 100644 --- a/vpr/src/route/route_debug.cpp +++ b/vpr/src/route/route_debug.cpp @@ -24,7 +24,7 @@ void enable_router_debug( router->set_router_debug(f_router_debug); -#ifndef VTR_ENABLE_DEBUG_LOGGING - VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); -#endif + if constexpr (!VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { + VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n"); + } } From b240d7ff755417c6943ef5b14642df93eae921c1 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 4 Nov 2024 10:59:31 -0500 Subject: [PATCH 14/31] call c_str() for name member variable --- vpr/src/place/annealer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 5876f468e6f..b5e847eeb77 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -879,7 +879,7 @@ void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() { "%d,", annealing_state_.t, int(b_from), int(b_to), - from_type->name, (to_type ? to_type->name : "EMPTY"), + from_type->name.c_str(), (to_type ? to_type->name.c_str() : "EMPTY"), blocks_affected_.moved_blocks.size()); } } From e21e1321472ad83f37bcb55d82892721d381e588 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 6 Nov 2024 12:42:58 -0500 Subject: [PATCH 15/31] enum class e_move_result --- vpr/src/draw/manual_moves.cpp | 6 +++--- vpr/src/draw/manual_moves.h | 4 ++-- vpr/src/place/annealer.cpp | 24 ++++++++++++------------ vpr/src/place/move_generator.h | 2 +- vpr/src/place/move_utils.cpp | 19 +++++++++++++++++-- vpr/src/place/move_utils.h | 2 +- vpr/src/place/placer_breakpoint.cpp | 9 ++++----- 7 files changed, 40 insertions(+), 26 deletions(-) diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index 0becc4917a9..9554d1491eb 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -254,16 +254,16 @@ void manual_move_cost_summary_dialog() { switch (result) { //If the user accepts the manual move case GTK_RESPONSE_ACCEPT: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = ACCEPTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ACCEPTED; application.update_message(msg); break; //If the user rejects the manual move case GTK_RESPONSE_REJECT: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = REJECTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::REJECTED; application.update_message("Manual move was rejected"); break; default: - draw_state->manual_moves_state.manual_move_info.user_move_outcome = ABORTED; + draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ABORTED; break; } diff --git a/vpr/src/draw/manual_moves.h b/vpr/src/draw/manual_moves.h index 7f78ff0e876..a5a90fb037f 100644 --- a/vpr/src/draw/manual_moves.h +++ b/vpr/src/draw/manual_moves.h @@ -56,8 +56,8 @@ struct ManualMovesInfo { double delta_bounding_box = 0; bool valid_input = true; t_pl_loc to_location; - e_move_result placer_move_outcome = ABORTED; - e_move_result user_move_outcome = ABORTED; + e_move_result placer_move_outcome = e_move_result::ABORTED; + e_move_result user_move_outcome = e_move_result::ABORTED; }; /** diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 29defbada23..a5740cfe20a 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -339,12 +339,12 @@ float PlacementAnnealer::estimate_starting_temperature() { e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm, REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled); - if (swap_result == ACCEPTED) { + if (swap_result == e_move_result::ACCEPTED) { num_accepted++; av += costs_.cost; sum_of_squares += costs_.cost * costs_.cost; swap_stats_.num_swap_accepted++; - } else if (swap_result == ABORTED) { + } else if (swap_result == e_move_result::ABORTED) { swap_stats_.num_swap_aborted++; } else { swap_stats_.num_swap_rejected++; @@ -455,7 +455,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, "illegal move"); } - move_outcome = ABORTED; + move_outcome = e_move_result::ABORTED; } else { VTR_ASSERT(create_move_outcome == e_create_move::VALID); @@ -553,7 +553,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, } #endif //NO_GRAPHICS - if (move_outcome == ACCEPTED) { + if (move_outcome == e_move_result::ACCEPTED) { costs_.cost += delta_c; costs_.bb_cost += bb_delta_c; @@ -601,7 +601,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, #endif //NO_GRAPHICS } else { - VTR_ASSERT_SAFE(move_outcome == REJECTED); + VTR_ASSERT_SAFE(move_outcome == e_move_result::REJECTED); // Reset the net cost function flags first. net_cost_handler_.reset_move_nets(); @@ -652,7 +652,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, move_outcome_stats.delta_timing_cost_abs = timing_delta_c; if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { - LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), ""); + LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome == e_move_result::ACCEPTED ? "ACCEPTED" : "REJECTED"), ""); } } move_outcome_stats.outcome = move_outcome; @@ -721,11 +721,11 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, timing_bb_factor, manual_move_enabled); - if (swap_result == ACCEPTED) { + if (swap_result == e_move_result::ACCEPTED) { // Move was accepted. Update statistics that are useful for the annealing schedule. placer_stats_.single_swap_update(costs_); swap_stats_.num_swap_accepted++; - } else if (swap_result == ABORTED) { + } else if (swap_result == e_move_result::ABORTED) { swap_stats_.num_swap_aborted++; } else { // swap_result == REJECTED swap_stats_.num_swap_rejected++; @@ -879,20 +879,20 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c); if (delta_c <= 0) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n"); - return ACCEPTED; + return e_move_result::ACCEPTED; } if (t == 0.) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n"); - return REJECTED; + return e_move_result::REJECTED; } float fnum = rng_.frand(); float prob_fac = std::exp(-delta_c / t); if (prob_fac > fnum) { VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n"); - return ACCEPTED; + return e_move_result::ACCEPTED; } VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); - return REJECTED; + return e_move_result::REJECTED; } \ No newline at end of file diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 42e318cc0a4..172c04d34f5 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -17,7 +17,7 @@ struct MoveOutcomeStats { float delta_bb_cost_abs = std::numeric_limits::quiet_NaN(); float delta_timing_cost_abs = std::numeric_limits::quiet_NaN(); - e_move_result outcome = ABORTED; + e_move_result outcome = e_move_result::ABORTED; float elapsed_time = std::numeric_limits::quiet_NaN(); }; diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 0ddd7062ee0..4cf2086c277 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -1265,8 +1265,23 @@ bool intersect_range_limit_with_floorplan_constraints(ClusterBlockId b_from, } std::string e_move_result_to_string(e_move_result move_outcome) { - std::string move_result_to_string[] = {"Rejected", "Accepted", "Aborted"}; - return move_result_to_string[move_outcome]; + switch (move_outcome) { + case e_move_result::REJECTED: + return "Rejected"; + break; + + case e_move_result::ACCEPTED: + return "Accepted"; + break; + + case e_move_result::ABORTED: + return "Aborted"; + break; + + default: + return "Unsupported Move Outcome!"; + break; + } } int find_free_layer(t_logical_block_type_ptr logical_block, diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 295d5fc6f4e..e5555648866 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -19,7 +19,7 @@ constexpr size_t SMALL_NET = 4; /* This is for the placement swap routines. A swap attempt could be * * rejected, accepted or aborted (due to the limitations placed on the * * carry chain support at this point). */ -enum e_move_result { +enum class e_move_result { REJECTED, ACCEPTED, ABORTED diff --git a/vpr/src/place/placer_breakpoint.cpp b/vpr/src/place/placer_breakpoint.cpp index 7b0f561f152..a31a0add053 100644 --- a/vpr/src/place/placer_breakpoint.cpp +++ b/vpr/src/place/placer_breakpoint.cpp @@ -11,10 +11,9 @@ void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected) { BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state(); bp_state->blocks_affected_by_move.clear(); - for (size_t i = 0; i < blocksAffected.moved_blocks.size(); i++) { + for (const t_pl_moved_block& moved_block : blocksAffected.moved_blocks) { //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class - - bp_state->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num)); + bp_state->blocks_affected_by_move.push_back(size_t(moved_block.block_num)); } } @@ -40,9 +39,9 @@ void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affect if (placer_breakpoint_reached() && draw_state->show_graphics) { std::string msg = available_move_types[0]; - if (move_outcome == 0) { + if (move_outcome == e_move_result::REJECTED) { msg += vtr::string_fmt(", Rejected"); - } else if (move_outcome == 1) { + } else if (move_outcome == e_move_result::ACCEPTED) { msg += vtr::string_fmt(", Accepted"); } else { msg += vtr::string_fmt(", Aborted"); From a69ee82f7d16a2e40b99e7f4d6b35c53b7f0625b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 9 Nov 2024 17:55:16 -0500 Subject: [PATCH 16/31] add some comments and update golder results --- vpr/src/base/place_and_route.cpp | 4 +- vpr/src/place/annealer.cpp | 44 +++++++++---------- vpr/src/place/annealer.h | 16 ++++++- .../config/golden_results.txt | 2 +- .../config/golden_results.txt | 4 +- .../config/golden_results.txt | 2 +- .../config/golden_results.txt | 2 +- .../config/golden_results.txt | 2 +- .../config/golden_results.txt | 2 +- 9 files changed, 45 insertions(+), 33 deletions(-) diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 6c448af6c7b..ba7e20ccd80 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -326,8 +326,8 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts, arch->Chans, det_routing_arch, segment_inf, - /*is_flat=*/arch->directs, - false); + arch->directs, + /*is_flat=*/false); } success = route(router_net_list, diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index a5740cfe20a..44fd2e85f12 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -491,18 +491,18 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // Invalidates timing of modified connections for incremental timing updates. pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ + /* Update the connection_timing_cost and connection_delay + * values from the temporary values. */ placer_state_.mutable_timing().commit_td_cost(blocks_affected_); - /* Update timing information. Since we are analyzing setup slacks, * - * we only update those values and keep the criticalities stale * - * so as not to interfere with the original timing driven algorithm. * + /* Update timing information. Since we are analyzing setup slacks, + * we only update those values and keep the criticalities stale + * so as not to interfere with the original timing driven algorithm. * - * Note: the timing info must be updated after applying block moves * - * and committing the timing driven delays and costs. * - * If we wish to revert this timing update due to move rejection, * - * we need to revert block moves and restore the timing values. */ + * Note: the timing info must be updated after applying block moves + * and committing the timing driven delays and costs. + * If we wish to revert this timing update due to move rejection, + * we need to revert block moves and restore the timing values. */ criticalities_->disable_update(); setup_slacks_->enable_update(); update_timing_classes(crit_params, timing_info_, criticalities_, @@ -543,7 +543,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_); } - /* 1 -> move accepted, 0 -> rejected. */ + // 1 -> move accepted, 0 -> rejected. move_outcome = assess_swap_(delta_c, annealing_state_.t); //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. @@ -569,25 +569,27 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { costs_.timing_cost += timing_delta_c; - /* Invalidates timing of modified connections for incremental * - * timing updates. These invalidations are accumulated for a * - * big timing update in the outer loop. */ + /* Invalidates timing of modified connections for incremental + * timing updates. These invalidations are accumulated for a + * big timing update in the outer loop. */ pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); - /* Update the connection_timing_cost and connection_delay * - * values from the temporary values. */ + /* Update the connection_timing_cost and connection_delay + * values from the temporary values. */ placer_state_.mutable_timing().commit_td_cost(blocks_affected_); } - /* Update net cost functions and reset flags. */ + // Update net cost functions and reset flags. net_cost_handler_.update_move_nets(); - /* Update clb data structures since we kept the move. */ + // Update clb data structures since we kept the move. blk_loc_registry.commit_move_blocks(blocks_affected_); - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + // if the agent proposed the block type, then collect the block type stat + if (proposed_action.logical_blk_type_index != -1) { ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; } + if (noc_opts_.noc){ noc_cost_handler_->commit_noc_costs(); costs_ += noc_delta_c; @@ -787,6 +789,7 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, ++annealing_state_.num_temps; } + int PlacementAnnealer::get_total_iteration() const { return tot_iter_; } @@ -805,7 +808,7 @@ void PlacementAnnealer::start_quench() { // Freeze out: only accept solutions that improve placement. annealing_state_.t = 0; - //Revert the move limit to initial value. + // Revert the move limit to initial value. annealing_state_.move_lim = annealing_state_.move_lim_max; } @@ -814,7 +817,6 @@ std::tuple } void PlacementAnnealer::LOG_MOVE_STATS_HEADER() { - if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { if (move_stats_file_) { fprintf(move_stats_file_.get(), @@ -837,10 +839,8 @@ void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() { const auto& grid_blocks = placer_state_.grid_blocks(); if (move_stats_file_) { - ClusterBlockId b_from = blocks_affected_.moved_blocks[0].block_num; - t_pl_loc to = blocks_affected_.moved_blocks[0].new_loc; ClusterBlockId b_to = grid_blocks.block_at_location(to); diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 2252367ad04..730f01eec96 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -171,7 +171,7 @@ class PlacementAnnealer { NetPinTimingInvalidator* pin_timing_invalidator, int move_lim); - /* Function which contains the inner loop of the simulated annealing */ + ///@brief Contains the inner loop of the simulated annealing void placement_inner_loop(MoveGenerator& move_generator, float timing_bb_factor); @@ -200,12 +200,19 @@ class PlacementAnnealer { float timing_bb_factor, bool manual_move_enabled); + ///@brief Returns the total number iterations or attempted swaps int get_total_iteration() const; + ///@brief Returns a constant reference to the annealing state const t_annealing_state& get_annealing_state() const; std::tuple get_stats() const; + /** + * @brief Starts the quench stage in simulated annealing by + * setting the temperature to zero and reverting the move range limit + * to the initial value. + */ void start_quench(); private: @@ -233,7 +240,7 @@ class PlacementAnnealer { int outer_crit_iter_count_; t_annealing_state annealing_state_; - /// Swap statistics keep record of the number accepted/rejected/aborted swaps. + ///Swap statistics keep record of the number accepted/rejected/aborted swaps. t_swap_stats swap_stats_; MoveTypeStat move_type_stats_; t_placer_statistics placer_stats_; @@ -249,10 +256,15 @@ class PlacementAnnealer { */ static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; + ///Specifies how often timing information is recomputed when the annealer isn't in the quench stage int inner_recompute_limit_; + ///Specifies how often timing information is recomputed when the annealer is in the quench stage int quench_recompute_limit_; + ///Used to trigger a BB and NoC cost re-computation from scratch int moves_since_cost_recompute_; + ///Total number of iterations or attempted swaps int tot_iter_; + ///Indicates whether the annealer has entered into the quench stage bool quench_started_; void LOG_MOVE_STATS_HEADER(); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt index 28a1bb52736..c5e2acb803a 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params crit_path_delay_mcw clk_to_clk_cpd clk_to_clk2_cpd clk_to_input_cpd clk_to_output_cpd clk2_to_clk2_cpd clk2_to_clk_cpd clk2_to_input_cpd clk2_to_output_cpd input_to_input_cpd input_to_clk_cpd input_to_clk2_cpd input_to_output_cpd output_to_output_cpd output_to_clk_cpd output_to_clk2_cpd output_to_input_cpd clk_to_clk_setup_slack clk_to_clk2_setup_slack clk_to_input_setup_slack clk_to_output_setup_slack clk2_to_clk2_setup_slack clk2_to_clk_setup_slack clk2_to_input_setup_slack clk2_to_output_setup_slack input_to_input_setup_slack input_to_clk_setup_slack input_to_clk2_setup_slack input_to_output_setup_slack output_to_output_setup_slack output_to_clk_setup_slack output_to_clk2_setup_slack output_to_input_setup_slack clk_to_clk_hold_slack clk_to_clk2_hold_slack clk_to_input_hold_slack clk_to_output_hold_slack clk2_to_clk2_hold_slack clk2_to_clk_hold_slack clk2_to_input_hold_slack clk2_to_output_hold_slack input_to_input_hold_slack input_to_clk_hold_slack input_to_clk2_hold_slack input_to_output_hold_slack output_to_output_hold_slack output_to_clk_hold_slack output_to_clk2_hold_slack output_to_input_hold_slack -k6_frac_N10_mem32K_40nm_clk_buf.xml multiclock_buf.blif common 1.48876 0.545 -1 -1 -1 0.545 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.293 -1 -1 -1 0.293 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_frac_N10_mem32K_40nm_clk_buf.xml multiclock_buf.blif common 1.6599674 0.545 -1 -1 -1 0.545 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.293 -1 -1 -1 0.293 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt index e7a944100ab..70910d2d59a 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt @@ -1,8 +1,8 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time num_global_nets num_routed_nets timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.30 vpr 57.61 MiB -1 -1 0.06 19388 1 0.02 -1 -1 33516 -1 -1 1 2 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 58988 2 1 3 4 1 3 4 3 3 9 -1 auto 19.1 MiB 0.00 4 9 6 3 0 57.6 MiB 0.00 0.00 0.55447 -0.91031 -0.55447 0.55447 0.00 1.4209e-05 1.0635e-05 0.000112608 8.885e-05 -1 2 1 18000 18000 14049.7 1561.07 0.00 0.00111531 0.00103596 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 timing/k6_N10_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.30 vpr 57.69 MiB -1 -1 0.06 19244 1 0.02 -1 -1 33536 -1 -1 1 2 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 59076 2 1 3 4 1 3 4 3 3 9 -1 auto 19.2 MiB 0.00 6 9 5 2 2 57.7 MiB 0.00 0.00 0.48631 -0.91031 -0.48631 0.48631 0.00 1.4475e-05 1.0195e-05 0.000102982 7.9111e-05 -1 4 1 18000 18000 15707.9 1745.32 0.00 0.00110914 0.00104203 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 -timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 26.57 parmys 203.92 MiB -1 -1 21.33 208816 2 1.49 -1 -1 61188 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 61088 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 22 75566 54444 2848 18274 59.7 MiB 0.07 0.00 1.49664 -15.129 -1.49664 1.49664 0.00 0.000225009 0.000209684 0.0166386 0.0154931 -1 38 6 3.042e+06 2.79e+06 863192. 3836.41 0.01 0.0221087 0.0205962 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 -timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 26.99 parmys 204.15 MiB -1 -1 21.52 209052 2 1.49 -1 -1 60656 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 60972 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 25 77716 55619 3345 18752 59.5 MiB 0.13 0.00 1.47823 -14.9031 -1.47823 1.47823 0.00 0.000388878 0.000358886 0.0289108 0.0266306 -1 38 3 3.042e+06 2.79e+06 892591. 3967.07 0.01 0.0351201 0.0324031 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 26.57 parmys 203.92 MiB -1 -1 21.33 208816 2 1.49 -1 -1 61188 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 61088 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 22 75566 54444 2848 18274 59.7 MiB 0.07 0.00 1.49664 -15.129 -1.49664 1.49664 0.00 0.000225009 0.000209684 0.0166386 0.0154931 -1 57 6 3.042e+06 2.79e+06 863192. 3836.41 0.01 0.0221087 0.0205962 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 154 9 +timing/k6_N10_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_route_--route_chan_width_60 26.99 parmys 204.15 MiB -1 -1 21.52 209052 2 1.49 -1 -1 60656 -1 -1 155 5 -1 -1 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 60972 5 156 191 347 1 163 316 15 15 225 clb auto 21.3 MiB 0.03 25 77716 55619 3345 18752 59.5 MiB 0.13 0.00 1.47823 -14.9031 -1.47823 1.47823 0.00 0.000388878 0.000358886 0.0289108 0.0266306 -1 57 3 3.042e+06 2.79e+06 892591. 3967.07 0.01 0.0351201 0.0324031 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 153 10 timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_ideal_--route_chan_width_60 0.35 vpr 63.08 MiB -1 -1 0.08 19324 1 0.02 -1 -1 33472 -1 -1 1 2 0 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64592 2 1 3 4 1 3 4 3 3 9 -1 auto 24.5 MiB 0.00 4 9 6 2 1 63.1 MiB 0.00 0.00 0.55247 -0.90831 -0.55247 0.55247 0.00 1.3129e-05 9.703e-06 0.000103951 8.1123e-05 -1 2 2 53894 53894 12370.0 1374.45 0.00 0.00116445 0.00109439 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 timing/k6_N10_mem32K_40nm.xml microbenchmarks/d_flip_flop.v common_--clock_modeling_route_--route_chan_width_60 0.35 vpr 62.96 MiB -1 -1 0.08 19876 1 0.02 -1 -1 33484 -1 -1 1 2 0 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 64468 2 1 3 4 1 3 4 3 3 9 -1 auto 24.3 MiB 0.00 6 9 5 2 2 63.0 MiB 0.00 0.00 0.48631 -0.90831 -0.48631 0.48631 0.00 1.5477e-05 1.1104e-05 0.000110622 8.6576e-05 -1 8 1 53894 53894 14028.3 1558.70 0.00 0.00113491 0.00106717 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0 3 timing/k6_N10_mem32K_40nm.xml verilog/mkPktMerge.v common_--clock_modeling_ideal_--route_chan_width_60 6.10 vpr 71.24 MiB -1 -1 1.09 28164 2 0.15 -1 -1 37372 -1 -1 32 311 15 0 success 84e0337 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2024-08-22T23:40:08 gh-actions-runner-vtr-auto-spawned3 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 72952 311 156 972 1128 1 953 514 28 28 784 memory auto 33.0 MiB 0.48 8979 193966 70726 114124 9116 71.2 MiB 1.31 0.03 4.11528 -4394.91 -4.11528 4.11528 0.00 0.00488787 0.00418834 0.465058 0.395185 -1 13380 12 4.25198e+07 9.94461e+06 2.96205e+06 3778.13 0.38 0.643724 0.557601 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 15 938 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt index 5d73f4813a6..1263c9d0fca 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time k6_frac_N10_40nm.xml test_eblif.eblif common 0.12 vpr 60.11 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 3 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:42:01 fv-az1118-845 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61552 3 1 5 6 1 4 5 3 3 9 -1 auto 21.6 MiB 0.00 9 12 1 9 2 60.1 MiB 0.00 0.00 0.52647 -0.88231 -0.52647 0.52647 0.00 1.0349e-05 7.103e-06 8.7732e-05 6.8547e-05 20 10 1 53894 53894 4880.82 542.314 0.00 0.00111687 0.00105846 379 725 -1 6 1 3 3 36 25 0.605178 0.605178 -1.1507 -0.605178 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00107362 0.00104552 - k6_frac_N10_40nm.xml conn_order.eblif common 0.12 vpr 59.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:42:01 fv-az1118-845 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61420 2 1 4 5 1 3 4 3 3 9 -1 auto 21.6 MiB 0.00 6 9 2 3 4 60.0 MiB 0.00 0.00 0.69084 -1.21731 -0.69084 0.69084 0.00 1.4366e-05 1.0429e-05 0.000128779 0.000106057 20 9 1 53894 53894 4880.82 542.314 0.00 0.00110538 0.00104614 379 725 -1 5 1 2 2 25 19 0.940178 0.940178 -1.48482 -0.940178 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00106677 0.00104008 + k6_frac_N10_40nm.xml conn_order.eblif common 0.12 vpr 59.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:42:01 fv-az1118-845 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61420 2 1 4 5 1 3 4 3 3 9 -1 auto 21.6 MiB 0.00 6 9 2 3 4 60.0 MiB 0.00 0.00 0.69084 -1.21731 -0.69084 0.69084 0.00 1.4366e-05 1.0429e-05 0.000128779 0.000106057 20 9 1 53894 53894 4880.82 542.314 0.00 0.00110538 0.00104614 379 725 -1 3 1 2 2 25 19 1.6923204 1.6923204 -2.22723 -1.6923204 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00106677 0.00104008 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt index 28a1bb52736..ce46084d4cd 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params crit_path_delay_mcw clk_to_clk_cpd clk_to_clk2_cpd clk_to_input_cpd clk_to_output_cpd clk2_to_clk2_cpd clk2_to_clk_cpd clk2_to_input_cpd clk2_to_output_cpd input_to_input_cpd input_to_clk_cpd input_to_clk2_cpd input_to_output_cpd output_to_output_cpd output_to_clk_cpd output_to_clk2_cpd output_to_input_cpd clk_to_clk_setup_slack clk_to_clk2_setup_slack clk_to_input_setup_slack clk_to_output_setup_slack clk2_to_clk2_setup_slack clk2_to_clk_setup_slack clk2_to_input_setup_slack clk2_to_output_setup_slack input_to_input_setup_slack input_to_clk_setup_slack input_to_clk2_setup_slack input_to_output_setup_slack output_to_output_setup_slack output_to_clk_setup_slack output_to_clk2_setup_slack output_to_input_setup_slack clk_to_clk_hold_slack clk_to_clk2_hold_slack clk_to_input_hold_slack clk_to_output_hold_slack clk2_to_clk2_hold_slack clk2_to_clk_hold_slack clk2_to_input_hold_slack clk2_to_output_hold_slack input_to_input_hold_slack input_to_clk_hold_slack input_to_clk2_hold_slack input_to_output_hold_slack output_to_output_hold_slack output_to_clk_hold_slack output_to_clk2_hold_slack output_to_input_hold_slack -k6_frac_N10_mem32K_40nm_clk_buf.xml multiclock_buf.blif common 1.48876 0.545 -1 -1 -1 0.545 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.293 -1 -1 -1 0.293 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +k6_frac_N10_mem32K_40nm_clk_buf.xml multiclock_buf.blif common 1.65996 0.545 -1 -1 -1 0.545 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.293 -1 -1 -1 0.293 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt index f922c82810e..24a98bd463d 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt @@ -1,3 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time k6_frac_N10_40nm.xml test_eblif.eblif common 0.12 vpr 59.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 3 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:47:29 fv-az775-518 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61424 3 1 5 6 1 4 5 3 3 9 -1 auto 21.5 MiB 0.00 9 12 1 9 2 60.0 MiB 0.00 0.00 0.52647 -0.88231 -0.52647 0.52647 0.00 1.035e-05 7.094e-06 8.8915e-05 6.9209e-05 20 10 1 53894 53894 4880.82 542.314 0.00 0.0011057 0.00104635 379 725 -1 6 1 3 3 36 25 0.605178 0.605178 -1.1507 -0.605178 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00105006 0.00102258 - k6_frac_N10_40nm.xml conn_order.eblif common 0.12 vpr 59.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:47:29 fv-az775-518 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61424 2 1 4 5 1 3 4 3 3 9 -1 auto 21.5 MiB 0.00 6 9 2 3 4 60.0 MiB 0.00 0.00 0.69084 -1.21731 -0.69084 0.69084 0.00 1.0129e-05 6.963e-06 0.000104936 8.568e-05 20 9 1 53894 53894 4880.82 542.314 0.00 0.00111784 0.00105927 379 725 -1 5 1 2 2 25 19 0.940178 0.940178 -1.48482 -0.940178 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00106769 0.00103834 + k6_frac_N10_40nm.xml conn_order.eblif common 0.12 vpr 59.98 MiB -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 1 2 -1 -1 success e1c7cb1 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64 2024-09-24T03:47:29 fv-az775-518 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 61424 2 1 4 5 1 3 4 3 3 9 -1 auto 21.5 MiB 0.00 6 9 2 3 4 60.0 MiB 0.00 0.00 0.69084 -1.21731 -0.69084 0.69084 0.00 1.0129e-05 6.963e-06 0.000104936 8.568e-05 20 9 1 53894 53894 4880.82 542.314 0.00 0.00111784 0.00105927 379 725 -1 15 1 2 2 25 19 1.701722 1.701722 -2.22723 -1.701722 0 0 6579.40 731.044 0.00 0.00 0.00 -1 -1 0.00 0.00106769 0.00103834 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt index e6e7c8778e5..3813343ed35 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt @@ -1,7 +1,7 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time x_gaussian_y_uniform.xml stereovision3.v common 1.42 vpr 65.81 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 37836 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67388 11 30 262 292 2 110 54 7 7 49 clb auto 27.0 MiB 0.12 431 2298 449 1774 75 65.8 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000483914 0.000384949 0.0147089 0.012636 -1 -1 -1 -1 12 326 3 1.07788e+06 700622 -1 -1 0.20 0.0742174 0.0638404 2680 3516 -1 316 3 175 255 10988 5508 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.0145719 0.0139138 x_uniform_y_gaussian.xml stereovision3.v common 1.44 vpr 65.54 MiB 0.05 9856 -1 -1 4 0.17 -1 -1 37820 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67108 11 30 262 292 2 110 54 7 7 49 clb auto 26.9 MiB 0.11 392 1890 346 1476 68 65.5 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000458868 0.000376323 0.0123402 0.0106294 -1 -1 -1 -1 12 287 5 1.07788e+06 700622 -1 -1 0.21 0.0867101 0.074128 2680 3516 -1 268 3 167 248 10043 4782 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.015461 0.0147632 - x_gaussian_y_gaussian.xml stereovision3.v common 1.50 vpr 65.58 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 37476 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67156 11 30 262 292 2 110 54 7 7 49 clb auto 26.9 MiB 0.12 398 2196 430 1697 69 65.6 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000468656 0.000387965 0.0139473 0.0119918 -1 -1 -1 -1 12 284 8 1.07788e+06 700622 -1 -1 0.28 0.0788417 0.0678402 2680 3516 -1 273 3 184 266 11521 5744 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.0151497 0.0144591 + x_gaussian_y_gaussian.xml stereovision3.v common 1.50 vpr 65.58 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 37476 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67156 11 30 262 292 2 110 54 7 7 49 clb auto 26.9 MiB 0.12 398 2196 430 1697 69 65.6 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000468656 0.000387965 0.0139473 0.0119918 -1 -1 -1 -1 16 284 8 1.07788e+06 700622 -1 -1 0.28 0.0788417 0.0678402 2680 3516 -1 273 3 184 266 11521 5744 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.0151497 0.0144591 x_delta_y_uniform.xml stereovision3.v common 1.67 vpr 65.78 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 40712 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67356 11 30 262 292 2 110 54 7 7 49 clb auto 27.0 MiB 0.11 474 1992 348 1574 70 65.8 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000450631 0.000369149 0.0127092 0.0109666 -1 -1 -1 -1 48 367 4 1.07788e+06 700622 -1 -1 0.46 0.187113 0.157611 2680 3516 -1 363 2 162 240 11458 5656 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.0141159 0.0135524 x_delta_y_delta.xml stereovision3.v common 1.41 vpr 65.68 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 38292 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67260 11 30 262 292 2 110 54 7 7 49 clb auto 26.9 MiB 0.12 411 2094 373 1653 68 65.7 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000478875 0.000382715 0.0140865 0.0122714 -1 -1 -1 -1 48 306 4 1.07788e+06 700622 -1 -1 0.20 0.107373 0.0919185 2680 3516 -1 300 3 176 263 11898 5867 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.014938 0.0142467 x_uniform_y_delta.xml stereovision3.v common 1.47 vpr 65.57 MiB 0.05 9984 -1 -1 4 0.17 -1 -1 37488 -1 -1 13 11 0 0 success 30aea82 Release IPO VTR_ASSERT_LEVEL=3 GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64 2024-10-28T23:46:21 fv-az1380-902 /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing 67144 11 30 262 292 2 110 54 7 7 49 clb auto 26.9 MiB 0.11 405 2196 394 1718 84 65.6 MiB 0.03 0.00 1.91988 -135.359 -1.91988 1.85222 0.01 0.000448588 0.000370342 0.0136716 0.0117962 -1 -1 -1 -1 58 286 2 1.07788e+06 700622 -1 -1 0.28 0.112457 0.0956247 2680 3516 -1 286 2 161 239 8848 4226 1.91988 1.85222 -135.359 -1.91988 0 0 -1 -1 0.00 0.03 0.00 -1 -1 0.00 0.0140539 0.0134498 From 11074f3c707e24e46a124a419ec2abdaf86c9ba3 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 11 Nov 2024 12:31:02 -0500 Subject: [PATCH 17/31] types and pass by reference in TimingGraph.cpp --- .../libtatum/libtatum/tatum/TimingGraph.cpp | 16 ++++----- .../libtatum/libtatum/tatum/TimingGraph.hpp | 36 +++++++++++-------- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp index 33392a6e8d1..3b861d054c6 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp @@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) { EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) { //We require that the source/sink node must already be in the graph, - // so we can update them with thier edge references + // so we can update them with their edge references TATUM_ASSERT(valid_node_id(src_node)); TATUM_ASSERT(valid_node_id(sink_node)); @@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N EdgeId edge_id = EdgeId(edge_ids_.size()); edge_ids_.push_back(edge_id); - //Create the edgge + //Create the edge edge_types_.push_back(type); edge_src_nodes_.push_back(src_node); edge_sink_nodes_.push_back(sink_node); @@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() { levelize(); validate(); - return {node_id_map, edge_id_map}; + return {std::move(node_id_map), std::move(edge_id_map)}; } void TimingGraph::levelize() { @@ -474,7 +474,7 @@ GraphIdMaps TimingGraph::optimize_layout() { levelize(); - return {node_id_map, edge_id_map}; + return {std::move(node_id_map), std::move(edge_id_map)}; } tatum::util::linear_map TimingGraph::optimize_edge_layout() const { @@ -483,7 +483,7 @@ tatum::util::linear_map TimingGraph::optimize_edge_layout() const //Determine the edges driven by each level of the graph std::vector> edge_levels; for(LevelId level_id : levels()) { - edge_levels.push_back(std::vector()); + edge_levels.emplace_back(); for(auto node_id : level_nodes(level_id)) { //We walk the nodes according to the input-edge order. @@ -874,7 +874,7 @@ std::vector> identify_combinational_loops(const TimingGraph& } std::vector find_transitively_connected_nodes(const TimingGraph& tg, - const std::vector through_nodes, + const std::vector& through_nodes, size_t max_depth) { std::vector nodes; @@ -890,7 +890,7 @@ std::vector find_transitively_connected_nodes(const TimingGraph& tg, } std::vector find_transitive_fanin_nodes(const TimingGraph& tg, - const std::vector sinks, + const std::vector& sinks, size_t max_depth) { std::vector nodes; @@ -905,7 +905,7 @@ std::vector find_transitive_fanin_nodes(const TimingGraph& tg, } std::vector find_transitive_fanout_nodes(const TimingGraph& tg, - const std::vector sources, + const std::vector& sources, size_t max_depth) { std::vector nodes; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp index 72a05cad9da..f4cd54ad8d9 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp @@ -11,8 +11,8 @@ * store all edges as bi-directional edges. * * NOTE: We store only the static connectivity and node information in the 'TimingGraph' class. - * Other dynamic information (edge delays, node arrival/required times) is stored seperately. - * This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only + * Other dynamic information (edge delays, node arrival/required times) is stored separately. + * This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only * require read-only access to the timing graph. * * Accessing Graph Data @@ -28,9 +28,9 @@ * rather than the more typical "Array of Structs (AoS)" data layout. * * By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous - * memory. Using an AoS layout the various fields accross nodes would *not* be contiguous + * memory. Using an AoS layout the various fields across nodes would *not* be contiguous * (although the different fields within each object (e.g. a TimingNode class) would be contiguous. - * Since we typically perform operations on particular fields accross nodes the SoA layout performs + * Since we typically perform operations on particular fields across nodes the SoA layout performs * better (and enables memory ordering optimizations). The edges are also stored in a SOA format. * * The SoA layout also motivates the ID based approach, which allows direct indexing into the required @@ -48,11 +48,12 @@ * and ensures that each cache line pulled into the cache will (likely) be accessed multiple times * before being evicted. * - * Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout() - * and optimize_node_layout() member functions. In the future (particularily if incremental modification + * Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout() + * and optimize_node_layout() member functions. In the future (particularly if incremental modification * support is added), it may be a good idea apply these modifications automatically as needed. * */ +#include #include #include #include @@ -149,7 +150,7 @@ class TimingGraph { ///\pre The graph must be levelized. ///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins) - ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network) + ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network) ///\see levelize() node_range primary_inputs() const { TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized"); @@ -282,7 +283,7 @@ class TimingGraph { //Node data tatum::util::linear_map node_ids_; //The node IDs in the graph tatum::util::linear_map node_types_; //Type of node - tatum::util::linear_map> node_in_edges_; //Incomiing edge IDs for node + tatum::util::linear_map> node_in_edges_; //Incoming edge IDs for node tatum::util::linear_map> node_out_edges_; //Out going edge IDs for node tatum::util::linear_map node_levels_; //Out going edge IDs for node @@ -293,12 +294,12 @@ class TimingGraph { tatum::util::linear_map edge_src_nodes_; //Source node for each edge tatum::util::linear_map edges_disabled_; - //Auxilary graph-level info, filled in by levelize() + //Auxiliary graph-level info, filled in by levelize() tatum::util::linear_map level_ids_; //The level IDs in the graph tatum::util::linear_map> level_nodes_; //Nodes in each level std::vector primary_inputs_; //Primary input nodes of the timing graph. std::vector logical_outputs_; //Logical output nodes of the timing graph. - bool is_levelized_ = false; //Inidcates if the current levelization is valid + bool is_levelized_ = false; //Indicates if the current levelization is valid bool allow_dangling_combinational_nodes_ = false; @@ -310,26 +311,31 @@ std::vector> identify_combinational_loops(const TimingGraph& //Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes //up to max_depth (default infinite) hops away std::vector find_transitively_connected_nodes(const TimingGraph& tg, - const std::vector through_nodes, + const std::vector& through_nodes, size_t max_depth=std::numeric_limits::max()); //Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away std::vector find_transitive_fanin_nodes(const TimingGraph& tg, - const std::vector sinks, + const std::vector& sinks, size_t max_depth=std::numeric_limits::max()); //Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away std::vector find_transitive_fanout_nodes(const TimingGraph& tg, - const std::vector sources, + const std::vector& sources, size_t max_depth=std::numeric_limits::max()); EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge); //Mappings from old to new IDs struct GraphIdMaps { - GraphIdMaps(tatum::util::linear_map node_map, - tatum::util::linear_map edge_map) + GraphIdMaps(const tatum::util::linear_map& node_map, + const tatum::util::linear_map& edge_map) : node_id_map(node_map), edge_id_map(edge_map) {} + + GraphIdMaps(tatum::util::linear_map&& node_map, + tatum::util::linear_map&& edge_map) + : node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {} + tatum::util::linear_map node_id_map; tatum::util::linear_map edge_id_map; }; From 4b3f7956b4de2991ea4e82bf7cf1ea30b20f8a37 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 12:11:35 -0500 Subject: [PATCH 18/31] fixed a few typos --- .../libtatum/tatum/analyzer_factory.hpp | 6 +++--- .../tatum/delay_calc/FixedDelayCalculator.hpp | 2 +- .../tatum/graph_visitors/GraphVisitor.hpp | 2 +- vpr/src/base/atom_lookup.cpp | 2 +- vpr/src/base/atom_lookup_fwd.h | 2 +- vpr/src/base/atom_netlist_utils.cpp | 13 +++++++------ vpr/src/timing/timing_graph_builder.cpp | 18 +++++++++--------- 7 files changed, 23 insertions(+), 22 deletions(-) diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp index 9ac444bc61f..db34f59a049 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp @@ -18,7 +18,7 @@ namespace tatum { * This file defines the AnalyzerFactory class used to construct timing analyzers. * * We assume that the user has already defined the timing graph, constraints and - * thier own delay calculator: + * their own delay calculator: * * TimingGraph timing_graph; * TimingConstraints timing_constraints; @@ -33,7 +33,7 @@ namespace tatum { * timing_constraints, * delay_calculator); * - * We can similarily generate analyzers for other types of analysis, for instance Hold: + * We can similarly generate analyzers for other types of analysis, for instance Hold: * * auto hold_analyzer = AnalyzerFactory::make(timing_graph, * timing_constraints, @@ -45,7 +45,7 @@ namespace tatum { * timing_constraints, * delay_calculator); * - * The AnalzyerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class: + * The AnalyzerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class: * * SetupAnalysis => SetupTimingAnalyzer * HoldAnalysis => HoldTimingAnalyzer diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp index bfa1f0fa037..9d0a86ec217 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp @@ -8,7 +8,7 @@ namespace tatum { /** - * An exmaple DelayCalculator implementation which takes + * An example DelayCalculator implementation which takes * a vector of fixed pre-calculated edge delays * * \see DelayCalculator diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp index 2672560d155..be9680d20b5 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp @@ -21,7 +21,7 @@ class GraphVisitor { virtual void do_reset_node_arrival_tags_from_origin(const NodeId node_id, const NodeId origin) = 0; virtual void do_reset_node_required_tags_from_origin(const NodeId node_id, const NodeId origin) = 0; - //Returns true if the specified source/sink is unconstrainted + //Returns true if the specified source/sink is unconstrained virtual bool do_arrival_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0; virtual bool do_required_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0; diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp index f771b8af154..eb597ff8abd 100644 --- a/vpr/src/base/atom_lookup.cpp +++ b/vpr/src/base/atom_lookup.cpp @@ -173,7 +173,7 @@ AtomLookup::pin_tnode_range AtomLookup::atom_pin_tnodes(BlockTnode block_tnode_t } void AtomLookup::set_atom_pin_tnode(const AtomPinId pin, const tatum::NodeId node, BlockTnode block_tnode_type) { - //A pin always expands to an external tnode (i.e. it's external connectivity in the netlist) + //A pin always expands to an external tnode (i.e. its external connectivity in the netlist) //but some pins may expand to an additional tnode (i.e. to SOURCE/SINK to cover internal sequential paths within a block) if (block_tnode_type == BlockTnode::EXTERNAL) { atom_pin_tnode_external_[pin] = node; diff --git a/vpr/src/base/atom_lookup_fwd.h b/vpr/src/base/atom_lookup_fwd.h index 02ab349cb94..1adb2e68bb0 100644 --- a/vpr/src/base/atom_lookup_fwd.h +++ b/vpr/src/base/atom_lookup_fwd.h @@ -5,7 +5,7 @@ class AtomLookup; enum class BlockTnode { INTERNAL, /// find_netlist_physical_clock_nets(const AtomNetlist& netlist) //clock generators // //Since we don't have good information about what pins are clock generators we build a lookup as we go - for (auto blk_id : netlist.blocks()) { + for (AtomBlockId blk_id : netlist.blocks()) { if (!blk_id) continue; + // Ignore I/O blocks AtomBlockType type = netlist.block_type(blk_id); if (type != AtomBlockType::BLOCK) continue; @@ -1352,7 +1353,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) const t_model* model = netlist.block_model(blk_id); VTR_ASSERT(model); if (clock_gen_ports.find(model) == clock_gen_ports.end()) { - //First time we've seen this model, intialize it + //First time we've seen this model, initialize it clock_gen_ports[model] = {}; //Look at all the ports to find clock generators @@ -1366,7 +1367,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) } //Look for connected input clocks - for (auto pin_id : netlist.block_clock_pins(blk_id)) { + for (AtomPinId pin_id : netlist.block_clock_pins(blk_id)) { if (!pin_id) continue; AtomNetId clk_net_id = netlist.pin_net(pin_id); @@ -1402,7 +1403,7 @@ std::set find_netlist_physical_clock_nets(const AtomNetlist& netlist) ///@brief Finds all logical clock drivers in the netlist (by back-tracing through logic) std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlist) { - auto clock_nets = find_netlist_physical_clock_nets(netlist); + std::set clock_nets = find_netlist_physical_clock_nets(netlist); //We now have a set of nets which drive clock pins // @@ -1415,7 +1416,7 @@ std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlis prev_clock_nets = clock_nets; clock_nets.clear(); - for (auto clk_net : prev_clock_nets) { + for (AtomNetId clk_net : prev_clock_nets) { AtomPinId driver_pin = netlist.net_driver(clk_net); AtomPortId driver_port = netlist.pin_port(driver_pin); AtomBlockId driver_blk = netlist.port_block(driver_port); @@ -1467,7 +1468,7 @@ std::set find_netlist_logical_clock_drivers(const AtomNetlist& netlis //Extract the net drivers std::set clock_drivers; - for (auto net : clock_nets) { + for (AtomNetId net : clock_nets) { AtomPinId driver = netlist.net_driver(net); if (netlist.pin_is_constant(driver)) { diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp index c0462429648..49534b9d380 100644 --- a/vpr/src/timing/timing_graph_builder.cpp +++ b/vpr/src/timing/timing_graph_builder.cpp @@ -295,7 +295,7 @@ void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) { } //Walk through the netlist nets adding the edges representing each net to - //the timiing graph. This connects the timing graph nodes of each netlist + //the timing graph. This connects the timing graph nodes of each netlist //block together. for (AtomNetId net : netlist_.nets()) { add_net_to_timing_graph(net); @@ -359,13 +359,13 @@ void TimingGraphBuilder::add_io_to_timing_graph(const AtomBlockId blk) { //Creates the timing graph nodes and internal edges for a netlist block void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) { /* - * How the code builds the primtive timing sub-graph + * How the code builds the primitive timing sub-graph * ------------------------------------------------- * - * The code below builds the timing sub-graph corresponding corresponding to the + * The code below builds the timing sub-graph corresponding to the * current netlist primitive/block. This is accomplished by walking through * the primitive's input, clock and output pins and creating the corresponding - * tnodes (note that if internal sequentail paths exist within the primitive + * tnodes (note that if internal sequential paths exist within the primitive * this also creates the appropriate internal tnodes). * * Once all nodes have been created the edges are added between them according @@ -374,12 +374,12 @@ void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) { * Note that to minimize the size of the timing graph we only create tnodes and * edges where they actually exist within the netlist. This means we do not create * tnodes or tedges to/from pins which are disconnected in the netlist (even if - * they exist in the archtiecture). + * they exist in the architecture). * * * Clock Generators * ---------------- - * An additional wrinkle in the above process is the presense of clock generators, + * An additional wrinkle in the above process is the presence of clock generators, * such as PLLs, which may define new clocks at their output (in contrast with a * primary input which is always a SOURCE type tnode). * @@ -545,7 +545,7 @@ void TimingGraphBuilder::create_block_internal_clock_timing_edges(const AtomBloc AtomPinId clk_pin = netlist_.port_pin(clk_port, 0); VTR_ASSERT(clk_pin); - //Convert the pin to it's tnode + //Convert the pin to its tnode NodeId clk_tnode = netlist_lookup_.atom_pin_tnode(clk_pin); VTR_ASSERT(clk_tnode); @@ -605,7 +605,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock //to OPIN), the end of a timing path (i.e. IPIN to SINK), or an internal timing path //(i.e. SOURCE to SINK). // - //Note that the creation of these edges is driven by the 'combinationl_sink_ports' specified + //Note that the creation of these edges is driven by the 'combinational_sink_ports' specified //in the architecture primitive model for (AtomPinId src_pin : netlist_.block_input_pins(blk)) { //Note that we have already created all the relevant nodes, and appropriately labelled them as @@ -615,7 +615,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock if (!src_tnode) continue; - auto src_type = tg_->node_type(src_tnode); + NodeType src_type = tg_->node_type(src_tnode); //Look-up the combinationally connected sink ports name on the port model AtomPortId src_port = netlist_.pin_port(src_pin); From 4a9423d3ae58ee321d4b2565a1570719706806d0 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 12:29:18 -0500 Subject: [PATCH 19/31] get rid of DUSTY_SCHED --- vpr/src/base/SetupVPR.cpp | 29 +------------ vpr/src/base/ShowSetup.cpp | 17 ++------ vpr/src/base/read_options.cpp | 44 ++------------------ vpr/src/base/read_options.h | 5 --- vpr/src/base/vpr_types.h | 13 ------ vpr/src/place/annealer.cpp | 76 +++++++++-------------------------- vpr/src/place/annealer.h | 15 ++----- 7 files changed, 31 insertions(+), 168 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 43d7605a1fd..d6f1b69efad 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -394,7 +394,7 @@ static void SetupSwitches(const t_arch& Arch, device_ctx.delayless_switch_idx = RoutingArch->delayless_switch; //Warn about non-zero Cout values for the ipin switch, since these values have no effect. - //VPR do not model the R/C's of block internal routing connectsion. + //VPR do not model the R/C's of block internal routing connection. // //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) if (device_ctx.arch_switch_inf[RoutingArch->wire_to_arch_ipin_switch].Cout != 0.) { @@ -530,31 +530,6 @@ static void SetupAnnealSched(const t_options& Options, VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n"); } - AnnealSched->alpha_min = Options.PlaceAlphaMin; - if (AnnealSched->alpha_min >= 1 || AnnealSched->alpha_min <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_min must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->alpha_max = Options.PlaceAlphaMax; - if (AnnealSched->alpha_max >= 1 || AnnealSched->alpha_max <= AnnealSched->alpha_min) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_max must be between alpha_min and 1 exclusive.\n"); - } - - AnnealSched->alpha_decay = Options.PlaceAlphaDecay; - if (AnnealSched->alpha_decay >= 1 || AnnealSched->alpha_decay <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_decay must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->success_min = Options.PlaceSuccessMin; - if (AnnealSched->success_min >= 1 || AnnealSched->success_min <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_min must be between 0 and 1 exclusive.\n"); - } - - AnnealSched->success_target = Options.PlaceSuccessTarget; - if (AnnealSched->success_target >= 1 || AnnealSched->success_target <= 0) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_target must be between 0 and 1 exclusive.\n"); - } - AnnealSched->type = Options.anneal_sched_type; } @@ -782,7 +757,7 @@ static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts) } static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_ipin_switch, int& wire_to_arch_ipin_switch_between_dice) { - for (auto cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) { + for (int cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) { int ipin_cblock_switch_index = UNDEFINED; for (int iswitch = 0; iswitch < (int)Arch.switches.size(); ++iswitch) { if (Arch.switches[iswitch].name == Arch.ipin_cblock_switch_name[cb_switch_name_index]) { diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 9e6f1ed87eb..66d1da27d0a 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -3,7 +3,6 @@ #include "vtr_assert.h" #include "vtr_log.h" -#include "vtr_memory.h" #include "vpr_types.h" #include "vpr_error.h" @@ -126,7 +125,6 @@ ClusteredNetlistStats::ClusteredNetlistStats() { auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); - int j; L_num_p_inputs = 0; L_num_p_outputs = 0; num_blocks_type = std::vector(device_ctx.logical_block_types.size(), 0); @@ -135,12 +133,12 @@ ClusteredNetlistStats::ClusteredNetlistStats() { logical_block_types = device_ctx.logical_block_types; /* Count I/O input and output pads */ - for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) { auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); auto physical_tile = pick_physical_type(logical_block); num_blocks_type[logical_block->index]++; if (is_io_type(physical_tile)) { - for (j = 0; j < logical_block->pb_type->num_pins; j++) { + for (int j = 0; j < logical_block->pb_type->num_pins; j++) { int physical_pin = get_physical_pin(physical_tile, logical_block, j); if (cluster_ctx.clb_nlist.block_net(blk_id, j) != ClusterNetId::INVALID()) { @@ -178,7 +176,7 @@ void ClusteredNetlistStats::write(OutputFormat fmt, std::ostream& output) const void writeClusteredNetlistStats(const std::string& block_usage_filename) { const auto stats = ClusteredNetlistStats(); - // Print out the human readable version to stdout + // Print out the human-readable version to stdout stats.write(ClusteredNetlistStats::OutputFormat::HumanReadable, std::cout); @@ -212,9 +210,6 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) { case e_sched_type::USER_SCHED: VTR_LOG("USER_SCHED\n"); break; - case e_sched_type::DUSTY_SCHED: - VTR_LOG("DUSTY_SCHED\n"); - break; default: VTR_LOG_ERROR("Unknown annealing schedule\n"); } @@ -225,12 +220,6 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) { VTR_LOG("AnnealSched.init_t: %f\n", AnnealSched.init_t); VTR_LOG("AnnealSched.alpha_t: %f\n", AnnealSched.alpha_t); VTR_LOG("AnnealSched.exit_t: %f\n", AnnealSched.exit_t); - } else if (e_sched_type::DUSTY_SCHED == AnnealSched.type) { - VTR_LOG("AnnealSched.alpha_min: %f\n", AnnealSched.alpha_min); - VTR_LOG("AnnealSched.alpha_max: %f\n", AnnealSched.alpha_max); - VTR_LOG("AnnealSched.alpha_decay: %f\n", AnnealSched.alpha_decay); - VTR_LOG("AnnealSched.success_min: %f\n", AnnealSched.success_min); - VTR_LOG("AnnealSched.success_target: %f\n", AnnealSched.success_target); } } diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index ec45d2e764f..1641e255b89 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1957,36 +1957,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("0.8") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlphaMin, "--alpha_min") - .help( - "For placement using Dusty's annealing schedule. Minimum (starting) value of alpha.") - .default_value("0.2") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceAlphaMax, "--alpha_max") - .help( - "For placement using Dusty's annealing schedule. Maximum (stopping) value of alpha.") - .default_value("0.9") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceAlphaDecay, "--alpha_decay") - .help( - "For placement using Dusty's annealing schedule. The value that alpha is scaled by after reset.") - .default_value("0.7") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceSuccessMin, "--anneal_success_min") - .help( - "For placement using Dusty's annealing schedule. Minimum success ratio when annealing before resetting the temperature to maintain the target success ratio.") - .default_value("0.1") - .show_in(argparse::ShowIn::HELP_ONLY); - - place_grp.add_argument(args.PlaceSuccessTarget, "--anneal_success_target") - .help( - "For placement using Dusty's annealing schedule. Target success ratio when annealing.") - .default_value("0.25") - .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.pad_loc_type, "--fix_pins") .help( "Fixes I/O pad locations randomly during placement. Valid options:\n" @@ -2996,7 +2966,7 @@ void set_conditional_defaults(t_options& args) { * Filenames */ - //We may have recieved the full circuit filepath in the circuit name, + //We may have received the full circuit filepath in the circuit name, //remove the extension and any leading path elements VTR_ASSERT(args.CircuitName.provenance() == Provenance::SPECIFIED); auto name_ext = vtr::split_ext(args.CircuitName); @@ -3132,15 +3102,9 @@ void set_conditional_defaults(t_options& args) { } //Which schedule? - if (args.PlaceAlphaMin.provenance() == Provenance::SPECIFIED // Any of these flags select Dusty's schedule - || args.PlaceAlphaMax.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaDecay.provenance() == Provenance::SPECIFIED - || args.PlaceSuccessMin.provenance() == Provenance::SPECIFIED - || args.PlaceSuccessTarget.provenance() == Provenance::SPECIFIED) { - args.anneal_sched_type.set(e_sched_type::DUSTY_SCHED, Provenance::INFERRED); - } else if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule - || args.PlaceExitT.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { + if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule + || args.PlaceExitT.provenance() == Provenance::SPECIFIED + || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index ca7f30d196f..b43e3734de1 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -119,11 +119,6 @@ struct t_options { argparse::ArgValue PlaceInitT; argparse::ArgValue PlaceExitT; argparse::ArgValue PlaceAlphaT; - argparse::ArgValue PlaceAlphaMin; - argparse::ArgValue PlaceAlphaMax; - argparse::ArgValue PlaceAlphaDecay; - argparse::ArgValue PlaceSuccessMin; - argparse::ArgValue PlaceSuccessTarget; argparse::ArgValue anneal_sched_type; argparse::ArgValue PlaceAlgorithm; argparse::ArgValue PlaceQuenchAlgorithm; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index af85be64a32..c18a9fe3665 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -466,7 +466,6 @@ constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3; /* Timing data structures end */ enum class e_sched_type { AUTO_SCHED, - DUSTY_SCHED, USER_SCHED }; /* Annealing schedule */ @@ -832,18 +831,6 @@ struct t_annealing_sched { float init_t; float alpha_t; float exit_t; - - /* Parameters for DUSTY_SCHED * - * The alpha ranges from alpha_min to alpha_max, decaying each * - * iteration by `alpha_decay`. * - * `restart_filter` is the low-pass coefficient (EWMA) for updating * - * the new starting temperature for each alpha. * - * Give up after `wait` alphas. */ - float alpha_min; - float alpha_max; - float alpha_decay; - float success_min; - float success_target; }; /****************************************************************** diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 44fd2e85f12..04e2367fd31 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -79,25 +79,17 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, } ///@brief Constructor: Initialize all annealing state variables and macros. -t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, +t_annealing_state::t_annealing_state(float first_t, float first_rlim, int first_move_lim, float first_crit_exponent) { num_temps = 0; - alpha = annealing_sched.alpha_min; + alpha = 1.f; t = first_t; - restart_t = first_t; rlim = first_rlim; move_lim_max = first_move_lim; crit_exponent = first_crit_exponent; - - /* Determine the current move_lim based on the schedule type */ - if (annealing_sched.type == e_sched_type::DUSTY_SCHED) { - move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target)); - } else { - move_lim = move_lim_max; - } + move_lim = move_lim_max; /* Store this inverse value for speed when updating crit_exponent. */ INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); @@ -132,52 +124,23 @@ bool t_annealing_state::outer_loop_update(float success_rate, auto& cluster_ctx = g_vpr_ctx.clustering(); float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); - if (placer_opts.anneal_sched.type == e_sched_type::DUSTY_SCHED) { - // May get nan if there are no nets - bool restart_temp = t < t_exit || std::isnan(t_exit); - /* If the success rate or the temperature is * - * too low, reset the temperature and alpha. */ - if (success_rate < placer_opts.anneal_sched.success_min || restart_temp) { - // Only exit anneal when alpha gets too large. - if (alpha > placer_opts.anneal_sched.alpha_max) { - return false; - } - - // Take a half step from the restart temperature. - t = restart_t / sqrt(alpha); - // Update alpha. - alpha = 1.0 - ((1.0 - alpha) * placer_opts.anneal_sched.alpha_decay); - } else { - /* If the success rate is promising, next time * - * reset t to the current annealing temperature. */ - if (success_rate > placer_opts.anneal_sched.success_target) { - restart_t = t; - } - // Update t. - t *= alpha; - } - - // Update move lim. - update_move_lim(placer_opts.anneal_sched.success_target, success_rate); + VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); + // Automatically adjust alpha according to success rate. + if (success_rate > 0.96) { + alpha = 0.5; + } else if (success_rate > 0.8) { + alpha = 0.9; + } else if (success_rate > 0.15 || rlim > 1.) { + alpha = 0.95; } else { - VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); - // Automatically adjust alpha according to success rate. - if (success_rate > 0.96) { - alpha = 0.5; - } else if (success_rate > 0.8) { - alpha = 0.9; - } else if (success_rate > 0.15 || rlim > 1.) { - alpha = 0.95; - } else { - alpha = 0.8; - } - // Update temp. - t *= alpha; - // Must be duplicated to retain previous behavior. - if (t < t_exit || std::isnan(t_exit)) { - return false; - } + alpha = 0.8; + } + // Update temp. + t *= alpha; + // Must be duplicated to retain previous behavior. + if (t < t_exit || std::isnan(t_exit)) { + return false; } // Update the range limiter. @@ -283,8 +246,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, // Get the first range limiter placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - annealing_state_ = t_annealing_state(placer_opts_.anneal_sched, - EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved + annealing_state_ = t_annealing_state(EPSILON, // Set the temperature low to ensure that initial placement quality will be preserved placer_state_.move().first_rlim, first_move_lim, first_crit_exponent); diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 730f01eec96..bf3fb62692e 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -42,23 +42,18 @@ struct t_swap_stats { * Public members: * @param t * Temperature for simulated annealing. - * @param restart_t - * Temperature used after restart due to minimum success ratio. - * Currently only used and updated by DUSTY_SCHED. * @param alpha * Temperature decays factor (multiplied each outer loop iteration). * @param num_temps * The count of how many temperature iterations have passed. - * * @param rlim * Range limit for block swaps. - * Currently only updated by DUSTY_SCHED and AUTO_SCHED. + * Currently only updated by AUTO_SCHED. * @param crit_exponent * Used by timing-driven placement to "sharpen" the timing criticality. - * Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED. + * Depends on rlim. Currently only updated by AUTO_SCHED. * @param move_lim * Current block move limit. - * Currently only updated by DUSTY_SCHED. * @param move_lim_max * Maximum block move limit. * @@ -80,7 +75,6 @@ struct t_swap_stats { class t_annealing_state { public: float t; - float restart_t; float alpha; int num_temps; @@ -96,8 +90,7 @@ class t_annealing_state { public: //Constructor t_annealing_state() = default; - t_annealing_state(const t_annealing_sched& annealing_sched, - float first_t, + t_annealing_state(float first_t, float first_rlim, int first_move_lim, float first_crit_exponent); @@ -108,8 +101,6 @@ class t_annealing_state { * * USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria. * AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio. - * DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio. - * See doc/src/vpr/dusty_sa.rst for more details. * * @return True->continues the annealing. False->exits the annealing. */ From f1aaf528bad5fe68c98ae181ef501850e128abe0 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 12:41:33 -0500 Subject: [PATCH 20/31] change the order of if statement so that e_place_algorithm::CRITICALITY_TIMING_PLACE is checked first --- vpr/src/place/annealer.cpp | 63 ++++++++++++++++------------- vpr/src/place/annealer.h | 4 +- vpr/src/route/connection_router.cpp | 2 +- 3 files changed, 39 insertions(+), 30 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 04e2367fd31..4bba30fb103 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -445,11 +445,34 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_, bb_delta_c, timing_delta_c); - //For setup slack analysis, we first do a timing analysis to get the newest - //slack values resulted from the proposed block moves. If the move turns out - //to be accepted, we keep the updated slack values and commit the block moves. - //If rejected, we reject the proposed block moves and revert this timing analysis. - if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { + /* Take delta_c as a combination of timing and wiring cost. In + * addition to `timing_tradeoff`, we normalize the cost values. + * CRITICALITY_TIMING_PLACE algorithm works with somewhat stale + * timing information to save CPU time. + */ + VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, + "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " + "timing_delta_c %e, timing_cost_norm %e\n", + bb_delta_c, + costs_.bb_cost_norm, + timing_tradeoff, + timing_delta_c, + costs_.timing_cost_norm); + delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + /* For setup slack analysis, we first do a timing analysis to get the newest + * slack values resulted from the proposed block moves. If the move turns out + * to be accepted, we keep the updated slack values and commit the block moves. + * If rejected, we reject the proposed block moves and revert this timing analysis. + * + * It should be noted that when SLACK_TIMING_PLACE algorithm is used, proposed moves + * are evaluated with up-to-date timing information, which is more expensive but more + * accurate. + */ + // Invalidates timing of modified connections for incremental timing updates. pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_); @@ -473,19 +496,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Get the setup slack analysis cost */ //TODO: calculate a weighted average of the slack cost and wiring cost delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm; - } else if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { - /* Take delta_c as a combination of timing and wiring cost. In - * addition to `timing_tradeoff`, we normalize the cost values */ - VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, - "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, " - "timing_delta_c %e, timing_cost_norm %e\n", - bb_delta_c, - costs_.bb_cost_norm, - timing_tradeoff, - timing_delta_c, - costs_.timing_cost_norm); - delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm - + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; } else { VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, @@ -519,15 +529,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, costs_.cost += delta_c; costs_.bb_cost += bb_delta_c; - if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { - // Update the timing driven cost as usual - costs_.timing_cost += timing_delta_c; - - // Commit the setup slack information - // The timing delay and cost values should be committed already - commit_setup_slacks(setup_slacks_, placer_state_); - } - if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { costs_.timing_cost += timing_delta_c; @@ -539,6 +540,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, /* Update the connection_timing_cost and connection_delay * values from the temporary values. */ placer_state_.mutable_timing().commit_td_cost(blocks_affected_); + + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + // Update the timing driven cost as usual + costs_.timing_cost += timing_delta_c; + + // Commit the setup slack information + // The timing delay and cost values should be committed already + commit_setup_slacks(setup_slacks_, placer_state_); } // Update net cost functions and reset flags. diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index bf3fb62692e..1197181f44e 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -191,7 +191,7 @@ class PlacementAnnealer { float timing_bb_factor, bool manual_move_enabled); - ///@brief Returns the total number iterations or attempted swaps + ///@brief Returns the total number iterations (attempted swaps). int get_total_iteration() const; ///@brief Returns a constant reference to the annealing state @@ -253,7 +253,7 @@ class PlacementAnnealer { int quench_recompute_limit_; ///Used to trigger a BB and NoC cost re-computation from scratch int moves_since_cost_recompute_; - ///Total number of iterations or attempted swaps + ///Total number of iterations (attempted swaps). int tot_iter_; ///Indicates whether the annealer has entered into the quench stage bool quench_started_; diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 6f2b5d2ebe6..7fd0f0d1680 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -10,7 +10,7 @@ static bool relevant_node_to_target(const RRGraphView* rr_graph, static void update_router_stats(RouterStats* router_stats, bool is_push, - RRNodeId rr_node_i, + RRNodeId rr_node_id, const RRGraphView* rr_graph); /** return tuple */ From e6e02acd20c10ed8172d49c0d0b6a97edb6762ad Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 12:58:51 -0500 Subject: [PATCH 21/31] don't resize PlacerMoveContext.??_coord in the constructor --- vpr/src/place/annealer.cpp | 63 ++++++++++++++++------------------ vpr/src/place/placer_state.cpp | 7 +--- vpr/src/place/placer_state.h | 2 +- 3 files changed, 31 insertions(+), 41 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 4bba30fb103..4fae47d7544 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -344,8 +344,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, */ auto& blk_loc_registry = placer_state_.mutable_blk_loc_registry(); - float rlim_escape_fraction = placer_opts_.rlim_escape_fraction; - float timing_tradeoff = placer_opts_.timing_tradeoff; + // increment the call counter + swap_stats_.num_ts_called++; PlaceCritParams crit_params{annealing_state_.crit_exponent, placer_opts_.place_crit_limit}; @@ -353,27 +353,19 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // move type and block type chosen by the agent t_propose_action proposed_action{e_move_type::UNIFORM, -1}; - swap_stats_.num_ts_called++; - MoveOutcomeStats move_outcome_stats; /* I'm using negative values of proposed_net_cost as a flag, * so DO NOT use cost functions that can go negative. */ - double delta_c = 0; //Change in cost due to this swap. double bb_delta_c = 0; //Change in the bounding box (wiring) cost. double timing_delta_c = 0; //Change in the timing cost (delay * criticality). - // Determine whether we need to force swap two router blocks - bool router_block_move = false; - if (noc_opts_.noc) { - router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_); - } /* Allow some fraction of moves to not be restricted by rlim, * in the hopes of better escaping local minima. */ float rlim; - if (rlim_escape_fraction > 0. && rng_.frand() < rlim_escape_fraction) { + if (placer_opts_.rlim_escape_fraction > 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) { rlim = std::numeric_limits::infinity(); } else { rlim = annealing_state_.rlim; @@ -381,6 +373,12 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, e_create_move create_move_outcome = e_create_move::ABORT; + // Determine whether we need to force swap two NoC router blocks + bool router_block_move = false; + if (noc_opts_.noc) { + router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_); + } + //When manual move toggle button is active, the manual move window asks the user for input. if (manual_move_enabled) { #ifndef NO_GRAPHICS @@ -422,30 +420,28 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, } else { VTR_ASSERT(create_move_outcome == e_create_move::VALID); - /* - * To make evaluating the move simpler (e.g. calculating changed bounding box), + /* To make evaluating the move simpler (e.g. calculating changed bounding box), * we first move the blocks to their new locations (apply the move to * blk_loc_registry.block_locs) and then compute the change in cost. If the move - * is accepted, the inverse look-up in place_ctx.grid_blocks is updated + * is accepted, the inverse look-up in blk_loc_registry.grid_blocks is updated * (committing the move). If the move is rejected, the blocks are returned to * their original positions (reverting blk_loc_registry.block_locs to its original state). * - * Note that the inverse look-up place_ctx.grid_blocks is only updated after + * Note that the inverse look-up blk_loc_registry.grid_blocks is only updated after * move acceptance is determined, so it should not be used when evaluating a move. */ - /* Update the block positions */ + // Update the block positions blk_loc_registry.apply_move_blocks(blocks_affected_); - //Find all the nets affected by this swap and update the wiring costs. - //This cost value doesn't depend on the timing info. - // - //Also find all the pins affected by the swap, and calculates new connection - //delays and timing costs and store them in proposed_* data structures. + /* Find all the nets affected by this swap and update the wiring costs. + * This cost value doesn't depend on the timing info. + * Also find all the pins affected by the swap, and calculates new connection + * delays and timing costs and store them in proposed_* data structures. + */ net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_, bb_delta_c, timing_delta_c); - if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { /* Take delta_c as a combination of timing and wiring cost. In * addition to `timing_tradeoff`, we normalize the cost values. @@ -457,11 +453,11 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, "timing_delta_c %e, timing_cost_norm %e\n", bb_delta_c, costs_.bb_cost_norm, - timing_tradeoff, + placer_opts_.timing_tradeoff, timing_delta_c, costs_.timing_cost_norm); - delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm - + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; + delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm; } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { /* For setup slack analysis, we first do a timing analysis to get the newest * slack values resulted from the proposed block moves. If the move turns out @@ -515,7 +511,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_); } - // 1 -> move accepted, 0 -> rejected. + // determine whether the move is accepted or rejected move_outcome = assess_swap_(delta_c, annealing_state_.t); //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move. @@ -582,9 +578,13 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // Restore the blk_loc_registry.block_locs data structures to their state before the move. blk_loc_registry.revert_move_blocks(blocks_affected_); - if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { - /* Revert the timing delays and costs to pre-update values. */ - /* These routines must be called after reverting the block moves. */ + if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { + // Un-stage the values stored in proposed_* data structures + placer_state_.mutable_timing().revert_td_cost(blocks_affected_); + } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { + /* Revert the timing delays and costs to pre-update values. + * These routines must be called after reverting the block moves. + */ //TODO: make this process incremental comp_td_connection_delays(delay_model_, placer_state_); comp_td_costs(delay_model_, *criticalities_, placer_state_, &costs_.timing_cost); @@ -603,11 +603,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, "The current setup slacks should be identical to the values before the try swap timing info update."); } - if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) { - // Un-stage the values stored in proposed_* data structures - placer_state_.mutable_timing().revert_td_cost(blocks_affected_); - } - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; } diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp index e83d74bbe2c..998c097b4ca 100644 --- a/vpr/src/place/placer_state.cpp +++ b/vpr/src/place/placer_state.cpp @@ -8,11 +8,6 @@ PlacerMoveContext::PlacerMoveContext(bool cube_bb) { const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); - // allocate helper vectors that are used by many move generators - X_coord.resize(10, 0); - Y_coord.resize(10, 0); - layer_coord.resize(10, 0); - const size_t num_nets = cluster_ctx.clb_nlist.nets().size(); const int num_layers = device_ctx.grid.get_num_layers(); @@ -27,7 +22,7 @@ PlacerMoveContext::PlacerMoveContext(bool cube_bb) { num_sink_pin_layer.resize({num_nets, size_t(num_layers)}); for (size_t flat_idx = 0; flat_idx < num_sink_pin_layer.size(); flat_idx++) { - auto& elem = num_sink_pin_layer.get(flat_idx); + int& elem = num_sink_pin_layer.get(flat_idx); elem = OPEN; } } diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index c727ac181e5..8f3b966a56d 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -138,7 +138,7 @@ struct PlacerMoveContext : public Context { // The first range limit calculated by the annealer float first_rlim; - // Scratch vectors that are used by different directed moves for temporary calculations (allocated here to save runtime) + // Scratch vectors that are used by different directed moves for temporary calculations // These vectors will grow up with the net size as it is mostly used to save coords of the net pins or net bb edges // Given that placement moves involve operations on each coordinate independently, we chose to // utilize a Struct of Arrays (SoA) rather than an Array of Struct (AoS). From f43d22d4bf27f0a80daec6dd22f6599d0a12bd5b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 13:07:17 -0500 Subject: [PATCH 22/31] add incr_blk_type_moves() and incr_accept_reject() methods --- vpr/src/place/annealer.cpp | 16 ++++------------ vpr/src/place/move_generator.h | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 4fae47d7544..6106f88e0e1 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -395,9 +395,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_); } - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } + move_type_stats_.incr_blk_type_moves(proposed_action); if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) LOG_MOVE_STATS_PROPOSED(); @@ -552,11 +550,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // Update clb data structures since we kept the move. blk_loc_registry.commit_move_blocks(blocks_affected_); - // if the agent proposed the block type, then collect the block type stat - if (proposed_action.logical_blk_type_index != -1) { - ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - if (noc_opts_.noc){ noc_cost_handler_->commit_noc_costs(); costs_ += noc_delta_c; @@ -603,15 +596,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, "The current setup slacks should be identical to the values before the try swap timing info update."); } - if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat - ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; - } - /* Revert the traffic flow routes within the NoC*/ + // Revert the traffic flow routes within the NoC if (noc_opts_.noc) { noc_cost_handler_->revert_noc_traffic_flow_routes(blocks_affected_); } } + move_type_stats_.incr_accept_reject(proposed_action, move_outcome)'' + move_outcome_stats.delta_cost_norm = delta_c; move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs_.bb_cost_norm; move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs_.timing_cost_norm; diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h index 172c04d34f5..e39493e16c6 100644 --- a/vpr/src/place/move_generator.h +++ b/vpr/src/place/move_generator.h @@ -38,6 +38,27 @@ struct MoveTypeStat { * @brief Prints placement perturbation distribution by block and move type. */ void print_placement_move_types_stats() const; + + inline void incr_blk_type_moves(const t_propose_action& proposed_action) { + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } + + inline void incr_accept_reject(const t_propose_action& proposed_action, + e_move_result move_result) { + if (move_result == e_move_result::ACCEPTED) { + // if the agent proposed the block type, then collect the block type stat + if (proposed_action.logical_blk_type_index != -1) { + ++accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } else { + VTR_ASSERT_SAFE(move_result == e_move_result::REJECTED); + if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat + ++rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type]; + } + } + } }; /** From 64dcd5bee131188f2886d4477c7c6f03d231e90a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 13:17:17 -0500 Subject: [PATCH 23/31] don't include time.h as we no longer call clock() --- vpr/src/draw/draw.cpp | 11 ----------- vpr/src/draw/draw_basic.cpp | 10 ---------- vpr/src/draw/draw_floorplanning.cpp | 22 +--------------------- vpr/src/draw/draw_mux.cpp | 11 ----------- vpr/src/draw/draw_rr.cpp | 10 ---------- vpr/src/draw/draw_rr_edges.cpp | 11 ----------- vpr/src/draw/draw_searchbar.cpp | 10 ---------- vpr/src/draw/draw_toggle_functions.cpp | 11 ----------- vpr/src/draw/draw_triangle.cpp | 10 ---------- vpr/src/draw/search_bar.cpp | 24 ++++++------------------ 10 files changed, 7 insertions(+), 123 deletions(-) diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index c77ab624c5c..546bc8b55f3 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -51,17 +51,6 @@ #include "move_utils.h" #include "ui_setup.h" - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 43fc2b78b32..99058b0975c 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -28,16 +28,6 @@ #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp index 9ba201987aa..9e56cfda5ac 100644 --- a/vpr/src/draw/draw_floorplanning.cpp +++ b/vpr/src/draw/draw_floorplanning.cpp @@ -1,39 +1,19 @@ #include -#include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" -#include "atom_netlist.h" + #include "draw_floorplanning.h" #include "user_place_constraints.h" #include "draw_color.h" #include "draw.h" -#include "draw_rr.h" -#include "draw_rr_edges.h" -#include "draw_basic.h" -#include "draw_toggle_functions.h" -#include "draw_triangle.h" -#include "draw_searchbar.h" -#include "draw_mux.h" #include "read_xml_arch_file.h" #include "draw_global.h" #include "intra_logic_block.h" -#include "move_utils.h" #include "route_export.h" #include "tatum/report/TimingPathCollector.hpp" - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp index e741112b6ba..e83fde50296 100644 --- a/vpr/src/draw/draw_mux.cpp +++ b/vpr/src/draw/draw_mux.cpp @@ -10,17 +10,6 @@ #include "draw_mux.h" #include "read_xml_arch_file.h" - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index abfbf0babe8..e3c2467cd9e 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -24,16 +24,6 @@ #include "read_xml_arch_file.h" #include "draw_global.h" -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index c4e8cbe507b..0815be661a6 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -18,17 +18,6 @@ #include "draw_global.h" #include "draw_basic.h" - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index 834457e9263..a90583f42ce 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -16,16 +16,6 @@ #include "draw_global.h" #include "intra_logic_block.h" -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp index 9dab5955450..968808c2906 100644 --- a/vpr/src/draw/draw_toggle_functions.cpp +++ b/vpr/src/draw/draw_toggle_functions.cpp @@ -14,17 +14,6 @@ #include "draw_global.h" #include "draw_basic.h" - -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/draw_triangle.cpp b/vpr/src/draw/draw_triangle.cpp index b37785b3ab1..82b5bd45376 100644 --- a/vpr/src/draw/draw_triangle.cpp +++ b/vpr/src/draw/draw_triangle.cpp @@ -8,16 +8,6 @@ #include "draw_global.h" -#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -#else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -#endif - #ifndef NO_GRAPHICS //To process key presses we need the X11 keysym definitions, diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index b557b27d5ca..5e78934841a 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -42,16 +42,6 @@ # include "route_export.h" # include "search_bar.h" -# ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will * - * track CPU runtime. */ -# include -# else /* For X11. The clock() function in time.h will not output correct time difference * - * for X11, because the graphics is processed by the Xserver rather than local CPU, * - * which means tracking CPU time will not be the same as the actual wall clock time. * - * Thus, so use gettimeofday() in sys/time.h to track actual calendar time. */ -# include -# endif - //To process key presses we need the X11 keysym definitions, //which are unavailable when building with MINGW # if defined(X11) && !defined(__MINGW32__) @@ -76,7 +66,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { std::stringstream ss(user_input); auto search_type = get_search_type(app); - if (search_type == "") + if (search_type.empty()) return; // reset @@ -119,7 +109,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { * * If the block does not exist in the atom netlist, we will check the CLB netlist to see if * they searched for a cluster block*/ - std::string block_name = ""; + std::string block_name; ss >> block_name; AtomBlockId atom_blk_id = atom_ctx.nlist.find_block(block_name); @@ -159,7 +149,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { else if (search_type == "Net Name") { //in this case, all nets (clb and non-clb) are contained in the atom netlist //So we only need to search this one - std::string net_name = ""; + std::string net_name; ss >> net_name; AtomNetId atom_net_id = atom_ctx.nlist.find_net(net_name); @@ -376,8 +366,6 @@ void warning_dialog_box(const char* message) { "response", G_CALLBACK(gtk_widget_destroy), dialog); - - return; } /** @@ -411,7 +399,7 @@ void search_type_changed(GtkComboBox* self, ezgl::application* app) { } else if (searchType == "Net Name") { gtk_entry_completion_set_model(completion, netNames); } else { //setting to null if option does not require auto-complete - gtk_entry_completion_set_model(completion, NULL); + gtk_entry_completion_set_model(completion, nullptr); gtk_entry_set_completion(searchBar, nullptr); } } @@ -506,10 +494,10 @@ void enable_autocomplete(ezgl::application* app) { auto draw_state = get_draw_state_vars(); std::string searchType = get_search_type(app); - if (searchType == "") + if (searchType.empty()) return; //Checking to make sure that we are on a mode that uses auto-complete - if (gtk_entry_completion_get_model(completion) == NULL) { + if (gtk_entry_completion_get_model(completion) == nullptr) { std::cout << "NO MODEL SELECTED" << std::endl; return; } From b3c7e72844ef1497ef63093f76bf323094534219 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 15:04:05 -0500 Subject: [PATCH 24/31] move the ownership of move generators to annealer --- vpr/src/place/annealer.cpp | 47 +++++++++++++++++++++++-------------- vpr/src/place/annealer.h | 48 ++++++++++++++++++++++++-------------- vpr/src/place/place.cpp | 38 ++++-------------------------- 3 files changed, 64 insertions(+), 69 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index cc726283aa0..415374a61bc 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -16,6 +16,7 @@ #include "place_timing_update.h" #include "read_place.h" #include "placer_breakpoint.h" +#include "RL_agent_util.h" /** * @brief Check if the setup slack has gotten better or worse due to block swap. @@ -31,7 +32,7 @@ * If no slack values have changed, then return an arbitrary positive number. A * move resulting in no change in the slack values should probably be unnecessary. * - * The sorting is need to prevent in the unlikely circumstances that a bad slack + * The sorting is needed to prevent in the unlikely circumstance that a bad slack * value suddenly got very good due to the block move, while a good slack value * got very bad, perhaps even worse than the original worse slack value. */ @@ -183,8 +184,8 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, std::optional& noc_cost_handler, const t_noc_opts& noc_opts, vtr::RngContainer& rng, - MoveGenerator& move_generator_1, - MoveGenerator& move_generator_2, + std::unique_ptr&& move_generator_1, + std::unique_ptr&& move_generator_2, ManualMoveGenerator& manual_move_generator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, @@ -199,9 +200,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , noc_cost_handler_(noc_cost_handler) , noc_opts_(noc_opts) , rng_(rng) - , move_generator_1_(move_generator_1) - , move_generator_2_(move_generator_2) + , move_generator_1_(std::move(move_generator_1)) + , move_generator_2_(std::move(move_generator_2)) , manual_move_generator_(manual_move_generator) + , agent_state_(e_agent_state::EARLY_IN_THE_ANNEAL) , delay_model_(delay_model) , criticalities_(criticalities) , setup_slacks_(setup_slacks) @@ -294,12 +296,8 @@ float PlacementAnnealer::estimate_starting_temperature() { } #endif /*NO_GRAPHICS*/ - // TODO: remove this - constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; - // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack - e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm, - REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled); + e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled); if (swap_result == e_move_result::ACCEPTED) { num_accepted++; @@ -334,7 +332,6 @@ float PlacementAnnealer::estimate_starting_temperature() { e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, const t_place_algorithm& place_algorithm, - float timing_bb_factor, bool manual_move_enabled) { /* Picks some block and moves it to another spot. If this spot is * occupied, switch the blocks. Assess the change in cost function. @@ -622,7 +619,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, // the move generators status since this outcome is not a direct // consequence of the move generator if (!router_block_move) { - move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor); + move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, REWARD_BB_TIMING_RELATIVE_WEIGHT); } #ifndef NO_GRAPHICS @@ -666,9 +663,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() { costs_.cost = costs_.get_total_cost(placer_opts_, noc_opts_); } -/* Function which contains the inner loop of the simulated annealing */ -void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, - float timing_bb_factor) { +void PlacementAnnealer::placement_inner_loop() { // How many times have we dumped placement to a file this temperature? int inner_placement_save_count = 0; @@ -676,10 +671,12 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, bool manual_move_enabled = false; + MoveGenerator& move_generator = select_move_generator(move_generator_1_, move_generator_2_, agent_state_, + placer_opts_, quench_started_); + // Inner loop begins for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) { - e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, - timing_bb_factor, manual_move_enabled); + e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled); if (swap_result == e_move_result::ACCEPTED) { // Move was accepted. Update statistics that are useful for the annealing schedule. @@ -743,6 +740,18 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator, // Calculate the success_rate and std_dev of the costs. placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim); + // update the RL agent's state + if (!quench_started_) { + if (placer_opts_.place_algorithm.is_timing_driven() && + placer_opts_.place_agent_multistate && + agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) { + if (annealing_state_.alpha < 0.85 && annealing_state_.alpha > 0.6) { + agent_state_ = e_agent_state::LATE_IN_THE_ANNEAL; + VTR_LOG("Agent's 2nd state: \n"); + } + } + } + tot_iter_ += annealing_state_.move_lim; ++annealing_state_.num_temps; } @@ -752,6 +761,10 @@ int PlacementAnnealer::get_total_iteration() const { return tot_iter_; } +e_agent_state PlacementAnnealer::get_agent_state() const { + return agent_state_; +} + const t_annealing_state& PlacementAnnealer::get_annealing_state() const { return annealing_state_; } diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 1197181f44e..3a84a05756d 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -12,6 +12,7 @@ class PlacerState; class t_placer_costs; struct t_placer_opts; +enum class e_agent_state; class NocCostHandler; class ManualMoveGenerator; @@ -152,8 +153,8 @@ class PlacementAnnealer { std::optional& noc_cost_handler, const t_noc_opts& noc_opts, vtr::RngContainer& rng, - MoveGenerator& move_generator_1, - MoveGenerator& move_generator_2, + std::unique_ptr&& move_generator_1, + std::unique_ptr&& move_generator_2, ManualMoveGenerator& manual_move_generator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, @@ -162,9 +163,8 @@ class PlacementAnnealer { NetPinTimingInvalidator* pin_timing_invalidator, int move_lim); - ///@brief Contains the inner loop of the simulated annealing - void placement_inner_loop(MoveGenerator& move_generator, - float timing_bb_factor); + /// @brief Contains the inner loop of the simulated annealing + void placement_inner_loop(); void outer_loop_update_timing_info(); @@ -188,13 +188,15 @@ class PlacementAnnealer { */ e_move_result try_swap(MoveGenerator& move_generator, const t_place_algorithm& place_algorithm, - float timing_bb_factor, bool manual_move_enabled); - ///@brief Returns the total number iterations (attempted swaps). + /// @brief Returns the total number iterations (attempted swaps). int get_total_iteration() const; - ///@brief Returns a constant reference to the annealing state + /// @brief Return the RL agent's state + e_agent_state get_agent_state() const; + + /// @brief Returns a constant reference to the annealing state const t_annealing_state& get_annealing_state() const; std::tuple get_stats() const; @@ -209,7 +211,7 @@ class PlacementAnnealer { private: e_move_result assess_swap_(double delta_c, double t); - public: + private: const t_placer_opts& placer_opts_; PlacerState& placer_state_; t_placer_costs& costs_; @@ -218,9 +220,11 @@ class PlacementAnnealer { const t_noc_opts& noc_opts_; vtr::RngContainer& rng_; - MoveGenerator& move_generator_1_; - MoveGenerator& move_generator_2_; + std::unique_ptr move_generator_1_; + std::unique_ptr move_generator_2_; ManualMoveGenerator& manual_move_generator_; + /// RL agent state definition + e_agent_state agent_state_; const PlaceDelayModel* delay_model_; PlacerCriticalities* criticalities_; @@ -231,7 +235,7 @@ class PlacementAnnealer { int outer_crit_iter_count_; t_annealing_state annealing_state_; - ///Swap statistics keep record of the number accepted/rejected/aborted swaps. + /// Swap statistics keep record of the number accepted/rejected/aborted swaps. t_swap_stats swap_stats_; MoveTypeStat move_type_stats_; t_placer_statistics placer_stats_; @@ -247,15 +251,16 @@ class PlacementAnnealer { */ static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000; - ///Specifies how often timing information is recomputed when the annealer isn't in the quench stage + /// Specifies how often (after how many swaps) timing information is recomputed + /// when the annealer isn't in the quench stage int inner_recompute_limit_; - ///Specifies how often timing information is recomputed when the annealer is in the quench stage + /// Specifies how often timing information is recomputed when the annealer is in the quench stage int quench_recompute_limit_; - ///Used to trigger a BB and NoC cost re-computation from scratch + /// Used to trigger a BB and NoC cost re-computation from scratch int moves_since_cost_recompute_; - ///Total number of iterations (attempted swaps). + /// Total number of iterations (attempted swaps). int tot_iter_; - ///Indicates whether the annealer has entered into the quench stage + /// Indicates whether the annealer has entered into the quench stage bool quench_started_; void LOG_MOVE_STATS_HEADER(); @@ -263,7 +268,14 @@ class PlacementAnnealer { void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, const char* outcome, const char* reason); + /** + * @brief Defines the RL agent's reward function factor constant. This factor controls the weight of bb cost + * compared to the timing cost in the agent's reward function. The reward is calculated as + * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) + */ + static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; + private: - ///@brief Find the starting temperature for the annealing loop. + /// @brief Find the starting temperature for the annealing loop. float estimate_starting_temperature(); }; \ No newline at end of file diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 50a568ac0db..1a3616232de 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -34,7 +34,6 @@ #include "place_timing_update.h" #include "move_transactions.h" #include "move_utils.h" -#include "place_constraints.h" #include "buttons.h" #include "manual_move_generator.h" @@ -57,11 +56,6 @@ #include "net_cost_handler.h" #include "placer_state.h" -/* define the RL agent's reward function factor constant. This factor controls the weight of bb cost * - * compared to the timing cost in the agent's reward function. The reward is calculated as * - * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) - */ -static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; /********************* Static subroutines local to place.c *******************/ #ifdef VERBOSE @@ -432,14 +426,8 @@ void try_place(const Netlist<>& net_list, } #endif /* ENABLE_ANALYTIC_PLACE */ - //RL agent state definition - e_agent_state agent_state = e_agent_state::EARLY_IN_THE_ANNEAL; - - //Define the timing bb weight factor for the agent's reward function - float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT; - PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, - noc_opts, rng, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(), + noc_opts, rng, std::move(move_generator), std::move(move_generator2), manual_move_generator, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); const t_annealing_state& annealing_state = annealer.get_annealing_state(); @@ -462,34 +450,20 @@ void try_place(const Netlist<>& net_list, sWNS = timing_info->setup_worst_negative_slack(); // see if we should save the current placement solution as a checkpoint - if (placer_opts.place_checkpointing && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) { + if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) { save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(), placement_checkpoint, timing_info, costs, critical_path.delay()); } } - // select the appropriate move generator - MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2, - agent_state, placer_opts, false); - // do a complete inner loop iteration - annealer.placement_inner_loop(current_move_generator, - timing_bb_factor); + annealer.placement_inner_loop(); print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(), critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(), noc_opts.noc, costs.noc_cost_terms); - if (placer_opts.place_algorithm.is_timing_driven() - && placer_opts.place_agent_multistate - && agent_state == e_agent_state::EARLY_IN_THE_ANNEAL) { - if (annealing_state.alpha < 0.85 && annealing_state.alpha > 0.6) { - agent_state = e_agent_state::LATE_IN_THE_ANNEAL; - VTR_LOG("Agent's 2nd state: \n"); - } - } - sprintf(msg, "Cost: %g BB Cost %g TD Cost %g Temperature: %g", costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t); update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info); @@ -513,13 +487,9 @@ void try_place(const Netlist<>& net_list, annealer.outer_loop_update_timing_info(); - // select the appropriate move generator - MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2, - agent_state, placer_opts, true); - /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ - annealer.placement_inner_loop(current_move_generator, timing_bb_factor); + annealer.placement_inner_loop(); if (placer_opts.place_quench_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); From 346446874e732b68226abb5e5f6cb12b1ff24667 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 15:09:53 -0500 Subject: [PATCH 25/31] construct manual move generator in annealer's constructor --- vpr/src/place/annealer.cpp | 7 +++---- vpr/src/place/annealer.h | 6 ++---- vpr/src/place/place.cpp | 6 +----- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 415374a61bc..3d686895812 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -163,10 +163,10 @@ void t_annealing_state::update_rlim(float success_rate) { } void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { - /* If rlim == FINAL_RLIM, then scale == 0. */ + // If rlim == FINAL_RLIM, then scale == 0. float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM; - /* Apply the scaling factor on crit_exponent. */ + // Apply the scaling factor on crit_exponent. crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + placer_opts.td_place_exp_first; } @@ -186,7 +186,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, vtr::RngContainer& rng, std::unique_ptr&& move_generator_1, std::unique_ptr&& move_generator_2, - ManualMoveGenerator& manual_move_generator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, @@ -202,7 +201,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , rng_(rng) , move_generator_1_(std::move(move_generator_1)) , move_generator_2_(std::move(move_generator_2)) - , manual_move_generator_(manual_move_generator) + , manual_move_generator_(placer_state, rng) , agent_state_(e_agent_state::EARLY_IN_THE_ANNEAL) , delay_model_(delay_model) , criticalities_(criticalities) diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 3a84a05756d..4131a1ebbe2 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -5,6 +5,7 @@ #include "move_generator.h" // movestats #include "net_cost_handler.h" +#include "manual_move_generator.h" #include #include @@ -15,7 +16,6 @@ struct t_placer_opts; enum class e_agent_state; class NocCostHandler; -class ManualMoveGenerator; class NetPinTimingInvalidator; /** @@ -155,7 +155,6 @@ class PlacementAnnealer { vtr::RngContainer& rng, std::unique_ptr&& move_generator_1, std::unique_ptr&& move_generator_2, - ManualMoveGenerator& manual_move_generator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, @@ -222,7 +221,7 @@ class PlacementAnnealer { std::unique_ptr move_generator_1_; std::unique_ptr move_generator_2_; - ManualMoveGenerator& manual_move_generator_; + ManualMoveGenerator manual_move_generator_; /// RL agent state definition e_agent_state agent_state_; @@ -243,7 +242,6 @@ class PlacementAnnealer { t_pl_blocks_to_be_moved blocks_affected_; private: - /** * @brief The maximum number of swap attempts before invoking the * once-in-a-while placement legality check as well as floating point diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 1a3616232de..dc907a751ea 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -36,8 +36,6 @@ #include "move_utils.h" #include "buttons.h" -#include "manual_move_generator.h" - #include "PlacementDelayCalculator.h" #include "VprTimingGraphResolver.h" #include "timing_util.h" @@ -216,8 +214,6 @@ void try_place(const Netlist<>& net_list, } #endif - ManualMoveGenerator manual_move_generator(placer_state, rng); - vtr::ScopedStartFinishTimer timer("Placement"); if (noc_opts.noc) { @@ -427,7 +423,7 @@ void try_place(const Netlist<>& net_list, #endif /* ENABLE_ANALYTIC_PLACE */ PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler, - noc_opts, rng, std::move(move_generator), std::move(move_generator2), manual_move_generator, place_delay_model.get(), + noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim); const t_annealing_state& annealing_state = annealer.get_annealing_state(); From f471b40f9590aaecce1b3ba5c0980ff4e4a1a94a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 15:49:26 -0500 Subject: [PATCH 26/31] add some comments --- libs/libarchfpga/src/device_grid.h | 4 ++ vpr/src/place/annealer.cpp | 1 - vpr/src/place/annealer.h | 64 ++++++++++++++++++++---------- vpr/src/place/place.cpp | 4 +- 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h index a82dd043da5..8e1332559ed 100644 --- a/libs/libarchfpga/src/device_grid.h +++ b/libs/libarchfpga/src/device_grid.h @@ -38,6 +38,10 @@ class DeviceGrid { size_t width() const { return grid_.dim_size(1); } ///@brief Return the height of the grid at the specified layer size_t height() const { return grid_.dim_size(2); } + ///@brief Return the grid dimensions in (# of layers, width, height) format + std::tuple dim_sizes() const { + return {grid_.dim_size(0), grid_.dim_size(1), grid_.dim_size(2)}; + } ///@brief Return the size of the flattened grid on the given layer inline size_t grid_size() const { diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 3d686895812..e3aab3316ee 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -635,7 +635,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, return move_outcome; } -/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ void PlacementAnnealer::outer_loop_update_timing_info() { if (placer_opts_.place_algorithm.is_timing_driven()) { /* At each temperature change we update these values to be used diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 4131a1ebbe2..039ecfb652f 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -37,9 +37,6 @@ struct t_swap_stats { * loop iteration. It stores various important variables that need to * be accessed during the placement inner loop. * - * Private variables are not given accessor functions. They serve as - * macros originally defined in place.cpp as global scope variables. - * * Public members: * @param t * Temperature for simulated annealing. @@ -134,13 +131,6 @@ class t_annealing_state { * factor is calculated and applied linearly. */ inline void update_crit_exponent(const t_placer_opts& placer_opts); - - /** - * @brief Update the move limit based on the success rate. - * - * The value is bounded between 1 and move_lim_max. - */ - inline void update_move_lim(float success_target, float success_rate); }; @@ -162,11 +152,23 @@ class PlacementAnnealer { NetPinTimingInvalidator* pin_timing_invalidator, int move_lim); - /// @brief Contains the inner loop of the simulated annealing + /** + * @brief Contains the inner loop of the simulated annealing that performs + * a certain number of swaps with a single temperature + */ void placement_inner_loop(); + /** + * @brief Updates the setup slacks and criticalities before the inner loop + * of the annealing/quench. It also updates normalization factors for different + * placement cost terms. + */ void outer_loop_update_timing_info(); + /** + * @brief Update the annealing state according to the annealing schedule selected. + * @return True->continues the annealing. False->exits the annealing. + */ bool outer_loop_update_state(); /** @@ -189,6 +191,13 @@ class PlacementAnnealer { const t_place_algorithm& place_algorithm, bool manual_move_enabled); + /** + * @brief Starts the quench stage in simulated annealing by + * setting the temperature to zero and reverting the move range limit + * to the initial value. + */ + void start_quench(); + /// @brief Returns the total number iterations (attempted swaps). int get_total_iteration() const; @@ -198,31 +207,45 @@ class PlacementAnnealer { /// @brief Returns a constant reference to the annealing state const t_annealing_state& get_annealing_state() const; + /// @brief Returns constant references to different statistics objects std::tuple get_stats() const; + private: /** - * @brief Starts the quench stage in simulated annealing by - * setting the temperature to zero and reverting the move range limit - * to the initial value. + * @brief Determines whether a move should be accepted or not. + * Moves with negative delta cost are always accepted, but + * moves that increase the total cost are accepted with a + * probability that diminishes as the temperature decreases. + * @param delta_c The cost difference if the move is accepted. + * @param t The annealer's temperature. + * @return Whether the move is accepted or not. */ - void start_quench(); - - private: e_move_result assess_swap_(double delta_c, double t); + /// @brief Find the starting temperature for the annealing loop. + float estimate_starting_temperature(); + private: const t_placer_opts& placer_opts_; PlacerState& placer_state_; + /// Stores different placement cost terms t_placer_costs& costs_; + /// Computes bounding box for each cluster net NetCostHandler& net_cost_handler_; + /// Computes NoC-related cost terms when NoC optimization are enabled std::optional& noc_cost_handler_; + /// Contains weighting factors for NoC-related cost terms const t_noc_opts& noc_opts_; + /// Random number generator for selecting random blocks and random locations vtr::RngContainer& rng_; + /// The move generator used in the first state of RL agent and initial temperature computation std::unique_ptr move_generator_1_; + /// The move generator used in the second state of RL agent std::unique_ptr move_generator_2_; + /// Handles manual swaps proposed by the user through graphical user interface ManualMoveGenerator manual_move_generator_; - /// RL agent state definition + /// RL agent state e_agent_state agent_state_; const PlaceDelayModel* delay_model_; @@ -239,6 +262,7 @@ class PlacementAnnealer { MoveTypeStat move_type_stats_; t_placer_statistics placer_stats_; + /// Keep record of moved blocks and affected pins in a swap t_pl_blocks_to_be_moved blocks_affected_; private: @@ -272,8 +296,4 @@ class PlacementAnnealer { * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost) */ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4; - - private: - /// @brief Find the starting temperature for the annealing loop. - float estimate_starting_temperature(); }; \ No newline at end of file diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index dc907a751ea..9af234a13f8 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -800,8 +800,8 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, const PlacementDelayCalculator& delay_calc, bool is_flat, const BlkLocRegistry& blk_loc_registry) { - auto& timing_ctx = g_vpr_ctx.timing(); - auto& atom_ctx = g_vpr_ctx.atom(); + const auto& timing_ctx = g_vpr_ctx.timing(); + const auto& atom_ctx = g_vpr_ctx.atom(); VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph, delay_calc, is_flat, blk_loc_registry); From 216893b2ae3535128d2aa25ec57f96ab85e22949 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 13 Nov 2024 17:59:04 -0500 Subject: [PATCH 27/31] fix compilation error by remove definition for t_annealing_state::update_move_lim() --- vpr/src/place/annealer.cpp | 7 ------- vpr/src/timing/PostClusterDelayCalculator.tpp | 7 ++----- vpr/src/timing/clb_delay_calc.inl | 2 +- vpr/src/timing/timing_info.h | 2 +- 4 files changed, 4 insertions(+), 14 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index e3aab3316ee..56f419477e2 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -10,7 +10,6 @@ #include "place_util.h" #include "placer_state.h" #include "move_utils.h" -#include "manual_move_generator.h" #include "noc_place_utils.h" #include "NetPinTimingInvalidator.h" #include "place_timing_update.h" @@ -171,12 +170,6 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { + placer_opts.td_place_exp_first; } -void t_annealing_state::update_move_lim(float success_target, float success_rate) { - move_lim = move_lim_max * (success_target / success_rate); - move_lim = std::min(move_lim, move_lim_max); - move_lim = std::max(move_lim, 1); -} - PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, PlacerState& placer_state, t_placer_costs& costs, diff --git a/vpr/src/timing/PostClusterDelayCalculator.tpp b/vpr/src/timing/PostClusterDelayCalculator.tpp index 7f6cda39022..9c989cec03f 100644 --- a/vpr/src/timing/PostClusterDelayCalculator.tpp +++ b/vpr/src/timing/PostClusterDelayCalculator.tpp @@ -247,12 +247,9 @@ inline tatum::Time PostClusterDelayCalculator::atom_net_delay(const tatum::Timin set_cached_pins(edge_id, delay_type, (ParentPinId&)atom_src_pin, (ParentPinId&)atom_sink_pin); } else { - ClusterBlockId clb_src_block; - ClusterBlockId clb_sink_block; - - clb_src_block = netlist_lookup_.atom_clb(atom_src_block); + ClusterBlockId clb_src_block = netlist_lookup_.atom_clb(atom_src_block); VTR_ASSERT(clb_src_block != ClusterBlockId::INVALID()); - clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block); + ClusterBlockId clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block); VTR_ASSERT(clb_sink_block != ClusterBlockId::INVALID()); const t_pb_graph_pin* src_gpin = netlist_lookup_.atom_pin_pb_graph_pin(atom_src_pin); diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl index 67524a82f7b..acfbc92a189 100644 --- a/vpr/src/timing/clb_delay_calc.inl +++ b/vpr/src/timing/clb_delay_calc.inl @@ -22,7 +22,7 @@ inline float ClbDelayCalc::internal_src_to_internal_sink_delay(const ClusterBloc } inline float ClbDelayCalc::trace_delay(ClusterBlockId clb, int src_pb_route_id, int sink_pb_route_id, DelayType delay_type) const { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); VTR_ASSERT(src_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins); VTR_ASSERT(sink_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins); diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 323ac7efbb6..14d3b08f939 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -122,7 +122,7 @@ class HoldTimingInfo : public virtual TimingInfo { //Generic interface which provides both setup and hold related timing information // //This is useful for algorithms which require access to both setup and hold timing -//information (e.g. simulatneously optimizing setup and hold) +//information (e.g. simultaneously optimizing setup and hold) // //This class supports both the SetupTimingInfo and HoldTimingInfo interfaces and //can be used in place of them in any algorithm requiring setup or hold related From c413d222e653e0ba396089c3e5f3374658de1a82 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 16 Nov 2024 20:22:33 -0500 Subject: [PATCH 28/31] add comment for PlacementAnnealer class make a few methods private --- vpr/src/place/annealer.cpp | 18 +++++------ vpr/src/place/annealer.h | 64 +++++++++++++++++++++++++------------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 56f419477e2..4696fc37b85 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -217,7 +217,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched); - if (placer_opts.inner_loop_recompute_divider != 0) { inner_recompute_limit_ = static_cast(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider); } else { @@ -258,10 +257,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0); // Update the starting temperature for placement annealing to a more appropriate value - annealing_state_.t = estimate_starting_temperature(); + annealing_state_.t = estimate_starting_temperature_(); } -float PlacementAnnealer::estimate_starting_temperature() { +float PlacementAnnealer::estimate_starting_temperature_() { if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) { return placer_opts_.anneal_sched.init_t; } @@ -289,7 +288,7 @@ float PlacementAnnealer::estimate_starting_temperature() { #endif /*NO_GRAPHICS*/ // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack - e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled); + e_move_result swap_result = try_swap_(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled); if (swap_result == e_move_result::ACCEPTED) { num_accepted++; @@ -322,9 +321,9 @@ float PlacementAnnealer::estimate_starting_temperature() { return init_temp; } -e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator, - const t_place_algorithm& place_algorithm, - bool manual_move_enabled) { +e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + bool manual_move_enabled) { /* Picks some block and moves it to another spot. If this spot is * occupied, switch the blocks. Assess the change in cost function. * rlim is the range limiter. @@ -638,7 +637,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() { PlaceCritParams crit_params{annealing_state_.crit_exponent, placer_opts_.place_crit_limit}; - //Update all timing related classes + // Update all timing related classes perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_, pin_timing_invalidator_, timing_info_, &costs_, placer_state_); @@ -667,7 +666,7 @@ void PlacementAnnealer::placement_inner_loop() { // Inner loop begins for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) { - e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled); + e_move_result swap_result = try_swap_(move_generator, placer_opts_.place_algorithm, manual_move_enabled); if (swap_result == e_move_result::ACCEPTED) { // Move was accepted. Update statistics that are useful for the annealing schedule. @@ -683,7 +682,6 @@ void PlacementAnnealer::placement_inner_loop() { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ - const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_; // on last iteration don't recompute if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) { diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 039ecfb652f..eec0e23106e 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -133,7 +133,26 @@ class t_annealing_state { inline void update_crit_exponent(const t_placer_opts& placer_opts); }; - +/** + * @class PlacementAnnealer + * @brief Implements a simulated annealing optimizer that minimizes the placement cost + * by swapping clustered blocks. It always accepts swaps that reduce the placement cost, + * but accepts the swaps that increase the cost with a diminishing probability. + * + * @details Swaps are performed in a two nested loops. The inner loop is implemented in + * placement_inner_loop() method. Each iteration of the inner loop performs a single swap, + * and all swaps performed in each iteration of the other loop are evaluated using the same + * temperature. + * + * The user is expected to call outer_loop_update_timing_info() before calling + * placement_inner_loop(). Then, outer_loop_update_state() should be called to + * determine whether another iteration of the outer loop is required. + * If outer_loop_update_state() returns false, start_quench() can be called to + * set the temperate to zero so that the annealer behaves greedily. Then, + * outer_loop_update_timing_info() and placement_inner_loop() can be called + * to run the quench stage. + * + */ class PlacementAnnealer { public: PlacementAnnealer(const t_placer_opts& placer_opts, @@ -171,26 +190,6 @@ class PlacementAnnealer { */ bool outer_loop_update_state(); - /** - * @brief Pick some block and moves it to another spot. - * - * If the new location is empty, directly move the block. If the new location - * is occupied, switch the blocks. Due to the different sizes of the blocks, - * this block switching may occur for multiple times. It might also cause the - * current swap attempt to abort due to inability to find suitable locations - * for moved blocks. - * - * The move generator will record all the switched blocks in the variable - * `blocks_affected`. Afterwards, the move will be assessed by the chosen - * cost formulation. Currently, there are three ways to assess move cost, - * which are stored in the enum type `t_place_algorithm`. - * - * @return Whether the block swap is accepted, rejected or aborted. - */ - e_move_result try_swap(MoveGenerator& move_generator, - const t_place_algorithm& place_algorithm, - bool manual_move_enabled); - /** * @brief Starts the quench stage in simulated annealing by * setting the temperature to zero and reverting the move range limit @@ -211,6 +210,27 @@ class PlacementAnnealer { std::tuple get_stats() const; private: + + /** + * @brief Pick some block and moves it to another spot. + * + * If the new location is empty, directly move the block. If the new location + * is occupied, switch the blocks. Due to the different sizes of the blocks, + * this block switching may occur for multiple times. It might also cause the + * current swap attempt to abort due to inability to find suitable locations + * for moved blocks. + * + * The move generator will record all the switched blocks in the variable + * `blocks_affected`. Afterwards, the move will be assessed by the chosen + * cost formulation. Currently, there are three ways to assess move cost, + * which are stored in the enum type `t_place_algorithm`. + * + * @return Whether the block swap is accepted, rejected or aborted. + */ + e_move_result try_swap_(MoveGenerator& move_generator, + const t_place_algorithm& place_algorithm, + bool manual_move_enabled); + /** * @brief Determines whether a move should be accepted or not. * Moves with negative delta cost are always accepted, but @@ -223,7 +243,7 @@ class PlacementAnnealer { e_move_result assess_swap_(double delta_c, double t); /// @brief Find the starting temperature for the annealing loop. - float estimate_starting_temperature(); + float estimate_starting_temperature_(); private: const t_placer_opts& placer_opts_; From f7c239f8b82a2b1ba7e53bc8e28643ccdc06c74d Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 17 Nov 2024 12:48:51 -0500 Subject: [PATCH 29/31] apply Alex's comments --- .../libtatum/libtatum/tatum/TimingGraph.cpp | 9 ++-- vpr/src/place/annealer.h | 41 ++++++++++++------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp index 3b861d054c6..d67e7c7afae 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp @@ -481,14 +481,13 @@ tatum::util::linear_map TimingGraph::optimize_edge_layout() const //Make all edges in a level be contiguous in memory //Determine the edges driven by each level of the graph - std::vector> edge_levels; + std::vector> edge_levels(levels().size()); for(LevelId level_id : levels()) { - edge_levels.emplace_back(); - for(auto node_id : level_nodes(level_id)) { + for(NodeId node_id : level_nodes(level_id)) { //We walk the nodes according to the input-edge order. //This is the same order used by the arrival-time traversal (which is responsible - //for most of the analyzer run-time), so matching it's order exactly results in + //for most of the analyzer run-time), so matching its order exactly results in //better cache locality for(EdgeId edge_id : node_in_edges(node_id)) { @@ -498,7 +497,7 @@ tatum::util::linear_map TimingGraph::optimize_edge_layout() const } } - //Maps from from original to new edge id, used to update node to edge refs + //Maps from original to new edge id, used to update node to edge refs tatum::util::linear_map orig_to_new_edge_id(edges().size()); //Determine the new order diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index eec0e23106e..a0a7ef3aa9b 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -135,23 +135,36 @@ class t_annealing_state { /** * @class PlacementAnnealer - * @brief Implements a simulated annealing optimizer that minimizes the placement cost - * by swapping clustered blocks. It always accepts swaps that reduce the placement cost, - * but accepts the swaps that increase the cost with a diminishing probability. + * @brief Simulated annealing optimizer for minimizing placement cost via block swaps. * - * @details Swaps are performed in a two nested loops. The inner loop is implemented in - * placement_inner_loop() method. Each iteration of the inner loop performs a single swap, - * and all swaps performed in each iteration of the other loop are evaluated using the same - * temperature. + * @details This class implements simulated annealing to optimize placement cost by swapping clustered blocks. + * Swaps that reduce the cost are always accepted, while those that increase the cost are accepted + * with a diminishing probability. * - * The user is expected to call outer_loop_update_timing_info() before calling - * placement_inner_loop(). Then, outer_loop_update_state() should be called to - * determine whether another iteration of the outer loop is required. - * If outer_loop_update_state() returns false, start_quench() can be called to - * set the temperate to zero so that the annealer behaves greedily. Then, - * outer_loop_update_timing_info() and placement_inner_loop() can be called - * to run the quench stage. + * The annealing process consists of two nested loops: + * - The **inner loop** (implemented in `placement_inner_loop()`) performs individual swaps, all evaluated at a fixed temperature. + * - The **outer loop** adjusts the temperature and determines whether further iterations are needed. * + * Usage workflow: + * 1. Call `outer_loop_update_timing_info()` to update timing information. + * 2. Execute `placement_inner_loop()` for swap evaluations. + * 3. Call `outer_loop_update_state()` to check if more outer loop iterations are needed. + * 4. Optionally, use `start_quench()` to set the temperature to zero for a greedy optimization (quenching stage), + * then repeat steps 1 and 2. + * + * Usage example: + * ************************************** + * PlacementAnnealer annealer(...); + * + * do { + * annealer.outer_loop_update_timing_info(); + * annealer.placement_inner_loop(); + * } while (annealer.outer_loop_update_state()); + * + * annealer.start_quench(); + * annealer.outer_loop_update_timing_info(); + * annealer.placement_inner_loop(); + * ************************************** */ class PlacementAnnealer { public: From 834e5891d07151ae07a650c59569c0c0a67917d5 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 17 Nov 2024 12:57:25 -0500 Subject: [PATCH 30/31] add separators to annelaer.cpp --- vpr/src/place/annealer.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 4696fc37b85..f0d2fc50e83 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -17,6 +17,10 @@ #include "placer_breakpoint.h" #include "RL_agent_util.h" +/**************************************************************************/ +/*************** Static Function Declarations *****************************/ +/**************************************************************************/ + /** * @brief Check if the setup slack has gotten better or worse due to block swap. * @@ -38,6 +42,9 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state); +/*************************************************************************/ +/*************** Static Function Definitions *****************************/ +/*************************************************************************/ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, const PlacerState& placer_state) { @@ -78,6 +85,10 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks, return 1; } +/**************************************************************************************/ +/*************** Member Function Definitions for t_annealing_state ********************/ +/**************************************************************************************/ + ///@brief Constructor: Initialize all annealing state variables and macros. t_annealing_state::t_annealing_state(float first_t, float first_rlim, @@ -170,6 +181,10 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) { + placer_opts.td_place_exp_first; } +/**************************************************************************************/ +/*************** Member Function Definitions for PlacementAnnealer ********************/ +/**************************************************************************************/ + PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, PlacerState& placer_state, t_placer_costs& costs, From 8c0fdfc220879a44ad1495d09bbedec41df6247e Mon Sep 17 00:00:00 2001 From: soheil Date: Mon, 18 Nov 2024 00:55:04 -0500 Subject: [PATCH 31/31] add get_move_abortion_logger() to PlacementAnnealer --- vpr/src/place/annealer.cpp | 6 +++++- vpr/src/place/annealer.h | 7 +++++++ vpr/src/place/place.cpp | 4 +--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index f0d2fc50e83..1d8836956ab 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -791,6 +791,10 @@ std::tuple return {swap_stats_, move_type_stats_, placer_stats_}; } +const MoveAbortionLogger& PlacementAnnealer::get_move_abortion_logger() const { + return blocks_affected_.move_abortion_logger; +} + void PlacementAnnealer::LOG_MOVE_STATS_HEADER() { if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { if (move_stats_file_) { @@ -870,4 +874,4 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) { } VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n"); return e_move_result::REJECTED; -} \ No newline at end of file +} diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index a0a7ef3aa9b..fd9b0dbd928 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -222,6 +222,13 @@ class PlacementAnnealer { /// @brief Returns constant references to different statistics objects std::tuple get_stats() const; + /** + * @brief Returns MoveAbortionLogger to report how many moves + * were aborted for each reason. + * @return A constant reference to a MoveAbortionLogger object. + */ + const MoveAbortionLogger& get_move_abortion_logger() const; + private: /** diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 9af234a13f8..69617b278a2 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -169,8 +169,6 @@ void try_place(const Netlist<>& net_list, std::unique_ptr placer_criticalities; std::unique_ptr pin_timing_invalidator; - t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size()); - if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_delay_model(net_list, @@ -552,7 +550,7 @@ void try_place(const Netlist<>& net_list, //Some stats VTR_LOG("\n"); VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called); - blocks_affected.move_abortion_logger.report_aborted_moves(); + annealer.get_move_abortion_logger().report_aborted_moves(); if (placer_opts.place_algorithm.is_timing_driven()) { //Final timing estimate