From b41cda3eb66219006e0ef8cbf3087237a23bb941 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 15 Oct 2024 14:46:53 -0400
Subject: [PATCH 01/31] add annealer.cpp/.h

---
 vpr/src/base/ShowSetup.cpp    |  10 +--
 vpr/src/base/read_options.cpp |   6 +-
 vpr/src/base/read_options.h   |   2 +-
 vpr/src/base/vpr_types.h      |   4 +-
 vpr/src/place/annealer.cpp    | 145 ++++++++++++++++++++++++++++++++++
 vpr/src/place/annealer.h      | 135 +++++++++++++++++++++++++++++++
 vpr/src/place/place.cpp       |   6 +-
 vpr/src/place/place_util.cpp  | 140 +-------------------------------
 vpr/src/place/place_util.h    | 124 -----------------------------
 9 files changed, 295 insertions(+), 277 deletions(-)
 create mode 100644 vpr/src/place/annealer.cpp
 create mode 100644 vpr/src/place/annealer.h
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index f288be8f865..68aa073759d 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -207,13 +207,13 @@ void writeClusteredNetlistStats(const std::string& block_usage_filename) {
 static void ShowAnnealSched(const t_annealing_sched& AnnealSched) {
     VTR_LOG("AnnealSched.type: ");
     switch (AnnealSched.type) {
-        case AUTO_SCHED:
+        case e_sched_type::AUTO_SCHED:
             VTR_LOG("AUTO_SCHED\n");
             break;
-        case USER_SCHED:
+        case e_sched_type::USER_SCHED:
             VTR_LOG("USER_SCHED\n");
             break;
-        case DUSTY_SCHED:
+        case e_sched_type::DUSTY_SCHED:
             VTR_LOG("DUSTY_SCHED\n");
             break;
         default:
@@ -222,11 +222,11 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) {
 
     VTR_LOG("AnnealSched.inner_num: %f\n", AnnealSched.inner_num);
 
-    if (USER_SCHED == AnnealSched.type) {
+    if (e_sched_type::USER_SCHED == AnnealSched.type) {
         VTR_LOG("AnnealSched.init_t: %f\n", AnnealSched.init_t);
         VTR_LOG("AnnealSched.alpha_t: %f\n", AnnealSched.alpha_t);
         VTR_LOG("AnnealSched.exit_t: %f\n", AnnealSched.exit_t);
-    } else if (DUSTY_SCHED == AnnealSched.type) {
+    } else if (e_sched_type::DUSTY_SCHED == AnnealSched.type) {
         VTR_LOG("AnnealSched.alpha_min: %f\n", AnnealSched.alpha_min);
         VTR_LOG("AnnealSched.alpha_max: %f\n", AnnealSched.alpha_max);
         VTR_LOG("AnnealSched.alpha_decay: %f\n", AnnealSched.alpha_decay);
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index fa7084a9b07..eeb4bbfaee0 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -3141,13 +3141,13 @@ void set_conditional_defaults(t_options& args) {
         || args.PlaceAlphaDecay.provenance() == Provenance::SPECIFIED
         || args.PlaceSuccessMin.provenance() == Provenance::SPECIFIED
         || args.PlaceSuccessTarget.provenance() == Provenance::SPECIFIED) {
-        args.anneal_sched_type.set(DUSTY_SCHED, Provenance::INFERRED);
+        args.anneal_sched_type.set(e_sched_type::DUSTY_SCHED, Provenance::INFERRED);
     } else if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
                || args.PlaceExitT.provenance() == Provenance::SPECIFIED
                || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) {
-        args.anneal_sched_type.set(USER_SCHED, Provenance::INFERRED);
+        args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED);
     } else {
-        args.anneal_sched_type.set(AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule
+        args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule
     }
 
     /*
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index c07762350d5..673694fd80c 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -124,7 +124,7 @@ struct t_options {
     argparse::ArgValue<float> PlaceAlphaDecay;
     argparse::ArgValue<float> PlaceSuccessMin;
     argparse::ArgValue<float> PlaceSuccessTarget;
-    argparse::ArgValue<sched_type> anneal_sched_type;
+    argparse::ArgValue<e_sched_type> anneal_sched_type;
     argparse::ArgValue<e_place_algorithm> PlaceAlgorithm;
     argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm;
     argparse::ArgValue<e_pad_loc_type> pad_loc_type;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 98f17e898a1..84432ed7181 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -460,7 +460,7 @@ constexpr int NUM_PL_MOVE_TYPES = 7;
 constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3;
 
 /* Timing data structures end */
-enum sched_type {
+enum class e_sched_type {
     AUTO_SCHED,
     DUSTY_SCHED,
     USER_SCHED
@@ -836,7 +836,7 @@ struct t_packer_opts {
  * the obvious meanings.
  */
 struct t_annealing_sched {
-    enum sched_type type;
+    e_sched_type type;
     float inner_num;
     float init_t;
     float alpha_t;
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
new file mode 100644
index 00000000000..b820acdaf92
--- /dev/null
+++ b/vpr/src/place/annealer.cpp
@@ -0,0 +1,145 @@
+
+#include "annealer.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "globals.h"
+#include "draw_global.h"
+#include "vpr_types.h"
+#include "place_util.h"
+
+///@brief Constructor: Initialize all annealing state variables and macros.
+t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
+                                     float first_t,
+                                     float first_rlim,
+                                     int first_move_lim,
+                                     float first_crit_exponent) {
+    num_temps = 0;
+    alpha = annealing_sched.alpha_min;
+    t = first_t;
+    restart_t = first_t;
+    rlim = first_rlim;
+    move_lim_max = first_move_lim;
+    crit_exponent = first_crit_exponent;
+
+    /* Determine the current move_lim based on the schedule type */
+    if (annealing_sched.type == e_sched_type::DUSTY_SCHED) {
+        move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target));
+    } else {
+        move_lim = move_lim_max;
+    }
+
+    /* Store this inverse value for speed when updating crit_exponent. */
+    INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM);
+
+    /* The range limit cannot exceed the largest grid size. */
+    const auto& grid = g_vpr_ctx.device().grid;
+    UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
+}
+
+bool t_annealing_state::outer_loop_update(float success_rate,
+                                          const t_placer_costs& costs,
+                                          const t_placer_opts& placer_opts,
+                                          const t_annealing_sched& annealing_sched) {
+#ifndef NO_GRAPHICS
+    t_draw_state* draw_state = get_draw_state_vars();
+    if (!draw_state->list_of_breakpoints.empty()) {
+        /* Update temperature in the current information variable. */
+        get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
+    }
+#endif
+
+    if (annealing_sched.type == e_sched_type::USER_SCHED) {
+        /* Update t with user specified alpha. */
+        t *= annealing_sched.alpha_t;
+
+        /* Check if the exit criterion is met. */
+        bool exit_anneal = t >= annealing_sched.exit_t;
+
+        return exit_anneal;
+    }
+
+    /* Automatically determine exit temperature. */
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+
+    if (annealing_sched.type == e_sched_type::DUSTY_SCHED) {
+        /* May get nan if there are no nets */
+        bool restart_temp = t < t_exit || std::isnan(t_exit);
+
+        /* If the success rate or the temperature is *
+         * too low, reset the temperature and alpha. */
+        if (success_rate < annealing_sched.success_min || restart_temp) {
+            /* Only exit anneal when alpha gets too large. */
+            if (alpha > annealing_sched.alpha_max) {
+                return false;
+            }
+            /* Take a half step from the restart temperature. */
+            t = restart_t / sqrt(alpha);
+            /* Update alpha. */
+            alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay);
+        } else {
+            /* If the success rate is promising, next time   *
+             * reset t to the current annealing temperature. */
+            if (success_rate > annealing_sched.success_target) {
+                restart_t = t;
+            }
+            /* Update t. */
+            t *= alpha;
+        }
+
+        /* Update move lim. */
+        update_move_lim(annealing_sched.success_target, success_rate);
+    } else {
+        VTR_ASSERT_SAFE(annealing_sched.type == e_sched_type::AUTO_SCHED);
+        /* Automatically adjust alpha according to success rate. */
+        if (success_rate > 0.96) {
+            alpha = 0.5;
+        } else if (success_rate > 0.8) {
+            alpha = 0.9;
+        } else if (success_rate > 0.15 || rlim > 1.) {
+            alpha = 0.95;
+        } else {
+            alpha = 0.8;
+        }
+        /* Update temp. */
+        t *= alpha;
+        /* Must be duplicated to retain previous behavior. */
+        if (t < t_exit || std::isnan(t_exit)) {
+            return false;
+        }
+    }
+
+    /* Update the range limiter. */
+    update_rlim(success_rate);
+
+    /* If using timing driven algorithm, update the crit_exponent. */
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        update_crit_exponent(placer_opts);
+    }
+
+    /* Continues the annealing. */
+    return true;
+}
+
+void t_annealing_state::update_rlim(float success_rate) {
+    rlim *= (1. - 0.44 + success_rate);
+    rlim = std::min(rlim, UPPER_RLIM);
+    rlim = std::max(rlim, FINAL_RLIM);
+}
+
+void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
+    /* If rlim == FINAL_RLIM, then scale == 0. */
+    float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
+
+    /* Apply the scaling factor on crit_exponent. */
+    crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
+                    + placer_opts.td_place_exp_first;
+}
+
+void t_annealing_state::update_move_lim(float success_target, float success_rate) {
+    move_lim = move_lim_max * (success_target / success_rate);
+    move_lim = std::min(move_lim, move_lim_max);
+    move_lim = std::max(move_lim, 1);
+}
\ No newline at end of file
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
new file mode 100644
index 00000000000..0c046bcdf5f
--- /dev/null
+++ b/vpr/src/place/annealer.h
@@ -0,0 +1,135 @@
+
+#pragma once
+
+class t_placer_costs;
+struct t_placer_opts;
+struct t_annealing_sched;
+
+/**
+ * @brief Stores variables that are used by the annealing process.
+ *
+ * This structure is updated by update_annealing_state() on each outer
+ * loop iteration. It stores various important variables that need to
+ * be accessed during the placement inner loop.
+ *
+ * Private variables are not given accessor functions. They serve as
+ * macros originally defined in place.cpp as global scope variables.
+ *
+ * Public members:
+ *   @param t
+ *              Temperature for simulated annealing.
+ *   @param restart_t
+ *              Temperature used after restart due to minimum success ratio.
+ *              Currently only used and updated by DUSTY_SCHED.
+ *   @param alpha
+ *              Temperature decays factor (multiplied each outer loop iteration).
+ *   @param num_temps
+ *              The count of how many temperature iterations have passed.
+ *
+ *   @param rlim
+ *              Range limit for block swaps.
+ *              Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *   @param crit_exponent
+ *              Used by timing-driven placement to "sharpen" the timing criticality.
+ *              Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *   @param move_lim
+ *              Current block move limit.
+ *              Currently only updated by DUSTY_SCHED.
+ *   @param move_lim_max
+ *              Maximum block move limit.
+ *
+ * Private members:
+ *   @param UPPER_RLIM
+ *              The upper limit for the range limiter value.
+ *   @param FINAL_RLIM
+ *              The final rlim (range limit) is 1, which is the smallest value that
+ *              can still make progress, since an rlim of 0 wouldn't allow any swaps.
+ *   @param INVERSE_DELTA_RLIM
+ *              Used to update crit_exponent. See update_rlim() for more.
+ *
+ * Mutators:
+ *   @param outer_loop_update()
+ *              Update the annealing state variables in the placement outer loop.
+ *   @param update_rlim(), update_crit_exponent(), update_move_lim()
+ *              Inline subroutines used by the main routine outer_loop_update().
+ */
+class t_annealing_state {
+  public:
+    float t;
+    float restart_t;
+    float alpha;
+    int num_temps;
+
+    float rlim;
+    float crit_exponent;
+    int move_lim;
+    int move_lim_max;
+
+  private:
+    float UPPER_RLIM;
+    float FINAL_RLIM = 1.;
+    float INVERSE_DELTA_RLIM;
+
+  public: //Constructor
+    t_annealing_state() = delete;
+    t_annealing_state(const t_annealing_sched& annealing_sched,
+                      float first_t,
+                      float first_rlim,
+                      int first_move_lim,
+                      float first_crit_exponent);
+
+  public: //Mutator
+    /**
+     * @brief Update the annealing state according to the annealing schedule selected.
+     *
+     *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
+     *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
+     *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
+     *                See doc/src/vpr/dusty_sa.rst for more details.
+     *
+     * @return True->continues the annealing. False->exits the annealing.
+     */
+    bool outer_loop_update(float success_rate,
+                           const t_placer_costs& costs,
+                           const t_placer_opts& placer_opts,
+                           const t_annealing_sched& annealing_sched);
+
+  private: //Mutator
+    /**
+     * @brief Update the range limiter to keep acceptance prob. near 0.44.
+     *
+     * Use a floating point rlim to allow gradual transitions at low temps.
+     * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
+     */
+    inline void update_rlim(float success_rate);
+
+    /**
+     * @brief Update the criticality exponent.
+     *
+     * When rlim shrinks towards the FINAL_RLIM value (indicating
+     * that we are fine-tuning a more optimized placement), we can
+     * focus more on a smaller number of critical connections.
+     * To achieve this, we make the crit_exponent sharper, so that
+     * critical connections would become more critical than before.
+     *
+     * We calculate how close rlim is to its final value comparing
+     * to its initial value. Then, we apply the same scaling factor
+     * on the crit_exponent so that it lands on the suitable value
+     * between td_place_exp_first and td_place_exp_last. The scaling
+     * factor is calculated and applied linearly.
+     */
+    inline void update_crit_exponent(const t_placer_opts& placer_opts);
+
+    /**
+     * @brief Update the move limit based on the success rate.
+     *
+     * The value is bounded between 1 and move_lim_max.
+     */
+    inline void update_move_lim(float success_target, float success_rate);
+};
+
+class PlacementAnnealer {
+
+  private:
+    t_annealing_state annealing_state_;
+};
\ No newline at end of file
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index aaa5620af50..90b566fb753 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -24,6 +24,7 @@
 
 #include "globals.h"
 #include "place.h"
+#include "annealer.h"
 #include "read_place.h"
 #include "draw.h"
 #include "place_and_route.h"
@@ -688,8 +689,7 @@ void try_place(const Netlist<>& net_list,
                             EPSILON,    // Set the temperature low to ensure that initial placement quality will be preserved
                             first_rlim,
                             first_move_lim,
-                            first_crit_exponent,
-                            device_ctx.grid.get_num_layers());
+                            first_crit_exponent);
 
     /* Update the starting temperature for placement annealing to a more appropriate value */
     state.t = starting_t(&state, &costs, annealing_sched,
@@ -1173,7 +1173,7 @@ static float starting_t(const t_annealing_state* state,
                         PlacerState& placer_state,
                         NetCostHandler& net_cost_handler,
                         std::optional<NocCostHandler>& noc_cost_handler) {
-    if (annealing_sched.type == USER_SCHED) {
+    if (annealing_sched.type == e_sched_type::USER_SCHED) {
         return (annealing_sched.init_t);
     }
 
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index ce24914b7f2..ec7ecb8982e 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -71,38 +71,6 @@ t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
     return *this;
 }
 
-///@brief Constructor: Initialize all annealing state variables and macros.
-t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
-                                     float first_t,
-                                     float first_rlim,
-                                     int first_move_lim,
-                                     float first_crit_exponent,
-                                     int num_laters) {
-    num_temps = 0;
-    alpha = annealing_sched.alpha_min;
-    t = first_t;
-    restart_t = first_t;
-    rlim = first_rlim;
-    move_lim_max = first_move_lim;
-    crit_exponent = first_crit_exponent;
-
-    /* Determine the current move_lim based on the schedule type */
-    if (annealing_sched.type == DUSTY_SCHED) {
-        move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target));
-    } else {
-        move_lim = move_lim_max;
-    }
-
-    NUM_LAYERS = num_laters;
-
-    /* Store this inverse value for speed when updating crit_exponent. */
-    INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM);
-
-    /* The range limit cannot exceed the largest grid size. */
-    auto& grid = g_vpr_ctx.device().grid;
-    UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
-}
-
 int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -126,112 +94,6 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch
     return move_lim;
 }
 
-bool t_annealing_state::outer_loop_update(float success_rate,
-                                          const t_placer_costs& costs,
-                                          const t_placer_opts& placer_opts,
-                                          const t_annealing_sched& annealing_sched) {
-#ifndef NO_GRAPHICS
-    t_draw_state* draw_state = get_draw_state_vars();
-    if (!draw_state->list_of_breakpoints.empty()) {
-        /* Update temperature in the current information variable. */
-        get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
-    }
-#endif
-
-    if (annealing_sched.type == USER_SCHED) {
-        /* Update t with user specified alpha. */
-        t *= annealing_sched.alpha_t;
-
-        /* Check if the exit criterion is met. */
-        bool exit_anneal = t >= annealing_sched.exit_t;
-
-        return exit_anneal;
-    }
-
-    /* Automatically determine exit temperature. */
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
-
-    if (annealing_sched.type == DUSTY_SCHED) {
-        /* May get nan if there are no nets */
-        bool restart_temp = t < t_exit || std::isnan(t_exit);
-
-        /* If the success rate or the temperature is *
-         * too low, reset the temperature and alpha. */
-        if (success_rate < annealing_sched.success_min || restart_temp) {
-            /* Only exit anneal when alpha gets too large. */
-            if (alpha > annealing_sched.alpha_max) {
-                return false;
-            }
-            /* Take a half step from the restart temperature. */
-            t = restart_t / sqrt(alpha);
-            /* Update alpha. */
-            alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay);
-        } else {
-            /* If the success rate is promising, next time   *
-             * reset t to the current annealing temperature. */
-            if (success_rate > annealing_sched.success_target) {
-                restart_t = t;
-            }
-            /* Update t. */
-            t *= alpha;
-        }
-
-        /* Update move lim. */
-        update_move_lim(annealing_sched.success_target, success_rate);
-    } else {
-        VTR_ASSERT_SAFE(annealing_sched.type == AUTO_SCHED);
-        /* Automatically adjust alpha according to success rate. */
-        if (success_rate > 0.96) {
-            alpha = 0.5;
-        } else if (success_rate > 0.8) {
-            alpha = 0.9;
-        } else if (success_rate > 0.15 || rlim > 1.) {
-            alpha = 0.95;
-        } else {
-            alpha = 0.8;
-        }
-        /* Update temp. */
-        t *= alpha;
-        /* Must be duplicated to retain previous behavior. */
-        if (t < t_exit || std::isnan(t_exit)) {
-            return false;
-        }
-    }
-
-    /* Update the range limiter. */
-    update_rlim(success_rate);
-
-    /* If using timing driven algorithm, update the crit_exponent. */
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        update_crit_exponent(placer_opts);
-    }
-
-    /* Continues the annealing. */
-    return true;
-}
-
-void t_annealing_state::update_rlim(float success_rate) {
-    rlim *= (1. - 0.44 + success_rate);
-    rlim = std::min(rlim, UPPER_RLIM);
-    rlim = std::max(rlim, FINAL_RLIM);
-}
-
-void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
-    /* If rlim == FINAL_RLIM, then scale == 0. */
-    float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
-
-    /* Apply the scaling factor on crit_exponent. */
-    crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
-                    + placer_opts.td_place_exp_first;
-}
-
-void t_annealing_state::update_move_lim(float success_target, float success_rate) {
-    move_lim = move_lim_max * (success_target / success_rate);
-    move_lim = std::min(move_lim, move_lim_max);
-    move_lim = std::max(move_lim, 1);
-}
-
 ///@brief Clear all data fields.
 void t_placer_statistics::reset() {
     av_cost = 0.;
@@ -390,7 +252,7 @@ bool macro_can_be_placed(const t_pl_macro& pl_macro,
         }
     }
 
-    return (mac_can_be_placed);
+    return mac_can_be_placed;
 }
 
 NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest)
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 60d4a86b1c5..49f4246dbe5 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -126,130 +126,6 @@ class t_placer_costs {
     t_place_algorithm place_algorithm;
 };
 
-/**
- * @brief Stores variables that are used by the annealing process.
- *
- * This structure is updated by update_annealing_state() on each outer
- * loop iteration. It stores various important variables that need to
- * be accessed during the placement inner loop.
- *
- * Private variables are not given accessor functions. They serve as
- * macros originally defined in place.cpp as global scope variables.
- *
- * Public members:
- *   @param t
- *              Temperature for simulated annealing.
- *   @param restart_t
- *              Temperature used after restart due to minimum success ratio.
- *              Currently only used and updated by DUSTY_SCHED.
- *   @param alpha
- *              Temperature decays factor (multiplied each outer loop iteration).
- *   @param num_temps
- *              The count of how many temperature iterations have passed.
- *
- *   @param rlim
- *              Range limit for block swaps.
- *              Currently only updated by DUSTY_SCHED and AUTO_SCHED.
- *   @param crit_exponent
- *              Used by timing-driven placement to "sharpen" the timing criticality.
- *              Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED.
- *   @param move_lim
- *              Current block move limit.
- *              Currently only updated by DUSTY_SCHED.
- *   @param move_lim_max
- *              Maximum block move limit.
- *
- * Private members:
- *   @param UPPER_RLIM
- *              The upper limit for the range limiter value.
- *   @param FINAL_RLIM
- *              The final rlim (range limit) is 1, which is the smallest value that
- *              can still make progress, since an rlim of 0 wouldn't allow any swaps.
- *   @param INVERSE_DELTA_RLIM
- *              Used to update crit_exponent. See update_rlim() for more.
- *
- * Mutators:
- *   @param outer_loop_update()
- *              Update the annealing state variables in the placement outer loop.
- *   @param update_rlim(), update_crit_exponent(), update_move_lim()
- *              Inline subroutines used by the main routine outer_loop_update().
- */
-class t_annealing_state {
-  public:
-    float t;
-    float restart_t;
-    float alpha;
-    int num_temps;
-
-    float rlim;
-    float crit_exponent;
-    int move_lim;
-    int move_lim_max;
-
-  private:
-    float UPPER_RLIM;
-    float FINAL_RLIM = 1.;
-    float INVERSE_DELTA_RLIM;
-    int NUM_LAYERS = 1;
-
-  public: //Constructor
-    t_annealing_state(const t_annealing_sched& annealing_sched,
-                      float first_t,
-                      float first_rlim,
-                      int first_move_lim,
-                      float first_crit_exponent,
-                      int num_layers);
-
-  public: //Mutator
-    /**
-     * @brief Update the annealing state according to the annealing schedule selected.
-     *
-     *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
-     *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
-     *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
-     *                See doc/src/vpr/dusty_sa.rst for more details.
-     *
-     * @return True->continues the annealing. False->exits the annealing.
-     */
-    bool outer_loop_update(float success_rate,
-                           const t_placer_costs& costs,
-                           const t_placer_opts& placer_opts,
-                           const t_annealing_sched& annealing_sched);
-
-  private: //Mutator
-    /**
-     * @brief Update the range limiter to keep acceptance prob. near 0.44.
-     *
-     * Use a floating point rlim to allow gradual transitions at low temps.
-     * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
-     */
-    inline void update_rlim(float success_rate);
-
-    /**
-     * @brief Update the criticality exponent.
-     *
-     * When rlim shrinks towards the FINAL_RLIM value (indicating
-     * that we are fine-tuning a more optimized placement), we can
-     * focus more on a smaller number of critical connections.
-     * To achieve this, we make the crit_exponent sharper, so that
-     * critical connections would become more critical than before.
-     *
-     * We calculate how close rlim is to its final value comparing
-     * to its initial value. Then, we apply the same scaling factor
-     * on the crit_exponent so that it lands on the suitable value
-     * between td_place_exp_first and td_place_exp_last. The scaling
-     * factor is calculated and applied linearly.
-     */
-    inline void update_crit_exponent(const t_placer_opts& placer_opts);
-
-    /**
-     * @brief Update the move limit based on the success rate.
-     *
-     * The value is bounded between 1 and move_lim_max.
-     */
-    inline void update_move_lim(float success_target, float success_rate);
-};
-
 /**
  * @brief Stores statistics produced by a single annealing iteration.
  *

From 32a2726602c4471372a80c3a84dfc22891e84bf7 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 15 Oct 2024 19:33:47 -0400
Subject: [PATCH 02/31] commit before I go home.

---
 vpr/src/base/vpr_types.h     |   2 +-
 vpr/src/place/annealer.cpp   | 911 +++++++++++++++++++++++++++++++++++
 vpr/src/place/annealer.h     | 104 +++-
 vpr/src/place/move_utils.h   |  12 -
 vpr/src/place/place.cpp      | 462 +-----------------
 vpr/src/place/place.h        |   1 -
 vpr/src/place/placer_state.h |   8 +
 7 files changed, 1032 insertions(+), 468 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 84432ed7181..cb280ff36ec 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1066,6 +1066,7 @@ enum class e_move_type;
 struct t_placer_opts {
     t_place_algorithm place_algorithm;
     t_place_algorithm place_quench_algorithm;
+    t_annealing_sched anneal_sched;  ///<Placement option annealing schedule
     float timing_tradeoff;
     float place_cost_exp;
     int place_chan_width;
@@ -1733,7 +1734,6 @@ struct t_vpr_setup {
     t_packer_opts PackerOpts;       ///<Options for packer
     t_placer_opts PlacerOpts;       ///<Options for placer
     t_ap_opts APOpts;               ///<Options for analytical placer
-    t_annealing_sched AnnealSched;  ///<Placement option annealing schedule
     t_router_opts RouterOpts;       ///<router options
     t_analysis_opts AnalysisOpts;   ///<Analysis options
     t_noc_opts NocOpts;             ///<Options for the NoC
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index b820acdaf92..0fdcd9dbcb9 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -8,6 +8,314 @@
 #include "draw_global.h"
 #include "vpr_types.h"
 #include "place_util.h"
+#include "placer_state.h"
+#include "move_utils.h"
+#include "manual_move_generator.h"
+#include "noc_place_utils.h"
+#include "NetPinTimingInvalidator.h"
+#include "place_timing_update.h"
+
+#ifdef VTR_ENABLE_DEBUG_LOGGIING
+#    define LOG_MOVE_STATS_HEADER()                               \
+        do {                                                      \
+            if (f_move_stats_file) {                              \
+                fprintf(f_move_stats_file.get(),                  \
+                        "temp,from_blk,to_blk,from_type,to_type," \
+                        "blk_count,"                              \
+                        "delta_cost,delta_bb_cost,delta_td_cost," \
+                        "outcome,reason\n");                      \
+            }                                                     \
+        } while (false)
+
+#    define LOG_MOVE_STATS_PROPOSED(t, affected_blocks)                                        \
+        do {                                                                                   \
+            if (f_move_stats_file) {                                                           \
+                auto& place_ctx = g_vpr_ctx.placement();                                       \
+                auto& cluster_ctx = g_vpr_ctx.clustering();                                    \
+                ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num;             \
+                                                                                               \
+                t_pl_loc to = affected_blocks.moved_blocks[0].new_loc;                         \
+                ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];   \
+                                                                                               \
+                t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \
+                t_logical_block_type_ptr to_type = nullptr;                                    \
+                if (b_to) {                                                                    \
+                    to_type = cluster_ctx.clb_nlist.block_type(b_to);                          \
+                }                                                                              \
+                                                                                               \
+                fprintf(f_move_stats_file.get(),                                               \
+                        "%g,"                                                                  \
+                        "%d,%d,"                                                               \
+                        "%s,%s,"                                                               \
+                        "%d,",                                                                 \
+                        t,                                                                     \
+                        int(b_from), int(b_to),                                                \
+                        from_type->name, (to_type ? to_type->name : "EMPTY"),                  \
+                        affected_blocks.moved_blocks.size());                                  \
+            }                                                                                  \
+        } while (false)
+
+#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
+                                   outcome, reason)                          \
+        do {                                                                 \
+            if (f_move_stats_file) {                                         \
+                fprintf(f_move_stats_file.get(),                             \
+                        "%g,%g,%g,"                                          \
+                        "%s,%s\n",                                           \
+                        delta_cost, delta_bb_cost, delta_td_cost,            \
+                        outcome, reason);                                    \
+            }                                                                \
+        } while (false)
+
+#else
+
+#    define LOG_MOVE_STATS_HEADER()                      \
+        do {                                             \
+            fprintf(move_stats_file_.get(),             \
+                    "VTR_ENABLE_DEBUG_LOGGING disabled " \
+                    "-- No move stats recorded\n");      \
+        } while (false)
+
+#    define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \
+        do {                                            \
+        } while (false)
+
+#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
+                                   outcome, reason)                          \
+        do {                                                                 \
+        } while (false)
+
+#endif
+
+
+/**
+ * @brief Invalidates the connections affected by the specified block moves.
+ *
+ * All the connections recorded in blocks_affected.affected_pins have different
+ * values for `proposed_connection_delay` and `connection_delay`.
+ *
+ * Invalidate all the timing graph edges associated with these connections via
+ * the NetPinTimingInvalidator class.
+ */
+static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
+                                            NetPinTimingInvalidator* pin_tedges_invalidator,
+                                            TimingInfo* timing_info);
+
+/**
+ * @brief Update the connection_timing_cost values from the temporary
+ *        values for all connections that have/haven't changed.
+ *
+ * All the connections have already been gathered by blocks_affected.affected_pins
+ * after running the routine find_affected_nets_and_update_costs() in try_swap().
+ */
+static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
+                           PlacerState& placer_state);
+
+/**
+ * @brief Check if the setup slack has gotten better or worse due to block swap.
+ *
+ * Get all the modified slack values via the PlacerSetupSlacks class, and compare
+ * then with the original values at these connections. Sort them and compare them
+ * one by one, and return the difference of the first different pair.
+ *
+ * If the new slack value is larger(better), than return a negative value so that
+ * the move will be accepted. If the new slack value is smaller(worse), return a
+ * positive value so that the move will be rejected.
+ *
+ * If no slack values have changed, then return an arbitrary positive number. A
+ * move resulting in no change in the slack values should probably be unnecessary.
+ *
+ * The sorting is need to prevent in the unlikely circumstances that a bad slack
+ * value suddenly got very good due to the block move, while a good slack value
+ * got very bad, perhaps even worse than the original worse slack value.
+ */
+static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
+                                      const PlacerState& placer_state);
+
+static e_move_result assess_swap(double delta_c, double t);
+
+static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
+                                            NetPinTimingInvalidator* pin_tedges_invalidator,
+                                            TimingInfo* timing_info) {
+    VTR_ASSERT_SAFE(timing_info);
+    VTR_ASSERT_SAFE(pin_tedges_invalidator);
+
+    // Invalidate timing graph edges affected by the move
+    for (ClusterPinId pin : blocks_affected.affected_pins) {
+        pin_tedges_invalidator->invalidate_connection(pin, timing_info);
+    }
+}
+
+static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
+                           PlacerState& placer_state) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    auto& p_timing_ctx = placer_state.mutable_timing();
+    auto& connection_delay = p_timing_ctx.connection_delay;
+    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
+    auto& connection_timing_cost = p_timing_ctx.connection_timing_cost;
+    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
+
+    //Go through all the sink pins affected
+    for (ClusterPinId pin_id : blocks_affected.affected_pins) {
+        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
+        int ipin = clb_nlist.pin_net_index(pin_id);
+
+        //Commit the timing delay and cost values
+        connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
+        proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
+        connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
+        proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
+    }
+}
+
+static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
+                                      const PlacerState& placer_state) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    const auto& p_timing_ctx = placer_state.timing();
+    const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack;
+
+    //Find the original/proposed setup slacks of pins with modified values
+    std::vector<float> original_setup_slacks, proposed_setup_slacks;
+
+    auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack();
+    for (ClusterPinId clb_pin : clb_pins_modified) {
+        ClusterNetId net_id = clb_nlist.pin_net(clb_pin);
+        size_t ipin = clb_nlist.pin_net_index(clb_pin);
+
+        original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]);
+        proposed_setup_slacks.push_back(
+            setup_slacks->setup_slack(net_id, ipin));
+    }
+
+    //Sort in ascending order, from the worse slack value to the best
+    std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end());
+    std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end());
+
+    //Check the first pair of slack values that are different
+    //If found, return their difference
+    for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) {
+        float slack_diff = original_setup_slacks[idiff]
+                           - proposed_setup_slacks[idiff];
+
+        if (slack_diff != 0) {
+            return slack_diff;
+        }
+    }
+
+    //If all slack values are identical (or no modified slack values),
+    //reject this move by returning an arbitrary positive number as cost.
+    return 1;
+}
+
+static e_move_result assess_swap(double delta_c, double t) {
+    /* Returns: 1 -> move accepted, 0 -> rejected. */
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c);
+    if (delta_c <= 0) {
+        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
+        return ACCEPTED;
+    }
+
+    if (t == 0.) {
+        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n");
+        return REJECTED;
+    }
+
+    float fnum = vtr::frand();
+    float prob_fac = std::exp(-delta_c / t);
+    if (prob_fac > fnum) {
+        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n");
+        return ACCEPTED;
+    }
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n");
+    return REJECTED;
+}
+
+//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on
+//the move proposed in blocks_affected
+static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
+                           PlacerTimingContext& p_timing_ctx) {
+#ifndef VTR_ASSERT_SAFE_ENABLED
+    (void)blocks_affected;
+    (void)p_timing_ctx;
+#else
+    //Invalidate temp delay & timing cost values to match sanity checks in
+    //comp_td_connection_cost()
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
+    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
+
+    for (ClusterPinId pin : blocks_affected.affected_pins) {
+        ClusterNetId net = clb_nlist.pin_net(pin);
+        int ipin = clb_nlist.pin_net_index(pin);
+        proposed_connection_delay[net][ipin] = INVALID_DELAY;
+        proposed_connection_timing_cost[net][ipin] = INVALID_DELAY;
+    }
+#endif
+}
+
+/**
+ * @brief Compute the total normalized cost for a given placement. This
+ * computation will vary depending on the placement modes.
+ *
+ * @param costs The current placement cost components and their normalization
+ * factors
+ * @param placer_opts Determines the placement mode
+ * @param noc_opts Determines if placement includes the NoC
+ * @return double The computed total cost of the current placement
+ */
+static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
+    double total_cost = 0.0;
+
+    if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
+        // in bounding box mode we only care about wirelength
+        total_cost = costs->bb_cost * costs->bb_cost_norm;
+    } else if (placer_opts.place_algorithm.is_timing_driven()) {
+        // in timing mode we include both wirelength and timing costs
+        total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm);
+    }
+
+    if (noc_opts.noc) {
+        // in noc mode we include noc aggregate bandwidth and noc latency
+        total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts);
+    }
+
+    return total_cost;
+}
+
+/**
+ * @brief Updates all the cost normalization factors during the outer
+ * loop iteration of the placement. At each temperature change, these
+ * values are updated so that we can balance the tradeoff between the
+ * different placement cost components (timing, wirelength and NoC).
+ * Depending on the placement mode the corresponding normalization factors are
+ * updated.
+ *
+ * @param costs Contains the normalization factors which need to be updated
+ * @param placer_opts Determines the placement mode
+ * @param noc_opts Determines if placement includes the NoC
+ * @param noc_cost_handler Computes normalization factors for NoC-related cost terms
+ */
+static void update_placement_cost_normalization_factors(t_placer_costs* costs,
+                                                        const t_placer_opts& placer_opts,
+                                                        const t_noc_opts& noc_opts,
+                                                        const std::optional<NocCostHandler>& noc_cost_handler) {
+    /* Update the cost normalization factors */
+    costs->update_norm_factors();
+
+    // update the noc normalization factors if the placement includes the NoC
+    if (noc_opts.noc) {
+        noc_cost_handler->update_noc_normalization_factors(*costs);
+    }
+
+    // update the current total placement cost
+    costs->cost = get_total_cost(costs, placer_opts, noc_opts);
+}
 
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
@@ -142,4 +450,607 @@ void t_annealing_state::update_move_lim(float success_target, float success_rate
     move_lim = move_lim_max * (success_target / success_rate);
     move_lim = std::min(move_lim, move_lim_max);
     move_lim = std::max(move_lim, 1);
+}
+
+PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
+                                     PlacerState& placer_state,
+                                     t_placer_costs& costs,
+                                     NetCostHandler& net_cost_handler,
+                                     std::optional<NocCostHandler>& noc_cost_handler,
+                                     const t_noc_opts& noc_opts,
+                                     MoveGenerator& move_generator_1,
+                                     MoveGenerator& move_generator_2,
+                                     ManualMoveGenerator& manual_move_generator,
+                                     const PlaceDelayModel* delay_model,
+                                     PlacerCriticalities* criticalities,
+                                     PlacerSetupSlacks* setup_slacks,
+                                     SetupTimingInfo* timing_info,
+                                     NetPinTimingInvalidator* pin_timing_invalidator,
+                                     int move_lim)
+    : placer_opts_(placer_opts)
+    , placer_state_(placer_state)
+    , costs_(costs)
+    , net_cost_handler_(net_cost_handler)
+    , noc_cost_handler_(noc_cost_handler)
+    , noc_opts_(noc_opts)
+    , move_generator_1_(move_generator_1)
+    , move_generator_2_(move_generator_2)
+    , manual_move_generator_(manual_move_generator)
+    , delay_model_(delay_model)
+    , criticalities_(criticalities)
+    , setup_slacks_(setup_slacks)
+    , timing_info_(timing_info)
+    , pin_timing_invalidator_(pin_timing_invalidator)
+    , move_stats_file_(nullptr, vtr::fclose)
+    , outer_crit_iter_count_(1)
+    , blocks_affected_(placer_state.block_locs().size())
+{
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    float first_crit_exponent;
+    if (placer_opts.place_algorithm.is_timing_driven()) {
+        first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
+    } else {
+        first_crit_exponent = 0.f;
+    }
+
+    int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched);
+
+    int inner_recompute_limit;
+    if (placer_opts.inner_loop_recompute_divider != 0) {
+        inner_recompute_limit = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
+    } else {
+        // don't do an inner recompute
+        inner_recompute_limit = first_move_lim + 1;
+    }
+
+    /* calculate the number of moves in the quench that we should recompute timing after based on the value of *
+     * the commandline option quench_recompute_divider                                                         */
+    int quench_recompute_limit;
+    if (placer_opts.quench_recompute_divider != 0) {
+        quench_recompute_limit = static_cast<int>(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
+    } else {
+        /*don't do an quench recompute */
+        quench_recompute_limit = first_move_lim + 1;
+    }
+
+    // Get the first range limiter
+    placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
+
+    annealing_state_ = t_annealing_state(placer_opts_.anneal_sched,
+                                         EPSILON,    // Set the temperature low to ensure that initial placement quality will be preserved
+                                         placer_state_.move().first_rlim,
+                                         first_move_lim,
+                                         first_crit_exponent);
+
+    if (!placer_opts.move_stats_file.empty()) {
+        move_stats_file_ = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
+            vtr::fopen(placer_opts.move_stats_file.c_str(), "w"),
+            vtr::fclose);
+        LOG_MOVE_STATS_HEADER();
+    }
+
+    //allocate move type statistics vectors
+    move_type_stats_.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
+    move_type_stats_.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
+    move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
+
+    // Update the starting temperature for placement annealing to a more appropriate value
+    annealing_state_.t = estimate_starting_temperature();
+}
+
+float PlacementAnnealer::estimate_starting_temperature() {
+    if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) {
+        return placer_opts_.anneal_sched.init_t;
+    }
+
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    // Use to calculate the average of cost when swap is accepted.
+    int num_accepted = 0;
+
+    // Use double types to avoid round off.
+    double av = 0., sum_of_squares = 0.;
+
+    // Determines the block swap loop count.
+    int move_lim = std::min(annealing_state_.move_lim_max, (int)cluster_ctx.clb_nlist.blocks().size());
+
+    bool manual_move_enabled = false;
+
+    for (int i = 0; i < move_lim; i++) {
+#ifndef NO_GRAPHICS
+        // Checks manual move flag for manual move feature
+        t_draw_state* draw_state = get_draw_state_vars();
+        if (draw_state->show_graphics) {
+            manual_move_enabled = manual_move_is_selected();
+        }
+#endif /*NO_GRAPHICS*/
+
+        // TODO: remove this
+        constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
+
+        // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
+        e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm,
+                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled);
+
+        if (swap_result == ACCEPTED) {
+            num_accepted++;
+            av += costs_.cost;
+            sum_of_squares += costs_.cost * costs_.cost;
+            swap_stats_.num_swap_accepted++;
+        } else if (swap_result == ABORTED) {
+            swap_stats_.num_swap_aborted++;
+        } else {
+            swap_stats_.num_swap_rejected++;
+        }
+    }
+
+    // Take the average of the accepted swaps' cost values.
+    av = num_accepted > 0 ? (av / num_accepted) : 0.;
+
+    // Get the standard deviation.
+    double std_dev = get_std_dev(num_accepted, sum_of_squares, av);
+
+    // Print warning if not all swaps are accepted.
+    if (num_accepted != move_lim) {
+        VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n",
+                     num_accepted, move_lim);
+    }
+
+#ifdef VERBOSE
+    /* Print stats related to finding the initital temp. */
+    VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
+#endif
+
+    // Improved initial placement uses a fast SA for NoC routers and centroid placement
+    // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement
+    float init_temp = std_dev / 64;
+
+    return init_temp;
+}
+
+
+/**
+ * @brief Pick some block and moves it to another spot.
+ *
+ * If the new location is empty, directly move the block. If the new location
+ * is occupied, switch the blocks. Due to the different sizes of the blocks,
+ * this block switching may occur for multiple times. It might also cause the
+ * current swap attempt to abort due to inability to find suitable locations
+ * for moved blocks.
+ *
+ * The move generator will record all the switched blocks in the variable
+ * `blocks_affected`. Afterwards, the move will be assessed by the chosen
+ * cost formulation. Currently, there are three ways to assess move cost,
+ * which are stored in the enum type `t_place_algorithm`.
+ *
+ * @return Whether the block swap is accepted, rejected or aborted.
+ */
+e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
+                                          const t_place_algorithm& place_algorithm,
+                                          float timing_bb_factor,
+                                          bool manual_move_enabled) {
+    /* Picks some block and moves it to another spot.  If this spot is
+     * occupied, switch the blocks.  Assess the change in cost function.
+     * rlim is the range limiter.
+     * Returns whether the swap is accepted, rejected or aborted.
+     * Passes back the new value of the cost functions.
+     */
+    auto& blk_loc_registry = placer_state_.mutable_blk_loc_registry();
+
+    float rlim_escape_fraction = placer_opts_.rlim_escape_fraction;
+    float timing_tradeoff = placer_opts_.timing_tradeoff;
+
+    PlaceCritParams crit_params;
+    crit_params.crit_exponent = annealing_state_.crit_exponent;
+    crit_params.crit_limit = placer_opts_.place_crit_limit;
+
+    // move type and block type chosen by the agent
+    t_propose_action proposed_action{e_move_type::UNIFORM, -1};
+
+    swap_stats_.num_ts_called++;
+
+    MoveOutcomeStats move_outcome_stats;
+
+    /* I'm using negative values of proposed_net_cost as a flag,
+     * so DO NOT use cost functions that can go negative. */
+
+    double delta_c = 0;        //Change in cost due to this swap.
+    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
+    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
+
+    // Determine whether we need to force swap two router blocks
+    bool router_block_move = false;
+    if (noc_opts_.noc) {
+        router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage);
+    }
+
+    /* Allow some fraction of moves to not be restricted by rlim,
+    /* in the hopes of better escaping local minima. */
+    float rlim;
+    if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) {
+        rlim = std::numeric_limits<float>::infinity();
+    } else {
+        rlim = annealing_state_.rlim;
+    }
+
+    e_create_move create_move_outcome = e_create_move::ABORT;
+
+    //When manual move toggle button is active, the manual move window asks the user for input.
+    if (manual_move_enabled) {
+#ifndef NO_GRAPHICS
+        create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_,
+                                                              proposed_action.move_type, rlim, placer_opts_,
+                                                              criticalities_);
+#endif //NO_GRAPHICS
+    } else if (router_block_move) {
+        // generate a move where two random router blocks are swapped
+        create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry);
+        proposed_action.move_type = e_move_type::UNIFORM;
+    } else {
+        //Generate a new move (perturbation) used to explore the space of possible placements
+        create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_);
+    }
+
+    if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+        ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+    }
+    LOG_MOVE_STATS_PROPOSED(t, blocks_affected_);
+
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
+                   "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n",
+                   costs_.cost, costs_.bb_cost, costs_.timing_cost);
+
+    e_move_result move_outcome = e_move_result::ABORTED;
+
+    if (create_move_outcome == e_create_move::ABORT) {
+        LOG_MOVE_STATS_OUTCOME(std::numeric_limits<float>::quiet_NaN(),
+                               std::numeric_limits<float>::quiet_NaN(),
+                               std::numeric_limits<float>::quiet_NaN(), "ABORTED",
+                               "illegal move");
+
+        move_outcome = ABORTED;
+
+    } else {
+        VTR_ASSERT(create_move_outcome == e_create_move::VALID);
+
+        /*
+         * To make evaluating the move simpler (e.g. calculating changed bounding box),
+         * we first move the blocks to their new locations (apply the move to
+         * blk_loc_registry.block_locs) and then compute the change in cost. If the move
+         * is accepted, the inverse look-up in place_ctx.grid_blocks is updated
+         * (committing the move). If the move is rejected, the blocks are returned to
+         * their original positions (reverting blk_loc_registry.block_locs to its original state).
+         *
+         * Note that the inverse look-up place_ctx.grid_blocks is only updated after
+         * move acceptance is determined, so it should not be used when evaluating a move.
+         */
+
+        /* Update the block positions */
+        blk_loc_registry.apply_move_blocks(blocks_affected_);
+
+        //Find all the nets affected by this swap and update the wiring costs.
+        //This cost value doesn't depend on the timing info.
+        //
+        //Also find all the pins affected by the swap, and calculates new connection
+        //delays and timing costs and store them in proposed_* data structures.
+        net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_,
+                                                              bb_delta_c, timing_delta_c);
+
+        //For setup slack analysis, we first do a timing analysis to get the newest
+        //slack values resulted from the proposed block moves. If the move turns out
+        //to be accepted, we keep the updated slack values and commit the block moves.
+        //If rejected, we reject the proposed block moves and revert this timing analysis.
+        if (place_algorithm == SLACK_TIMING_PLACE) {
+            // Invalidates timing of modified connections for incremental timing updates.
+            invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+
+            /* Update the connection_timing_cost and connection_delay *
+             * values from the temporary values.                      */
+            commit_td_cost(blocks_affected_, placer_state_);
+
+            /* Update timing information. Since we are analyzing setup slacks,   *
+             * we only update those values and keep the criticalities stale      *
+             * so as not to interfere with the original timing driven algorithm. *
+             *
+             * Note: the timing info must be updated after applying block moves  *
+             * and committing the timing driven delays and costs.                *
+             * If we wish to revert this timing update due to move rejection,    *
+             * we need to revert block moves and restore the timing values.      */
+            criticalities_->disable_update();
+            setup_slacks_->enable_update();
+            update_timing_classes(crit_params, timing_info_, criticalities_,
+                                  setup_slacks_, pin_timing_invalidator_, placer_state_);
+
+            /* Get the setup slack analysis cost */
+            //TODO: calculate a weighted average of the slack cost and wiring cost
+            delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm;
+        } else if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+            /* Take delta_c as a combination of timing and wiring cost. In
+             * addition to `timing_tradeoff`, we normalize the cost values */
+            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
+                           "timing_delta_c %e, timing_cost_norm %e\n",
+                           bb_delta_c,
+                           costs_.bb_cost_norm,
+                           timing_tradeoff,
+                           timing_delta_c,
+                           costs_.timing_cost_norm);
+            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
+                      + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
+        } else {
+            VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
+            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e\n",
+                           bb_delta_c,
+                           costs_.bb_cost_norm);
+            delta_c = bb_delta_c * costs_.bb_cost_norm;
+        }
+
+        NocCostTerms noc_delta_c; // change in NoC cost
+        /* Update the NoC data structure and costs*/
+        if (noc_opts_.noc) {
+            VTR_ASSERT_SAFE(noc_cost_handler_.has_value());
+            noc_cost_handler_->find_affected_noc_routers_and_update_noc_costs(blocks_affected_, noc_delta_c);
+
+            // Include the NoC delta costs in the total cost change for this swap
+            delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_);
+        }
+
+        /* 1 -> move accepted, 0 -> rejected. */
+        move_outcome = assess_swap(delta_c, annealing_state_.t);
+
+        //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move.
+#ifndef NO_GRAPHICS
+        if (manual_move_enabled) {
+            move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome);
+        }
+#endif //NO_GRAPHICS
+
+        if (move_outcome == ACCEPTED) {
+            costs_.cost += delta_c;
+            costs_.bb_cost += bb_delta_c;
+
+            if (place_algorithm == SLACK_TIMING_PLACE) {
+                // Update the timing driven cost as usual
+                costs_.timing_cost += timing_delta_c;
+
+                // Commit the setup slack information
+                // The timing delay and cost values should be committed already
+                commit_setup_slacks(setup_slacks_, placer_state_);
+            }
+
+            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+                costs_.timing_cost += timing_delta_c;
+
+                /* Invalidates timing of modified connections for incremental *
+                 * timing updates. These invalidations are accumulated for a  *
+                 * big timing update in the outer loop.                       */
+                invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+
+                /* Update the connection_timing_cost and connection_delay *
+                 * values from the temporary values.                      */
+                commit_td_cost(blocks_affected_, placer_state_);
+            }
+
+            /* Update net cost functions and reset flags. */
+            net_cost_handler_.update_move_nets();
+
+            /* Update clb data structures since we kept the move. */
+            blk_loc_registry.commit_move_blocks(blocks_affected_);
+
+            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+                ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+            }
+            if (noc_opts_.noc){
+                noc_cost_handler_->commit_noc_costs();
+                costs_ += noc_delta_c;
+            }
+
+            //Highlights the new block when manual move is selected.
+#ifndef NO_GRAPHICS
+            if (manual_move_enabled) {
+                manual_move_highlight_new_block_location();
+            }
+#endif //NO_GRAPHICS
+
+        } else {
+            VTR_ASSERT_SAFE(move_outcome == REJECTED);
+
+            // Reset the net cost function flags first.
+            net_cost_handler_.reset_move_nets();
+
+            // Restore the blk_loc_registry.block_locs data structures to their state before the move.
+            blk_loc_registry.revert_move_blocks(blocks_affected_);
+
+            if (place_algorithm == SLACK_TIMING_PLACE) {
+                /* Revert the timing delays and costs to pre-update values.       */
+                /* These routines must be called after reverting the block moves. */
+                //TODO: make this process incremental
+                comp_td_connection_delays(delay_model_, placer_state_);
+                comp_td_costs(delay_model_, *criticalities_, placer_state_, &costs_.timing_cost);
+
+                /* Re-invalidate the affected sink pins since the proposed
+                 * move is rejected, and the same blocks are reverted to
+                 * their original positions. */
+                invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+
+                // Revert the timing update
+                update_timing_classes(crit_params, timing_info_, criticalities_,
+                                      setup_slacks_, pin_timing_invalidator_, placer_state_);
+
+                VTR_ASSERT_SAFE_MSG(
+                    verify_connection_setup_slacks(setup_slacks_, placer_state_),
+                    "The current setup slacks should be identical to the values before the try swap timing info update.");
+            }
+
+            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+                // Un-stage the values stored in proposed_* data structures
+                revert_td_cost(blocks_affected_, placer_state_.mutable_timing());
+            }
+
+            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+                ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+            }
+            /* Revert the traffic flow routes within the NoC*/
+            if (noc_opts_.noc) {
+                noc_cost_handler_->revert_noc_traffic_flow_routes(blocks_affected_);
+            }
+        }
+
+        move_outcome_stats.delta_cost_norm = delta_c;
+        move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs_.bb_cost_norm;
+        move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs_.timing_cost_norm;
+
+        move_outcome_stats.delta_bb_cost_abs = bb_delta_c;
+        move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
+
+        LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), "");
+    }
+    move_outcome_stats.outcome = move_outcome;
+
+    // If we force a router block move then it was not proposed by the
+    // move generator, so we should not calculate the reward and update
+    // the move generators status since this outcome is not a direct
+    // consequence of the move generator
+    if (!router_block_move) {
+        move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor);
+    }
+
+#ifdef VTR_ENABLE_DEBUG_LOGGING
+#    ifndef NO_GRAPHICS
+    stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c);
+#    endif
+#endif
+
+    // Clear the data structure containing block move info
+    blocks_affected_.clear_move_blocks();
+
+#if 0
+    // Check that each accepted swap yields a valid placement. This will
+    // greatly slow the placer, but can debug some issues.
+    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
+#endif
+    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
+                   "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n",
+                   costs_.cost, costs_.bb_cost, costs_.timing_cost);
+    return move_outcome;
+}
+
+/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
+void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) {
+    if (placer_opts_.place_algorithm.is_timing_driven()) {
+        /* At each temperature change we update these values to be used
+         * for normalizing the tradeoff between timing and wirelength (bb) */
+        if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter
+            || placer_opts_.inner_loop_recompute_divider != 0) {
+#ifdef VERBOSE
+            VTR_LOG("Outer loop recompute criticalities\n");
+#endif
+            // Avoid division by zero
+            num_connections = std::max(num_connections, 1);
+            VTR_ASSERT(num_connections > 0);
+
+            PlaceCritParams crit_params;
+            crit_params.crit_exponent = annealing_state_.crit_exponent;
+            crit_params.crit_limit = placer_opts_.place_crit_limit;
+
+            //Update all timing related classes
+            perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_,
+                                       pin_timing_invalidator_, timing_info_, &costs_, placer_state_);
+
+            outer_crit_iter_count_ = 0;
+        }
+        outer_crit_iter_count_++;
+    }
+
+    // Update the cost normalization factors
+    update_placement_cost_normalization_factors(&costs_, placer_opts_, noc_opts_, noc_cost_handler_);
+}
+
+/* Function which contains the inner loop of the simulated annealing */
+void placement_inner_loop(int inner_recompute_limit,
+                         t_placer_statistics* stats,
+
+                                 int* moves_since_cost_recompute,
+                                 PlacerSetupSlacks* setup_slacks,
+                                 MoveGenerator& move_generator,
+                                 float timing_bb_factor
+                                 ) {
+    // How many times have we dumped placement to a file this temperature?
+    int inner_placement_save_count = 0;
+
+    stats->reset();
+
+    bool manual_move_enabled = false;
+
+    // Inner loop begins
+    for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) {
+        e_move_result swap_result = try_swap(move_generator,
+                                             placer_opts, noc_opts, move_type_stat, place_algorithm,
+                                             timing_bb_factor, manual_move_enabled);
+
+        if (swap_result == ACCEPTED) {
+            /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
+            stats->single_swap_update(*costs);
+            swap_stats.num_swap_accepted++;
+        } else if (swap_result == ABORTED) {
+            swap_stats.num_swap_aborted++;
+        } else { // swap_result == REJECTED
+            swap_stats.num_swap_rejected++;
+        }
+
+        if (place_algorithm.is_timing_driven()) {
+            /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
+             * We do this only once in a while, since it is expensive.
+             */
+            if (inner_crit_iter_count >= inner_recompute_limit
+                && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */
+
+                inner_crit_iter_count = 0;
+#ifdef VERBOSE
+                VTR_LOG("Inner loop recompute criticalities\n");
+#endif
+
+                PlaceCritParams crit_params;
+                crit_params.crit_exponent = state->crit_exponent;
+                crit_params.crit_limit = placer_opts.place_crit_limit;
+
+                //Update all timing related classes
+                perform_full_timing_update(crit_params, delay_model, criticalities,
+                                           setup_slacks, pin_timing_invalidator,
+                                           timing_info, costs, placer_state);
+            }
+            inner_crit_iter_count++;
+        }
+
+        /* Lines below prevent too much round-off error from accumulating
+         * in the cost over many iterations (due to incremental updates).
+         * This round-off can lead to error checks failing because the cost
+         * is different from what you get when you recompute from scratch.
+         */
+        ++(*moves_since_cost_recompute);
+        if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {
+            net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs);
+
+            if (noc_cost_handler.has_value()) {
+                noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs);
+            }
+
+            *moves_since_cost_recompute = 0;
+        }
+
+        if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0
+            && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) {
+            std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
+                                                   state->num_temps + 1, inner_placement_save_count);
+            VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n",
+                    inner_iter, state->move_lim, filename.c_str());
+            print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs());
+            ++inner_placement_save_count;
+        }
+    }
+
+    /* Calculate the success_rate and std_dev of the costs. */
+    stats->calc_iteration_stats(*costs, state->move_lim);
 }
\ No newline at end of file
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 0c046bcdf5f..b19ef8f5968 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -1,9 +1,32 @@
 
 #pragma once
 
+#include "vpr_types.h"
+
+#include "move_generator.h" // movestats
+#include "net_cost_handler.h"
+
+#include <optional>
+
+class PlacerState;
 class t_placer_costs;
 struct t_placer_opts;
-struct t_annealing_sched;
+
+class NocCostHandler;
+class ManualMoveGenerator;
+class NetPinTimingInvalidator;
+
+/**
+ * These variables keep track of the number of swaps
+ * rejected, accepted or aborted. The total number of swap attempts
+ * is the sum of the three number.
+ */
+struct t_swap_stats {
+    int num_swap_rejected = 0;
+    int num_swap_accepted = 0;
+    int num_swap_aborted = 0;
+    int num_ts_called = 0;
+};
 
 /**
  * @brief Stores variables that are used by the annealing process.
@@ -71,7 +94,7 @@ class t_annealing_state {
     float INVERSE_DELTA_RLIM;
 
   public: //Constructor
-    t_annealing_state() = delete;
+    t_annealing_state() = default;
     t_annealing_state(const t_annealing_sched& annealing_sched,
                       float first_t,
                       float first_rlim,
@@ -128,8 +151,83 @@ class t_annealing_state {
     inline void update_move_lim(float success_target, float success_rate);
 };
 
+
 class PlacementAnnealer {
+  public:
+    PlacementAnnealer(const t_placer_opts& placer_opts,
+                      PlacerState& placer_state,
+                      t_placer_costs& costs,
+                      NetCostHandler& net_cost_handler,
+                      std::optional<NocCostHandler>& noc_cost_handler,
+                      const t_noc_opts& noc_opts,
+                      MoveGenerator& move_generator_1,
+                      MoveGenerator& move_generator_2,
+                      ManualMoveGenerator& manual_move_generator,
+                      const PlaceDelayModel* delay_model,
+                      PlacerCriticalities* criticalities,
+                      PlacerSetupSlacks* setup_slacks,
+                      SetupTimingInfo* timing_info,
+                      NetPinTimingInvalidator* pin_timing_invalidator,
+                      int move_lim);
+
+    void placement_inner_loop(const t_annealing_state* state,
+                              const t_placer_opts& placer_opts,
+                              const t_noc_opts& noc_opts,
+                              int inner_recompute_limit,
+                              t_placer_statistics* stats,
+                              t_placer_costs* costs,
+                              int* moves_since_cost_recompute,
+                              NetPinTimingInvalidator* pin_timing_invalidator,
+                              const PlaceDelayModel* delay_model,
+                              PlacerCriticalities* criticalities,
+                              PlacerSetupSlacks* setup_slacks,
+                              MoveGenerator& move_generator,
+                              ManualMoveGenerator& manual_move_generator,
+                              t_pl_blocks_to_be_moved& blocks_affected,
+                              SetupTimingInfo* timing_info,
+                              const t_place_algorithm& place_algorithm,
+                              MoveTypeStat& move_type_stat,
+                              float timing_bb_factor,
+                              t_swap_stats& swap_stats,
+                              PlacerState& placer_state,
+                              NetCostHandler& net_cost_handler,
+                              std::optional<NocCostHandler>& noc_cost_handler);
+
+    void outer_loop_update_timing_info(int num_connections);
+
+    e_move_result try_swap(MoveGenerator& move_generator,
+                           const t_place_algorithm& place_algorithm,
+                           float timing_bb_factor,
+                           bool manual_move_enabled);
+
+  public:
+    const t_placer_opts& placer_opts_;
+    PlacerState& placer_state_;
+    t_placer_costs& costs_;
+    NetCostHandler& net_cost_handler_;
+    std::optional<NocCostHandler>& noc_cost_handler_;
+    const t_noc_opts& noc_opts_;
+
+    MoveGenerator& move_generator_1_;
+    MoveGenerator& move_generator_2_;
+    ManualMoveGenerator& manual_move_generator_;
+
+    const PlaceDelayModel* delay_model_;
+    PlacerCriticalities* criticalities_;
+    PlacerSetupSlacks* setup_slacks_;
+    SetupTimingInfo* timing_info_;
+    NetPinTimingInvalidator* pin_timing_invalidator_;
+    std::unique_ptr<FILE, decltype(&vtr::fclose)> move_stats_file_;
+    int outer_crit_iter_count_;
 
-  private:
     t_annealing_state annealing_state_;
+    /// Swap statistics keep record of the number accepted/rejected/aborted swaps.
+    t_swap_stats swap_stats_;
+    MoveTypeStat move_type_stats_;
+
+    t_pl_blocks_to_be_moved blocks_affected_;
+
+
+  private:
+    float estimate_starting_temperature();
 };
\ No newline at end of file
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 0c221f89c4a..99151695dab 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -91,18 +91,6 @@ struct t_range_limiters {
     float dm_rlim;
 };
 
-/**
- * These variables keep track of the number of swaps
- * rejected, accepted or aborted. The total number of swap attempts
- * is the sum of the three number.
- */
-struct t_swap_stats {
-    int num_swap_rejected = 0;
-    int num_swap_accepted = 0;
-    int num_swap_aborted = 0;
-    int num_ts_called = 0;
-};
-
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected,
                           ClusterBlockId b_from,
                           t_pl_loc to,
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 90b566fb753..33fc09fa342 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -90,84 +90,6 @@ static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
-/********************** Variables local to place.c ***************************/
-
-
-std::unique_ptr<FILE, decltype(&vtr::fclose)> f_move_stats_file(nullptr,
-                                                                vtr::fclose);
-
-#ifdef VTR_ENABLE_DEBUG_LOGGIING
-#    define LOG_MOVE_STATS_HEADER()                               \
-        do {                                                      \
-            if (f_move_stats_file) {                              \
-                fprintf(f_move_stats_file.get(),                  \
-                        "temp,from_blk,to_blk,from_type,to_type," \
-                        "blk_count,"                              \
-                        "delta_cost,delta_bb_cost,delta_td_cost," \
-                        "outcome,reason\n");                      \
-            }                                                     \
-        } while (false)
-
-#    define LOG_MOVE_STATS_PROPOSED(t, affected_blocks)                                        \
-        do {                                                                                   \
-            if (f_move_stats_file) {                                                           \
-                auto& place_ctx = g_vpr_ctx.placement();                                       \
-                auto& cluster_ctx = g_vpr_ctx.clustering();                                    \
-                ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num;             \
-                                                                                               \
-                t_pl_loc to = affected_blocks.moved_blocks[0].new_loc;                         \
-                ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];   \
-                                                                                               \
-                t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \
-                t_logical_block_type_ptr to_type = nullptr;                                    \
-                if (b_to) {                                                                    \
-                    to_type = cluster_ctx.clb_nlist.block_type(b_to);                          \
-                }                                                                              \
-                                                                                               \
-                fprintf(f_move_stats_file.get(),                                               \
-                        "%g,"                                                                  \
-                        "%d,%d,"                                                               \
-                        "%s,%s,"                                                               \
-                        "%d,",                                                                 \
-                        t,                                                                     \
-                        int(b_from), int(b_to),                                                \
-                        from_type->name, (to_type ? to_type->name : "EMPTY"),                  \
-                        affected_blocks.moved_blocks.size());                                  \
-            }                                                                                  \
-        } while (false)
-
-#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
-                                   outcome, reason)                          \
-        do {                                                                 \
-            if (f_move_stats_file) {                                         \
-                fprintf(f_move_stats_file.get(),                             \
-                        "%g,%g,%g,"                                          \
-                        "%s,%s\n",                                           \
-                        delta_cost, delta_bb_cost, delta_td_cost,            \
-                        outcome, reason);                                    \
-            }                                                                \
-        } while (false)
-
-#else
-
-#    define LOG_MOVE_STATS_HEADER()                      \
-        do {                                             \
-            fprintf(f_move_stats_file.get(),             \
-                    "VTR_ENABLE_DEBUG_LOGGING disabled " \
-                    "-- No move stats recorded\n");      \
-        } while (false)
-
-#    define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \
-        do {                                            \
-        } while (false)
-
-#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
-                                   outcome, reason)                          \
-        do {                                                                 \
-        } while (false)
-
-#endif
-
 /********************* Static subroutines local to place.c *******************/
 #ifdef VERBOSE
 void print_clb_placement(const char* fname);
@@ -254,43 +176,9 @@ static float starting_t(const t_annealing_state* state,
 
 static int count_connections();
 
-static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerState& placer_state);
-
-static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerTimingContext& p_timing_ctx);
-
-static void invalidate_affected_connections(
-    const t_pl_blocks_to_be_moved& blocks_affected,
-    NetPinTimingInvalidator* pin_tedges_invalidator,
-    TimingInfo* timing_info);
-
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state);
 
-static e_move_result assess_swap(double delta_c, double t);
-
-static void update_placement_cost_normalization_factors(t_placer_costs* costs,
-                                                        const t_placer_opts& placer_opts,
-                                                        const t_noc_opts& noc_opts,
-                                                        const std::optional<NocCostHandler>& noc_cost_handler);
-
-static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
-
-static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
-                                          const t_noc_opts& noc_opts,
-                                          t_placer_costs* costs,
-                                          int num_connections,
-                                          float crit_exponent,
-                                          int* outer_crit_iter_count,
-                                          const PlaceDelayModel* delay_model,
-                                          PlacerCriticalities* criticalities,
-                                          PlacerSetupSlacks* setup_slacks,
-                                          NetPinTimingInvalidator* pin_timing_invalidator,
-                                          SetupTimingInfo* timing_info,
-                                          PlacerState& placer_state,
-                                          const std::optional<NocCostHandler>& noc_cost_handler);
-
 static void placement_inner_loop(const t_annealing_state* state,
                                  const t_placer_opts& placer_opts,
                                  const t_noc_opts& noc_opts,
@@ -346,7 +234,6 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry);
 /*****************************************************************************/
 void try_place(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
-               t_annealing_sched annealing_sched,
                const t_router_opts& router_opts,
                const t_analysis_opts& analysis_opts,
                const t_noc_opts& noc_opts,
@@ -372,8 +259,6 @@ void try_place(const Netlist<>& net_list,
     auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
     int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count;
-    float first_crit_exponent;
-
 
     t_placer_costs costs(placer_opts.place_algorithm);
 
@@ -395,9 +280,6 @@ void try_place(const Netlist<>& net_list,
 
     t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size());
 
-    // Swap statistics keep record of the number accepted/rejected/aborted swaps.
-    t_swap_stats swap_stats;
-
     if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
         place_delay_model = alloc_lookups_and_delay_model(net_list,
@@ -422,7 +304,7 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-    int move_lim = (int)(annealing_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
+    int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
 
     PlacerState placer_state;
     auto& place_move_ctx = placer_state.mutable_move();
@@ -490,8 +372,6 @@ void try_place(const Netlist<>& net_list,
     if (placer_opts.place_algorithm.is_timing_driven()) {
         costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL);
 
-        first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
-
         num_connections = count_connections();
         VTR_LOG("\n");
         VTR_LOG("There are %d point to point connections in this circuit.\n",
@@ -530,7 +410,7 @@ void try_place(const Netlist<>& net_list,
 
         //First time compute timing and costs, compute from scratch
         PlaceCritParams crit_params;
-        crit_params.crit_exponent = first_crit_exponent;
+        crit_params.crit_exponent = placer_opts.td_place_exp_first;
         crit_params.crit_limit = placer_opts.place_crit_limit;
 
         initialize_timing_info(crit_params, place_delay_model.get(), placer_criticalities.get(),
@@ -574,7 +454,6 @@ void try_place(const Netlist<>& net_list,
         /* Other initializations */
         outer_crit_iter_count = 0;
         num_connections = 0;
-        first_crit_exponent = 0;
     }
 
     if (noc_opts.noc) {
@@ -650,62 +529,6 @@ void try_place(const Netlist<>& net_list,
         print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
     }
 
-    int first_move_lim = get_initial_move_lim(placer_opts, annealing_sched);
-
-    int inner_recompute_limit;
-    if (placer_opts.inner_loop_recompute_divider != 0) {
-        inner_recompute_limit = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
-    } else {
-        /*don't do an inner recompute */
-        inner_recompute_limit = first_move_lim + 1;
-    }
-
-    /* calculate the number of moves in the quench that we should recompute timing after based on the value of *
-     * the commandline option quench_recompute_divider                                                         */
-    int quench_recompute_limit;
-    if (placer_opts.quench_recompute_divider != 0) {
-        quench_recompute_limit = static_cast<int>(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
-    } else {
-        /*don't do an quench recompute */
-        quench_recompute_limit = first_move_lim + 1;
-    }
-
-    //allocate helper vectors that are used by many move generators
-    place_move_ctx.X_coord.resize(10, 0);
-    place_move_ctx.Y_coord.resize(10, 0);
-    place_move_ctx.layer_coord.resize(10, 0);
-
-    //allocate move type statistics vectors
-    MoveTypeStat move_type_stat;
-    move_type_stat.blk_type_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
-    move_type_stat.accepted_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
-    move_type_stat.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
-
-    /* Get the first range limiter */
-    float first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
-    place_move_ctx.first_rlim = first_rlim;
-
-    t_annealing_state state(annealing_sched,
-                            EPSILON,    // Set the temperature low to ensure that initial placement quality will be preserved
-                            first_rlim,
-                            first_move_lim,
-                            first_crit_exponent);
-
-    /* Update the starting temperature for placement annealing to a more appropriate value */
-    state.t = starting_t(&state, &costs, annealing_sched,
-                         place_delay_model.get(), placer_criticalities.get(),
-                         placer_setup_slacks.get(), timing_info.get(), *move_generator,
-                         manual_move_generator, pin_timing_invalidator.get(),
-                         blocks_affected, placer_opts, noc_opts, move_type_stat,
-                         swap_stats, placer_state, net_cost_handler, noc_cost_handler);
-
-    if (!placer_opts.move_stats_file.empty()) {
-        f_move_stats_file = std::unique_ptr<FILE, decltype(&vtr::fclose)>(
-            vtr::fopen(placer_opts.move_stats_file.c_str(), "w"),
-            vtr::fclose);
-        LOG_MOVE_STATS_HEADER();
-    }
-
     tot_iter = 0;
     moves_since_cost_recompute = 0;
 
@@ -727,6 +550,10 @@ void try_place(const Netlist<>& net_list,
     //Define the timing bb weight factor for the agent's reward function
     float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT;
 
+    PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
+                               noc_opts, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(),
+                               placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim);
+
     if (!skip_anneal) {
         //Table header
         VTR_LOG("\n");
@@ -736,21 +563,15 @@ void try_place(const Netlist<>& net_list,
         do {
             vtr::Timer temperature_timer;
 
-            outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections,
-                                          state.crit_exponent, &outer_crit_iter_count,
-                                          place_delay_model.get(), placer_criticalities.get(),
-                                          placer_setup_slacks.get(), pin_timing_invalidator.get(),
-                                          timing_info.get(), placer_state, noc_cost_handler);
+            annealer.outer_loop_update_timing_info(num_connections);
 
             if (placer_opts.place_algorithm.is_timing_driven()) {
                 critical_path = timing_info->least_slack_critical_path();
                 sTNS = timing_info->setup_total_negative_slack();
                 sWNS = timing_info->setup_worst_negative_slack();
 
-                //see if we should save the current placement solution as a checkpoint
-
-                if (placer_opts.place_checkpointing
-                    && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) {
+                // see if we should save the current placement solution as a checkpoint
+                if (placer_opts.place_checkpointing && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) {
                     save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(),
                                                         placement_checkpoint,
                                                         timing_info, costs, critical_path.delay());
@@ -761,7 +582,7 @@ void try_place(const Netlist<>& net_list,
             assign_current_move_generator(move_generator, move_generator2,
                                           agent_state, placer_opts, false, current_move_generator);
 
-            //do a complete inner loop iteration
+            // do a complete inner loop iteration
             placement_inner_loop(&state, placer_opts, noc_opts,
                                  inner_recompute_limit,
                                  &stats, &costs, &moves_since_cost_recompute,
@@ -818,11 +639,7 @@ void try_place(const Netlist<>& net_list,
 
         vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
-        outer_loop_update_timing_info(placer_opts, noc_opts, &costs, num_connections,
-                                      state.crit_exponent, &outer_crit_iter_count,
-                                      place_delay_model.get(), placer_criticalities.get(),
-                                      placer_setup_slacks.get(), pin_timing_invalidator.get(),
-                                      timing_info.get(), placer_state, noc_cost_handler);
+        annealer.outer_loop_update_timing_info(num_connections);
 
         //move the appropriate move_generator to be the current used move generator
         assign_current_move_generator(move_generator, move_generator2,
@@ -991,47 +808,7 @@ void try_place(const Netlist<>& net_list,
     copy_locs_to_global_state(blk_loc_registry);
 }
 
-/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
-static void outer_loop_update_timing_info(const t_placer_opts& placer_opts,
-                                          const t_noc_opts& noc_opts,
-                                          t_placer_costs* costs,
-                                          int num_connections,
-                                          float crit_exponent,
-                                          int* outer_crit_iter_count,
-                                          const PlaceDelayModel* delay_model,
-                                          PlacerCriticalities* criticalities,
-                                          PlacerSetupSlacks* setup_slacks,
-                                          NetPinTimingInvalidator* pin_timing_invalidator,
-                                          SetupTimingInfo* timing_info,
-                                          PlacerState& placer_state,
-                                          const std::optional<NocCostHandler>& noc_cost_handler) {
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        /*at each temperature change we update these values to be used     */
-        /*for normalizing the tradeoff between timing and wirelength (bb)  */
-        if (*outer_crit_iter_count >= placer_opts.recompute_crit_iter
-            || placer_opts.inner_loop_recompute_divider != 0) {
-#ifdef VERBOSE
-            VTR_LOG("Outer loop recompute criticalities\n");
-#endif
-            num_connections = std::max(num_connections, 1); //Avoid division by zero
-            VTR_ASSERT(num_connections > 0);
-
-            PlaceCritParams crit_params;
-            crit_params.crit_exponent = crit_exponent;
-            crit_params.crit_limit = placer_opts.place_crit_limit;
 
-            //Update all timing related classes
-            perform_full_timing_update(crit_params, delay_model, criticalities, setup_slacks,
-                                       pin_timing_invalidator, timing_info, costs, placer_state);
-
-            *outer_crit_iter_count = 0;
-        }
-        (*outer_crit_iter_count)++;
-    }
-
-    /* Update the cost normalization factors */
-    update_placement_cost_normalization_factors(costs, placer_opts, noc_opts, noc_cost_handler);
-}
 
 /* Function which contains the inner loop of the simulated annealing */
 static void placement_inner_loop(const t_annealing_state* state,
@@ -1618,223 +1395,6 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     return cube_bb;
 }
 
-/**
- * @brief Updates all the cost normalization factors during the outer
- * loop iteration of the placement. At each temperature change, these
- * values are updated so that we can balance the tradeoff between the
- * different placement cost components (timing, wirelength and NoC).
- * Depending on the placement mode the corresponding normalization factors are
- * updated.
- *
- * @param costs Contains the normalization factors which need to be updated
- * @param placer_opts Determines the placement mode
- * @param noc_opts Determines if placement includes the NoC
- * @param noc_cost_handler Computes normalization factors for NoC-related cost terms
- */
-static void update_placement_cost_normalization_factors(t_placer_costs* costs,
-                                                        const t_placer_opts& placer_opts,
-                                                        const t_noc_opts& noc_opts,
-                                                        const std::optional<NocCostHandler>& noc_cost_handler) {
-    /* Update the cost normalization factors */
-    costs->update_norm_factors();
-
-    // update the noc normalization factors if the placement includes the NoC
-    if (noc_opts.noc) {
-        noc_cost_handler->update_noc_normalization_factors(*costs);
-    }
-
-    // update the current total placement cost
-    costs->cost = get_total_cost(costs, placer_opts, noc_opts);
-}
-
-/**
- * @brief Compute the total normalized cost for a given placement. This
- * computation will vary depending on the placement modes.
- *
- * @param costs The current placement cost components and their normalization
- * factors
- * @param placer_opts Determines the placement mode
- * @param noc_opts Determines if placement includes the NoC
- * @return double The computed total cost of the current placement
- */
-static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
-    double total_cost = 0.0;
-
-    if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
-        // in bounding box mode we only care about wirelength
-        total_cost = costs->bb_cost * costs->bb_cost_norm;
-    } else if (placer_opts.place_algorithm.is_timing_driven()) {
-        // in timing mode we include both wirelength and timing costs
-        total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm);
-    }
-
-    if (noc_opts.noc) {
-        // in noc mode we include noc aggregate bandwidth and noc latency
-        total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts);
-    }
-
-    return total_cost;
-}
-
-/**
- * @brief Check if the setup slack has gotten better or worse due to block swap.
- *
- * Get all the modified slack values via the PlacerSetupSlacks class, and compare
- * then with the original values at these connections. Sort them and compare them
- * one by one, and return the difference of the first different pair.
- *
- * If the new slack value is larger(better), than return a negative value so that
- * the move will be accepted. If the new slack value is smaller(worse), return a
- * positive value so that the move will be rejected.
- *
- * If no slack values have changed, then return an arbitrary positive number. A
- * move resulting in no change in the slack values should probably be unnecessary.
- *
- * The sorting is need to prevent in the unlikely circumstances that a bad slack
- * value suddenly got very good due to the block move, while a good slack value
- * got very bad, perhaps even worse than the original worse slack value.
- */
-static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
-                                      const PlacerState& placer_state) {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    const auto& p_timing_ctx = placer_state.timing();
-    const auto& connection_setup_slack = p_timing_ctx.connection_setup_slack;
-
-    //Find the original/proposed setup slacks of pins with modified values
-    std::vector<float> original_setup_slacks, proposed_setup_slacks;
-
-    auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack();
-    for (ClusterPinId clb_pin : clb_pins_modified) {
-        ClusterNetId net_id = clb_nlist.pin_net(clb_pin);
-        size_t ipin = clb_nlist.pin_net_index(clb_pin);
-
-        original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]);
-        proposed_setup_slacks.push_back(
-            setup_slacks->setup_slack(net_id, ipin));
-    }
-
-    //Sort in ascending order, from the worse slack value to the best
-    std::stable_sort(original_setup_slacks.begin(), original_setup_slacks.end());
-    std::stable_sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end());
-
-    //Check the first pair of slack values that are different
-    //If found, return their difference
-    for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) {
-        float slack_diff = original_setup_slacks[idiff]
-                           - proposed_setup_slacks[idiff];
-
-        if (slack_diff != 0) {
-            return slack_diff;
-        }
-    }
-
-    //If all slack values are identical (or no modified slack values),
-    //reject this move by returning an arbitrary positive number as cost.
-    return 1;
-}
-
-static e_move_result assess_swap(double delta_c, double t) {
-    /* Returns: 1 -> move accepted, 0 -> rejected. */
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c);
-    if (delta_c <= 0) {
-        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
-        return ACCEPTED;
-    }
-
-    if (t == 0.) {
-        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n");
-        return REJECTED;
-    }
-
-    float fnum = vtr::frand();
-    float prob_fac = std::exp(-delta_c / t);
-    if (prob_fac > fnum) {
-        VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n");
-        return ACCEPTED;
-    }
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n");
-    return REJECTED;
-}
-
-/**
- * @brief Update the connection_timing_cost values from the temporary
- *        values for all connections that have/haven't changed.
- *
- * All the connections have already been gathered by blocks_affected.affected_pins
- * after running the routine find_affected_nets_and_update_costs() in try_swap().
- */
-static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerState& placer_state) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    auto& p_timing_ctx = placer_state.mutable_timing();
-    auto& connection_delay = p_timing_ctx.connection_delay;
-    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
-    auto& connection_timing_cost = p_timing_ctx.connection_timing_cost;
-    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
-
-    //Go through all the sink pins affected
-    for (ClusterPinId pin_id : blocks_affected.affected_pins) {
-        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
-        int ipin = clb_nlist.pin_net_index(pin_id);
-
-        //Commit the timing delay and cost values
-        connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
-        proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
-        connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
-        proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
-    }
-}
-
-//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on
-//the move proposed in blocks_affected
-static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerTimingContext& p_timing_ctx) {
-#ifndef VTR_ASSERT_SAFE_ENABLED
-    (void)blocks_affected;
-    (void)p_timing_ctx;
-#else
-    //Invalidate temp delay & timing cost values to match sanity checks in
-    //comp_td_connection_cost()
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
-    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
-
-    for (ClusterPinId pin : blocks_affected.affected_pins) {
-        ClusterNetId net = clb_nlist.pin_net(pin);
-        int ipin = clb_nlist.pin_net_index(pin);
-        proposed_connection_delay[net][ipin] = INVALID_DELAY;
-        proposed_connection_timing_cost[net][ipin] = INVALID_DELAY;
-    }
-#endif
-}
-
-/**
- * @brief Invalidates the connections affected by the specified block moves.
- *
- * All the connections recorded in blocks_affected.affected_pins have different
- * values for `proposed_connection_delay` and `connection_delay`.
- *
- * Invalidate all the timing graph edges associated with these connections via
- * the NetPinTimingInvalidator class.
- */
-static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
-                                            NetPinTimingInvalidator* pin_tedges_invalidator,
-                                            TimingInfo* timing_info) {
-    VTR_ASSERT_SAFE(timing_info);
-    VTR_ASSERT_SAFE(pin_tedges_invalidator);
-
-    /* Invalidate timing graph edges affected by the move */
-    for (ClusterPinId pin : blocks_affected.affected_pins) {
-        pin_tedges_invalidator->invalidate_connection(pin, timing_info);
-    }
-}
-
 /* Allocates the major structures needed only by the placer, primarily for *
  * computing costs quickly and such.                                       */
 static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& placer_opts,
diff --git a/vpr/src/place/place.h b/vpr/src/place/place.h
index 138c6cdd05d..210663823a8 100644
--- a/vpr/src/place/place.h
+++ b/vpr/src/place/place.h
@@ -5,7 +5,6 @@
 
 void try_place(const Netlist<>& net_list,
                const t_placer_opts& placer_opts,
-               t_annealing_sched annealing_sched,
                const t_router_opts& router_opts,
                const t_analysis_opts& analysis_opts,
                const t_noc_opts& noc_opts,
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index 97941f639b1..344839a1bd5 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -119,6 +119,14 @@ struct PlacerMoveContext : public Context {
 
     // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option)
     std::vector<std::pair<ClusterNetId, int>> highly_crit_pins;
+
+  public:
+    PlacerMoveContext() {
+        // allocate helper vectors that are used by many move generators
+        X_coord.resize(10, 0);
+        Y_coord.resize(10, 0);
+        layer_coord.resize(10, 0);
+    }
 };
 
 /**

From 5db48fc81a176dd73548e1e88b2d36424e559beb Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 18 Oct 2024 17:02:23 -0400
Subject: [PATCH 03/31] solve compilation errors

---
 vpr/src/place/annealer.cpp       | 216 +++++-----
 vpr/src/place/annealer.h         |  69 ++--
 vpr/src/place/move_generator.cpp |   2 +-
 vpr/src/place/move_generator.h   |   2 +-
 vpr/src/place/place.cpp          | 690 ++-----------------------------
 vpr/src/place/place_util.cpp     |  20 +
 vpr/src/place/place_util.h       |  12 +
 7 files changed, 200 insertions(+), 811 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 0fdcd9dbcb9..3203d79576f 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -14,6 +14,7 @@
 #include "noc_place_utils.h"
 #include "NetPinTimingInvalidator.h"
 #include "place_timing_update.h"
+#include "read_place.h"
 
 #ifdef VTR_ENABLE_DEBUG_LOGGIING
 #    define LOG_MOVE_STATS_HEADER()                               \
@@ -259,35 +260,6 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
 #endif
 }
 
-/**
- * @brief Compute the total normalized cost for a given placement. This
- * computation will vary depending on the placement modes.
- *
- * @param costs The current placement cost components and their normalization
- * factors
- * @param placer_opts Determines the placement mode
- * @param noc_opts Determines if placement includes the NoC
- * @return double The computed total cost of the current placement
- */
-static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
-    double total_cost = 0.0;
-
-    if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
-        // in bounding box mode we only care about wirelength
-        total_cost = costs->bb_cost * costs->bb_cost_norm;
-    } else if (placer_opts.place_algorithm.is_timing_driven()) {
-        // in timing mode we include both wirelength and timing costs
-        total_cost = (1 - placer_opts.timing_tradeoff) * (costs->bb_cost * costs->bb_cost_norm) + (placer_opts.timing_tradeoff) * (costs->timing_cost * costs->timing_cost_norm);
-    }
-
-    if (noc_opts.noc) {
-        // in noc mode we include noc aggregate bandwidth and noc latency
-        total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts);
-    }
-
-    return total_cost;
-}
-
 /**
  * @brief Updates all the cost normalization factors during the outer
  * loop iteration of the placement. At each temperature change, these
@@ -314,7 +286,7 @@ static void update_placement_cost_normalization_factors(t_placer_costs* costs,
     }
 
     // update the current total placement cost
-    costs->cost = get_total_cost(costs, placer_opts, noc_opts);
+    costs->cost = costs->get_total_cost(placer_opts, noc_opts);
 }
 
 ///@brief Constructor: Initialize all annealing state variables and macros.
@@ -348,60 +320,60 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
 
 bool t_annealing_state::outer_loop_update(float success_rate,
                                           const t_placer_costs& costs,
-                                          const t_placer_opts& placer_opts,
-                                          const t_annealing_sched& annealing_sched) {
+                                          const t_placer_opts& placer_opts) {
 #ifndef NO_GRAPHICS
     t_draw_state* draw_state = get_draw_state_vars();
     if (!draw_state->list_of_breakpoints.empty()) {
-        /* Update temperature in the current information variable. */
+        // Update temperature in the current information variable.
         get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
     }
 #endif
 
-    if (annealing_sched.type == e_sched_type::USER_SCHED) {
-        /* Update t with user specified alpha. */
-        t *= annealing_sched.alpha_t;
+    if (placer_opts.anneal_sched.type == e_sched_type::USER_SCHED) {
+        // Update t with user specified alpha.
+        t *= placer_opts.anneal_sched.alpha_t;
 
-        /* Check if the exit criterion is met. */
-        bool exit_anneal = t >= annealing_sched.exit_t;
+        // Check if the exit criterion is met.
+        bool exit_anneal = t >= placer_opts.anneal_sched.exit_t;
 
         return exit_anneal;
     }
 
-    /* Automatically determine exit temperature. */
+    // Automatically determine exit temperature.
     auto& cluster_ctx = g_vpr_ctx.clustering();
     float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
 
-    if (annealing_sched.type == e_sched_type::DUSTY_SCHED) {
-        /* May get nan if there are no nets */
+    if (placer_opts.anneal_sched.type == e_sched_type::DUSTY_SCHED) {
+        // May get nan if there are no nets
         bool restart_temp = t < t_exit || std::isnan(t_exit);
 
         /* If the success rate or the temperature is *
          * too low, reset the temperature and alpha. */
-        if (success_rate < annealing_sched.success_min || restart_temp) {
-            /* Only exit anneal when alpha gets too large. */
-            if (alpha > annealing_sched.alpha_max) {
+        if (success_rate < placer_opts.anneal_sched.success_min || restart_temp) {
+            // Only exit anneal when alpha gets too large.
+            if (alpha > placer_opts.anneal_sched.alpha_max) {
                 return false;
             }
-            /* Take a half step from the restart temperature. */
+
+            // Take a half step from the restart temperature.
             t = restart_t / sqrt(alpha);
-            /* Update alpha. */
-            alpha = 1.0 - ((1.0 - alpha) * annealing_sched.alpha_decay);
+            // Update alpha.
+            alpha = 1.0 - ((1.0 - alpha) * placer_opts.anneal_sched.alpha_decay);
         } else {
             /* If the success rate is promising, next time   *
              * reset t to the current annealing temperature. */
-            if (success_rate > annealing_sched.success_target) {
+            if (success_rate > placer_opts.anneal_sched.success_target) {
                 restart_t = t;
             }
-            /* Update t. */
+            // Update t.
             t *= alpha;
         }
 
-        /* Update move lim. */
-        update_move_lim(annealing_sched.success_target, success_rate);
+        // Update move lim.
+        update_move_lim(placer_opts.anneal_sched.success_target, success_rate);
     } else {
-        VTR_ASSERT_SAFE(annealing_sched.type == e_sched_type::AUTO_SCHED);
-        /* Automatically adjust alpha according to success rate. */
+        VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
+        // Automatically adjust alpha according to success rate.
         if (success_rate > 0.96) {
             alpha = 0.5;
         } else if (success_rate > 0.8) {
@@ -411,23 +383,23 @@ bool t_annealing_state::outer_loop_update(float success_rate,
         } else {
             alpha = 0.8;
         }
-        /* Update temp. */
+        // Update temp.
         t *= alpha;
-        /* Must be duplicated to retain previous behavior. */
+        // Must be duplicated to retain previous behavior.
         if (t < t_exit || std::isnan(t_exit)) {
             return false;
         }
     }
 
-    /* Update the range limiter. */
+    // Update the range limiter.
     update_rlim(success_rate);
 
-    /* If using timing driven algorithm, update the crit_exponent. */
+    // If using timing driven algorithm, update the crit_exponent.
     if (placer_opts.place_algorithm.is_timing_driven()) {
         update_crit_exponent(placer_opts);
     }
 
-    /* Continues the annealing. */
+    // Continues the annealing.
     return true;
 }
 
@@ -496,13 +468,15 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
 
     int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched);
 
-    int inner_recompute_limit;
+
     if (placer_opts.inner_loop_recompute_divider != 0) {
-        inner_recompute_limit = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
+        inner_recompute_limit_ = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
     } else {
         // don't do an inner recompute
-        inner_recompute_limit = first_move_lim + 1;
+        inner_recompute_limit_ = first_move_lim + 1;
     }
+    moves_since_cost_recompute_ = 0;
+    tot_iter_ = 0;
 
     /* calculate the number of moves in the quench that we should recompute timing after based on the value of *
      * the commandline option quench_recompute_divider                                                         */
@@ -510,7 +484,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     if (placer_opts.quench_recompute_divider != 0) {
         quench_recompute_limit = static_cast<int>(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
     } else {
-        /*don't do an quench recompute */
+        // don't do an quench recompute
         quench_recompute_limit = first_move_lim + 1;
     }
 
@@ -609,23 +583,6 @@ float PlacementAnnealer::estimate_starting_temperature() {
     return init_temp;
 }
 
-
-/**
- * @brief Pick some block and moves it to another spot.
- *
- * If the new location is empty, directly move the block. If the new location
- * is occupied, switch the blocks. Due to the different sizes of the blocks,
- * this block switching may occur for multiple times. It might also cause the
- * current swap attempt to abort due to inability to find suitable locations
- * for moved blocks.
- *
- * The move generator will record all the switched blocks in the variable
- * `blocks_affected`. Afterwards, the move will be assessed by the chosen
- * cost formulation. Currently, there are three ways to assess move cost,
- * which are stored in the enum type `t_place_algorithm`.
- *
- * @return Whether the block swap is accepted, rejected or aborted.
- */
 e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                                           const t_place_algorithm& place_algorithm,
                                           float timing_bb_factor,
@@ -969,43 +926,37 @@ void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) {
 }
 
 /* Function which contains the inner loop of the simulated annealing */
-void placement_inner_loop(int inner_recompute_limit,
-                         t_placer_statistics* stats,
-
-                                 int* moves_since_cost_recompute,
-                                 PlacerSetupSlacks* setup_slacks,
-                                 MoveGenerator& move_generator,
-                                 float timing_bb_factor
-                                 ) {
+void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
+                                             float timing_bb_factor) {
     // How many times have we dumped placement to a file this temperature?
     int inner_placement_save_count = 0;
 
-    stats->reset();
+    placer_stats_.reset();
 
     bool manual_move_enabled = false;
 
     // Inner loop begins
-    for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(move_generator,
-                                             placer_opts, noc_opts, move_type_stat, place_algorithm,
+    for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) {
+        e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm,
                                              timing_bb_factor, manual_move_enabled);
 
         if (swap_result == ACCEPTED) {
-            /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
-            stats->single_swap_update(*costs);
-            swap_stats.num_swap_accepted++;
+            // Move was accepted.  Update statistics that are useful for the annealing schedule.
+            placer_stats_.single_swap_update(costs_);
+            swap_stats_.num_swap_accepted++;
         } else if (swap_result == ABORTED) {
-            swap_stats.num_swap_aborted++;
+            swap_stats_.num_swap_aborted++;
         } else { // swap_result == REJECTED
-            swap_stats.num_swap_rejected++;
+            swap_stats_.num_swap_rejected++;
         }
 
-        if (place_algorithm.is_timing_driven()) {
+        if (placer_opts_.place_algorithm.is_timing_driven()) {
             /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
              * We do this only once in a while, since it is expensive.
              */
-            if (inner_crit_iter_count >= inner_recompute_limit
-                && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */
+
+            // on last iteration don't recompute
+            if (inner_crit_iter_count >= inner_recompute_limit_ && inner_iter != annealing_state_.move_lim - 1) {
 
                 inner_crit_iter_count = 0;
 #ifdef VERBOSE
@@ -1013,13 +964,13 @@ void placement_inner_loop(int inner_recompute_limit,
 #endif
 
                 PlaceCritParams crit_params;
-                crit_params.crit_exponent = state->crit_exponent;
-                crit_params.crit_limit = placer_opts.place_crit_limit;
+                crit_params.crit_exponent = annealing_state_.crit_exponent;
+                crit_params.crit_limit = placer_opts_.place_crit_limit;
 
-                //Update all timing related classes
-                perform_full_timing_update(crit_params, delay_model, criticalities,
-                                           setup_slacks, pin_timing_invalidator,
-                                           timing_info, costs, placer_state);
+                // Update all timing related classes
+                perform_full_timing_update(crit_params, delay_model_, criticalities_,
+                                           setup_slacks_, pin_timing_invalidator_,
+                                           timing_info_, &costs_, placer_state_);
             }
             inner_crit_iter_count++;
         }
@@ -1029,28 +980,55 @@ void placement_inner_loop(int inner_recompute_limit,
          * This round-off can lead to error checks failing because the cost
          * is different from what you get when you recompute from scratch.
          */
-        ++(*moves_since_cost_recompute);
-        if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {
-            net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs);
+        moves_since_cost_recompute_++;
+        if (moves_since_cost_recompute_ > MAX_MOVES_BEFORE_RECOMPUTE) {
+            net_cost_handler_.recompute_costs_from_scratch(delay_model_, criticalities_, costs_);
 
-            if (noc_cost_handler.has_value()) {
-                noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs);
+            if (noc_cost_handler_.has_value()) {
+                noc_cost_handler_->recompute_costs_from_scratch(noc_opts_, costs_);
             }
 
-            *moves_since_cost_recompute = 0;
+            moves_since_cost_recompute_ = 0;
         }
 
-        if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0
-            && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) {
+        if (placer_opts_.placement_saves_per_temperature >= 1 && inner_iter > 0
+            && (inner_iter + 1) % (annealing_state_.move_lim / placer_opts_.placement_saves_per_temperature) == 0) {
             std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
-                                                   state->num_temps + 1, inner_placement_save_count);
+                                                   annealing_state_.num_temps + 1, inner_placement_save_count);
             VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n",
-                    inner_iter, state->move_lim, filename.c_str());
-            print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs());
+                    inner_iter, annealing_state_.move_lim, filename.c_str());
+            print_place(nullptr, nullptr, filename.c_str(), placer_state_.block_locs());
             ++inner_placement_save_count;
         }
     }
 
-    /* Calculate the success_rate and std_dev of the costs. */
-    stats->calc_iteration_stats(*costs, state->move_lim);
-}
\ No newline at end of file
+    // Calculate the success_rate and std_dev of the costs.
+    placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim);
+
+    tot_iter_ += annealing_state_.move_lim;
+    ++annealing_state_.num_temps;
+}
+
+int PlacementAnnealer::get_total_iteration() const {
+    return tot_iter_;
+}
+
+const t_annealing_state& PlacementAnnealer::get_annealing_state() const {
+    return annealing_state_;
+}
+
+bool PlacementAnnealer::outer_loop_update_state() {
+    return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_);
+}
+
+void PlacementAnnealer::start_quench() {
+    // Freeze out: only accept solutions that improve placement.
+    annealing_state_.t = 0;
+
+    //Revert the move limit to initial value.
+    annealing_state_.move_lim = annealing_state_.move_lim_max;
+}
+
+std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> PlacementAnnealer::get_stats() const {
+    return {swap_stats_, move_type_stats_, placer_stats_};
+}
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index b19ef8f5968..02b48967525 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -7,6 +7,7 @@
 #include "net_cost_handler.h"
 
 #include <optional>
+#include <tuple>
 
 class PlacerState;
 class t_placer_costs;
@@ -114,8 +115,7 @@ class t_annealing_state {
      */
     bool outer_loop_update(float success_rate,
                            const t_placer_costs& costs,
-                           const t_placer_opts& placer_opts,
-                           const t_annealing_sched& annealing_sched);
+                           const t_placer_opts& placer_opts);
 
   private: //Mutator
     /**
@@ -170,36 +170,43 @@ class PlacementAnnealer {
                       NetPinTimingInvalidator* pin_timing_invalidator,
                       int move_lim);
 
-    void placement_inner_loop(const t_annealing_state* state,
-                              const t_placer_opts& placer_opts,
-                              const t_noc_opts& noc_opts,
-                              int inner_recompute_limit,
-                              t_placer_statistics* stats,
-                              t_placer_costs* costs,
-                              int* moves_since_cost_recompute,
-                              NetPinTimingInvalidator* pin_timing_invalidator,
-                              const PlaceDelayModel* delay_model,
-                              PlacerCriticalities* criticalities,
-                              PlacerSetupSlacks* setup_slacks,
-                              MoveGenerator& move_generator,
-                              ManualMoveGenerator& manual_move_generator,
-                              t_pl_blocks_to_be_moved& blocks_affected,
-                              SetupTimingInfo* timing_info,
-                              const t_place_algorithm& place_algorithm,
-                              MoveTypeStat& move_type_stat,
-                              float timing_bb_factor,
-                              t_swap_stats& swap_stats,
-                              PlacerState& placer_state,
-                              NetCostHandler& net_cost_handler,
-                              std::optional<NocCostHandler>& noc_cost_handler);
+    /* Function which contains the inner loop of the simulated annealing */
+    void placement_inner_loop(MoveGenerator& move_generator,
+                              float timing_bb_factor);
 
     void outer_loop_update_timing_info(int num_connections);
 
+    bool outer_loop_update_state();
+
+    /**
+     * @brief Pick some block and moves it to another spot.
+     *
+     * If the new location is empty, directly move the block. If the new location
+     * is occupied, switch the blocks. Due to the different sizes of the blocks,
+     * this block switching may occur for multiple times. It might also cause the
+     * current swap attempt to abort due to inability to find suitable locations
+     * for moved blocks.
+     *
+     * The move generator will record all the switched blocks in the variable
+     * `blocks_affected`. Afterwards, the move will be assessed by the chosen
+     * cost formulation. Currently, there are three ways to assess move cost,
+     * which are stored in the enum type `t_place_algorithm`.
+     *
+     * @return Whether the block swap is accepted, rejected or aborted.
+     */
     e_move_result try_swap(MoveGenerator& move_generator,
                            const t_place_algorithm& place_algorithm,
                            float timing_bb_factor,
                            bool manual_move_enabled);
 
+    int get_total_iteration() const;
+
+    const t_annealing_state& get_annealing_state() const;
+
+    std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
+
+    void start_quench();
+
   public:
     const t_placer_opts& placer_opts_;
     PlacerState& placer_state_;
@@ -224,10 +231,24 @@ class PlacementAnnealer {
     /// Swap statistics keep record of the number accepted/rejected/aborted swaps.
     t_swap_stats swap_stats_;
     MoveTypeStat move_type_stats_;
+    t_placer_statistics placer_stats_;
 
     t_pl_blocks_to_be_moved blocks_affected_;
 
+  private:
+
+    /**
+     * @brief The maximum number of swap attempts before invoking the
+     * once-in-a-while placement legality check as well as floating point
+     * variables round-offs check.
+     */
+    static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
+
+    int inner_recompute_limit_;
+    int moves_since_cost_recompute_;
+    int tot_iter_;
 
   private:
+    ///@brief Find the starting temperature for the annealing loop.
     float estimate_starting_temperature();
 };
\ No newline at end of file
diff --git a/vpr/src/place/move_generator.cpp b/vpr/src/place/move_generator.cpp
index 2d1bcfbc64a..59795a63cc6 100644
--- a/vpr/src/place/move_generator.cpp
+++ b/vpr/src/place/move_generator.cpp
@@ -51,7 +51,7 @@ void MoveGenerator::calculate_reward_and_process_outcome(const MoveOutcomeStats&
     }
 }
 
-void MoveTypeStat::print_placement_move_types_stats() {
+void MoveTypeStat::print_placement_move_types_stats() const {
     VTR_LOG("\n\nPlacement perturbation distribution by block and move type: \n");
 
     VTR_LOG(
diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h
index 0c83bb9d5eb..14f76a5b136 100644
--- a/vpr/src/place/move_generator.h
+++ b/vpr/src/place/move_generator.h
@@ -37,7 +37,7 @@ struct MoveTypeStat {
     /**
      * @brief Prints placement perturbation distribution by block and move type.
      */
-    void print_placement_move_types_stats();
+    void print_placement_move_types_stats() const;
 };
 
 /**
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 33fc09fa342..f4ccaf083f1 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -82,11 +82,6 @@ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
 #endif
 
 /************** Types and defines local to place.c ***************************/
-/* This defines the maximum number of swap attempts before invoking the   *
- * once-in-a-while placement legality check as well as floating point     *
- * variables round-offs check.                                            */
-static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
-
 constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
@@ -112,28 +107,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac
 
 static void free_placement_structs();
 
-static e_move_result try_swap(const t_annealing_state* state,
-                              t_placer_costs* costs,
-                              MoveGenerator& move_generator,
-                              ManualMoveGenerator& manual_move_generator,
-                              SetupTimingInfo* timing_info,
-                              NetPinTimingInvalidator* pin_timing_invalidator,
-                              t_pl_blocks_to_be_moved& blocks_affected,
-                              const PlaceDelayModel* delay_model,
-                              PlacerCriticalities* criticalities,
-                              PlacerSetupSlacks* setup_slacks,
-                              const t_placer_opts& placer_opts,
-                              const t_noc_opts& noc_opts,
-                              MoveTypeStat& move_type_stat,
-                              const t_place_algorithm& place_algorithm,
-                              float timing_bb_factor,
-                              bool manual_move_enabled,
-                              t_swap_stats& swap_stats,
-                              PlacerState& placer_state,
-                              NetCostHandler& net_cost_handler,
-                              std::optional<NocCostHandler>& noc_cost_handler);
-
-
 static void check_place(const t_placer_costs& costs,
                         const PlaceDelayModel* delay_model,
                         const PlacerCriticalities* criticalities,
@@ -155,53 +128,11 @@ static int check_placement_consistency(const BlkLocRegistry& blk_loc_registry);
 static int check_block_placement_consistency(const BlkLocRegistry& blk_loc_registry);
 static int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_registry);
 
-static float starting_t(const t_annealing_state* state,
-                        t_placer_costs* costs,
-                        t_annealing_sched annealing_sched,
-                        const PlaceDelayModel* delay_model,
-                        PlacerCriticalities* criticalities,
-                        PlacerSetupSlacks* setup_slacks,
-                        SetupTimingInfo* timing_info,
-                        MoveGenerator& move_generator,
-                        ManualMoveGenerator& manual_move_generator,
-                        NetPinTimingInvalidator* pin_timing_invalidator,
-                        t_pl_blocks_to_be_moved& blocks_affected,
-                        const t_placer_opts& placer_opts,
-                        const t_noc_opts& noc_opts,
-                        MoveTypeStat& move_type_stat,
-                        t_swap_stats& swap_stats,
-                        PlacerState& placer_state,
-                        NetCostHandler& net_cost_handler,
-                        std::optional<NocCostHandler>& noc_cost_handler);
-
 static int count_connections();
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state);
 
-static void placement_inner_loop(const t_annealing_state* state,
-                                 const t_placer_opts& placer_opts,
-                                 const t_noc_opts& noc_opts,
-                                 int inner_recompute_limit,
-                                 t_placer_statistics* stats,
-                                 t_placer_costs* costs,
-                                 int* moves_since_cost_recompute,
-                                 NetPinTimingInvalidator* pin_timing_invalidator,
-                                 const PlaceDelayModel* delay_model,
-                                 PlacerCriticalities* criticalities,
-                                 PlacerSetupSlacks* setup_slacks,
-                                 MoveGenerator& move_generator,
-                                 ManualMoveGenerator& manual_move_generator,
-                                 t_pl_blocks_to_be_moved& blocks_affected,
-                                 SetupTimingInfo* timing_info,
-                                 const t_place_algorithm& place_algorithm,
-                                 MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor,
-                                 t_swap_stats& swap_stats,
-                                 PlacerState& placer_state,
-                                 NetCostHandler& net_cost_handler,
-                                 std::optional<NocCostHandler>& noc_cost_handler);
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
@@ -258,7 +189,7 @@ void try_place(const Netlist<>& net_list,
 
     auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
-    int tot_iter, moves_since_cost_recompute, num_connections, outer_crit_iter_count;
+    int num_connections, outer_crit_iter_count;
 
     t_placer_costs costs(placer_opts.place_algorithm);
 
@@ -267,7 +198,6 @@ void try_place(const Netlist<>& net_list,
     float sWNS = NAN;
 
     char msg[vtr::bufsize];
-    t_placer_statistics stats;
 
     t_placement_checkpoint placement_checkpoint;
 
@@ -469,7 +399,7 @@ void try_place(const Netlist<>& net_list,
     }
 
     // set the starting total placement cost
-    costs.cost = get_total_cost(&costs, placer_opts, noc_opts);
+    costs.cost = costs.get_total_cost(placer_opts, noc_opts);
 
     //Sanity check that initial placement is legal
     check_place(costs,
@@ -529,9 +459,6 @@ void try_place(const Netlist<>& net_list,
         print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
     }
 
-    tot_iter = 0;
-    moves_since_cost_recompute = 0;
-
     bool skip_anneal = false;
 
 #ifdef ENABLE_ANALYTIC_PLACE
@@ -554,6 +481,9 @@ void try_place(const Netlist<>& net_list,
                                noc_opts, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(),
                                placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim);
 
+    const t_annealing_state& annealing_state = annealer.get_annealing_state();
+    const auto& [swap_stats, move_type_stats, placer_stats] = annealer.get_stats();
+
     if (!skip_anneal) {
         //Table header
         VTR_LOG("\n");
@@ -583,40 +513,28 @@ void try_place(const Netlist<>& net_list,
                                           agent_state, placer_opts, false, current_move_generator);
 
             // do a complete inner loop iteration
-            placement_inner_loop(&state, placer_opts, noc_opts,
-                                 inner_recompute_limit,
-                                 &stats, &costs, &moves_since_cost_recompute,
-                                 pin_timing_invalidator.get(), place_delay_model.get(),
-                                 placer_criticalities.get(), placer_setup_slacks.get(),
-                                 *current_move_generator, manual_move_generator,
-                                 blocks_affected, timing_info.get(),
-                                 placer_opts.place_algorithm, move_type_stat,
-                                 timing_bb_factor, swap_stats, placer_state,
-                                 net_cost_handler, noc_cost_handler);
-
+            annealer.placement_inner_loop(*current_move_generator,
+                                          timing_bb_factor);
 
             //move the update used move_generator to its original variable
             update_move_generator(move_generator, move_generator2, agent_state,
                                   placer_opts, false, current_move_generator);
 
-            tot_iter += state.move_lim;
-            ++state.num_temps;
-
-            print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                               critical_path.delay(), sTNS, sWNS, tot_iter,
+            print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
+                               critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
                                noc_opts.noc, costs.noc_cost_terms);
 
             if (placer_opts.place_algorithm.is_timing_driven()
                 && placer_opts.place_agent_multistate
                 && agent_state == e_agent_state::EARLY_IN_THE_ANNEAL) {
-                if (state.alpha < 0.85 && state.alpha > 0.6) {
+                if (annealing_state.alpha < 0.85 && annealing_state.alpha > 0.6) {
                     agent_state = e_agent_state::LATE_IN_THE_ANNEAL;
                     VTR_LOG("Agent's 2nd state: \n");
                 }
             }
 
             sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
-                    costs.cost, costs.bb_cost, costs.timing_cost, state.t);
+                    costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t);
             update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT,
                           timing_info);
 
@@ -625,14 +543,12 @@ void try_place(const Netlist<>& net_list,
             //                print_clb_placement("first_iteration_clb_placement.echo");
             //            }
             //#endif
-        } while (state.outer_loop_update(stats.success_rate, costs, placer_opts,
-                                         annealing_sched));
+        } while (annealer.outer_loop_update_state());
         /* Outer loop of the simulated annealing ends */
     } //skip_anneal ends
 
-    /* Start Quench */
-    state.t = 0;                         //Freeze out: only accept solutions that improve placement.
-    state.move_lim = state.move_lim_max; //Revert the move limit to initial value.
+    // Start Quench
+    annealer.start_quench();
 
     auto pre_quench_timing_stats = timing_ctx.stats;
     { /* Quench */
@@ -647,40 +563,27 @@ void try_place(const Netlist<>& net_list,
 
         /* Run inner loop again with temperature = 0 so as to accept only swaps
          * which reduce the cost of the placement */
-        placement_inner_loop(&state, placer_opts, noc_opts,
-                             quench_recompute_limit,
-                             &stats, &costs, &moves_since_cost_recompute,
-                             pin_timing_invalidator.get(), place_delay_model.get(),
-                             placer_criticalities.get(), placer_setup_slacks.get(),
-                             *current_move_generator, manual_move_generator,
-                             blocks_affected, timing_info.get(),
-                             placer_opts.place_quench_algorithm, move_type_stat,
-                             timing_bb_factor, swap_stats, placer_state,
-                             net_cost_handler, noc_cost_handler);
-
-
-        //move the update used move_generator to its original variable
+        annealer.placement_inner_loop(*current_move_generator, timing_bb_factor);
+
+        // move the update used move_generator to its original variable
         update_move_generator(move_generator, move_generator2, agent_state,
                               placer_opts, true, current_move_generator);
 
-        tot_iter += state.move_lim;
-        ++state.num_temps;
-
         if (placer_opts.place_quench_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
             sTNS = timing_info->setup_total_negative_slack();
             sWNS = timing_info->setup_worst_negative_slack();
         }
 
-        print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                           critical_path.delay(), sTNS, sWNS, tot_iter,
+        print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
+                           critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
                            noc_opts.noc, costs.noc_cost_terms);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
     //Final timing analysis
     PlaceCritParams crit_params;
-    crit_params.crit_exponent = state.crit_exponent;
+    crit_params.crit_exponent = annealing_state.crit_exponent;
     crit_params.crit_limit = placer_opts.place_crit_limit;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -700,7 +603,7 @@ void try_place(const Netlist<>& net_list,
 
     if (placer_opts.placement_saves_per_temperature >= 1) {
         std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
-                                               state.num_temps + 1, 0);
+                                               annealing_state.num_temps + 1, 0);
         VTR_LOG("Saving final placement to file: %s\n", filename.c_str());
         print_place(nullptr, nullptr, filename.c_str(), blk_loc_registry.block_locs());
     }
@@ -784,9 +687,9 @@ void try_place(const Netlist<>& net_list,
     // Print out swap statistics
     print_resources_utilization(blk_loc_registry);
 
-    print_placement_swaps_stats(state, swap_stats);
+    print_placement_swaps_stats(annealing_state, swap_stats);
 
-    move_type_stat.print_placement_move_types_stats();
+    move_type_stats.print_placement_move_types_stats();
 
     if (noc_opts.noc) {
         write_noc_placement_file(noc_opts.noc_placement_file_name, blk_loc_registry.block_locs());
@@ -808,112 +711,6 @@ void try_place(const Netlist<>& net_list,
     copy_locs_to_global_state(blk_loc_registry);
 }
 
-
-
-/* Function which contains the inner loop of the simulated annealing */
-static void placement_inner_loop(const t_annealing_state* state,
-                                 const t_placer_opts& placer_opts,
-                                 const t_noc_opts& noc_opts,
-                                 int inner_recompute_limit,
-                                 t_placer_statistics* stats,
-                                 t_placer_costs* costs,
-                                 int* moves_since_cost_recompute,
-                                 NetPinTimingInvalidator* pin_timing_invalidator,
-                                 const PlaceDelayModel* delay_model,
-                                 PlacerCriticalities* criticalities,
-                                 PlacerSetupSlacks* setup_slacks,
-                                 MoveGenerator& move_generator,
-                                 ManualMoveGenerator& manual_move_generator,
-                                 t_pl_blocks_to_be_moved& blocks_affected,
-                                 SetupTimingInfo* timing_info,
-                                 const t_place_algorithm& place_algorithm,
-                                 MoveTypeStat& move_type_stat,
-                                 float timing_bb_factor,
-                                 t_swap_stats& swap_stats,
-                                 PlacerState& placer_state,
-                                 NetCostHandler& net_cost_handler,
-                                 std::optional<NocCostHandler>& noc_cost_handler) {
-    //How many times have we dumped placement to a file this temperature?
-    int inner_placement_save_count = 0;
-
-    stats->reset();
-
-    bool manual_move_enabled = false;
-
-    /* Inner loop begins */
-    for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < state->move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(state, costs, move_generator,
-                                             manual_move_generator, timing_info, pin_timing_invalidator,
-                                             blocks_affected, delay_model, criticalities, setup_slacks,
-                                             placer_opts, noc_opts, move_type_stat, place_algorithm,
-                                             timing_bb_factor, manual_move_enabled, swap_stats,
-                                             placer_state, net_cost_handler, noc_cost_handler);
-
-        if (swap_result == ACCEPTED) {
-            /* Move was accepted.  Update statistics that are useful for the annealing schedule. */
-            stats->single_swap_update(*costs);
-            swap_stats.num_swap_accepted++;
-        } else if (swap_result == ABORTED) {
-            swap_stats.num_swap_aborted++;
-        } else { // swap_result == REJECTED
-            swap_stats.num_swap_rejected++;
-        }
-
-        if (place_algorithm.is_timing_driven()) {
-            /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
-             * We do this only once in a while, since it is expensive.
-             */
-            if (inner_crit_iter_count >= inner_recompute_limit
-                && inner_iter != state->move_lim - 1) { /*on last iteration don't recompute */
-
-                inner_crit_iter_count = 0;
-#ifdef VERBOSE
-                VTR_LOG("Inner loop recompute criticalities\n");
-#endif
-
-                PlaceCritParams crit_params;
-                crit_params.crit_exponent = state->crit_exponent;
-                crit_params.crit_limit = placer_opts.place_crit_limit;
-
-                //Update all timing related classes
-                perform_full_timing_update(crit_params, delay_model, criticalities,
-                                           setup_slacks, pin_timing_invalidator,
-                                           timing_info, costs, placer_state);
-            }
-            inner_crit_iter_count++;
-        }
-
-        /* Lines below prevent too much round-off error from accumulating
-         * in the cost over many iterations (due to incremental updates).
-         * This round-off can lead to error checks failing because the cost
-         * is different from what you get when you recompute from scratch.
-         */
-        ++(*moves_since_cost_recompute);
-        if (*moves_since_cost_recompute > MAX_MOVES_BEFORE_RECOMPUTE) {
-            net_cost_handler.recompute_costs_from_scratch(delay_model, criticalities, *costs);
-
-            if (noc_cost_handler.has_value()) {
-                noc_cost_handler->recompute_costs_from_scratch(noc_opts, *costs);
-            }
-
-            *moves_since_cost_recompute = 0;
-        }
-
-        if (placer_opts.placement_saves_per_temperature >= 1 && inner_iter > 0
-            && (inner_iter + 1) % (state->move_lim / placer_opts.placement_saves_per_temperature) == 0) {
-            std::string filename = vtr::string_fmt("placement_%03d_%03d.place",
-                                                   state->num_temps + 1, inner_placement_save_count);
-            VTR_LOG("Saving placement to file at temperature move %d / %d: %s\n",
-                    inner_iter, state->move_lim, filename.c_str());
-            print_place(nullptr, nullptr, filename.c_str(), placer_state.block_locs());
-            ++inner_placement_save_count;
-        }
-    }
-
-    /* Calculate the success_rate and std_dev of the costs. */
-    stats->calc_iteration_stats(*costs, state->move_lim);
-}
-
 /*only count non-global connections */
 static int count_connections() {
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -928,446 +725,7 @@ static int count_connections() {
         count += cluster_ctx.clb_nlist.net_sinks(net_id).size();
     }
 
-    return (count);
-}
-
-///@brief Find the starting temperature for the annealing loop.
-static float starting_t(const t_annealing_state* state,
-                        t_placer_costs* costs,
-                        t_annealing_sched annealing_sched,
-                        const PlaceDelayModel* delay_model,
-                        PlacerCriticalities* criticalities,
-                        PlacerSetupSlacks* setup_slacks,
-                        SetupTimingInfo* timing_info,
-                        MoveGenerator& move_generator,
-                        ManualMoveGenerator& manual_move_generator,
-                        NetPinTimingInvalidator* pin_timing_invalidator,
-                        t_pl_blocks_to_be_moved& blocks_affected,
-                        const t_placer_opts& placer_opts,
-                        const t_noc_opts& noc_opts,
-                        MoveTypeStat& move_type_stat,
-                        t_swap_stats& swap_stats,
-                        PlacerState& placer_state,
-                        NetCostHandler& net_cost_handler,
-                        std::optional<NocCostHandler>& noc_cost_handler) {
-    if (annealing_sched.type == e_sched_type::USER_SCHED) {
-        return (annealing_sched.init_t);
-    }
-
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    /* Use to calculate the average of cost when swap is accepted. */
-    int num_accepted = 0;
-
-    /* Use double types to avoid round off. */
-    double av = 0., sum_of_squares = 0.;
-
-    /* Determines the block swap loop count. */
-    int move_lim = std::min(state->move_lim_max,
-                            (int)cluster_ctx.clb_nlist.blocks().size());
-
-    bool manual_move_enabled = false;
-
-    for (int i = 0; i < move_lim; i++) {
-#ifndef NO_GRAPHICS
-        //Checks manual move flag for manual move feature
-        t_draw_state* draw_state = get_draw_state_vars();
-        if (draw_state->show_graphics) {
-            manual_move_enabled = manual_move_is_selected();
-        }
-#endif /*NO_GRAPHICS*/
-
-        //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
-        e_move_result swap_result = try_swap(state, costs, move_generator,
-                                             manual_move_generator, timing_info, pin_timing_invalidator,
-                                             blocks_affected, delay_model, criticalities, setup_slacks,
-                                             placer_opts, noc_opts, move_type_stat, placer_opts.place_algorithm,
-                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled, swap_stats,
-                                             placer_state, net_cost_handler, noc_cost_handler);
-
-
-        if (swap_result == ACCEPTED) {
-            num_accepted++;
-            av += costs->cost;
-            sum_of_squares += costs->cost * costs->cost;
-            swap_stats.num_swap_accepted++;
-        } else if (swap_result == ABORTED) {
-            swap_stats.num_swap_aborted++;
-        } else {
-            swap_stats.num_swap_rejected++;
-        }
-    }
-
-    /* Take the average of the accepted swaps' cost values. */
-    av = num_accepted > 0 ? (av / num_accepted) : 0.;
-
-    /* Get the standard deviation. */
-    double std_dev = get_std_dev(num_accepted, sum_of_squares, av);
-
-    /* Print warning if not all swaps are accepted. */
-    if (num_accepted != move_lim) {
-        VTR_LOG_WARN("Starting t: %d of %d configurations accepted.\n",
-                     num_accepted, move_lim);
-    }
-
-#ifdef VERBOSE
-    /* Print stats related to finding the initital temp. */
-    VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
-#endif
-
-    // Improved initial placement uses a fast SA for NoC routers and centroid placement
-    // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement
-    float init_temp = std_dev / 64;
-
-    return init_temp;
-}
-
-/**
- * @brief Pick some block and moves it to another spot.
- *
- * If the new location is empty, directly move the block. If the new location
- * is occupied, switch the blocks. Due to the different sizes of the blocks,
- * this block switching may occur for multiple times. It might also cause the
- * current swap attempt to abort due to inability to find suitable locations
- * for moved blocks.
- *
- * The move generator will record all the switched blocks in the variable
- * `blocks_affected`. Afterwards, the move will be assessed by the chosen
- * cost formulation. Currently, there are three ways to assess move cost,
- * which are stored in the enum type `t_place_algorithm`.
- *
- * @return Whether the block swap is accepted, rejected or aborted.
- */
-static e_move_result try_swap(const t_annealing_state* state,
-                              t_placer_costs* costs,
-                              MoveGenerator& move_generator,
-                              ManualMoveGenerator& manual_move_generator,
-                              SetupTimingInfo* timing_info,
-                              NetPinTimingInvalidator* pin_timing_invalidator,
-                              t_pl_blocks_to_be_moved& blocks_affected,
-                              const PlaceDelayModel* delay_model,
-                              PlacerCriticalities* criticalities,
-                              PlacerSetupSlacks* setup_slacks,
-                              const t_placer_opts& placer_opts,
-                              const t_noc_opts& noc_opts,
-                              MoveTypeStat& move_type_stat,
-                              const t_place_algorithm& place_algorithm,
-                              float timing_bb_factor,
-                              bool manual_move_enabled,
-                              t_swap_stats& swap_stats,
-                              PlacerState& placer_state,
-                              NetCostHandler& net_cost_handler,
-                              std::optional<NocCostHandler>& noc_cost_handler) {
-    /* Picks some block and moves it to another spot.  If this spot is   *
-     * occupied, switch the blocks.  Assess the change in cost function. *
-     * rlim is the range limiter.                                        *
-     * Returns whether the swap is accepted, rejected or aborted.        *
-     * Passes back the new value of the cost functions.                  */
-    auto& blk_loc_registry = placer_state.mutable_blk_loc_registry();
-
-    float rlim_escape_fraction = placer_opts.rlim_escape_fraction;
-    float timing_tradeoff = placer_opts.timing_tradeoff;
-
-    PlaceCritParams crit_params;
-    crit_params.crit_exponent = state->crit_exponent;
-    crit_params.crit_limit = placer_opts.place_crit_limit;
-
-    // move type and block type chosen by the agent
-    t_propose_action proposed_action{e_move_type::UNIFORM, -1};
-
-    swap_stats.num_ts_called++;
-
-    MoveOutcomeStats move_outcome_stats;
-
-    /* I'm using negative values of proposed_net_cost as a flag, *
-     * so DO NOT use cost functions that can go negative.        */
-
-    double delta_c = 0;        //Change in cost due to this swap.
-    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
-    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
-
-    // Determine whether we need to force swap two router blocks
-    bool router_block_move = false;
-    if (noc_opts.noc) {
-        router_block_move = check_for_router_swap(noc_opts.noc_swap_percentage);
-    }
-
-    /* Allow some fraction of moves to not be restricted by rlim, */
-    /* in the hopes of better escaping local minima.              */
-    float rlim;
-    if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) {
-        rlim = std::numeric_limits<float>::infinity();
-    } else {
-        rlim = state->rlim;
-    }
-
-    e_create_move create_move_outcome = e_create_move::ABORT;
-
-    //When manual move toggle button is active, the manual move window asks the user for input.
-    if (manual_move_enabled) {
-#ifndef NO_GRAPHICS
-        create_move_outcome = manual_move_display_and_propose(manual_move_generator, blocks_affected,
-                                                              proposed_action.move_type, rlim, placer_opts,
-                                                              criticalities);
-#else  //NO_GRAPHICS
-       //Cast to void to explicitly avoid warning.
-        (void)manual_move_generator;
-#endif //NO_GRAPHICS
-    } else if (router_block_move) {
-        // generate a move where two random router blocks are swapped
-        create_move_outcome = propose_router_swap(blocks_affected, rlim, placer_state.blk_loc_registry());
-        proposed_action.move_type = e_move_type::UNIFORM;
-    } else {
-        //Generate a new move (perturbation) used to explore the space of possible placements
-        create_move_outcome = move_generator.propose_move(blocks_affected, proposed_action, rlim, placer_opts, criticalities);
-    }
-
-    if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
-        ++move_type_stat.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-    }
-    LOG_MOVE_STATS_PROPOSED(t, blocks_affected);
-
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                   "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n",
-                   costs->cost, costs->bb_cost, costs->timing_cost);
-
-    e_move_result move_outcome = e_move_result::ABORTED;
-
-    if (create_move_outcome == e_create_move::ABORT) {
-        LOG_MOVE_STATS_OUTCOME(std::numeric_limits<float>::quiet_NaN(),
-                               std::numeric_limits<float>::quiet_NaN(),
-                               std::numeric_limits<float>::quiet_NaN(), "ABORTED",
-                               "illegal move");
-
-        move_outcome = ABORTED;
-
-    } else {
-        VTR_ASSERT(create_move_outcome == e_create_move::VALID);
-
-        /*
-         * To make evaluating the move simpler (e.g. calculating changed bounding box),
-         * we first move the blocks to their new locations (apply the move to
-         * blk_loc_registry.block_locs) and then compute the change in cost. If the move
-         * is accepted, the inverse look-up in place_ctx.grid_blocks is updated
-         * (committing the move). If the move is rejected, the blocks are returned to
-         * their original positions (reverting blk_loc_registry.block_locs to its original state).
-         *
-         * Note that the inverse look-up place_ctx.grid_blocks is only updated after
-         * move acceptance is determined, so it should not be used when evaluating a move.
-         */
-
-        /* Update the block positions */
-        blk_loc_registry.apply_move_blocks(blocks_affected);
-
-        //Find all the nets affected by this swap and update the wiring costs.
-        //This cost value doesn't depend on the timing info.
-        //
-        //Also find all the pins affected by the swap, and calculates new connection
-        //delays and timing costs and store them in proposed_* data structures.
-        net_cost_handler.find_affected_nets_and_update_costs(delay_model, criticalities, blocks_affected,
-                                                             bb_delta_c, timing_delta_c);
-
-        //For setup slack analysis, we first do a timing analysis to get the newest
-        //slack values resulted from the proposed block moves. If the move turns out
-        //to be accepted, we keep the updated slack values and commit the block moves.
-        //If rejected, we reject the proposed block moves and revert this timing analysis.
-        if (place_algorithm == SLACK_TIMING_PLACE) {
-            /* Invalidates timing of modified connections for incremental timing updates. */
-            invalidate_affected_connections(blocks_affected,
-                                            pin_timing_invalidator, timing_info);
-
-            /* Update the connection_timing_cost and connection_delay *
-             * values from the temporary values.                      */
-            commit_td_cost(blocks_affected, placer_state);
-
-            /* Update timing information. Since we are analyzing setup slacks,   *
-             * we only update those values and keep the criticalities stale      *
-             * so as not to interfere with the original timing driven algorithm. *
-             *
-             * Note: the timing info must be updated after applying block moves  *
-             * and committing the timing driven delays and costs.                *
-             * If we wish to revert this timing update due to move rejection,    *
-             * we need to revert block moves and restore the timing values.      */
-            criticalities->disable_update();
-            setup_slacks->enable_update();
-            update_timing_classes(crit_params, timing_info, criticalities,
-                                  setup_slacks, pin_timing_invalidator, placer_state);
-
-            /* Get the setup slack analysis cost */
-            //TODO: calculate a weighted average of the slack cost and wiring cost
-            delta_c = analyze_setup_slack_cost(setup_slacks, placer_state) * costs->timing_cost_norm;
-        } else if (place_algorithm == CRITICALITY_TIMING_PLACE) {
-            /* Take delta_c as a combination of timing and wiring cost. In
-             * addition to `timing_tradeoff`, we normalize the cost values */
-            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
-                           "timing_delta_c %e, timing_cost_norm %e\n",
-                           bb_delta_c,
-                           costs->bb_cost_norm,
-                           timing_tradeoff,
-                           timing_delta_c,
-                           costs->timing_cost_norm);
-            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs->bb_cost_norm
-                      + timing_tradeoff * timing_delta_c * costs->timing_cost_norm;
-        } else {
-            VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
-            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %e, bb_cost_norm %e\n",
-                           bb_delta_c,
-                           costs->bb_cost_norm);
-            delta_c = bb_delta_c * costs->bb_cost_norm;
-        }
-
-        NocCostTerms noc_delta_c; // change in NoC cost
-        /* Update the NoC data structure and costs*/
-        if (noc_opts.noc) {
-            VTR_ASSERT_SAFE(noc_cost_handler.has_value());
-            noc_cost_handler->find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
-
-            // Include the NoC delta costs in the total cost change for this swap
-            delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts);
-        }
-
-        /* 1 -> move accepted, 0 -> rejected. */
-        move_outcome = assess_swap(delta_c, state->t);
-
-        //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move.
-#ifndef NO_GRAPHICS
-        if (manual_move_enabled) {
-            move_outcome = pl_do_manual_move(delta_c, timing_delta_c, bb_delta_c, move_outcome);
-        }
-#endif //NO_GRAPHICS
-
-        if (move_outcome == ACCEPTED) {
-            costs->cost += delta_c;
-            costs->bb_cost += bb_delta_c;
-
-            if (place_algorithm == SLACK_TIMING_PLACE) {
-                /* Update the timing driven cost as usual */
-                costs->timing_cost += timing_delta_c;
-
-                //Commit the setup slack information
-                //The timing delay and cost values should be committed already
-                commit_setup_slacks(setup_slacks, placer_state);
-            }
-
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
-                costs->timing_cost += timing_delta_c;
-
-                /* Invalidates timing of modified connections for incremental *
-                 * timing updates. These invalidations are accumulated for a  *
-                 * big timing update in the outer loop.                       */
-                invalidate_affected_connections(blocks_affected,
-                                                pin_timing_invalidator, timing_info);
-
-                /* Update the connection_timing_cost and connection_delay *
-                 * values from the temporary values.                      */
-                commit_td_cost(blocks_affected, placer_state);
-            }
-
-            /* Update net cost functions and reset flags. */
-            net_cost_handler.update_move_nets();
-
-            /* Update clb data structures since we kept the move. */
-            blk_loc_registry.commit_move_blocks(blocks_affected);
-
-            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
-                ++move_type_stat.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-            }
-            if (noc_opts.noc){
-                noc_cost_handler->commit_noc_costs();
-                *costs += noc_delta_c;
-            }
-
-            //Highlights the new block when manual move is selected.
-#ifndef NO_GRAPHICS
-            if (manual_move_enabled) {
-                manual_move_highlight_new_block_location();
-            }
-#endif //NO_GRAPHICS
-
-        } else {
-            VTR_ASSERT_SAFE(move_outcome == REJECTED);
-
-            /* Reset the net cost function flags first. */
-            net_cost_handler.reset_move_nets();
-
-            /* Restore the blk_loc_registry.block_locs data structures to their state before the move. */
-            blk_loc_registry.revert_move_blocks(blocks_affected);
-
-            if (place_algorithm == SLACK_TIMING_PLACE) {
-                /* Revert the timing delays and costs to pre-update values.       */
-                /* These routines must be called after reverting the block moves. */
-                //TODO: make this process incremental
-                comp_td_connection_delays(delay_model, placer_state);
-                comp_td_costs(delay_model, *criticalities, placer_state, &costs->timing_cost);
-
-                /* Re-invalidate the affected sink pins since the proposed *
-                 * move is rejected, and the same blocks are reverted to   *
-                 * their original positions.                               */
-                invalidate_affected_connections(blocks_affected,
-                                                pin_timing_invalidator, timing_info);
-
-                /* Revert the timing update */
-                update_timing_classes(crit_params, timing_info, criticalities,
-                                      setup_slacks, pin_timing_invalidator, placer_state);
-
-                VTR_ASSERT_SAFE_MSG(
-                    verify_connection_setup_slacks(setup_slacks, placer_state),
-                    "The current setup slacks should be identical to the values before the try swap timing info update.");
-            }
-
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
-                /* Unstage the values stored in proposed_* data structures */
-                revert_td_cost(blocks_affected, placer_state.mutable_timing());
-            }
-
-            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
-                ++move_type_stat.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-            }
-            /* Revert the traffic flow routes within the NoC*/
-            if (noc_opts.noc) {
-                noc_cost_handler->revert_noc_traffic_flow_routes(blocks_affected);
-            }
-        }
-
-        move_outcome_stats.delta_cost_norm = delta_c;
-        move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs->bb_cost_norm;
-        move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs->timing_cost_norm;
-
-        move_outcome_stats.delta_bb_cost_abs = bb_delta_c;
-        move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
-
-        LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), "");
-    }
-    move_outcome_stats.outcome = move_outcome;
-
-    // If we force a router block move then it was not proposed by the
-    // move generator, so we should not calculate the reward and update
-    // the move generators status since this outcome is not a direct
-    // consequence of the move generator
-    if (!router_block_move) {
-        move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor);
-    }
-
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    ifndef NO_GRAPHICS
-    stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c);
-#    endif
-#endif
-
-    /* Clear the data structure containing block move info */
-    blocks_affected.clear_move_blocks();
-
-#if 0
-    // Check that each accepted swap yields a valid placement. This will
-    // greatly slow the placer, but can debug some issues.
-    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
-#endif
-    VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                   "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n",
-                   costs->cost, costs->bb_cost, costs->timing_cost);
-    return move_outcome;
+    return count;
 }
 
 static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index ec7ecb8982e..e09bb2d5dd6 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -8,6 +8,7 @@
 #include "globals.h"
 #include "draw_global.h"
 #include "place_constraints.h"
+#include "noc_place_utils.h"
 
 /**
  * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
@@ -65,6 +66,25 @@ void t_placer_costs::update_norm_factors() {
     }
 }
 
+double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
+    double total_cost = 0.0;
+
+    if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
+        // in bounding box mode we only care about wirelength
+        total_cost = bb_cost * bb_cost_norm;
+    } else if (placer_opts.place_algorithm.is_timing_driven()) {
+        // in timing mode we include both wirelength and timing costs
+        total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
+    }
+
+    if (noc_opts.noc) {
+        // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion
+        total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts);
+    }
+
+    return total_cost;
+}
+
 t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
     noc_cost_terms += noc_delta_cost;
 
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 49f4246dbe5..d76e738f70e 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -114,6 +114,18 @@ class t_placer_costs {
      */
     void update_norm_factors();
 
+    /**
+     * @brief Compute the total normalized cost for a given placement. This
+     * computation will vary depending on the placement modes.
+     *
+     * @param costs The current placement cost components and their normalization
+     * factors
+     * @param placer_opts Determines the placement mode
+     * @param noc_opts Determines if placement includes the NoC
+     * @return double The computed total cost of the current placement
+     */
+    double get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts);
+
     /**
      * @brief Accumulates NoC cost difference terms
      *

From 896810eb1dc9a67ea3dd1721af7c59094a9750a4 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 13:38:22 -0400
Subject: [PATCH 04/31] show annealing schedule in ShowSetup.cpp

---
 vpr/src/base/ShowSetup.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 68aa073759d..f0280669cd9 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -17,8 +17,7 @@
 /******** Function Prototypes ********/
 static void ShowPackerOpts(const t_packer_opts& PackerOpts);
 static void ShowNetlistOpts(const t_netlist_opts& NetlistOpts);
-static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
-                           const t_annealing_sched& AnnealSched);
+static void ShowPlacerOpts(const t_placer_opts& PlacerOpts);
 static void ShowAnalyticalPlacerOpts(const t_ap_opts& APOpts);
 static void ShowRouterOpts(const t_router_opts& RouterOpts);
 static void ShowAnalysisOpts(const t_analysis_opts& AnalysisOpts);
@@ -56,7 +55,7 @@ void ShowSetup(const t_vpr_setup& vpr_setup) {
         ShowPackerOpts(vpr_setup.PackerOpts);
     }
     if (vpr_setup.PlacerOpts.doPlacement) {
-        ShowPlacerOpts(vpr_setup.PlacerOpts, vpr_setup.AnnealSched);
+        ShowPlacerOpts(vpr_setup.PlacerOpts);
     }
     if (vpr_setup.APOpts.doAP) {
         ShowAnalyticalPlacerOpts(vpr_setup.APOpts);
@@ -498,8 +497,7 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
     VTR_LOG("\n");
 }
 
-static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
-                           const t_annealing_sched& AnnealSched) {
+static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) {
     VTR_LOG("PlacerOpts.place_freq: ");
     switch (PlacerOpts.place_freq) {
         case PLACE_ONCE:
@@ -609,7 +607,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts,
 
         VTR_LOG("PlaceOpts.seed: %d\n", PlacerOpts.seed);
 
-        ShowAnnealSched(AnnealSched);
+        ShowAnnealSched(PlacerOpts.anneal_sched);
     }
     VTR_LOG("\n");
 }

From 8ffa91379b863f353c3d55060dae941cb9091877 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 13:54:33 -0400
Subject: [PATCH 05/31] added a constructor for PlacerTimingContext

---
 vpr/src/base/SetupVPR.cpp        |  5 ++-
 vpr/src/base/SetupVPR.h          |  1 -
 vpr/src/base/place_and_route.cpp |  8 ++---
 vpr/src/base/place_and_route.h   |  1 -
 vpr/src/base/vpr_api.cpp         |  8 ++---
 vpr/src/base/vpr_api.h           |  1 -
 vpr/src/place/annealer.cpp       | 52 ++++------------------------
 vpr/src/place/annealer.h         |  1 +
 vpr/src/place/place.cpp          | 39 ++-------------------
 vpr/src/place/placer_state.cpp   | 58 ++++++++++++++++++++++++++++++++
 vpr/src/place/placer_state.h     | 22 ++++++++++++
 11 files changed, 97 insertions(+), 99 deletions(-)
 create mode 100644 vpr/src/place/placer_state.cpp

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index d6315762786..38ac3c595c7 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -97,7 +97,6 @@ void SetupVPR(const t_options* options,
               t_packer_opts* packerOpts,
               t_placer_opts* placerOpts,
               t_ap_opts* apOpts,
-              t_annealing_sched* annealSched,
               t_router_opts* routerOpts,
               t_analysis_opts* analysisOpts,
               t_noc_opts* nocOpts,
@@ -145,7 +144,7 @@ void SetupVPR(const t_options* options,
 
     SetupNetlistOpts(*options, *netlistOpts);
     SetupPlacerOpts(*options, placerOpts);
-    SetupAnnealSched(*options, annealSched);
+    SetupAnnealSched(*options, &placerOpts->anneal_sched);
     SetupRouterOpts(*options, routerOpts);
     SetupAnalysisOpts(*options, *analysisOpts);
     SetupPowerOpts(*options, powerOpts, arch);
@@ -155,7 +154,7 @@ void SetupVPR(const t_options* options,
     //save the device layout, which is required to parse the architecture file
     arch->device_layout = options->device_layout;
 
-    if (readArchFile == true) {
+    if (readArchFile) {
         vtr::ScopedStartFinishTimer t("Loading Architecture Description");
         switch (options->arch_format) {
             case e_arch_format::VTR:
diff --git a/vpr/src/base/SetupVPR.h b/vpr/src/base/SetupVPR.h
index 451fdc6567a..45bf510c18c 100644
--- a/vpr/src/base/SetupVPR.h
+++ b/vpr/src/base/SetupVPR.h
@@ -17,7 +17,6 @@ void SetupVPR(const t_options* Options,
               t_packer_opts* PackerOpts,
               t_placer_opts* PlacerOpts,
               t_ap_opts* APOpts,
-              t_annealing_sched* AnnealSched,
               t_router_opts* RouterOpts,
               t_analysis_opts* AnalysisOpts,
               t_noc_opts* NocOpts,
diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index ab5cf31ca4f..60e2459a6ba 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -55,7 +55,6 @@ static float comp_width(t_chan* chan, float x, float separation);
 int binary_search_place_and_route(const Netlist<>& placement_net_list,
                                   const Netlist<>& router_net_list,
                                   const t_placer_opts& placer_opts_ref,
-                                  const t_annealing_sched& annealing_sched,
                                   const t_router_opts& router_opts,
                                   const t_analysis_opts& analysis_opts,
                                   const t_noc_opts& noc_opts,
@@ -183,7 +182,6 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
             placer_opts.place_chan_width = current;
             try_place(placement_net_list,
                       placer_opts,
-                      annealing_sched,
                       router_opts,
                       analysis_opts,
                       noc_opts,
@@ -191,7 +189,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                       det_routing_arch,
                       segment_inf,
                       arch->Directs,
-                      false);
+                      /*is_flat=*/false);
         }
         success = route(router_net_list,
                         current,
@@ -326,10 +324,10 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                 break;
             if (placer_opts.place_freq == PLACE_ALWAYS) {
                 placer_opts.place_chan_width = current;
-                try_place(placement_net_list, placer_opts, annealing_sched, router_opts, analysis_opts, noc_opts,
+                try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts,
                           arch->Chans, det_routing_arch, segment_inf,
                           arch->Directs,
-                          false);
+                          /*is_flat=*/false);
             }
 
             success = route(router_net_list,
diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h
index b4735ed8af4..6f191c0ff9e 100644
--- a/vpr/src/base/place_and_route.h
+++ b/vpr/src/base/place_and_route.h
@@ -25,7 +25,6 @@ struct t_fmap_cell {
 int binary_search_place_and_route(const Netlist<>& placement_net_list,
                                   const Netlist<>& router_net_list,
                                   const t_placer_opts& placer_opts_ref,
-                                  const t_annealing_sched& annealing_sched,
                                   const t_router_opts& router_opts,
                                   const t_analysis_opts& analysis_opts,
                                   const t_noc_opts& noc_opts,
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 86cbdaabd80..16589cdf8bc 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -287,7 +287,6 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a
              &vpr_setup->PackerOpts,
              &vpr_setup->PlacerOpts,
              &vpr_setup->APOpts,
-             &vpr_setup->AnnealSched,
              &vpr_setup->RouterOpts,
              &vpr_setup->AnalysisOpts,
              &vpr_setup->NocOpts,
@@ -830,7 +829,6 @@ void vpr_place(const Netlist<>& net_list, t_vpr_setup& vpr_setup, const t_arch&
 
     try_place(net_list,
               vpr_setup.PlacerOpts,
-              vpr_setup.AnnealSched,
               vpr_setup.RouterOpts,
               vpr_setup.AnalysisOpts,
               vpr_setup.NocOpts,
@@ -1058,7 +1056,6 @@ RouteStatus vpr_route_min_W(const Netlist<>& net_list,
     int min_W = binary_search_place_and_route((const Netlist<>&)g_vpr_ctx.clustering().clb_nlist,
                                               net_list,
                                               vpr_setup.PlacerOpts,
-                                              vpr_setup.AnnealSched,
                                               router_opts,
                                               vpr_setup.AnalysisOpts,
                                               vpr_setup.NocOpts,
@@ -1290,8 +1287,9 @@ static void free_complex_block_types() {
 void free_circuit() {
     //Free new net structures
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
-    for (auto blk_id : cluster_ctx.clb_nlist.blocks())
+    for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) {
         cluster_ctx.clb_nlist.remove_block(blk_id);
+    }
 
     cluster_ctx.clb_nlist = ClusteredNetlist();
 }
@@ -1369,7 +1367,6 @@ void vpr_setup_vpr(t_options* Options,
                    t_packer_opts* PackerOpts,
                    t_placer_opts* PlacerOpts,
                    t_ap_opts* APOpts,
-                   t_annealing_sched* AnnealSched,
                    t_router_opts* RouterOpts,
                    t_analysis_opts* AnalysisOpts,
                    t_noc_opts* NocOpts,
@@ -1395,7 +1392,6 @@ void vpr_setup_vpr(t_options* Options,
              PackerOpts,
              PlacerOpts,
              APOpts,
-             AnnealSched,
              RouterOpts,
              AnalysisOpts,
              NocOpts,
diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h
index dca8f7441ad..93cf2d12cc1 100644
--- a/vpr/src/base/vpr_api.h
+++ b/vpr/src/base/vpr_api.h
@@ -179,7 +179,6 @@ void vpr_setup_vpr(t_options* Options,
                    t_packer_opts* PackerOpts,
                    t_placer_opts* PlacerOpts,
                    t_ap_opts* APOpts,
-                   t_annealing_sched* AnnealSched,
                    t_router_opts* RouterOpts,
                    t_analysis_opts* AnalysisOpts,
                    t_noc_opts* NocOpts,
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 3203d79576f..204b7456c35 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -101,17 +101,6 @@
 static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
                                             NetPinTimingInvalidator* pin_tedges_invalidator,
                                             TimingInfo* timing_info);
-
-/**
- * @brief Update the connection_timing_cost values from the temporary
- *        values for all connections that have/haven't changed.
- *
- * All the connections have already been gathered by blocks_affected.affected_pins
- * after running the routine find_affected_nets_and_update_costs() in try_swap().
- */
-static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerState& placer_state);
-
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
  *
@@ -147,30 +136,6 @@ static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& block
     }
 }
 
-static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerState& placer_state) {
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    auto& p_timing_ctx = placer_state.mutable_timing();
-    auto& connection_delay = p_timing_ctx.connection_delay;
-    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
-    auto& connection_timing_cost = p_timing_ctx.connection_timing_cost;
-    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
-
-    //Go through all the sink pins affected
-    for (ClusterPinId pin_id : blocks_affected.affected_pins) {
-        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
-        int ipin = clb_nlist.pin_net_index(pin_id);
-
-        //Commit the timing delay and cost values
-        connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
-        proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
-        connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
-        proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
-    }
-}
-
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -188,8 +153,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
         size_t ipin = clb_nlist.pin_net_index(clb_pin);
 
         original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]);
-        proposed_setup_slacks.push_back(
-            setup_slacks->setup_slack(net_id, ipin));
+        proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin));
     }
 
     //Sort in ascending order, from the worse slack value to the best
@@ -199,8 +163,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
     //Check the first pair of slack values that are different
     //If found, return their difference
     for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) {
-        float slack_diff = original_setup_slacks[idiff]
-                           - proposed_setup_slacks[idiff];
+        float slack_diff = original_setup_slacks[idiff] - proposed_setup_slacks[idiff];
 
         if (slack_diff != 0) {
             return slack_diff;
@@ -480,12 +443,11 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
 
     /* calculate the number of moves in the quench that we should recompute timing after based on the value of *
      * the commandline option quench_recompute_divider                                                         */
-    int quench_recompute_limit;
     if (placer_opts.quench_recompute_divider != 0) {
-        quench_recompute_limit = static_cast<int>(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
+        quench_recompute_limit_ = static_cast<int>(0.5 + (float)move_lim / (float)placer_opts.quench_recompute_divider);
     } else {
         // don't do an quench recompute
-        quench_recompute_limit = first_move_lim + 1;
+        quench_recompute_limit_ = first_move_lim + 1;
     }
 
     // Get the first range limiter
@@ -623,7 +585,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     }
 
     /* Allow some fraction of moves to not be restricted by rlim,
-    /* in the hopes of better escaping local minima. */
+     * in the hopes of better escaping local minima. */
     float rlim;
     if (rlim_escape_fraction > 0. && vtr::frand() < rlim_escape_fraction) {
         rlim = std::numeric_limits<float>::infinity();
@@ -704,7 +666,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 
             /* Update the connection_timing_cost and connection_delay *
              * values from the temporary values.                      */
-            commit_td_cost(blocks_affected_, placer_state_);
+            placer_state_.mutable_timing().commit_td_cost(blocks_affected_);
 
             /* Update timing information. Since we are analyzing setup slacks,   *
              * we only update those values and keep the criticalities stale      *
@@ -787,7 +749,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 
                 /* Update the connection_timing_cost and connection_delay *
                  * values from the temporary values.                      */
-                commit_td_cost(blocks_affected_, placer_state_);
+                placer_state_.mutable_timing().commit_td_cost(blocks_affected_);
             }
 
             /* Update net cost functions and reset flags. */
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 02b48967525..0c6b2e66ec3 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -245,6 +245,7 @@ class PlacementAnnealer {
     static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 
     int inner_recompute_limit_;
+    int quench_recompute_limit_;
     int moves_since_cost_recompute_;
     int tot_iter_;
 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f4ccaf083f1..9bab97519aa 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -82,8 +82,7 @@ static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
 #endif
 
 /************** Types and defines local to place.c ***************************/
-constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
-constexpr float INVALID_COST = std::numeric_limits<double>::quiet_NaN();
+constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
 
 /********************* Static subroutines local to place.c *******************/
 #ifdef VERBOSE
@@ -130,9 +129,6 @@ static int check_macro_placement_consistency(const BlkLocRegistry& blk_loc_regis
 
 static int count_connections();
 
-static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
-                                      const PlacerState& placer_state);
-
 static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const t_analysis_opts& analysis_opts,
                                                const SetupTimingInfo& timing_info,
@@ -236,8 +232,7 @@ void try_place(const Netlist<>& net_list,
 
     int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
 
-    PlacerState placer_state;
-    auto& place_move_ctx = placer_state.mutable_move();
+    PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven());
     auto& blk_loc_registry = placer_state.mutable_blk_loc_registry();
     const auto& p_timing_ctx = placer_state.timing();
     const auto& p_runtime_ctx = placer_state.runtime();
@@ -777,36 +772,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac
         max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins);
     }
 
-    if (placer_opts.place_algorithm.is_timing_driven()) {
-        /* Allocate structures associated with timing driven placement */
-        /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]  */
-
-        auto& p_timing_ctx = placer_state.mutable_timing();
-
-        p_timing_ctx.connection_delay = make_net_pins_matrix<float>((const Netlist<>&)cluster_ctx.clb_nlist, 0.f);
-        p_timing_ctx.proposed_connection_delay = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, 0.f);
-
-        p_timing_ctx.connection_setup_slack = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, std::numeric_limits<float>::infinity());
-
-        p_timing_ctx.connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist);
-        p_timing_ctx.proposed_connection_timing_cost = make_net_pins_matrix<double>(cluster_ctx.clb_nlist, 0.);
-        p_timing_ctx.net_timing_cost.resize(num_nets, 0.);
-
-        for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
-            for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
-                p_timing_ctx.connection_delay[net_id][ipin] = 0;
-                p_timing_ctx.proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
-
-                p_timing_ctx.proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
-
-                if (cluster_ctx.clb_nlist.net_is_ignored(net_id))
-                    continue;
-
-                p_timing_ctx.connection_timing_cost[net_id][ipin] = INVALID_DELAY;
-            }
-        }
-    }
-
     auto& place_move_ctx = placer_state.mutable_move();
 
     if (place_ctx.cube_bb) {
diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp
new file mode 100644
index 00000000000..ab9edd12836
--- /dev/null
+++ b/vpr/src/place/placer_state.cpp
@@ -0,0 +1,58 @@
+
+#include "placer_state.h"
+
+#include "globals.h"
+#include "move_transactions.h"
+
+
+PlacerTimingContext::PlacerTimingContext(bool placement_is_timing_driven) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    const size_t num_nets = cluster_ctx.clb_nlist.nets().size();
+
+    if (placement_is_timing_driven) {
+        connection_delay = make_net_pins_matrix<float>((const Netlist<>&)cluster_ctx.clb_nlist, 0.f);
+        proposed_connection_delay = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, 0.f);
+
+        connection_setup_slack = make_net_pins_matrix<float>(cluster_ctx.clb_nlist, std::numeric_limits<float>::infinity());
+
+        connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist);
+        proposed_connection_timing_cost = make_net_pins_matrix<double>(cluster_ctx.clb_nlist, 0.);
+        net_timing_cost.resize(num_nets, 0.);
+
+        for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+            for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
+                connection_delay[net_id][ipin] = 0;
+                proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
+
+                proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
+
+                if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+                    continue;
+                }
+
+                connection_timing_cost[net_id][ipin] = INVALID_DELAY;
+            }
+        }
+    }
+}
+
+void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    // Go through all the sink pins affected
+    for (ClusterPinId pin_id : blocks_affected.affected_pins) {
+        ClusterNetId net_id = clb_nlist.pin_net(pin_id);
+        int ipin = clb_nlist.pin_net_index(pin_id);
+
+        // Commit the timing delay and cost values
+        connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin];
+        proposed_connection_delay[net_id][ipin] = INVALID_DELAY;
+        connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin];
+        proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY;
+    }
+}
+
+PlacerState::PlacerState(bool placement_is_timing_driven)
+    : timing_(placement_is_timing_driven) {}
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index 344839a1bd5..9c99830c994 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -24,6 +24,23 @@
  * use mutable_timing() to access it. For more, see PlacerTimingCosts.
  */
 struct PlacerTimingContext : public Context {
+    PlacerTimingContext() = delete;
+
+    /**
+     * @brief Allocate structures associated with timing driven placement
+     * @param placement_is_timing_driven Specifies whether the placement is timing driven.
+     */
+    PlacerTimingContext(bool placement_is_timing_driven);
+
+    /**
+     * @brief Update the connection_timing_cost values from the temporary
+     *        values for all connections that have/haven't changed.
+     *
+     * All the connections have already been gathered by blocks_affected.affected_pins
+     * after running the routine find_affected_nets_and_update_costs() in try_swap().
+     */
+    void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
+
     /**
      * @brief Net connection delays based on the committed block positions.
      *
@@ -74,6 +91,8 @@ struct PlacerTimingContext : public Context {
      * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1]
      */
     vtr::vector<ClusterNetId, double> net_timing_cost;
+
+    static constexpr float INVALID_DELAY = std::numeric_limits<float>::quiet_NaN();
 };
 
 /**
@@ -143,6 +162,9 @@ struct PlacerMoveContext : public Context {
  * how to use this class due to similar implementation style.
  */
 class PlacerState : public Context {
+  public:
+    PlacerState(bool placement_is_timing_driven);
+
   public:
     inline const PlacerTimingContext& timing() const { return timing_; }
     inline PlacerTimingContext& mutable_timing() { return timing_; }

From 2ed4cac5cab92d1728ab91fd376b650bcf03bfbb Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 14:02:24 -0400
Subject: [PATCH 06/31] add a constructor for PlacerMoveContext

---
 vpr/src/place/place.cpp        | 21 +--------------------
 vpr/src/place/placer_state.cpp | 32 ++++++++++++++++++++++++++++++--
 vpr/src/place/placer_state.h   | 16 +++++++---------
 3 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9bab97519aa..38a4a3508d8 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -232,7 +232,7 @@ void try_place(const Netlist<>& net_list,
 
     int move_lim = (int)(placer_opts.anneal_sched.inner_num * pow(net_list.blocks().size(), 1.3333));
 
-    PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven());
+    PlacerState placer_state(placer_opts.place_algorithm.is_timing_driven(), cube_bb);
     auto& blk_loc_registry = placer_state.mutable_blk_loc_registry();
     const auto& p_timing_ctx = placer_state.timing();
     const auto& p_runtime_ctx = placer_state.runtime();
@@ -763,8 +763,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac
 
     size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
-    const int num_layers = device_ctx.grid.get_num_layers();
-
     init_placement_context(placer_state.mutable_blk_loc_registry(), directs);
 
     int max_pins_per_clb = 0;
@@ -772,23 +770,6 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac
         max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins);
     }
 
-    auto& place_move_ctx = placer_state.mutable_move();
-
-    if (place_ctx.cube_bb) {
-        place_move_ctx.bb_coords.resize(num_nets, t_bb());
-        place_move_ctx.bb_num_on_edges.resize(num_nets, t_bb());
-    } else {
-        VTR_ASSERT_SAFE(!place_ctx.cube_bb);
-        place_move_ctx.layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-        place_move_ctx.layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-    }
-
-    place_move_ctx.num_sink_pin_layer.resize({num_nets, size_t(num_layers)});
-    for (size_t flat_idx = 0; flat_idx < place_move_ctx.num_sink_pin_layer.size(); flat_idx++) {
-        auto& elem = place_move_ctx.num_sink_pin_layer.get(flat_idx);
-        elem = OPEN;
-    }
-
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 
     if (noc_opts.noc) {
diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp
index ab9edd12836..57fe8735685 100644
--- a/vpr/src/place/placer_state.cpp
+++ b/vpr/src/place/placer_state.cpp
@@ -4,6 +4,33 @@
 #include "globals.h"
 #include "move_transactions.h"
 
+PlacerMoveContext::PlacerMoveContext(bool cube_bb) {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    // allocate helper vectors that are used by many move generators
+    X_coord.resize(10, 0);
+    Y_coord.resize(10, 0);
+    layer_coord.resize(10, 0);
+
+    const size_t num_nets = cluster_ctx.clb_nlist.nets().size();
+
+    const int num_layers = device_ctx.grid.get_num_layers();
+
+    if (cube_bb) {
+        bb_coords.resize(num_nets, t_bb());
+        bb_num_on_edges.resize(num_nets, t_bb());
+    } else {
+        layer_bb_num_on_edges.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+        layer_bb_coords.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
+    }
+
+    num_sink_pin_layer.resize({num_nets, size_t(num_layers)});
+    for (size_t flat_idx = 0; flat_idx < num_sink_pin_layer.size(); flat_idx++) {
+        auto& elem = num_sink_pin_layer.get(flat_idx);
+        elem = OPEN;
+    }
+}
 
 PlacerTimingContext::PlacerTimingContext(bool placement_is_timing_driven) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -54,5 +81,6 @@ void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_a
     }
 }
 
-PlacerState::PlacerState(bool placement_is_timing_driven)
-    : timing_(placement_is_timing_driven) {}
+PlacerState::PlacerState(bool placement_is_timing_driven, bool cube_bb)
+    : timing_(placement_is_timing_driven)
+    , move_(cube_bb) {}
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index 9c99830c994..b35e56dab9c 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -109,6 +109,10 @@ struct PlacerRuntimeContext : public Context {
  * @brief Placement Move generators data
  */
 struct PlacerMoveContext : public Context {
+  public:
+    PlacerMoveContext() = delete;
+    explicit PlacerMoveContext(bool cube_bb);
+
   public:
     // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     vtr::vector<ClusterNetId, t_bb> bb_num_on_edges;
@@ -138,16 +142,10 @@ struct PlacerMoveContext : public Context {
 
     // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option)
     std::vector<std::pair<ClusterNetId, int>> highly_crit_pins;
-
-  public:
-    PlacerMoveContext() {
-        // allocate helper vectors that are used by many move generators
-        X_coord.resize(10, 0);
-        Y_coord.resize(10, 0);
-        layer_coord.resize(10, 0);
-    }
 };
 
+
+
 /**
  * @brief This object encapsulates VPR placer's state.
  *
@@ -163,7 +161,7 @@ struct PlacerMoveContext : public Context {
  */
 class PlacerState : public Context {
   public:
-    PlacerState(bool placement_is_timing_driven);
+    PlacerState(bool placement_is_timing_driven, bool cube_bb);
 
   public:
     inline const PlacerTimingContext& timing() const { return timing_; }

From ecbcba969beb6b183862f39114a0942ee4bc802c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 14:15:33 -0400
Subject: [PATCH 07/31] added invalidate_affected_connections() to
 NetPinTimingInvalidator

---
 vpr/src/place/annealer.cpp               | 31 +++---------------------
 vpr/src/timing/NetPinTimingInvalidator.h | 19 +++++++++++++++
 2 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 204b7456c35..91da212ae67 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -88,19 +88,6 @@
 
 #endif
 
-
-/**
- * @brief Invalidates the connections affected by the specified block moves.
- *
- * All the connections recorded in blocks_affected.affected_pins have different
- * values for `proposed_connection_delay` and `connection_delay`.
- *
- * Invalidate all the timing graph edges associated with these connections via
- * the NetPinTimingInvalidator class.
- */
-static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
-                                            NetPinTimingInvalidator* pin_tedges_invalidator,
-                                            TimingInfo* timing_info);
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
  *
@@ -124,18 +111,6 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
 
 static e_move_result assess_swap(double delta_c, double t);
 
-static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected,
-                                            NetPinTimingInvalidator* pin_tedges_invalidator,
-                                            TimingInfo* timing_info) {
-    VTR_ASSERT_SAFE(timing_info);
-    VTR_ASSERT_SAFE(pin_tedges_invalidator);
-
-    // Invalidate timing graph edges affected by the move
-    for (ClusterPinId pin : blocks_affected.affected_pins) {
-        pin_tedges_invalidator->invalidate_connection(pin, timing_info);
-    }
-}
-
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -662,7 +637,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         //If rejected, we reject the proposed block moves and revert this timing analysis.
         if (place_algorithm == SLACK_TIMING_PLACE) {
             // Invalidates timing of modified connections for incremental timing updates.
-            invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+            pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
             /* Update the connection_timing_cost and connection_delay *
              * values from the temporary values.                      */
@@ -745,7 +720,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                 /* Invalidates timing of modified connections for incremental *
                  * timing updates. These invalidations are accumulated for a  *
                  * big timing update in the outer loop.                       */
-                invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
                 /* Update the connection_timing_cost and connection_delay *
                  * values from the temporary values.                      */
@@ -792,7 +767,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                 /* Re-invalidate the affected sink pins since the proposed
                  * move is rejected, and the same blocks are reverted to
                  * their original positions. */
-                invalidate_affected_connections(blocks_affected_, pin_timing_invalidator_, timing_info_);
+                pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
                 // Revert the timing update
                 update_timing_classes(crit_params, timing_info_, criticalities_,
diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h
index f452b95bd7a..754d118aef2 100644
--- a/vpr/src/timing/NetPinTimingInvalidator.h
+++ b/vpr/src/timing/NetPinTimingInvalidator.h
@@ -4,6 +4,7 @@
 #include "tatum/TimingGraphFwd.hpp"
 #include "timing_info.h"
 #include "vtr_range.h"
+#include "move_transactions.h"
 
 #include "vtr_vec_id_set.h"
 
@@ -21,6 +22,24 @@ class NetPinTimingInvalidator {
     virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0;
     virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0;
     virtual void reset() = 0;
+
+    /**
+     * @brief Invalidates the connections affected by the specified block moves.
+     *
+     * All the connections recorded in blocks_affected.affected_pins have different
+     * values for `proposed_connection_delay` and `connection_delay`.
+     *
+     * Invalidate all the timing graph edges associated with these connections via
+     * the NetPinTimingInvalidator class.
+     */
+    void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, TimingInfo* timing_info) {
+        VTR_ASSERT_SAFE(timing_info);
+
+        // Invalidate timing graph edges affected by the move
+        for (ClusterPinId pin : blocks_affected.affected_pins) {
+            invalidate_connection(pin, timing_info);
+        }
+    }
 };
 
 //Helper class for iterating through the timing edges associated with a particular

From 34e0e909841f1a8afe52fcddba805b573f26847e Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 14:16:06 -0400
Subject: [PATCH 08/31] added  PlacerTimingContext::revert_td_cost()

---
 vpr/src/place/annealer.cpp     | 27 +--------------------------
 vpr/src/place/placer_state.cpp | 20 ++++++++++++++++++++
 vpr/src/place/placer_state.h   |  6 ++++++
 3 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 91da212ae67..003f0ea3045 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -173,31 +173,6 @@ static e_move_result assess_swap(double delta_c, double t) {
     return REJECTED;
 }
 
-//Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on
-//the move proposed in blocks_affected
-static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected,
-                           PlacerTimingContext& p_timing_ctx) {
-#ifndef VTR_ASSERT_SAFE_ENABLED
-    (void)blocks_affected;
-    (void)p_timing_ctx;
-#else
-    //Invalidate temp delay & timing cost values to match sanity checks in
-    //comp_td_connection_cost()
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& clb_nlist = cluster_ctx.clb_nlist;
-
-    auto& proposed_connection_delay = p_timing_ctx.proposed_connection_delay;
-    auto& proposed_connection_timing_cost = p_timing_ctx.proposed_connection_timing_cost;
-
-    for (ClusterPinId pin : blocks_affected.affected_pins) {
-        ClusterNetId net = clb_nlist.pin_net(pin);
-        int ipin = clb_nlist.pin_net_index(pin);
-        proposed_connection_delay[net][ipin] = INVALID_DELAY;
-        proposed_connection_timing_cost[net][ipin] = INVALID_DELAY;
-    }
-#endif
-}
-
 /**
  * @brief Updates all the cost normalization factors during the outer
  * loop iteration of the placement. At each temperature change, these
@@ -780,7 +755,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 
             if (place_algorithm == CRITICALITY_TIMING_PLACE) {
                 // Un-stage the values stored in proposed_* data structures
-                revert_td_cost(blocks_affected_, placer_state_.mutable_timing());
+                placer_state_.mutable_timing().revert_td_cost(blocks_affected_);
             }
 
             if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp
index 57fe8735685..e83d74bbe2c 100644
--- a/vpr/src/place/placer_state.cpp
+++ b/vpr/src/place/placer_state.cpp
@@ -81,6 +81,26 @@ void PlacerTimingContext::commit_td_cost(const t_pl_blocks_to_be_moved& blocks_a
     }
 }
 
+void PlacerTimingContext::revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) {
+#ifndef VTR_ASSERT_SAFE_ENABLED
+    (void)blocks_affected;
+#else
+    //Invalidate temp delay & timing cost values to match sanity checks in
+    //comp_td_connection_cost()
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& clb_nlist = cluster_ctx.clb_nlist;
+
+
+    for (ClusterPinId pin : blocks_affected.affected_pins) {
+        ClusterNetId net = clb_nlist.pin_net(pin);
+        int ipin = clb_nlist.pin_net_index(pin);
+        proposed_connection_delay[net][ipin] = INVALID_DELAY;
+        proposed_connection_timing_cost[net][ipin] = INVALID_DELAY;
+    }
+#endif
+}
+
 PlacerState::PlacerState(bool placement_is_timing_driven, bool cube_bb)
     : timing_(placement_is_timing_driven)
     , move_(cube_bb) {}
+
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index b35e56dab9c..c727ac181e5 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -41,6 +41,12 @@ struct PlacerTimingContext : public Context {
      */
     void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
 
+    /**
+     * @brief Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost
+     * based on the move proposed in blocks_affected
+     */
+    void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected);
+
     /**
      * @brief Net connection delays based on the committed block positions.
      *

From 15540aef82b48d4129522c88becdabe47d782494 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 16:49:05 -0400
Subject: [PATCH 09/31] add select_move_generator() function

---
 vpr/src/place/RL_agent_util.cpp | 38 ++++++-----------------
 vpr/src/place/RL_agent_util.h   | 23 +++++---------
 vpr/src/place/annealer.cpp      | 55 +++++++++------------------------
 vpr/src/place/annealer.h        |  3 +-
 vpr/src/place/place.cpp         | 44 ++++++++------------------
 vpr/src/place/place_util.cpp    |  4 +++
 vpr/src/place/place_util.h      |  8 +++--
 7 files changed, 55 insertions(+), 120 deletions(-)

diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index e418a7db6ee..e080b335fe7 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -122,40 +122,20 @@ std::pair<std::unique_ptr<MoveGenerator>, std::unique_ptr<MoveGenerator>> create
     return move_generators;
 }
 
-void assign_current_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
-                                   std::unique_ptr<MoveGenerator>& move_generator2,
-                                   e_agent_state agent_state,
-                                   const t_placer_opts& placer_opts,
-                                   bool in_quench,
-                                   std::unique_ptr<MoveGenerator>& current_move_generator) {
+MoveGenerator& select_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
+                                     std::unique_ptr<MoveGenerator>& move_generator2,
+                                     e_agent_state agent_state,
+                                     const t_placer_opts& placer_opts,
+                                     bool in_quench) {
     if (in_quench) {
         if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate)
-            current_move_generator = std::move(move_generator2);
+            return *move_generator2;
         else
-            current_move_generator = std::move(move_generator);
+            return *move_generator;
     } else {
         if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate)
-            current_move_generator = std::move(move_generator);
+            return *move_generator;
         else
-            current_move_generator = std::move(move_generator2);
-    }
-}
-
-void update_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
-                           std::unique_ptr<MoveGenerator>& move_generator2,
-                           e_agent_state agent_state,
-                           const t_placer_opts& placer_opts,
-                           bool in_quench,
-                           std::unique_ptr<MoveGenerator>& current_move_generator) {
-    if (in_quench) {
-        if (placer_opts.place_quench_algorithm.is_timing_driven() && placer_opts.place_agent_multistate)
-            move_generator2 = std::move(current_move_generator);
-        else
-            move_generator = std::move(current_move_generator);
-    } else {
-        if (agent_state == e_agent_state::EARLY_IN_THE_ANNEAL || !placer_opts.place_agent_multistate)
-            move_generator = std::move(current_move_generator);
-        else
-            move_generator2 = std::move(current_move_generator);
+            return *move_generator2;
     }
 }
\ No newline at end of file
diff --git a/vpr/src/place/RL_agent_util.h b/vpr/src/place/RL_agent_util.h
index afe8bf3b6cb..166ddccbf2a 100644
--- a/vpr/src/place/RL_agent_util.h
+++ b/vpr/src/place/RL_agent_util.h
@@ -31,22 +31,13 @@ std::pair<std::unique_ptr<MoveGenerator>, std::unique_ptr<MoveGenerator>> create
                                                                                                  double noc_attraction_weight);
 
 /**
- * @brief copy one of the available move_generators to be the current move_generator that would be used in the placement based on the placer_options and the agent state
+ * @brief Returns to one of the available move generators to be the current move generator
+ * that would be used in the placement based on the placer_options and the agent state
  */
-void assign_current_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
-                                   std::unique_ptr<MoveGenerator>& move_generator2,
-                                   e_agent_state agent_state,
-                                   const t_placer_opts& placer_opts,
-                                   bool in_quench,
-                                   std::unique_ptr<MoveGenerator>& current_move_generator);
+MoveGenerator& select_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
+                                     std::unique_ptr<MoveGenerator>& move_generator2,
+                                     e_agent_state agent_state,
+                                     const t_placer_opts& placer_opts,
+                                     bool in_quench);
 
-/**
- * @brief move the updated current_move_generator to its original move_Generator structure based on the placer_options and the agent state
- */
-void update_move_generator(std::unique_ptr<MoveGenerator>& move_generator,
-                           std::unique_ptr<MoveGenerator>& move_generator2,
-                           e_agent_state agent_state,
-                           const t_placer_opts& placer_opts,
-                           bool in_quench,
-                           std::unique_ptr<MoveGenerator>& current_move_generator);
 #endif
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 003f0ea3045..51508e65792 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -173,35 +173,6 @@ static e_move_result assess_swap(double delta_c, double t) {
     return REJECTED;
 }
 
-/**
- * @brief Updates all the cost normalization factors during the outer
- * loop iteration of the placement. At each temperature change, these
- * values are updated so that we can balance the tradeoff between the
- * different placement cost components (timing, wirelength and NoC).
- * Depending on the placement mode the corresponding normalization factors are
- * updated.
- *
- * @param costs Contains the normalization factors which need to be updated
- * @param placer_opts Determines the placement mode
- * @param noc_opts Determines if placement includes the NoC
- * @param noc_cost_handler Computes normalization factors for NoC-related cost terms
- */
-static void update_placement_cost_normalization_factors(t_placer_costs* costs,
-                                                        const t_placer_opts& placer_opts,
-                                                        const t_noc_opts& noc_opts,
-                                                        const std::optional<NocCostHandler>& noc_cost_handler) {
-    /* Update the cost normalization factors */
-    costs->update_norm_factors();
-
-    // update the noc normalization factors if the placement includes the NoC
-    if (noc_opts.noc) {
-        noc_cost_handler->update_noc_normalization_factors(*costs);
-    }
-
-    // update the current total placement cost
-    costs->cost = costs->get_total_cost(placer_opts, noc_opts);
-}
-
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
                                      float first_t,
@@ -369,6 +340,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , move_stats_file_(nullptr, vtr::fclose)
     , outer_crit_iter_count_(1)
     , blocks_affected_(placer_state.block_locs().size())
+    , quench_started_(false)
 {
     const auto& device_ctx = g_vpr_ctx.device();
 
@@ -388,8 +360,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
         // don't do an inner recompute
         inner_recompute_limit_ = first_move_lim + 1;
     }
-    moves_since_cost_recompute_ = 0;
-    tot_iter_ = 0;
 
     /* calculate the number of moves in the quench that we should recompute timing after based on the value of *
      * the commandline option quench_recompute_divider                                                         */
@@ -400,6 +370,9 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
         quench_recompute_limit_ = first_move_lim + 1;
     }
 
+    moves_since_cost_recompute_ = 0;
+    tot_iter_ = 0;
+
     // Get the first range limiter
     placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
 
@@ -807,19 +780,15 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 }
 
 /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
-void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) {
+void PlacementAnnealer::outer_loop_update_timing_info() {
     if (placer_opts_.place_algorithm.is_timing_driven()) {
         /* At each temperature change we update these values to be used
          * for normalizing the tradeoff between timing and wirelength (bb) */
-        if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter
-            || placer_opts_.inner_loop_recompute_divider != 0) {
+        if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter ||
+            placer_opts_.inner_loop_recompute_divider != 0) {
 #ifdef VERBOSE
             VTR_LOG("Outer loop recompute criticalities\n");
 #endif
-            // Avoid division by zero
-            num_connections = std::max(num_connections, 1);
-            VTR_ASSERT(num_connections > 0);
-
             PlaceCritParams crit_params;
             crit_params.crit_exponent = annealing_state_.crit_exponent;
             crit_params.crit_limit = placer_opts_.place_crit_limit;
@@ -834,7 +803,10 @@ void PlacementAnnealer::outer_loop_update_timing_info(int num_connections) {
     }
 
     // Update the cost normalization factors
-    update_placement_cost_normalization_factors(&costs_, placer_opts_, noc_opts_, noc_cost_handler_);
+    costs_.update_norm_factors();
+
+    // update the current total placement cost
+    costs_.cost = costs_.get_total_cost(placer_opts_, noc_opts_);
 }
 
 /* Function which contains the inner loop of the simulated annealing */
@@ -867,8 +839,9 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
              * We do this only once in a while, since it is expensive.
              */
 
+            const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_;
             // on last iteration don't recompute
-            if (inner_crit_iter_count >= inner_recompute_limit_ && inner_iter != annealing_state_.move_lim - 1) {
+            if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) {
 
                 inner_crit_iter_count = 0;
 #ifdef VERBOSE
@@ -934,6 +907,8 @@ bool PlacementAnnealer::outer_loop_update_state() {
 }
 
 void PlacementAnnealer::start_quench() {
+    quench_started_ = true;
+
     // Freeze out: only accept solutions that improve placement.
     annealing_state_.t = 0;
 
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 0c6b2e66ec3..b932ed56d7e 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -174,7 +174,7 @@ class PlacementAnnealer {
     void placement_inner_loop(MoveGenerator& move_generator,
                               float timing_bb_factor);
 
-    void outer_loop_update_timing_info(int num_connections);
+    void outer_loop_update_timing_info();
 
     bool outer_loop_update_state();
 
@@ -248,6 +248,7 @@ class PlacementAnnealer {
     int quench_recompute_limit_;
     int moves_since_cost_recompute_;
     int tot_iter_;
+    bool quench_started_;
 
   private:
     ///@brief Find the starting temperature for the annealing loop.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 38a4a3508d8..f9c51b28e37 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -185,9 +185,8 @@ void try_place(const Netlist<>& net_list,
 
     auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
-    int num_connections, outer_crit_iter_count;
 
-    t_placer_costs costs(placer_opts.place_algorithm);
+    t_placer_costs costs(placer_opts.place_algorithm, noc_opts.noc);
 
     tatum::TimingPathInfo critical_path;
     float sTNS = NAN;
@@ -297,7 +296,7 @@ void try_place(const Netlist<>& net_list,
     if (placer_opts.place_algorithm.is_timing_driven()) {
         costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL);
 
-        num_connections = count_connections();
+        int num_connections = count_connections();
         VTR_LOG("\n");
         VTR_LOG("There are %d point to point connections in this circuit.\n",
                 num_connections);
@@ -359,8 +358,6 @@ void try_place(const Netlist<>& net_list,
                 *timing_info, debug_tnode);
         }
 
-        outer_crit_iter_count = 1;
-
         /* Initialize the normalization factors. Calling costs.update_norm_factors() *
          * here would fail the golden results of strong_sdc benchmark                */
         costs.timing_cost_norm = 1 / costs.timing_cost;
@@ -375,10 +372,6 @@ void try_place(const Netlist<>& net_list,
         /* Timing cost and normalization factors are not used */
         costs.timing_cost = INVALID_COST;
         costs.timing_cost_norm = INVALID_COST;
-
-        /* Other initializations */
-        outer_crit_iter_count = 0;
-        num_connections = 0;
     }
 
     if (noc_opts.noc) {
@@ -467,8 +460,6 @@ void try_place(const Netlist<>& net_list,
     //RL agent state definition
     e_agent_state agent_state = e_agent_state::EARLY_IN_THE_ANNEAL;
 
-    std::unique_ptr<MoveGenerator> current_move_generator;
-
     //Define the timing bb weight factor for the agent's reward function
     float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT;
 
@@ -488,7 +479,7 @@ void try_place(const Netlist<>& net_list,
         do {
             vtr::Timer temperature_timer;
 
-            annealer.outer_loop_update_timing_info(num_connections);
+            annealer.outer_loop_update_timing_info();
 
             if (placer_opts.place_algorithm.is_timing_driven()) {
                 critical_path = timing_info->least_slack_critical_path();
@@ -503,18 +494,14 @@ void try_place(const Netlist<>& net_list,
                 }
             }
 
-            //move the appropriate move_generator to be the current used move generator
-            assign_current_move_generator(move_generator, move_generator2,
-                                          agent_state, placer_opts, false, current_move_generator);
+            // select the appropriate move generator
+            MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2,
+                                                                          agent_state, placer_opts, false);
 
             // do a complete inner loop iteration
-            annealer.placement_inner_loop(*current_move_generator,
+            annealer.placement_inner_loop(current_move_generator,
                                           timing_bb_factor);
 
-            //move the update used move_generator to its original variable
-            update_move_generator(move_generator, move_generator2, agent_state,
-                                  placer_opts, false, current_move_generator);
-
             print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
                                critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
                                noc_opts.noc, costs.noc_cost_terms);
@@ -530,8 +517,7 @@ void try_place(const Netlist<>& net_list,
 
             sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
                     costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t);
-            update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT,
-                          timing_info);
+            update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info);
 
             //#ifdef VERBOSE
             //            if (getEchoEnabled()) {
@@ -550,19 +536,15 @@ void try_place(const Netlist<>& net_list,
 
         vtr::ScopedFinishTimer temperature_timer("Placement Quench");
 
-        annealer.outer_loop_update_timing_info(num_connections);
+        annealer.outer_loop_update_timing_info();
 
-        //move the appropriate move_generator to be the current used move generator
-        assign_current_move_generator(move_generator, move_generator2,
-                                      agent_state, placer_opts, true, current_move_generator);
+        // select the appropriate move generator
+        MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2,
+                                                                      agent_state, placer_opts, true);
 
         /* Run inner loop again with temperature = 0 so as to accept only swaps
          * which reduce the cost of the placement */
-        annealer.placement_inner_loop(*current_move_generator, timing_bb_factor);
-
-        // move the update used move_generator to its original variable
-        update_move_generator(move_generator, move_generator2, agent_state,
-                              placer_opts, true, current_move_generator);
+        annealer.placement_inner_loop(current_move_generator, timing_bb_factor);
 
         if (placer_opts.place_quench_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index e09bb2d5dd6..aa65a15110a 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -64,6 +64,10 @@ void t_placer_costs::update_norm_factors() {
         VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
         bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost
     }
+
+    if (noc_enabled) {
+        NocCostHandler::update_noc_normalization_factors(*this);
+    }
 }
 
 double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index d76e738f70e..2a816e01350 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -101,8 +101,9 @@ class t_placer_costs {
     NocCostTerms noc_cost_norm_factors;
 
   public: //Constructor
-    explicit t_placer_costs(t_place_algorithm algo)
-        : place_algorithm(algo) {}
+    explicit t_placer_costs(t_place_algorithm algo, bool noc)
+        : place_algorithm(algo)
+        , noc_enabled(noc) {}
     t_placer_costs() = default;
 
   public: //Mutator
@@ -134,8 +135,9 @@ class t_placer_costs {
     t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
 
   private:
-    double MAX_INV_TIMING_COST = 1.e12;
+    static constexpr double MAX_INV_TIMING_COST = 1.e12;
     t_place_algorithm place_algorithm;
+    bool noc_enabled;
 };
 
 /**

From c005b70a1e76f5ffdb048ef0dc6582d61b01d008 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 17:02:53 -0400
Subject: [PATCH 10/31] enum class e_place_algorithm and
 e_place_bounding_box_mode

---
 vpr/src/base/ShowSetup.cpp         |  6 +++---
 vpr/src/base/read_options.cpp      | 30 +++++++++++++++---------------
 vpr/src/base/vpr_types.h           |  6 +++---
 vpr/src/place/annealer.cpp         | 14 +++++++-------
 vpr/src/place/net_cost_handler.cpp |  2 +-
 vpr/src/place/place.cpp            | 10 +++++-----
 vpr/src/place/place_util.cpp       |  4 ++--
 7 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index f0280669cd9..66170063d67 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -516,13 +516,13 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) {
         || (PLACE_ALWAYS == PlacerOpts.place_freq)) {
         VTR_LOG("PlacerOpts.place_algorithm: ");
         switch (PlacerOpts.place_algorithm.get()) {
-            case BOUNDING_BOX_PLACE:
+            case e_place_algorithm::BOUNDING_BOX_PLACE:
                 VTR_LOG("BOUNDING_BOX_PLACE\n");
                 break;
-            case CRITICALITY_TIMING_PLACE:
+            case e_place_algorithm::CRITICALITY_TIMING_PLACE:
                 VTR_LOG("CRITICALITY_TIMING_PLACE\n");
                 break;
-            case SLACK_TIMING_PLACE:
+            case e_place_algorithm::SLACK_TIMING_PLACE:
                 VTR_LOG("SLACK_TIMING_PLACE\n");
                 break;
             default:
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index eeb4bbfaee0..ce20940cda5 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -393,11 +393,11 @@ struct ParsePlaceAlgorithm {
     ConvertedValue<e_place_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_place_algorithm> conv_value;
         if (str == "bounding_box") {
-            conv_value.set_value(BOUNDING_BOX_PLACE);
+            conv_value.set_value(e_place_algorithm::BOUNDING_BOX_PLACE);
         } else if (str == "criticality_timing") {
-            conv_value.set_value(CRITICALITY_TIMING_PLACE);
+            conv_value.set_value(e_place_algorithm::CRITICALITY_TIMING_PLACE);
         } else if (str == "slack_timing") {
-            conv_value.set_value(SLACK_TIMING_PLACE);
+            conv_value.set_value(e_place_algorithm::SLACK_TIMING_PLACE);
         } else {
             std::stringstream msg;
             msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -415,12 +415,12 @@ struct ParsePlaceAlgorithm {
 
     ConvertedValue<std::string> to_str(e_place_algorithm val) {
         ConvertedValue<std::string> conv_value;
-        if (val == BOUNDING_BOX_PLACE) {
+        if (val == e_place_algorithm::BOUNDING_BOX_PLACE) {
             conv_value.set_value("bounding_box");
-        } else if (val == CRITICALITY_TIMING_PLACE) {
+        } else if (val == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
             conv_value.set_value("criticality_timing");
         } else {
-            VTR_ASSERT(val == SLACK_TIMING_PLACE);
+            VTR_ASSERT(val == e_place_algorithm::SLACK_TIMING_PLACE);
             conv_value.set_value("slack_timing");
         }
         return conv_value;
@@ -435,11 +435,11 @@ struct ParsePlaceBoundingBox {
     ConvertedValue<e_place_bounding_box_mode> from_str(const std::string& str) {
         ConvertedValue<e_place_bounding_box_mode> conv_value;
         if (str == "auto_bb") {
-            conv_value.set_value(AUTO_BB);
+            conv_value.set_value(e_place_bounding_box_mode::AUTO_BB);
         } else if (str == "cube_bb") {
-            conv_value.set_value(CUBE_BB);
+            conv_value.set_value(e_place_bounding_box_mode::CUBE_BB);
         } else if (str == "per_layer_bb") {
-            conv_value.set_value(PER_LAYER_BB);
+            conv_value.set_value(e_place_bounding_box_mode::PER_LAYER_BB);
         } else {
             std::stringstream msg;
             msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -450,12 +450,12 @@ struct ParsePlaceBoundingBox {
 
     ConvertedValue<std::string> to_str(e_place_bounding_box_mode val) {
         ConvertedValue<std::string> conv_value;
-        if (val == AUTO_BB) {
+        if (val == e_place_bounding_box_mode::AUTO_BB) {
             conv_value.set_value("auto_bb");
-        } else if (val == CUBE_BB) {
+        } else if (val == e_place_bounding_box_mode::CUBE_BB) {
             conv_value.set_value("cube_bb");
         } else {
-            VTR_ASSERT(val == PER_LAYER_BB);
+            VTR_ASSERT(val == e_place_bounding_box_mode::PER_LAYER_BB);
             conv_value.set_value("per_layer_bb");
         }
         return conv_value;
@@ -3089,9 +3089,9 @@ void set_conditional_defaults(t_options& args) {
     //Which placement algorithm to use?
     if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) {
         if (args.timing_analysis) {
-            args.PlaceAlgorithm.set(CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
+            args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
         } else {
-            args.PlaceAlgorithm.set(BOUNDING_BOX_PLACE, Provenance::INFERRED);
+            args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED);
         }
     }
 
@@ -3105,7 +3105,7 @@ void set_conditional_defaults(t_options& args) {
     // Check for correct options combinations
     // If you are running WLdriven placement, the RL reward function should be
     // either basic or nonPenalizing basic
-    if (args.RL_agent_placement && (args.PlaceAlgorithm == BOUNDING_BOX_PLACE || !args.timing_analysis)) {
+    if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) {
         if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") {
             VTR_LOG_WARN(
                 "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n"
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index cb280ff36ec..9ea6f8d1f70 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -876,13 +876,13 @@ struct t_annealing_sched {
  * is used when there is no timing information available (wiring only).
  * SLACK_TIMING_PLACE is mainly feasible during placement quench.
  */
-enum e_place_algorithm {
+enum class e_place_algorithm {
     BOUNDING_BOX_PLACE,
     CRITICALITY_TIMING_PLACE,
     SLACK_TIMING_PLACE
 };
 
-enum e_place_bounding_box_mode {
+enum class e_place_bounding_box_mode {
     AUTO_BB,
     CUBE_BB,
     PER_LAYER_BB
@@ -929,7 +929,7 @@ class t_place_algorithm {
 
     ///@brief Check if the algorithm belongs to the timing driven category.
     inline bool is_timing_driven() const {
-        return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE;
+        return algo == e_place_algorithm::CRITICALITY_TIMING_PLACE || algo == e_place_algorithm::SLACK_TIMING_PLACE;
     }
 
     ///@brief Accessor: returns the underlying e_place_algorithm enum value.
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 51508e65792..6aad4e512eb 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -583,7 +583,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         //slack values resulted from the proposed block moves. If the move turns out
         //to be accepted, we keep the updated slack values and commit the block moves.
         //If rejected, we reject the proposed block moves and revert this timing analysis.
-        if (place_algorithm == SLACK_TIMING_PLACE) {
+        if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
             // Invalidates timing of modified connections for incremental timing updates.
             pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
@@ -607,7 +607,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             /* Get the setup slack analysis cost */
             //TODO: calculate a weighted average of the slack cost and wiring cost
             delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm;
-        } else if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+        } else if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values */
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
@@ -621,7 +621,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
                       + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
         } else {
-            VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
+            VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
                            "\t\tMove bb_delta_c %e, bb_cost_norm %e\n",
                            bb_delta_c,
@@ -653,7 +653,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             costs_.cost += delta_c;
             costs_.bb_cost += bb_delta_c;
 
-            if (place_algorithm == SLACK_TIMING_PLACE) {
+            if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
                 // Update the timing driven cost as usual
                 costs_.timing_cost += timing_delta_c;
 
@@ -662,7 +662,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                 commit_setup_slacks(setup_slacks_, placer_state_);
             }
 
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+            if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
                 costs_.timing_cost += timing_delta_c;
 
                 /* Invalidates timing of modified connections for incremental *
@@ -705,7 +705,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             // Restore the blk_loc_registry.block_locs data structures to their state before the move.
             blk_loc_registry.revert_move_blocks(blocks_affected_);
 
-            if (place_algorithm == SLACK_TIMING_PLACE) {
+            if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
                 /* Revert the timing delays and costs to pre-update values.       */
                 /* These routines must be called after reverting the block moves. */
                 //TODO: make this process incremental
@@ -726,7 +726,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                     "The current setup slacks should be identical to the values before the try swap timing info update.");
             }
 
-            if (place_algorithm == CRITICALITY_TIMING_PLACE) {
+            if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
                 // Un-stage the values stored in proposed_* data structures
                 placer_state_.mutable_timing().revert_td_cost(blocks_affected_);
             }
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 99ab6fa5ce2..a161fba3b3e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1589,7 +1589,7 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m
         check_and_print_cost(new_timing_cost, costs.timing_cost, "timing_cost");
         costs.timing_cost = new_timing_cost;
     } else {
-        VTR_ASSERT(placer_opts_.place_algorithm == BOUNDING_BOX_PLACE);
+        VTR_ASSERT(placer_opts_.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
         costs.cost = new_bb_cost * costs.bb_cost_norm;
     }
 }
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f9c51b28e37..5d06a546cc5 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -363,7 +363,7 @@ void try_place(const Netlist<>& net_list,
         costs.timing_cost_norm = 1 / costs.timing_cost;
         costs.bb_cost_norm = 1 / costs.bb_cost;
     } else {
-        VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
+        VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
 
         /* Total cost is the same as wirelength cost normalized*/
         costs.bb_cost = net_cost_handler.comp_bb_cost(e_cost_methods::NORMAL);
@@ -710,7 +710,7 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
     bool cube_bb;
     const int number_layers = g_vpr_ctx.device().grid.get_num_layers();
 
-    if (place_bb_mode == AUTO_BB) {
+    if (place_bb_mode == e_place_bounding_box_mode::AUTO_BB) {
         // If the auto_bb is used, we analyze the RR graph to see whether is there any inter-layer connection that is not
         // originated from OPIN. If there is any, cube BB is chosen, otherwise, per-layer bb is chosen.
         if (number_layers > 1 && inter_layer_connections_limited_to_opin(rr_graph)) {
@@ -718,12 +718,12 @@ static bool is_cube_bb(const e_place_bounding_box_mode place_bb_mode,
         } else {
             cube_bb = true;
         }
-    } else if (place_bb_mode == CUBE_BB) {
+    } else if (place_bb_mode == e_place_bounding_box_mode::CUBE_BB) {
         // The user has specifically asked for CUBE_BB
         cube_bb = true;
     } else {
         // The user has specifically asked for PER_LAYER_BB
-        VTR_ASSERT_SAFE(place_bb_mode == PER_LAYER_BB);
+        VTR_ASSERT_SAFE(place_bb_mode == e_place_bounding_box_mode::PER_LAYER_BB);
         cube_bb = false;
     }
 
@@ -1143,4 +1143,4 @@ static void copy_locs_to_global_state(const BlkLocRegistry& blk_loc_registry) {
     // update the graphics' reference to placement location variables
     get_draw_state_vars()->set_graphics_blk_loc_registry_ref(global_blk_loc_registry);
 #endif
-}
+}
\ No newline at end of file
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index aa65a15110a..e3f3d9da567 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -61,7 +61,7 @@ void t_placer_costs::update_norm_factors() {
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
     } else {
-        VTR_ASSERT_SAFE(place_algorithm == BOUNDING_BOX_PLACE);
+        VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
         bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost
     }
 
@@ -73,7 +73,7 @@ void t_placer_costs::update_norm_factors() {
 double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_noc_opts& noc_opts) {
     double total_cost = 0.0;
 
-    if (placer_opts.place_algorithm == BOUNDING_BOX_PLACE) {
+    if (placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE) {
         // in bounding box mode we only care about wirelength
         total_cost = bb_cost * bb_cost_norm;
     } else if (placer_opts.place_algorithm.is_timing_driven()) {

From 8777a4b317b0b32e92f917c6387b575d726cdac6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Oct 2024 17:25:29 -0400
Subject: [PATCH 11/31] enum class e_agent_algorithm

---
 vpr/src/base/SetupVPR.cpp       | 1 -
 vpr/src/base/read_options.cpp   | 8 ++++----
 vpr/src/base/vpr_types.h        | 8 +-------
 vpr/src/pack/cluster.cpp        | 2 --
 vpr/src/place/RL_agent_util.cpp | 2 +-
 5 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 38ac3c595c7..5b9adcaea2d 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -607,7 +607,6 @@ void SetupPackerOpts(const t_options& Options,
     //TODO: document?
     PackerOpts->inter_cluster_net_delay = 1.0; /* DEFAULT */
     PackerOpts->auto_compute_inter_cluster_net_delay = true;
-    PackerOpts->packer_algorithm = PACK_GREEDY; /* DEFAULT */
 
     PackerOpts->device_layout = Options.device_layout;
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index ce20940cda5..658fb245ce6 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -470,9 +470,9 @@ struct ParsePlaceAgentAlgorithm {
     ConvertedValue<e_agent_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_agent_algorithm> conv_value;
         if (str == "e_greedy")
-            conv_value.set_value(E_GREEDY);
+            conv_value.set_value(e_agent_algorithm::E_GREEDY);
         else if (str == "softmax")
-            conv_value.set_value(SOFTMAX);
+            conv_value.set_value(e_agent_algorithm::SOFTMAX);
         else {
             std::stringstream msg;
             msg << "Invalid conversion from '" << str << "' to e_agent_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -483,10 +483,10 @@ struct ParsePlaceAgentAlgorithm {
 
     ConvertedValue<std::string> to_str(e_agent_algorithm val) {
         ConvertedValue<std::string> conv_value;
-        if (val == E_GREEDY)
+        if (val == e_agent_algorithm::E_GREEDY)
             conv_value.set_value("e_greedy");
         else {
-            VTR_ASSERT(val == SOFTMAX);
+            VTR_ASSERT(val == e_agent_algorithm::SOFTMAX);
             conv_value.set_value("softmax");
         }
         return conv_value;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 9ea6f8d1f70..035f2d5abd0 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -789,11 +789,6 @@ enum e_stage_action {
  *
  * TODO: document each packing parameter
  */
-enum e_packer_algorithm {
-    PACK_GREEDY,
-    PACK_BRUTE_FORCE
-};
-
 struct t_packer_opts {
     std::string circuit_file_name;
     std::string sdc_file_name;
@@ -818,7 +813,6 @@ struct t_packer_opts {
     int transitive_fanout_threshold;
     int feasible_block_array_size;
     e_stage_action doPacking;
-    enum e_packer_algorithm packer_algorithm;
     std::string device_layout;
     e_timing_update_type timing_update_type;
     bool use_attraction_groups;
@@ -952,7 +946,7 @@ enum class e_pad_loc_type {
  * Currently, the supported algorithms are: epsilon greedy and softmax
  * For more details, check simpleRL_move_generator.cpp
  */
-enum e_agent_algorithm {
+enum class e_agent_algorithm {
     E_GREEDY,
     SOFTMAX
 };
diff --git a/vpr/src/pack/cluster.cpp b/vpr/src/pack/cluster.cpp
index 93683858f3f..0e9099f2ea2 100644
--- a/vpr/src/pack/cluster.cpp
+++ b/vpr/src/pack/cluster.cpp
@@ -97,8 +97,6 @@ std::map<t_logical_block_type_ptr, size_t> do_clustering(const t_packer_opts& pa
     /****************************************************************
      * Initialization
      *****************************************************************/
-    VTR_ASSERT(packer_opts.packer_algorithm == PACK_GREEDY);
-
     t_cluster_progress_stats cluster_stats;
 
     //int num_molecules, num_molecules_processed, mols_since_last_print, blocks_since_last_analysis,
diff --git a/vpr/src/place/RL_agent_util.cpp b/vpr/src/place/RL_agent_util.cpp
index e080b335fe7..1f0f45ae053 100644
--- a/vpr/src/place/RL_agent_util.cpp
+++ b/vpr/src/place/RL_agent_util.cpp
@@ -59,7 +59,7 @@ std::pair<std::unique_ptr<MoveGenerator>, std::unique_ptr<MoveGenerator>> create
             second_state_avail_moves.push_back(e_move_type::NOC_ATTRACTION_CENTROID);
         }
 
-        if (placer_opts.place_agent_algorithm == E_GREEDY) {
+        if (placer_opts.place_agent_algorithm == e_agent_algorithm::E_GREEDY) {
             std::unique_ptr<EpsilonGreedyAgent> karmed_bandit_agent1, karmed_bandit_agent2;
             //agent's 1st state
             if (placer_opts.place_agent_space == e_agent_space::MOVE_BLOCK_TYPE) {

From f146fd9b6bcc93a51610cba75acadc827314f969 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 20 Oct 2024 14:18:17 -0400
Subject: [PATCH 12/31] remove unused types from vpr_types.h

---
 vpr/src/base/vpr_types.h       | 31 +++----------------------------
 vpr/src/place/place.cpp        |  3 +--
 vpr/src/place/timing_place.cpp | 12 +++++++-----
 vpr/src/place/timing_place.h   |  4 ++--
 vpr/src/timing/timing_util.cpp |  4 ++--
 vpr/src/timing/timing_util.h   |  2 +-
 6 files changed, 16 insertions(+), 40 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 035f2d5abd0..0daa84b5148 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -126,7 +126,7 @@ enum class e_router_lookahead {
 
 enum class e_route_bb_update {
     STATIC, ///<Router net bounding boxes are not updated
-    DYNAMIC ///<Rotuer net bounding boxes are updated
+    DYNAMIC ///<Router net bounding boxes are updated
 };
 
 enum class e_router_initial_timing {
@@ -487,7 +487,7 @@ struct t_net_power {
     float probability;
 
     /**
-     * @brief Transistion density - average # of transitions per clock cycle
+     * @brief Transition density - average # of transitions per clock cycle
      *
      * For example, a clock would have density = 2
      */
@@ -721,13 +721,6 @@ struct hash<t_pl_loc> {
 };
 } // namespace std
 
-struct t_place_region {
-    float capacity; ///<Capacity of this region, in tracks.
-    float inv_capacity;
-    float occupancy; ///<Expected number of tracks that will be occupied.
-    float cost;      ///<Current cost of this usage.
-};
-
 /**
  * @brief  Represents the placement location of a clustered block
  *
@@ -1491,7 +1484,7 @@ struct t_det_routing_arch {
 struct t_seg_details {
     int length = 0;
     int start = 0;
-    bool longline = 0;
+    bool longline = false;
     std::unique_ptr<bool[]> sb;
     std::unique_ptr<bool[]> cb;
     short arch_wire_switch = 0;
@@ -1575,16 +1568,6 @@ class t_chan_seg_details {
  */
 typedef vtr::NdMatrix<t_chan_seg_details, 3> t_chan_details;
 
-/**
- * @brief A linked list of float pointers.
- *
- * Used for keeping track of which pathcosts in the router have been changed.
- */
-struct t_linked_f_pointer {
-    t_linked_f_pointer* next;
-    float* fptr;
-};
-
 constexpr bool is_pin(e_rr_type type) { return (type == IPIN || type == OPIN); }
 constexpr bool is_chan(e_rr_type type) { return (type == CHANX || type == CHANY); }
 constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == SINK); }
@@ -1685,8 +1668,6 @@ struct t_non_configurable_rr_sets {
     std::set<std::set<t_node_edge>> edge_sets;
 };
 
-#define NO_PREVIOUS -1
-
 ///@brief Power estimation options
 struct t_power_opts {
     bool do_power; ///<Perform power estimation?
@@ -1704,12 +1685,6 @@ struct t_power_opts {
  * (imagine a 2D Cartesian grid with vertical lines starting at every grid point on a line parallel to the x-axis)
  */
 
-///@brief Type to store our list of token to enum pairings
-struct t_TokenPair {
-    const char* Str;
-    int Enum;
-};
-
 struct t_lb_type_rr_node; /* Defined in pack_types.h */
 
 /// @brief Stores settings for VPR server mode
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 5d06a546cc5..2c07a2354f7 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -316,8 +316,7 @@ void try_place(const Netlist<>& net_list,
         placement_delay_calc->set_tsu_margin_relative(placer_opts.tsu_rel_margin);
         placement_delay_calc->set_tsu_margin_absolute(placer_opts.tsu_abs_margin);
 
-        timing_info = make_setup_timing_info(placement_delay_calc,
-                                             placer_opts.timing_update_type);
+        timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type);
 
         placer_setup_slacks = std::make_unique<PlacerSetupSlacks>(cluster_ctx.clb_nlist, netlist_pin_lookup);
 
diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp
index 1e4985b7852..021bb6211fb 100644
--- a/vpr/src/place/timing_place.cpp
+++ b/vpr/src/place/timing_place.cpp
@@ -63,7 +63,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf
         ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin);
         int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin);
         // Routing for placement is not flat (at least for the time being)
-        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), false);
+        float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false);
 
         float new_crit = pow(clb_pin_crit, crit_params.crit_exponent);
         /*
@@ -74,13 +74,15 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf
          */
         if (!first_time_update_criticality) {
             if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) {
-                place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net));
+                place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
             } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) {
-                place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), place_move_ctx.highly_crit_pins.end());
+                place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)),
+                                                      place_move_ctx.highly_crit_pins.end());
             }
         } else {
-            if (new_crit > crit_params.crit_limit)
-                place_move_ctx.highly_crit_pins.push_back(std::make_pair(clb_net, pin_index_in_net));
+            if (new_crit > crit_params.crit_limit) {
+                place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net);
+            }
         }
 
         /* The placer likes a great deal of contrast between criticalities.
diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h
index 7ccf73c12f4..852c1aa6297 100644
--- a/vpr/src/place/timing_place.h
+++ b/vpr/src/place/timing_place.h
@@ -102,8 +102,8 @@ class PlacerCriticalities {
 
   public: //Lifetime
     PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup);
-    PlacerCriticalities(const PlacerCriticalities& clb_nlist) = delete;
-    PlacerCriticalities& operator=(const PlacerCriticalities& clb_nlist) = delete;
+    PlacerCriticalities(const PlacerCriticalities&) = delete;
+    PlacerCriticalities& operator=(const PlacerCriticalities&) = delete;
 
   public: //Accessors
     ///@brief Returns the criticality of the specified connection.
diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp
index 536667faa51..a210c0dbdcd 100644
--- a/vpr/src/timing/timing_util.cpp
+++ b/vpr/src/timing/timing_util.cpp
@@ -697,14 +697,14 @@ std::map<tatum::DomainId, size_t> count_clock_fanouts(const tatum::TimingGraph&
  */
 float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info,
                                         const ClusteredPinAtomPinsLookup& pin_lookup,
-                                        const ParentPinId& pin_id,
+                                        const ParentPinId pin_id,
                                         bool is_flat) {
     float pin_crit = 0.;
     if (is_flat) {
         pin_crit = timing_info.setup_pin_criticality(convert_to_atom_pin_id(pin_id));
     } else {
         //There may be multiple atom netlist pins connected to this CLB pin
-        for (const auto atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) {
+        for (const AtomPinId atom_pin : pin_lookup.connected_atom_pins(convert_to_cluster_pin_id(pin_id))) {
             //Take the maximum of the atom pin criticality as the CLB pin criticality
             pin_crit = std::max(pin_crit, timing_info.setup_pin_criticality(atom_pin));
         }
diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h
index 51fc0491c28..e4d45c84213 100644
--- a/vpr/src/timing/timing_util.h
+++ b/vpr/src/timing/timing_util.h
@@ -89,7 +89,7 @@ std::map<tatum::DomainId, size_t> count_clock_fanouts(const tatum::TimingGraph&
 //Return the criticality of a net's pin in the CLB netlist
 float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info,
                                         const ClusteredPinAtomPinsLookup& pin_lookup,
-                                        const ParentPinId& clb_pin,
+                                        const ParentPinId clb_pin,
                                         bool is_flat);
 
 //Return the setup slack of a net's pin in the CLB netlist

From 7a64e46db81c3354c4bd5b8871003c2866591cf1 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 21 Oct 2024 16:22:31 -0400
Subject: [PATCH 13/31] converted logging macros in annealer.cpp to methods

---
 vpr/src/base/vpr_types.h               |   9 +-
 vpr/src/draw/draw.cpp                  |  15 --
 vpr/src/draw/draw_basic.cpp            |  13 --
 vpr/src/draw/draw_floorplanning.cpp    |   3 -
 vpr/src/draw/draw_mux.cpp              |  70 +++------
 vpr/src/draw/draw_rr.cpp               |  14 --
 vpr/src/draw/draw_rr_edges.cpp         |  17 ---
 vpr/src/draw/draw_searchbar.cpp        |  24 +---
 vpr/src/draw/draw_toggle_functions.cpp |  33 +----
 vpr/src/draw/draw_triangle.cpp         |  30 +---
 vpr/src/place/annealer.cpp             | 189 +++++++++++--------------
 vpr/src/place/annealer.h               |   5 +
 vpr/src/place/initial_placement.cpp    |   9 +-
 vpr/src/place/move_utils.cpp           |  32 ++---
 vpr/src/place/move_utils.h             |   2 -
 vpr/src/place/net_cost_handler.cpp     |   2 +-
 vpr/src/place/net_cost_handler.h       |   6 +-
 vpr/src/place/place.cpp                |  42 +-----
 vpr/src/place/placer_breakpoint.cpp    |  39 ++---
 vpr/src/place/placer_breakpoint.h      |   5 +-
 vpr/src/route/connection_router.cpp    |  68 ++++-----
 vpr/src/route/route.cpp                |  34 ++---
 vpr/src/route/route_debug.cpp          |   6 +-
 23 files changed, 216 insertions(+), 451 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 0daa84b5148..3a4d89f0b19 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -86,6 +86,12 @@ enum class ScreenUpdatePriority {
     MAJOR = 1
 };
 
+#ifdef VTR_ENABLE_DEBUG_LOGGING
+constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = true;
+#else
+constexpr bool VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR = false;
+#endif
+
 #define MAX_SHORT 32767
 
 /* Values large enough to be way out of range for any data, but small enough
@@ -1101,7 +1107,6 @@ struct t_placer_opts {
     float place_agent_gamma;
     float place_dm_rlim;
     e_agent_space place_agent_space;
-    //int place_timing_cost_func;
     std::string place_reward_fun;
     float place_crit_limit;
     int place_constraint_expand;
@@ -1492,7 +1497,7 @@ struct t_seg_details {
     short arch_opin_between_dice_switch = 0;
     float Rmetal = 0;
     float Cmetal = 0;
-    bool twisted = 0;
+    bool twisted = false;
     enum Direction direction = Direction::NONE;
     int group_start = 0;
     int group_size = 0;
diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp
index 716647c7f36..c77ab624c5c 100644
--- a/vpr/src/draw/draw.cpp
+++ b/vpr/src/draw/draw.cpp
@@ -13,14 +13,11 @@
  */
 
 #include <cstdio>
-#include <cfloat>
 #include <cstring>
 #include <cmath>
 #include <algorithm>
-#include <sstream>
 #include <array>
 #include <iostream>
-#include <time.h>
 
 #include "vtr_assert.h"
 #include "vtr_ndoffsetmatrix.h"
@@ -29,7 +26,6 @@
 #include "vtr_color_map.h"
 #include "vtr_path.h"
 
-#include "vpr_utils.h"
 #include "vpr_error.h"
 
 #include "globals.h"
@@ -37,15 +33,10 @@
 #include "draw.h"
 #include "draw_basic.h"
 #include "draw_rr.h"
-#include "draw_rr_edges.h"
 #include "draw_toggle_functions.h"
-#include "draw_triangle.h"
-#include "draw_mux.h"
 #include "draw_searchbar.h"
-#include "read_xml_arch_file.h"
 #include "draw_global.h"
 #include "intra_logic_block.h"
-#include "atom_netlist.h"
 #include "tatum/report/TimingPathCollector.hpp"
 #include "hsl.h"
 #include "route_export.h"
@@ -53,19 +44,13 @@
 #include "save_graphics.h"
 #include "timing_info.h"
 #include "physical_types.h"
-#include "route_common.h"
-#include "breakpoint.h"
 #include "manual_moves.h"
 #include "draw_noc.h"
 #include "draw_floorplanning.h"
 
 #include "move_utils.h"
 #include "ui_setup.h"
-#include "buttons.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp
index 64b3d49979f..43fc2b78b32 100644
--- a/vpr/src/draw/draw_basic.cpp
+++ b/vpr/src/draw/draw_basic.cpp
@@ -2,20 +2,15 @@
  * that aren't RR nodes or muxes (they have their own file).
  * All functions in this file contain the prefix draw_. */
 #include <cstdio>
-#include <cfloat>
-#include <cstring>
 #include <cmath>
 #include <algorithm>
 #include <sstream>
 #include <array>
-#include <iostream>
 
 #include "vtr_assert.h"
 #include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
 #include "vtr_log.h"
 #include "vtr_color_map.h"
-#include "vtr_path.h"
 
 #include "vpr_utils.h"
 #include "vpr_error.h"
@@ -26,21 +21,13 @@
 #include "draw_rr.h"
 #include "draw_rr_edges.h"
 #include "draw_basic.h"
-#include "draw_toggle_functions.h"
 #include "draw_triangle.h"
-#include "draw_searchbar.h"
-#include "draw_mux.h"
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
-#include "intra_logic_block.h"
 #include "move_utils.h"
 #include "route_export.h"
 #include "tatum/report/TimingPathCollector.hpp"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
-
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
 #    include <time.h>
diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp
index 8e93d0ca7bd..9ba201987aa 100644
--- a/vpr/src/draw/draw_floorplanning.cpp
+++ b/vpr/src/draw/draw_floorplanning.cpp
@@ -23,9 +23,6 @@
 #include "route_export.h"
 #include "tatum/report/TimingPathCollector.hpp"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp
index 746af57d811..e741112b6ba 100644
--- a/vpr/src/draw/draw_mux.cpp
+++ b/vpr/src/draw/draw_mux.cpp
@@ -1,41 +1,15 @@
 /*draw_mux.cpp contains all functions that draw muxes.*/
-#include <cstdio>
-#include <cfloat>
-#include <cstring>
-#include <cmath>
+
 #include <algorithm>
-#include <sstream>
 #include <array>
-#include <iostream>
 
 #include "vtr_assert.h"
-#include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
 #include "vtr_color_map.h"
-#include "vtr_path.h"
-
-#include "vpr_utils.h"
-#include "vpr_error.h"
-
-#include "globals.h"
 #include "draw_color.h"
-#include "draw.h"
-#include "draw_rr.h"
-#include "draw_rr_edges.h"
-#include "draw_basic.h"
-#include "draw_toggle_functions.h"
-#include "draw_triangle.h"
-#include "draw_searchbar.h"
+
 #include "draw_mux.h"
 #include "read_xml_arch_file.h"
-#include "draw_global.h"
-
-#include "move_utils.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
@@ -64,39 +38,31 @@ ezgl::rectangle draw_mux(ezgl::point2d origin, e_side orientation, float height,
     switch (orientation) {
         case TOP:
             //Clock-wise from bottom left
-            mux_polygon.push_back({origin.x - height / 2, origin.y - width / 2});
-            mux_polygon.push_back(
-                {origin.x - (scale * height) / 2, origin.y + width / 2});
-            mux_polygon.push_back(
-                {origin.x + (scale * height) / 2, origin.y + width / 2});
-            mux_polygon.push_back({origin.x + height / 2, origin.y - width / 2});
+            mux_polygon.emplace_back(origin.x - height / 2, origin.y - width / 2);
+            mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y + width / 2);
+            mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y + width / 2);
+            mux_polygon.emplace_back(origin.x + height / 2, origin.y - width / 2);
             break;
         case BOTTOM:
             //Clock-wise from bottom left
-            mux_polygon.push_back(
-                {origin.x - (scale * height) / 2, origin.y - width / 2});
-            mux_polygon.push_back({origin.x - height / 2, origin.y + width / 2});
-            mux_polygon.push_back({origin.x + height / 2, origin.y + width / 2});
-            mux_polygon.push_back(
-                {origin.x + (scale * height) / 2, origin.y - width / 2});
+            mux_polygon.emplace_back(origin.x - (scale * height) / 2, origin.y - width / 2);
+            mux_polygon.emplace_back(origin.x - height / 2, origin.y + width / 2);
+            mux_polygon.emplace_back(origin.x + height / 2, origin.y + width / 2);
+            mux_polygon.emplace_back(origin.x + (scale * height) / 2, origin.y - width / 2);
             break;
         case LEFT:
             //Clock-wise from bottom left
-            mux_polygon.push_back(
-                {origin.x - width / 2, origin.y - (scale * height) / 2});
-            mux_polygon.push_back(
-                {origin.x - width / 2, origin.y + (scale * height) / 2});
-            mux_polygon.push_back({origin.x + width / 2, origin.y + height / 2});
-            mux_polygon.push_back({origin.x + width / 2, origin.y - height / 2});
+            mux_polygon.emplace_back(origin.x - width / 2, origin.y - (scale * height) / 2);
+            mux_polygon.emplace_back(origin.x - width / 2, origin.y + (scale * height) / 2);
+            mux_polygon.emplace_back(origin.x + width / 2, origin.y + height / 2);
+            mux_polygon.emplace_back(origin.x + width / 2, origin.y - height / 2);
             break;
         case RIGHT:
             //Clock-wise from bottom left
-            mux_polygon.push_back({origin.x - width / 2, origin.y - height / 2});
-            mux_polygon.push_back({origin.x - width / 2, origin.y + height / 2});
-            mux_polygon.push_back(
-                {origin.x + width / 2, origin.y + (scale * height) / 2});
-            mux_polygon.push_back(
-                {origin.x + width / 2, origin.y - (scale * height) / 2});
+            mux_polygon.emplace_back(origin.x - width / 2, origin.y - height / 2);
+            mux_polygon.emplace_back(origin.x - width / 2, origin.y + height / 2);
+            mux_polygon.emplace_back(origin.x + width / 2, origin.y + (scale * height) / 2);
+            mux_polygon.emplace_back(origin.x + width / 2, origin.y - (scale * height) / 2);
             break;
 
         default:
diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp
index 02645f6baf5..abfbf0babe8 100644
--- a/vpr/src/draw/draw_rr.cpp
+++ b/vpr/src/draw/draw_rr.cpp
@@ -1,20 +1,13 @@
 /*draw_rr.cpp contains all functions that relate to drawing routing resources.*/
 #include <cstdio>
-#include <cfloat>
-#include <cstring>
 #include <cmath>
 #include <algorithm>
-#include <sstream>
 #include <array>
-#include <iostream>
 
 #include "rr_graph_fwd.h"
 #include "vtr_assert.h"
 #include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
 #include "vtr_color_map.h"
-#include "vtr_path.h"
 
 #include "vpr_utils.h"
 #include "vpr_error.h"
@@ -25,19 +18,12 @@
 #include "draw_rr.h"
 #include "draw_rr_edges.h"
 #include "draw_basic.h"
-#include "draw_toggle_functions.h"
 #include "draw_triangle.h"
 #include "draw_searchbar.h"
 #include "draw_mux.h"
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
 
-#include "move_utils.h"
-
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
-
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
 #    include <time.h>
diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp
index 274e02e0fb3..c4e8cbe507b 100644
--- a/vpr/src/draw/draw_rr_edges.cpp
+++ b/vpr/src/draw/draw_rr_edges.cpp
@@ -1,19 +1,8 @@
 /*draw_rr_edges.cpp contains all functions that draw lines between RR nodes.*/
-#include <cstdio>
-#include <cfloat>
-#include <cstring>
-#include <cmath>
 #include <algorithm>
-#include <sstream>
-#include <array>
-#include <iostream>
 
 #include "vtr_assert.h"
-#include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
 #include "vtr_color_map.h"
-#include "vtr_path.h"
 
 #include "vpr_utils.h"
 #include "vpr_error.h"
@@ -23,18 +12,12 @@
 #include "draw.h"
 #include "draw_rr.h"
 #include "draw_rr_edges.h"
-#include "draw_toggle_functions.h"
 #include "draw_triangle.h"
 #include "draw_searchbar.h"
-#include "draw_mux.h"
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
 #include "draw_basic.h"
-#include "move_utils.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp
index 00a1208bcba..834457e9263 100644
--- a/vpr/src/draw/draw_searchbar.cpp
+++ b/vpr/src/draw/draw_searchbar.cpp
@@ -1,42 +1,20 @@
 /*draw_searchbar.cpp contains all functions related to searchbar actions.*/
 #include <cstdio>
-#include <cfloat>
-#include <cstring>
-#include <cmath>
-#include <algorithm>
-#include <sstream>
 #include <array>
-#include <iostream>
 
 #include "netlist_fwd.h"
-#include "vtr_assert.h"
-#include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
-#include "vtr_color_map.h"
-#include "vtr_path.h"
 
 #include "vpr_utils.h"
-#include "vpr_error.h"
 
 #include "globals.h"
 #include "draw_color.h"
 #include "draw.h"
 #include "draw_rr.h"
-#include "draw_rr_edges.h"
 #include "draw_basic.h"
-#include "draw_toggle_functions.h"
-#include "draw_triangle.h"
 #include "draw_searchbar.h"
-#include "draw_mux.h"
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
 #include "intra_logic_block.h"
-#include "move_utils.h"
-
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
@@ -261,7 +239,7 @@ std::set<RRNodeId> draw_expand_non_configurable_rr_nodes(RRNodeId from_node) {
 
 void deselect_all() {
     // Sets the color of all clbs, nets and rr_nodes to the default.
-    // as well as clearing the highlighed sub-block
+    // as well as clearing the highlighted sub-block
 
     t_draw_state* draw_state = get_draw_state_vars();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp
index 0f69b4c6087..9dab5955450 100644
--- a/vpr/src/draw/draw_toggle_functions.cpp
+++ b/vpr/src/draw/draw_toggle_functions.cpp
@@ -1,43 +1,19 @@
 
-
-#include <cstdio>
-#include <cfloat>
 #include <cstring>
-#include <cmath>
-#include <algorithm>
-#include <sstream>
 #include <array>
 #include <iostream>
 
-#include "vtr_assert.h"
-#include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
-#include "vtr_color_map.h"
-#include "vtr_path.h"
-
 #include "vpr_utils.h"
 #include "vpr_error.h"
 
 #include "globals.h"
 #include "draw_color.h"
 #include "draw.h"
-#include "draw_rr.h"
-#include "draw_rr_edges.h"
 #include "draw_toggle_functions.h"
-#include "draw_triangle.h"
-#include "draw_searchbar.h"
-#include "draw_mux.h"
-#include "read_xml_arch_file.h"
+
 #include "draw_global.h"
 #include "draw_basic.h"
-#include "hsl.h"
-#include "move_utils.h"
-#include "intra_logic_block.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
@@ -70,13 +46,10 @@ constexpr float EMPTY_BLOCK_LIGHTEN_FACTOR = 0.20;
  * @param app ezgl::application
  */
 void toggle_nets_cbk(GtkComboBox* self, ezgl::application* app) {
-    std::cout << "Nets toggled" << std::endl;
     enum e_draw_nets new_state;
     t_draw_state* draw_state = get_draw_state_vars();
-    std::cout << draw_state << std::endl;
     gchar* setting = gtk_combo_box_text_get_active_text(
         GTK_COMBO_BOX_TEXT(self));
-    std::cout << setting << std::endl;
     // assign corresponding enum value to draw_state->show_nets
     if (strcmp(setting, "None") == 0)
         new_state = DRAW_NO_NETS;
@@ -467,7 +440,7 @@ void select_layer_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/
             // Only iterate through checkboxes with name "Layer ...", skip Cross Layer Connection
             if (std::string(name).find("Layer") != std::string::npos
                 && std::string(name).find("Cross") == std::string::npos) {
-                // Change the the boolean of the draw_layer_display vector depending on checkbox
+                // Change the boolean of the draw_layer_display vector depending on checkbox
                 if (state) {
                     draw_state->draw_layer_display[index].visible = true;
                 } else {
@@ -492,7 +465,7 @@ void transparency_cbk(GtkWidget* widget, gint /*response_id*/, gpointer /*data*/
 
     int index = 0;
     // Iterate over transparency layers
-    for (GList* iter = children; iter != NULL; iter = g_list_next(iter)) {
+    for (GList* iter = children; iter != nullptr; iter = g_list_next(iter)) {
         if (GTK_IS_SPIN_BUTTON(iter->data)) {
             GtkWidget* spin_button = GTK_WIDGET(iter->data);
             const gchar* name = gtk_widget_get_name(spin_button);
diff --git a/vpr/src/draw/draw_triangle.cpp b/vpr/src/draw/draw_triangle.cpp
index 370868efbbc..b37785b3ab1 100644
--- a/vpr/src/draw/draw_triangle.cpp
+++ b/vpr/src/draw/draw_triangle.cpp
@@ -1,40 +1,12 @@
-#include <cstdio>
-#include <cfloat>
-#include <cstring>
+
 #include <cmath>
-#include <algorithm>
-#include <sstream>
-#include <array>
-#include <iostream>
 
 #include "vtr_assert.h"
-#include "vtr_ndoffsetmatrix.h"
-#include "vtr_memory.h"
-#include "vtr_log.h"
 #include "vtr_color_map.h"
-#include "vtr_path.h"
-
-#include "vpr_utils.h"
-#include "vpr_error.h"
-
-#include "globals.h"
 #include "draw_color.h"
-#include "draw.h"
-#include "draw_rr.h"
-#include "draw_rr_edges.h"
-#include "draw_toggle_functions.h"
 #include "draw_triangle.h"
-#include "draw_searchbar.h"
-#include "draw_mux.h"
-#include "read_xml_arch_file.h"
 #include "draw_global.h"
-#include "draw_basic.h"
 
-#include "move_utils.h"
-
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "move_utils.h"
-#endif
 
 #ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
               * track CPU runtime.														   */
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 6aad4e512eb..5876f468e6f 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -15,78 +15,7 @@
 #include "NetPinTimingInvalidator.h"
 #include "place_timing_update.h"
 #include "read_place.h"
-
-#ifdef VTR_ENABLE_DEBUG_LOGGIING
-#    define LOG_MOVE_STATS_HEADER()                               \
-        do {                                                      \
-            if (f_move_stats_file) {                              \
-                fprintf(f_move_stats_file.get(),                  \
-                        "temp,from_blk,to_blk,from_type,to_type," \
-                        "blk_count,"                              \
-                        "delta_cost,delta_bb_cost,delta_td_cost," \
-                        "outcome,reason\n");                      \
-            }                                                     \
-        } while (false)
-
-#    define LOG_MOVE_STATS_PROPOSED(t, affected_blocks)                                        \
-        do {                                                                                   \
-            if (f_move_stats_file) {                                                           \
-                auto& place_ctx = g_vpr_ctx.placement();                                       \
-                auto& cluster_ctx = g_vpr_ctx.clustering();                                    \
-                ClusterBlockId b_from = affected_blocks.moved_blocks[0].block_num;             \
-                                                                                               \
-                t_pl_loc to = affected_blocks.moved_blocks[0].new_loc;                         \
-                ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile];   \
-                                                                                               \
-                t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from); \
-                t_logical_block_type_ptr to_type = nullptr;                                    \
-                if (b_to) {                                                                    \
-                    to_type = cluster_ctx.clb_nlist.block_type(b_to);                          \
-                }                                                                              \
-                                                                                               \
-                fprintf(f_move_stats_file.get(),                                               \
-                        "%g,"                                                                  \
-                        "%d,%d,"                                                               \
-                        "%s,%s,"                                                               \
-                        "%d,",                                                                 \
-                        t,                                                                     \
-                        int(b_from), int(b_to),                                                \
-                        from_type->name, (to_type ? to_type->name : "EMPTY"),                  \
-                        affected_blocks.moved_blocks.size());                                  \
-            }                                                                                  \
-        } while (false)
-
-#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
-                                   outcome, reason)                          \
-        do {                                                                 \
-            if (f_move_stats_file) {                                         \
-                fprintf(f_move_stats_file.get(),                             \
-                        "%g,%g,%g,"                                          \
-                        "%s,%s\n",                                           \
-                        delta_cost, delta_bb_cost, delta_td_cost,            \
-                        outcome, reason);                                    \
-            }                                                                \
-        } while (false)
-
-#else
-
-#    define LOG_MOVE_STATS_HEADER()                      \
-        do {                                             \
-            fprintf(move_stats_file_.get(),             \
-                    "VTR_ENABLE_DEBUG_LOGGING disabled " \
-                    "-- No move stats recorded\n");      \
-        } while (false)
-
-#    define LOG_MOVE_STATS_PROPOSED(t, blocks_affected) \
-        do {                                            \
-        } while (false)
-
-#    define LOG_MOVE_STATS_OUTCOME(delta_cost, delta_bb_cost, delta_td_cost, \
-                                   outcome, reason)                          \
-        do {                                                                 \
-        } while (false)
-
-#endif
+#include "placer_breakpoint.h"
 
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
@@ -456,11 +385,6 @@ float PlacementAnnealer::estimate_starting_temperature() {
                      num_accepted, move_lim);
     }
 
-#ifdef VERBOSE
-    /* Print stats related to finding the initital temp. */
-    VTR_LOG("std_dev: %g, average cost: %g, starting temp: %g\n", std_dev, av, 20. * std_dev);
-#endif
-
     // Improved initial placement uses a fast SA for NoC routers and centroid placement
     // for other blocks. The temperature is reduced to prevent SA from destroying the initial placement
     float init_temp = std_dev / 64;
@@ -483,9 +407,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     float rlim_escape_fraction = placer_opts_.rlim_escape_fraction;
     float timing_tradeoff = placer_opts_.timing_tradeoff;
 
-    PlaceCritParams crit_params;
-    crit_params.crit_exponent = annealing_state_.crit_exponent;
-    crit_params.crit_limit = placer_opts_.place_crit_limit;
+    PlaceCritParams crit_params{annealing_state_.crit_exponent,
+                                placer_opts_.place_crit_limit};
 
     // move type and block type chosen by the agent
     t_propose_action proposed_action{e_move_type::UNIFORM, -1};
@@ -537,7 +460,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
         ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
     }
-    LOG_MOVE_STATS_PROPOSED(t, blocks_affected_);
+
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) LOG_MOVE_STATS_PROPOSED();
 
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
                    "\t\tBefore move Place cost %e, bb_cost %e, timing cost %e\n",
@@ -546,10 +470,12 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     e_move_result move_outcome = e_move_result::ABORTED;
 
     if (create_move_outcome == e_create_move::ABORT) {
-        LOG_MOVE_STATS_OUTCOME(std::numeric_limits<float>::quiet_NaN(),
-                               std::numeric_limits<float>::quiet_NaN(),
-                               std::numeric_limits<float>::quiet_NaN(), "ABORTED",
-                               "illegal move");
+        if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+            LOG_MOVE_STATS_OUTCOME(std::numeric_limits<double>::quiet_NaN(),
+                                   std::numeric_limits<double>::quiet_NaN(),
+                                   std::numeric_limits<double>::quiet_NaN(), "ABORTED",
+                                   "illegal move");
+        }
 
         move_outcome = ABORTED;
 
@@ -747,7 +673,9 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         move_outcome_stats.delta_bb_cost_abs = bb_delta_c;
         move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
 
-        LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), "");
+        if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+            LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), "");
+        }
     }
     move_outcome_stats.outcome = move_outcome;
 
@@ -759,20 +687,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor);
     }
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    ifndef NO_GRAPHICS
-    stop_placement_and_check_breakpoints(blocks_affected, move_outcome, delta_c, bb_delta_c, timing_delta_c);
-#    endif
+#ifndef NO_GRAPHICS
+    stop_placement_and_check_breakpoints(blocks_affected_, move_outcome, delta_c, bb_delta_c, timing_delta_c);
 #endif
 
+
     // Clear the data structure containing block move info
     blocks_affected_.clear_move_blocks();
 
-#if 0
-    // Check that each accepted swap yields a valid placement. This will
-    // greatly slow the placer, but can debug some issues.
-    check_place(*costs, delay_model, criticalities, place_algorithm, noc_opts);
-#endif
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
                    "\t\tAfter move Place cost %e, bb_cost %e, timing cost %e\n",
                    costs_.cost, costs_.bb_cost, costs_.timing_cost);
@@ -786,12 +708,9 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
          * for normalizing the tradeoff between timing and wirelength (bb) */
         if (outer_crit_iter_count_ >= placer_opts_.recompute_crit_iter ||
             placer_opts_.inner_loop_recompute_divider != 0) {
-#ifdef VERBOSE
-            VTR_LOG("Outer loop recompute criticalities\n");
-#endif
-            PlaceCritParams crit_params;
-            crit_params.crit_exponent = annealing_state_.crit_exponent;
-            crit_params.crit_limit = placer_opts_.place_crit_limit;
+
+            PlaceCritParams crit_params{annealing_state_.crit_exponent,
+                                        placer_opts_.place_crit_limit};
 
             //Update all timing related classes
             perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_,
@@ -844,13 +763,9 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
             if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) {
 
                 inner_crit_iter_count = 0;
-#ifdef VERBOSE
-                VTR_LOG("Inner loop recompute criticalities\n");
-#endif
 
-                PlaceCritParams crit_params;
-                crit_params.crit_exponent = annealing_state_.crit_exponent;
-                crit_params.crit_limit = placer_opts_.place_crit_limit;
+                PlaceCritParams crit_params{annealing_state_.crit_exponent,
+                                            placer_opts_.place_crit_limit};
 
                 // Update all timing related classes
                 perform_full_timing_update(crit_params, delay_model_, criticalities_,
@@ -919,3 +834,63 @@ void PlacementAnnealer::start_quench() {
 std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> PlacementAnnealer::get_stats() const {
     return {swap_stats_, move_type_stats_, placer_stats_};
 }
+
+void PlacementAnnealer::LOG_MOVE_STATS_HEADER() {
+
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        if (move_stats_file_) {
+            fprintf(move_stats_file_.get(),
+                    "temp,from_blk,to_blk,from_type,to_type,"
+                    "blk_count,"
+                    "delta_cost,delta_bb_cost,delta_td_cost,"
+                    "outcome,reason\n");
+        }
+    } else {
+        if (move_stats_file_) {
+            fprintf(move_stats_file_.get(),
+                    "VTR_ENABLE_DEBUG_LOGGING disabled "
+                    "-- No move stats recorded\n");
+        }
+    }
+}
+
+void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& grid_blocks = placer_state_.grid_blocks();
+
+    if (move_stats_file_) {
+
+        ClusterBlockId b_from = blocks_affected_.moved_blocks[0].block_num;
+
+
+        t_pl_loc to = blocks_affected_.moved_blocks[0].new_loc;
+        ClusterBlockId b_to = grid_blocks.block_at_location(to);
+
+        t_logical_block_type_ptr from_type = cluster_ctx.clb_nlist.block_type(b_from);
+        t_logical_block_type_ptr to_type = nullptr;
+        if (b_to) {
+            to_type = cluster_ctx.clb_nlist.block_type(b_to);
+        }
+
+        fprintf(move_stats_file_.get(),
+                "%g,"
+                "%d,%d,"
+                "%s,%s,"
+                "%d,",
+                annealing_state_.t,
+                int(b_from), int(b_to),
+                from_type->name, (to_type ? to_type->name : "EMPTY"),
+                blocks_affected_.moved_blocks.size());
+    }
+}
+
+void PlacementAnnealer::LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost,
+                                               const char* outcome, const char* reason) {
+    if (move_stats_file_) {
+        fprintf(move_stats_file_.get(),
+                "%g,%g,%g,"
+                "%s,%s\n",
+                delta_cost, delta_bb_cost, delta_td_cost,
+                outcome, reason);
+    }
+}
\ No newline at end of file
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index b932ed56d7e..dc281e33a02 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -250,6 +250,11 @@ class PlacementAnnealer {
     int tot_iter_;
     bool quench_started_;
 
+    void LOG_MOVE_STATS_HEADER();
+    void LOG_MOVE_STATS_PROPOSED();
+    void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost,
+                                const char* outcome, const char* reason);
+
   private:
     ///@brief Find the starting temperature for the annealing loop.
     float estimate_starting_temperature();
diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index bb76d0d6cc8..8ff45fa96dd 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -1038,11 +1038,10 @@ static void place_all_blocks(const t_placer_opts& placer_opts,
 
             auto blk_id_type = cluster_ctx.clb_nlist.block_type(blk_id);
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-            enable_placer_debug(placer_opts, blk_id);
-#else
-            (void)placer_opts;
-#endif
+            if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+                enable_placer_debug(placer_opts, blk_id);
+            }
+
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "Popped Block %d\n", size_t(blk_id));
 
             blocks_placed_since_heap_update++;
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 262a801c611..7b2d02ea965 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -12,10 +12,20 @@
 #include "place_constraints.h"
 #include "placer_state.h"
 
-//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true.
+//f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached.
+// When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true.
 //Note: The flag is only effective if compiled with VTR_ENABLE_DEBUG_LOGGING
 bool f_placer_breakpoint_reached = false;
 
+//Accessor for f_placer_breakpoint_reached
+bool placer_breakpoint_reached() {
+    return f_placer_breakpoint_reached;
+}
+
+void set_placer_breakpoint_reached(bool flag) {
+    f_placer_breakpoint_reached = flag;
+}
+
 e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected,
                           ClusterBlockId b_from,
                           t_pl_loc to,
@@ -487,7 +497,6 @@ bool is_legal_swap_to_location(ClusterBlockId blk,
     return true;
 }
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 void enable_placer_debug(const t_placer_opts& placer_opts,
                          ClusterBlockId blk_id) {
     if (!blk_id.is_valid()) {
@@ -535,7 +544,6 @@ void enable_placer_debug(const t_placer_opts& placer_opts,
     if (active_blk_debug) f_placer_debug &= match_blk;
     if (active_net_debug) f_placer_debug &= match_net;
 }
-#endif
 
 ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts,
                                      int& logical_blk_type_index,
@@ -564,11 +572,10 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts,
             b_from = pick_from_block(logical_blk_type_index);
         }
     }
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-    enable_placer_debug(placer_opts, b_from);
-#else
-    (void)placer_opts;
-#endif
+
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        enable_placer_debug(placer_opts, b_from);
+    }
 
     return b_from;
 }
@@ -771,15 +778,6 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type,
     return true;
 }
 
-//Accessor for f_placer_breakpoint_reached
-bool placer_breakpoint_reached() {
-    return f_placer_breakpoint_reached;
-}
-
-void set_placer_breakpoint_reached(bool flag) {
-    f_placer_breakpoint_reached = flag;
-}
-
 bool find_to_loc_median(t_logical_block_type_ptr blk_type,
                         const t_pl_loc& from_loc,
                         const t_bb* limit_coords,
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 99151695dab..0ca993abe93 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -439,7 +439,6 @@ t_bb union_2d_bb(const std::vector<t_2D_bb>& tbb_vec);
 std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
                                        const std::vector<t_2D_bb>& bb_vec);
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 /**
  * @brief If the block ID passed to the placer_debug_net parameter of the command line is equal to blk_id, or if any of the nets
  * connected to the block share the same ID as the net ID passed to the placer_debug_net parameter of the command line,
@@ -450,6 +449,5 @@ std::pair<t_bb, t_bb> union_2d_bb_incr(const std::vector<t_2D_bb>& num_edge_vec,
  */
 void enable_placer_debug(const t_placer_opts& placer_opts,
                          ClusterBlockId blk_id);
-#endif
 
 #endif
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index a161fba3b3e..d213e2827ab 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -104,12 +104,12 @@ static double wirelength_crossing_count(size_t fanout);
 
 NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
                                PlacerState& placer_state,
-                               size_t num_nets,
                                bool cube_bb)
     : cube_bb_(cube_bb)
     , placer_state_(placer_state)
     , placer_opts_(placer_opts) {
     const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size();
 
     // Either 3D BB or per layer BB data structure are used, not both.
     if (cube_bb_) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 65fab00afc2..12ded6db838 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -43,11 +43,11 @@ class NetCostHandler {
      * are affected by a move and data needed per net about where their terminals are in order to quickly (incrementally) update
      * their wirelength costs. These data structures are (layer_)ts_bb_edge_new, (layer_)ts_bb_coord_new, ts_layer_sink_pin_count,
      * and ts_nets_to_update.
-     * @param num_nets Number of nets in the netlist used by the placement engine (currently clustered netlist)
+     * @param placer_opts Contains some parameters that determine how the bounding box is computed.
+     * @param placer_state Contains information about block locations and net bounding boxes.
      * @param cube_bb True if the 3D bounding box should be used, false otherwise.
-     * @param place_cost_exp It is an exponent to which you take the average inverse channel
      */
-    NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, size_t num_nets, bool cube_bb);
+    NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb);
 
     /**
      * @brief Finds the bb cost from scratch.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 2c07a2354f7..522d6877489 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -1,9 +1,6 @@
 #include <cstdio>
 #include <cmath>
 #include <memory>
-#include <fstream>
-#include <iostream>
-#include <numeric>
 #include <chrono>
 #include <optional>
 
@@ -11,29 +8,21 @@
 #include "vtr_assert.h"
 #include "vtr_log.h"
 #include "vtr_util.h"
-#include "vtr_random.h"
-#include "vtr_geometry.h"
 #include "vtr_time.h"
 #include "vtr_math.h"
-#include "vtr_ndmatrix.h"
 
 #include "vpr_types.h"
 #include "vpr_error.h"
 #include "vpr_utils.h"
-#include "vpr_net_pins_matrix.h"
 
 #include "globals.h"
 #include "place.h"
 #include "annealer.h"
 #include "read_place.h"
 #include "draw.h"
-#include "place_and_route.h"
-#include "net_delay.h"
-#include "timing_place_lookup.h"
 #include "timing_place.h"
 #include "read_xml_arch_file.h"
 #include "echo_files.h"
-#include "place_macro.h"
 #include "histogram.h"
 #include "place_util.h"
 #include "analytic_placer.h"
@@ -43,7 +32,6 @@
 #include "move_transactions.h"
 #include "move_utils.h"
 #include "place_constraints.h"
-#include "manual_moves.h"
 #include "buttons.h"
 
 #include "manual_move_generator.h"
@@ -56,14 +44,11 @@
 #include "tatum/echo_writer.hpp"
 #include "tatum/TimingReporter.hpp"
 
-#include "placer_breakpoint.h"
 #include "RL_agent_util.h"
 #include "place_checkpoint.h"
 
 #include "clustered_netlist_utils.h"
 
-#include "cluster_placement.h"
-
 #include "noc_place_utils.h"
 
 #include "net_cost_handler.h"
@@ -75,15 +60,6 @@
  */
 static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-#    include "draw_types.h"
-#    include "draw_global.h"
-#    include "draw_color.h"
-#endif
-
-/************** Types and defines local to place.c ***************************/
-constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
-
 /********************* Static subroutines local to place.c *******************/
 #ifdef VERBOSE
 void print_clb_placement(const char* fname);
@@ -369,6 +345,7 @@ void try_place(const Netlist<>& net_list,
         costs.bb_cost_norm = 1 / costs.bb_cost;
 
         /* Timing cost and normalization factors are not used */
+        constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
         costs.timing_cost = INVALID_COST;
         costs.timing_cost_norm = INVALID_COST;
     }
@@ -673,10 +650,8 @@ void try_place(const Netlist<>& net_list,
 
     free_placement_structs();
 
-    print_timing_stats("Placement Quench", post_quench_timing_stats,
-                       pre_quench_timing_stats);
-    print_timing_stats("Placement Total ", timing_ctx.stats,
-                       pre_place_timing_stats);
+    print_timing_stats("Placement Quench", post_quench_timing_stats, pre_quench_timing_stats);
+    print_timing_stats("Placement Total ", timing_ctx.stats, pre_place_timing_stats);
 
     VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n",
             p_runtime_ctx.f_update_td_costs_connections_elapsed_sec,
@@ -736,28 +711,19 @@ static NetCostHandler alloc_and_load_placement_structs(const t_placer_opts& plac
                                                        const std::vector<t_direct_inf>& directs,
                                                        PlacerState& placer_state,
                                                        std::optional<NocCostHandler>& noc_cost_handler) {
-    const auto& device_ctx = g_vpr_ctx.device();
-    const auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.mutable_placement();
 
     place_ctx.lock_loc_vars();
 
-    size_t num_nets = cluster_ctx.clb_nlist.nets().size();
-
     init_placement_context(placer_state.mutable_blk_loc_registry(), directs);
 
-    int max_pins_per_clb = 0;
-    for (const t_physical_tile_type& type : device_ctx.physical_tile_types) {
-        max_pins_per_clb = std::max(max_pins_per_clb, type.num_pins);
-    }
-
     place_ctx.compressed_block_grids = create_compressed_block_grids();
 
     if (noc_opts.noc) {
         noc_cost_handler.emplace(placer_state.block_locs());
     }
 
-    return NetCostHandler{placer_opts, placer_state, num_nets, place_ctx.cube_bb};
+    return NetCostHandler{placer_opts, placer_state, place_ctx.cube_bb};
 }
 
 /* Frees the major structures needed by the placer (and not needed       *
diff --git a/vpr/src/place/placer_breakpoint.cpp b/vpr/src/place/placer_breakpoint.cpp
index b576bc64f04..7b0f561f152 100644
--- a/vpr/src/place/placer_breakpoint.cpp
+++ b/vpr/src/place/placer_breakpoint.cpp
@@ -1,45 +1,52 @@
 #include "placer_breakpoint.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 
 //map of the available move types and their corresponding type number
 std::map<int, std::string> available_move_types = {
     {0, "Uniform"}};
 
-#    ifndef NO_GRAPHICS
+#ifndef NO_GRAPHICS
 //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state
 void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected) {
-    get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.clear();
+    BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state();
+
+    bp_state->blocks_affected_by_move.clear();
     for (size_t i = 0; i < blocksAffected.moved_blocks.size(); i++) {
         //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class
 
-        get_bp_state_globals()->get_glob_breakpoint_state()->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num));
+        bp_state->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num));
     }
 }
 
-void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c) {
+void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome,
+                                          double delta_c, double bb_delta_c, double timing_delta_c) {
     t_draw_state* draw_state = get_draw_state_vars();
-    if (draw_state->list_of_breakpoints.size() != 0) {
+    BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state();
+
+    if (!draw_state->list_of_breakpoints.empty()) {
         //update current information
         transform_blocks_affected(blocks_affected);
-        get_bp_state_globals()->get_glob_breakpoint_state()->move_num++;
-        get_bp_state_globals()->get_glob_breakpoint_state()->from_block = size_t(blocks_affected.moved_blocks[0].block_num);
+        bp_state->move_num++;
+        bp_state->from_block = size_t(blocks_affected.moved_blocks[0].block_num);
 
         //check for breakpoints
         set_placer_breakpoint_reached(check_for_breakpoints(true)); // the passed flag is true as we are in the placer
-        if (placer_breakpoint_reached())
-            breakpoint_info_window(get_bp_state_globals()->get_glob_breakpoint_state()->bp_description, *get_bp_state_globals()->get_glob_breakpoint_state(), true);
-    } else
+        if (placer_breakpoint_reached()) {
+            breakpoint_info_window(bp_state->bp_description, *bp_state, true);
+        }
+    } else {
         set_placer_breakpoint_reached(false);
+    }
 
     if (placer_breakpoint_reached() && draw_state->show_graphics) {
         std::string msg = available_move_types[0];
-        if (move_outcome == 0)
+        if (move_outcome == 0) {
             msg += vtr::string_fmt(", Rejected");
-        else if (move_outcome == 1)
+        } else if (move_outcome == 1) {
             msg += vtr::string_fmt(", Accepted");
-        else
+        } else {
             msg += vtr::string_fmt(", Aborted");
+        }
 
         msg += vtr::string_fmt(", Delta_cost: %1.6f (bb_delta_cost= %1.5f , timing_delta_c= %6.1e)", delta_c, bb_delta_c, timing_delta_c);
 
@@ -48,6 +55,4 @@ void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affect
     }
 }
 
-#    endif //NO_GRAPHICS
-
-#endif //VTR_ENABLE_DEBUG_LOGGING
+#endif //NO_GRAPHICS
diff --git a/vpr/src/place/placer_breakpoint.h b/vpr/src/place/placer_breakpoint.h
index c01ef77450c..510b7071e0d 100644
--- a/vpr/src/place/placer_breakpoint.h
+++ b/vpr/src/place/placer_breakpoint.h
@@ -7,14 +7,13 @@
 #include "breakpoint.h"
 #include "draw.h"
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 
 //transforms the vector moved_blocks to a vector of ints and adds it in glob_breakpoint_state
 void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected);
 
 //checks the breakpoint and see whether one of them was reached and pause place,emt accordingly
-void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome, double delta_c, double bb_delta_c, double timing_delta_c);
+void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affected, e_move_result move_outcome,
+                                          double delta_c, double bb_delta_c, double timing_delta_c);
 
-#endif
 
 #endif
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 5409d5ec49f..1cdb4c8cfd5 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -11,17 +11,10 @@ static bool relevant_node_to_target(const RRGraphView* rr_graph,
                                     RRNodeId node_to_add,
                                     RRNodeId target_node);
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 static void update_router_stats(RouterStats* router_stats,
                                 bool is_push,
-                                RRNodeId rr_node_id,
+                                RRNodeId rr_node_i,
                                 const RRGraphView* rr_graph);
-#else
-static void update_router_stats(RouterStats* router_stats,
-                                bool is_push,
-                                RRNodeId /*rr_node_id*/,
-                                const RRGraphView* /*rr_graph*/);
-#endif
 
 /** return tuple <found_path, retry_with_full_bb, cheapest> */
 template<typename Heap>
@@ -225,7 +218,7 @@ t_heap* ConnectionRouter<Heap>::timing_driven_route_connection_from_heap(RRNodeI
         // cheapest t_heap in current route tree to be expanded on
         cheapest = heap_.get_heap_head();
         update_router_stats(router_stats_,
-                            false,
+                            /*is_push=*/false,
                             cheapest->index,
                             rr_graph_);
 
@@ -315,7 +308,7 @@ vtr::vector<RRNodeId, t_heap> ConnectionRouter<Heap>::timing_driven_find_all_sho
         // cheapest t_heap in current route tree to be expanded on
         t_heap* cheapest = heap_.get_heap_head();
         update_router_stats(router_stats_,
-                            false,
+                            /*is_push=*/false,
                             cheapest->index,
                             rr_graph_);
 
@@ -610,7 +603,7 @@ void ConnectionRouter<Heap>::timing_driven_add_to_heap(const t_conn_cost_params&
 
         heap_.add_to_heap(next_ptr);
         update_router_stats(router_stats_,
-                            true,
+                            /*is_push=*/true,
                             to_node,
                             rr_graph_);
 
@@ -925,13 +918,13 @@ void ConnectionRouter<Heap>::add_route_tree_node_to_heap(
     }
 
     update_router_stats(router_stats_,
-                        true,
+                        /*is_push=*/true,
                         inode,
                         rr_graph_);
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-    router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++;
-#endif
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++;
+    }
 }
 
 /* Expand bb by inode's extents and clip against net_bb */
@@ -1073,45 +1066,38 @@ static inline bool relevant_node_to_target(const RRGraphView* rr_graph,
     return false;
 }
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
 static inline void update_router_stats(RouterStats* router_stats,
                                        bool is_push,
                                        RRNodeId rr_node_id,
                                        const RRGraphView* rr_graph) {
-#else
-static inline void update_router_stats(RouterStats* router_stats,
-                                       bool is_push,
-                                       RRNodeId /*rr_node_id*/,
-                                       const RRGraphView* /*rr_graph*/) {
-#endif
     if (is_push) {
         router_stats->heap_pushes++;
     } else {
         router_stats->heap_pops++;
     }
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-    auto node_type = rr_graph->node_type(rr_node_id);
-    VTR_ASSERT(node_type != NUM_RR_TYPES);
-
-    if (is_inter_cluster_node(*rr_graph, rr_node_id)) {
-        if (is_push) {
-            router_stats->inter_cluster_node_pushes++;
-            router_stats->inter_cluster_node_type_cnt_pushes[node_type]++;
-        } else {
-            router_stats->inter_cluster_node_pops++;
-            router_stats->inter_cluster_node_type_cnt_pops[node_type]++;
-        }
-    } else {
-        if (is_push) {
-            router_stats->intra_cluster_node_pushes++;
-            router_stats->intra_cluster_node_type_cnt_pushes[node_type]++;
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        auto node_type = rr_graph->node_type(rr_node_id);
+        VTR_ASSERT(node_type != NUM_RR_TYPES);
+
+        if (is_inter_cluster_node(*rr_graph, rr_node_id)) {
+            if (is_push) {
+                router_stats->inter_cluster_node_pushes++;
+                router_stats->inter_cluster_node_type_cnt_pushes[node_type]++;
+            } else {
+                router_stats->inter_cluster_node_pops++;
+                router_stats->inter_cluster_node_type_cnt_pops[node_type]++;
+            }
         } else {
-            router_stats->intra_cluster_node_pops++;
-            router_stats->intra_cluster_node_type_cnt_pops[node_type]++;
+            if (is_push) {
+                router_stats->intra_cluster_node_pushes++;
+                router_stats->intra_cluster_node_type_cnt_pushes[node_type]++;
+            } else {
+                router_stats->intra_cluster_node_pops++;
+                router_stats->intra_cluster_node_type_cnt_pops[node_type]++;
+            }
         }
     }
-#endif
 }
 
 std::unique_ptr<ConnectionRouterInterface> make_connection_router(e_heap_type heap_type,
diff --git a/vpr/src/route/route.cpp b/vpr/src/route/route.cpp
index 24ee0b5a8cf..5cd8f3faee3 100644
--- a/vpr/src/route/route.cpp
+++ b/vpr/src/route/route.cpp
@@ -592,10 +592,12 @@ bool route(const Netlist<>& net_list,
         //If the routing fails, print the overused info
         print_overused_nodes_status(router_opts, overuse_info);
 
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-        if (f_router_debug)
-            print_invalid_routing_info(net_list, is_flat);
-#endif
+        if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+            if (f_router_debug) {
+                print_invalid_routing_info(net_list, is_flat);
+            }
+        }
+
     }
 
     if (router_opts.with_timing_analysis) {
@@ -608,19 +610,19 @@ bool route(const Netlist<>& net_list,
     VTR_LOG(
         "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu ",
         router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops);
-#ifdef VTR_ENABLE_DEBUG_LOGGING
-    VTR_LOG(
-        "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ",
-        router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops,
-        router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops);
-    for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
-        VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]);
-        VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]);
-        VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]);
+    if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        VTR_LOG(
+            "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ",
+            router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops,
+            router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops);
+        for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) {
+            VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]);
+            VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]);
+            VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]);
+            VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]);
+            VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]);
+        }
     }
-#endif
     VTR_LOG("\n");
 
     return success;
diff --git a/vpr/src/route/route_debug.cpp b/vpr/src/route/route_debug.cpp
index 022b8da8071..6745b67013a 100644
--- a/vpr/src/route/route_debug.cpp
+++ b/vpr/src/route/route_debug.cpp
@@ -24,7 +24,7 @@ void enable_router_debug(
 
     router->set_router_debug(f_router_debug);
 
-#ifndef VTR_ENABLE_DEBUG_LOGGING
-    VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n");
-#endif
+    if constexpr (!VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
+        VTR_LOGV_WARN(f_router_debug, "Limited router debug output provided since compiled without VTR_ENABLE_DEBUG_LOGGING defined\n");
+    }
 }

From b240d7ff755417c6943ef5b14642df93eae921c1 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 4 Nov 2024 10:59:31 -0500
Subject: [PATCH 14/31] call c_str() for name member variable

---
 vpr/src/place/annealer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 5876f468e6f..b5e847eeb77 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -879,7 +879,7 @@ void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() {
                 "%d,",
                 annealing_state_.t,
                 int(b_from), int(b_to),
-                from_type->name, (to_type ? to_type->name : "EMPTY"),
+                from_type->name.c_str(), (to_type ? to_type->name.c_str() : "EMPTY"),
                 blocks_affected_.moved_blocks.size());
     }
 }

From e21e1321472ad83f37bcb55d82892721d381e588 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 6 Nov 2024 12:42:58 -0500
Subject: [PATCH 15/31] enum class e_move_result

---
 vpr/src/draw/manual_moves.cpp       |  6 +++---
 vpr/src/draw/manual_moves.h         |  4 ++--
 vpr/src/place/annealer.cpp          | 24 ++++++++++++------------
 vpr/src/place/move_generator.h      |  2 +-
 vpr/src/place/move_utils.cpp        | 19 +++++++++++++++++--
 vpr/src/place/move_utils.h          |  2 +-
 vpr/src/place/placer_breakpoint.cpp |  9 ++++-----
 7 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp
index 0becc4917a9..9554d1491eb 100644
--- a/vpr/src/draw/manual_moves.cpp
+++ b/vpr/src/draw/manual_moves.cpp
@@ -254,16 +254,16 @@ void manual_move_cost_summary_dialog() {
     switch (result) {
         //If the user accepts the manual move
         case GTK_RESPONSE_ACCEPT:
-            draw_state->manual_moves_state.manual_move_info.user_move_outcome = ACCEPTED;
+            draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ACCEPTED;
             application.update_message(msg);
             break;
         //If the user rejects the manual move
         case GTK_RESPONSE_REJECT:
-            draw_state->manual_moves_state.manual_move_info.user_move_outcome = REJECTED;
+            draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::REJECTED;
             application.update_message("Manual move was rejected");
             break;
         default:
-            draw_state->manual_moves_state.manual_move_info.user_move_outcome = ABORTED;
+            draw_state->manual_moves_state.manual_move_info.user_move_outcome = e_move_result::ABORTED;
             break;
     }
 
diff --git a/vpr/src/draw/manual_moves.h b/vpr/src/draw/manual_moves.h
index 7f78ff0e876..a5a90fb037f 100644
--- a/vpr/src/draw/manual_moves.h
+++ b/vpr/src/draw/manual_moves.h
@@ -56,8 +56,8 @@ struct ManualMovesInfo {
     double delta_bounding_box = 0;
     bool valid_input = true;
     t_pl_loc to_location;
-    e_move_result placer_move_outcome = ABORTED;
-    e_move_result user_move_outcome = ABORTED;
+    e_move_result placer_move_outcome = e_move_result::ABORTED;
+    e_move_result user_move_outcome = e_move_result::ABORTED;
 };
 
 /**
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 29defbada23..a5740cfe20a 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -339,12 +339,12 @@ float PlacementAnnealer::estimate_starting_temperature() {
         e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm,
                                              REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled);
 
-        if (swap_result == ACCEPTED) {
+        if (swap_result == e_move_result::ACCEPTED) {
             num_accepted++;
             av += costs_.cost;
             sum_of_squares += costs_.cost * costs_.cost;
             swap_stats_.num_swap_accepted++;
-        } else if (swap_result == ABORTED) {
+        } else if (swap_result == e_move_result::ABORTED) {
             swap_stats_.num_swap_aborted++;
         } else {
             swap_stats_.num_swap_rejected++;
@@ -455,7 +455,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                                    "illegal move");
         }
 
-        move_outcome = ABORTED;
+        move_outcome = e_move_result::ABORTED;
 
     } else {
         VTR_ASSERT(create_move_outcome == e_create_move::VALID);
@@ -553,7 +553,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         }
 #endif //NO_GRAPHICS
 
-        if (move_outcome == ACCEPTED) {
+        if (move_outcome == e_move_result::ACCEPTED) {
             costs_.cost += delta_c;
             costs_.bb_cost += bb_delta_c;
 
@@ -601,7 +601,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 #endif //NO_GRAPHICS
 
         } else {
-            VTR_ASSERT_SAFE(move_outcome == REJECTED);
+            VTR_ASSERT_SAFE(move_outcome == e_move_result::REJECTED);
 
             // Reset the net cost function flags first.
             net_cost_handler_.reset_move_nets();
@@ -652,7 +652,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         move_outcome_stats.delta_timing_cost_abs = timing_delta_c;
 
         if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
-            LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome ? "ACCEPTED" : "REJECTED"), "");
+            LOG_MOVE_STATS_OUTCOME(delta_c, bb_delta_c, timing_delta_c, (move_outcome == e_move_result::ACCEPTED ? "ACCEPTED" : "REJECTED"), "");
         }
     }
     move_outcome_stats.outcome = move_outcome;
@@ -721,11 +721,11 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
         e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm,
                                              timing_bb_factor, manual_move_enabled);
 
-        if (swap_result == ACCEPTED) {
+        if (swap_result == e_move_result::ACCEPTED) {
             // Move was accepted.  Update statistics that are useful for the annealing schedule.
             placer_stats_.single_swap_update(costs_);
             swap_stats_.num_swap_accepted++;
-        } else if (swap_result == ABORTED) {
+        } else if (swap_result == e_move_result::ABORTED) {
             swap_stats_.num_swap_aborted++;
         } else { // swap_result == REJECTED
             swap_stats_.num_swap_rejected++;
@@ -879,20 +879,20 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) {
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\tTemperature is: %e delta_c is %e\n", t, delta_c);
     if (delta_c <= 0) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(delta_c < 0)\n");
-        return ACCEPTED;
+        return e_move_result::ACCEPTED;
     }
 
     if (t == 0.) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(t == 0)\n");
-        return REJECTED;
+        return e_move_result::REJECTED;
     }
 
     float fnum = rng_.frand();
     float prob_fac = std::exp(-delta_c / t);
     if (prob_fac > fnum) {
         VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is accepted(hill climbing)\n");
-        return ACCEPTED;
+        return e_move_result::ACCEPTED;
     }
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n");
-    return REJECTED;
+    return e_move_result::REJECTED;
 }
\ No newline at end of file
diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h
index 42e318cc0a4..172c04d34f5 100644
--- a/vpr/src/place/move_generator.h
+++ b/vpr/src/place/move_generator.h
@@ -17,7 +17,7 @@ struct MoveOutcomeStats {
     float delta_bb_cost_abs = std::numeric_limits<float>::quiet_NaN();
     float delta_timing_cost_abs = std::numeric_limits<float>::quiet_NaN();
 
-    e_move_result outcome = ABORTED;
+    e_move_result outcome = e_move_result::ABORTED;
     float elapsed_time = std::numeric_limits<float>::quiet_NaN();
 };
 
diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp
index 0ddd7062ee0..4cf2086c277 100644
--- a/vpr/src/place/move_utils.cpp
+++ b/vpr/src/place/move_utils.cpp
@@ -1265,8 +1265,23 @@ bool intersect_range_limit_with_floorplan_constraints(ClusterBlockId b_from,
 }
 
 std::string e_move_result_to_string(e_move_result move_outcome) {
-    std::string move_result_to_string[] = {"Rejected", "Accepted", "Aborted"};
-    return move_result_to_string[move_outcome];
+    switch (move_outcome) {
+        case e_move_result::REJECTED:
+            return "Rejected";
+            break;
+
+        case e_move_result::ACCEPTED:
+            return "Accepted";
+            break;
+
+        case e_move_result::ABORTED:
+            return "Aborted";
+            break;
+
+        default:
+            return "Unsupported Move Outcome!";
+            break;
+    }
 }
 
 int find_free_layer(t_logical_block_type_ptr logical_block,
diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h
index 295d5fc6f4e..e5555648866 100644
--- a/vpr/src/place/move_utils.h
+++ b/vpr/src/place/move_utils.h
@@ -19,7 +19,7 @@ constexpr size_t SMALL_NET = 4;
 /* This is for the placement swap routines. A swap attempt could be       *
  * rejected, accepted or aborted (due to the limitations placed on the    *
  * carry chain support at this point).                                    */
-enum e_move_result {
+enum class e_move_result {
     REJECTED,
     ACCEPTED,
     ABORTED
diff --git a/vpr/src/place/placer_breakpoint.cpp b/vpr/src/place/placer_breakpoint.cpp
index 7b0f561f152..a31a0add053 100644
--- a/vpr/src/place/placer_breakpoint.cpp
+++ b/vpr/src/place/placer_breakpoint.cpp
@@ -11,10 +11,9 @@ void transform_blocks_affected(const t_pl_blocks_to_be_moved& blocksAffected) {
     BreakpointState* bp_state = get_bp_state_globals()->get_glob_breakpoint_state();
 
     bp_state->blocks_affected_by_move.clear();
-    for (size_t i = 0; i < blocksAffected.moved_blocks.size(); i++) {
+    for (const t_pl_moved_block& moved_block : blocksAffected.moved_blocks) {
         //size_t conversion is required since block_num is of type ClusterBlockId and can't be cast to an int. And this vector has to be of type int to be recognized in expr_eval class
-
-        bp_state->blocks_affected_by_move.push_back(size_t(blocksAffected.moved_blocks[i].block_num));
+        bp_state->blocks_affected_by_move.push_back(size_t(moved_block.block_num));
     }
 }
 
@@ -40,9 +39,9 @@ void stop_placement_and_check_breakpoints(t_pl_blocks_to_be_moved& blocks_affect
 
     if (placer_breakpoint_reached() && draw_state->show_graphics) {
         std::string msg = available_move_types[0];
-        if (move_outcome == 0) {
+        if (move_outcome == e_move_result::REJECTED) {
             msg += vtr::string_fmt(", Rejected");
-        } else if (move_outcome == 1) {
+        } else if (move_outcome == e_move_result::ACCEPTED) {
             msg += vtr::string_fmt(", Accepted");
         } else {
             msg += vtr::string_fmt(", Aborted");

From a69ee82f7d16a2e40b99e7f4d6b35c53b7f0625b Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 9 Nov 2024 17:55:16 -0500
Subject: [PATCH 16/31] add some comments and update golder results

---
 vpr/src/base/place_and_route.cpp              |  4 +-
 vpr/src/place/annealer.cpp                    | 44 +++++++++----------
 vpr/src/place/annealer.h                      | 16 ++++++-
 .../config/golden_results.txt                 |  2 +-
 .../config/golden_results.txt                 |  4 +-
 .../config/golden_results.txt                 |  2 +-
 .../config/golden_results.txt                 |  2 +-
 .../config/golden_results.txt                 |  2 +-
 .../config/golden_results.txt                 |  2 +-
 9 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index 6c448af6c7b..ba7e20ccd80 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -326,8 +326,8 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                 placer_opts.place_chan_width = current;
                 try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts,
                           arch->Chans, det_routing_arch, segment_inf,
-                          /*is_flat=*/arch->directs,
-                          false);
+                          arch->directs,
+                          /*is_flat=*/false);
             }
 
             success = route(router_net_list,
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index a5740cfe20a..44fd2e85f12 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -491,18 +491,18 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             // Invalidates timing of modified connections for incremental timing updates.
             pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
-            /* Update the connection_timing_cost and connection_delay *
-             * values from the temporary values.                      */
+            /* Update the connection_timing_cost and connection_delay
+             * values from the temporary values. */
             placer_state_.mutable_timing().commit_td_cost(blocks_affected_);
 
-            /* Update timing information. Since we are analyzing setup slacks,   *
-             * we only update those values and keep the criticalities stale      *
-             * so as not to interfere with the original timing driven algorithm. *
+            /* Update timing information. Since we are analyzing setup slacks,
+             * we only update those values and keep the criticalities stale
+             * so as not to interfere with the original timing driven algorithm.
              *
-             * Note: the timing info must be updated after applying block moves  *
-             * and committing the timing driven delays and costs.                *
-             * If we wish to revert this timing update due to move rejection,    *
-             * we need to revert block moves and restore the timing values.      */
+             * Note: the timing info must be updated after applying block moves
+             * and committing the timing driven delays and costs.
+             * If we wish to revert this timing update due to move rejection,
+             * we need to revert block moves and restore the timing values. */
             criticalities_->disable_update();
             setup_slacks_->enable_update();
             update_timing_classes(crit_params, timing_info_, criticalities_,
@@ -543,7 +543,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_);
         }
 
-        /* 1 -> move accepted, 0 -> rejected. */
+        // 1 -> move accepted, 0 -> rejected.
         move_outcome = assess_swap_(delta_c, annealing_state_.t);
 
         //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move.
@@ -569,25 +569,27 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
                 costs_.timing_cost += timing_delta_c;
 
-                /* Invalidates timing of modified connections for incremental *
-                 * timing updates. These invalidations are accumulated for a  *
-                 * big timing update in the outer loop.                       */
+                /* Invalidates timing of modified connections for incremental
+                 * timing updates. These invalidations are accumulated for a
+                 * big timing update in the outer loop. */
                 pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
-                /* Update the connection_timing_cost and connection_delay *
-                 * values from the temporary values.                      */
+                /* Update the connection_timing_cost and connection_delay
+                 * values from the temporary values. */
                 placer_state_.mutable_timing().commit_td_cost(blocks_affected_);
             }
 
-            /* Update net cost functions and reset flags. */
+            // Update net cost functions and reset flags.
             net_cost_handler_.update_move_nets();
 
-            /* Update clb data structures since we kept the move. */
+            // Update clb data structures since we kept the move.
             blk_loc_registry.commit_move_blocks(blocks_affected_);
 
-            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+            // if the agent proposed the block type, then collect the block type stat
+            if (proposed_action.logical_blk_type_index != -1) {
                 ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
             }
+
             if (noc_opts_.noc){
                 noc_cost_handler_->commit_noc_costs();
                 costs_ += noc_delta_c;
@@ -787,6 +789,7 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
     ++annealing_state_.num_temps;
 }
 
+
 int PlacementAnnealer::get_total_iteration() const {
     return tot_iter_;
 }
@@ -805,7 +808,7 @@ void PlacementAnnealer::start_quench() {
     // Freeze out: only accept solutions that improve placement.
     annealing_state_.t = 0;
 
-    //Revert the move limit to initial value.
+    // Revert the move limit to initial value.
     annealing_state_.move_lim = annealing_state_.move_lim_max;
 }
 
@@ -814,7 +817,6 @@ std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&>
 }
 
 void PlacementAnnealer::LOG_MOVE_STATS_HEADER() {
-
     if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
         if (move_stats_file_) {
             fprintf(move_stats_file_.get(),
@@ -837,10 +839,8 @@ void PlacementAnnealer::LOG_MOVE_STATS_PROPOSED() {
     const auto& grid_blocks = placer_state_.grid_blocks();
 
     if (move_stats_file_) {
-
         ClusterBlockId b_from = blocks_affected_.moved_blocks[0].block_num;
 
-
         t_pl_loc to = blocks_affected_.moved_blocks[0].new_loc;
         ClusterBlockId b_to = grid_blocks.block_at_location(to);
 
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 2252367ad04..730f01eec96 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -171,7 +171,7 @@ class PlacementAnnealer {
                       NetPinTimingInvalidator* pin_timing_invalidator,
                       int move_lim);
 
-    /* Function which contains the inner loop of the simulated annealing */
+    ///@brief Contains the inner loop of the simulated annealing
     void placement_inner_loop(MoveGenerator& move_generator,
                               float timing_bb_factor);
 
@@ -200,12 +200,19 @@ class PlacementAnnealer {
                            float timing_bb_factor,
                            bool manual_move_enabled);
 
+    ///@brief Returns the total number iterations or attempted swaps
     int get_total_iteration() const;
 
+    ///@brief Returns a constant reference to the annealing state
     const t_annealing_state& get_annealing_state() const;
 
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
 
+    /**
+     * @brief Starts the quench stage in simulated annealing by
+     * setting the temperature to zero and reverting the move range limit
+     * to the initial value.
+     */
     void start_quench();
 
   private:
@@ -233,7 +240,7 @@ class PlacementAnnealer {
     int outer_crit_iter_count_;
 
     t_annealing_state annealing_state_;
-    /// Swap statistics keep record of the number accepted/rejected/aborted swaps.
+    ///Swap statistics keep record of the number accepted/rejected/aborted swaps.
     t_swap_stats swap_stats_;
     MoveTypeStat move_type_stats_;
     t_placer_statistics placer_stats_;
@@ -249,10 +256,15 @@ class PlacementAnnealer {
      */
     static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 
+    ///Specifies how often timing information is recomputed when the annealer isn't in the quench stage
     int inner_recompute_limit_;
+    ///Specifies how often timing information is recomputed when the annealer is in the quench stage
     int quench_recompute_limit_;
+    ///Used to trigger a BB and NoC cost re-computation from scratch
     int moves_since_cost_recompute_;
+    ///Total number of iterations or attempted swaps
     int tot_iter_;
+    ///Indicates whether the annealer has entered into the quench stage
     bool quench_started_;
 
     void LOG_MOVE_STATS_HEADER();
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt
index 28a1bb52736..c5e2acb803a 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_buf/config/golden_results.txt
@@ -1,2 +1,2 @@
 arch	circuit	script_params	crit_path_delay_mcw	clk_to_clk_cpd	clk_to_clk2_cpd	clk_to_input_cpd	clk_to_output_cpd	clk2_to_clk2_cpd	clk2_to_clk_cpd	clk2_to_input_cpd	clk2_to_output_cpd	input_to_input_cpd	input_to_clk_cpd	input_to_clk2_cpd	input_to_output_cpd	output_to_output_cpd	output_to_clk_cpd	output_to_clk2_cpd	output_to_input_cpd	clk_to_clk_setup_slack	clk_to_clk2_setup_slack	clk_to_input_setup_slack	clk_to_output_setup_slack	clk2_to_clk2_setup_slack	clk2_to_clk_setup_slack	clk2_to_input_setup_slack	clk2_to_output_setup_slack	input_to_input_setup_slack	input_to_clk_setup_slack	input_to_clk2_setup_slack	input_to_output_setup_slack	output_to_output_setup_slack	output_to_clk_setup_slack	output_to_clk2_setup_slack	output_to_input_setup_slack	clk_to_clk_hold_slack	clk_to_clk2_hold_slack	clk_to_input_hold_slack	clk_to_output_hold_slack	clk2_to_clk2_hold_slack	clk2_to_clk_hold_slack	clk2_to_input_hold_slack	clk2_to_output_hold_slack	input_to_input_hold_slack	input_to_clk_hold_slack	input_to_clk2_hold_slack	input_to_output_hold_slack	output_to_output_hold_slack	output_to_clk_hold_slack	output_to_clk2_hold_slack	output_to_input_hold_slack	
-k6_frac_N10_mem32K_40nm_clk_buf.xml	multiclock_buf.blif	common	1.48876	0.545	-1	-1	-1	0.545	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0.293	-1	-1	-1	0.293	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	
+k6_frac_N10_mem32K_40nm_clk_buf.xml	multiclock_buf.blif	common	1.6599674	0.545	-1	-1	-1	0.545	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0.293	-1	-1	-1	0.293	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt
index e7a944100ab..70910d2d59a 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_clock_modeling/config/golden_results.txt
@@ -1,8 +1,8 @@
 arch	circuit	script_params	vtr_flow_elapsed_time	vtr_max_mem_stage	vtr_max_mem	error	odin_synth_time	max_odin_mem	parmys_synth_time	max_parmys_mem	abc_depth	abc_synth_time	abc_cec_time	abc_sec_time	max_abc_mem	ace_time	max_ace_mem	num_clb	num_io	num_memories	num_mult	vpr_status	vpr_revision	vpr_build_info	vpr_compiler	vpr_compiled	hostname	rundir	max_vpr_mem	num_primary_inputs	num_primary_outputs	num_pre_packed_nets	num_pre_packed_blocks	num_netlist_clocks	num_post_packed_nets	num_post_packed_blocks	device_width	device_height	device_grid_tiles	device_limiting_resources	device_name	pack_mem	pack_time	placed_wirelength_est	total_swap	accepted_swap	rejected_swap	aborted_swap	place_mem	place_time	place_quench_time	placed_CPD_est	placed_setup_TNS_est	placed_setup_WNS_est	placed_geomean_nonvirtual_intradomain_critical_path_delay_est	place_delay_matrix_lookup_time	place_quench_timing_analysis_time	place_quench_sta_time	place_total_timing_analysis_time	place_total_sta_time	min_chan_width	routed_wirelength	min_chan_width_route_success_iteration	logic_block_area_total	logic_block_area_used	min_chan_width_routing_area_total	min_chan_width_routing_area_per_tile	min_chan_width_route_time	min_chan_width_total_timing_analysis_time	min_chan_width_total_sta_time	crit_path_num_rr_graph_nodes	crit_path_num_rr_graph_edges	crit_path_collapsed_nodes	crit_path_routed_wirelength	crit_path_route_success_iteration	crit_path_total_nets_routed	crit_path_total_connections_routed	crit_path_total_heap_pushes	crit_path_total_heap_pops	critical_path_delay	geomean_nonvirtual_intradomain_critical_path_delay	setup_TNS	setup_WNS	hold_TNS	hold_WNS	crit_path_routing_area_total	crit_path_routing_area_per_tile	router_lookahead_computation_time	crit_path_route_time	crit_path_create_rr_graph_time	crit_path_create_intra_cluster_rr_graph_time	crit_path_tile_lookahead_computation_time	crit_path_router_lookahead_computation_time	crit_path_total_timing_analysis_time	crit_path_total_sta_time	num_global_nets	num_routed_nets	
 timing/k6_N10_40nm.xml	microbenchmarks/d_flip_flop.v	common_--clock_modeling_ideal_--route_chan_width_60	0.30	vpr	57.61 MiB		-1	-1	0.06	19388	1	0.02	-1	-1	33516	-1	-1	1	2	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	58988	2	1	3	4	1	3	4	3	3	9	-1	auto	19.1 MiB	0.00	4	9	6	3	0	57.6 MiB	0.00	0.00	0.55447	-0.91031	-0.55447	0.55447	0.00	1.4209e-05	1.0635e-05	0.000112608	8.885e-05	-1	2	1	18000	18000	14049.7	1561.07	0.00	0.00111531	0.00103596	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	1	2	
 timing/k6_N10_40nm.xml	microbenchmarks/d_flip_flop.v	common_--clock_modeling_route_--route_chan_width_60	0.30	vpr	57.69 MiB		-1	-1	0.06	19244	1	0.02	-1	-1	33536	-1	-1	1	2	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	59076	2	1	3	4	1	3	4	3	3	9	-1	auto	19.2 MiB	0.00	6	9	5	2	2	57.7 MiB	0.00	0.00	0.48631	-0.91031	-0.48631	0.48631	0.00	1.4475e-05	1.0195e-05	0.000102982	7.9111e-05	-1	4	1	18000	18000	15707.9	1745.32	0.00	0.00110914	0.00104203	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0	3	
-timing/k6_N10_40nm.xml	verilog/mkPktMerge.v	common_--clock_modeling_ideal_--route_chan_width_60	26.57	parmys	203.92 MiB		-1	-1	21.33	208816	2	1.49	-1	-1	61188	-1	-1	155	5	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	61088	5	156	191	347	1	163	316	15	15	225	clb	auto	21.3 MiB	0.03	22	75566	54444	2848	18274	59.7 MiB	0.07	0.00	1.49664	-15.129	-1.49664	1.49664	0.00	0.000225009	0.000209684	0.0166386	0.0154931	-1	38	6	3.042e+06	2.79e+06	863192.	3836.41	0.01	0.0221087	0.0205962	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	154	9	
-timing/k6_N10_40nm.xml	verilog/mkPktMerge.v	common_--clock_modeling_route_--route_chan_width_60	26.99	parmys	204.15 MiB		-1	-1	21.52	209052	2	1.49	-1	-1	60656	-1	-1	155	5	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	60972	5	156	191	347	1	163	316	15	15	225	clb	auto	21.3 MiB	0.03	25	77716	55619	3345	18752	59.5 MiB	0.13	0.00	1.47823	-14.9031	-1.47823	1.47823	0.00	0.000388878	0.000358886	0.0289108	0.0266306	-1	38	3	3.042e+06	2.79e+06	892591.	3967.07	0.01	0.0351201	0.0324031	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	153	10	
+timing/k6_N10_40nm.xml	verilog/mkPktMerge.v	common_--clock_modeling_ideal_--route_chan_width_60	26.57	parmys	203.92 MiB		-1	-1	21.33	208816	2	1.49	-1	-1	61188	-1	-1	155	5	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	61088	5	156	191	347	1	163	316	15	15	225	clb	auto	21.3 MiB	0.03	22	75566	54444	2848	18274	59.7 MiB	0.07	0.00	1.49664	-15.129	-1.49664	1.49664	0.00	0.000225009	0.000209684	0.0166386	0.0154931	-1	57	6	3.042e+06	2.79e+06	863192.	3836.41	0.01	0.0221087	0.0205962	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	154	9	
+timing/k6_N10_40nm.xml	verilog/mkPktMerge.v	common_--clock_modeling_route_--route_chan_width_60	26.99	parmys	204.15 MiB		-1	-1	21.52	209052	2	1.49	-1	-1	60656	-1	-1	155	5	-1	-1	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	60972	5	156	191	347	1	163	316	15	15	225	clb	auto	21.3 MiB	0.03	25	77716	55619	3345	18752	59.5 MiB	0.13	0.00	1.47823	-14.9031	-1.47823	1.47823	0.00	0.000388878	0.000358886	0.0289108	0.0266306	-1	57	3	3.042e+06	2.79e+06	892591.	3967.07	0.01	0.0351201	0.0324031	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	153	10	
 timing/k6_N10_mem32K_40nm.xml	microbenchmarks/d_flip_flop.v	common_--clock_modeling_ideal_--route_chan_width_60	0.35	vpr	63.08 MiB		-1	-1	0.08	19324	1	0.02	-1	-1	33472	-1	-1	1	2	0	0	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	64592	2	1	3	4	1	3	4	3	3	9	-1	auto	24.5 MiB	0.00	4	9	6	2	1	63.1 MiB	0.00	0.00	0.55247	-0.90831	-0.55247	0.55247	0.00	1.3129e-05	9.703e-06	0.000103951	8.1123e-05	-1	2	2	53894	53894	12370.0	1374.45	0.00	0.00116445	0.00109439	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	1	2	
 timing/k6_N10_mem32K_40nm.xml	microbenchmarks/d_flip_flop.v	common_--clock_modeling_route_--route_chan_width_60	0.35	vpr	62.96 MiB		-1	-1	0.08	19876	1	0.02	-1	-1	33484	-1	-1	1	2	0	0	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	64468	2	1	3	4	1	3	4	3	3	9	-1	auto	24.3 MiB	0.00	6	9	5	2	2	63.0 MiB	0.00	0.00	0.48631	-0.90831	-0.48631	0.48631	0.00	1.5477e-05	1.1104e-05	0.000110622	8.6576e-05	-1	8	1	53894	53894	14028.3	1558.70	0.00	0.00113491	0.00106717	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0	3	
 timing/k6_N10_mem32K_40nm.xml	verilog/mkPktMerge.v	common_--clock_modeling_ideal_--route_chan_width_60	6.10	vpr	71.24 MiB		-1	-1	1.09	28164	2	0.15	-1	-1	37372	-1	-1	32	311	15	0	success	84e0337	release IPO VTR_ASSERT_LEVEL=3	GNU 9.5.0 on Linux-5.10.35-v8 x86_64	2024-08-22T23:40:08	gh-actions-runner-vtr-auto-spawned3	/root/vtr-verilog-to-routing/vtr-verilog-to-routing	72952	311	156	972	1128	1	953	514	28	28	784	memory	auto	33.0 MiB	0.48	8979	193966	70726	114124	9116	71.2 MiB	1.31	0.03	4.11528	-4394.91	-4.11528	4.11528	0.00	0.00488787	0.00418834	0.465058	0.395185	-1	13380	12	4.25198e+07	9.94461e+06	2.96205e+06	3778.13	0.38	0.643724	0.557601	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	15	938	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt
index 5d73f4813a6..1263c9d0fca 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_eblif_vpr/config/golden_results.txt
@@ -1,3 +1,3 @@
  arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  min_chan_width	  routed_wirelength	  min_chan_width_route_success_iteration	  logic_block_area_total	  logic_block_area_used	  min_chan_width_routing_area_total	  min_chan_width_routing_area_per_tile	  min_chan_width_route_time	  min_chan_width_total_timing_analysis_time	  min_chan_width_total_sta_time	  crit_path_num_rr_graph_nodes	  crit_path_num_rr_graph_edges	  crit_path_collapsed_nodes	  crit_path_routed_wirelength	  crit_path_route_success_iteration	  crit_path_total_nets_routed	  crit_path_total_connections_routed	  crit_path_total_heap_pushes	  crit_path_total_heap_pops	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  crit_path_routing_area_total	  crit_path_routing_area_per_tile	  router_lookahead_computation_time	  crit_path_route_time	  crit_path_create_rr_graph_time	  crit_path_create_intra_cluster_rr_graph_time	  crit_path_tile_lookahead_computation_time	  crit_path_router_lookahead_computation_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	 
  k6_frac_N10_40nm.xml	  test_eblif.eblif	  common	  0.12	  vpr	  60.11 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  3	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:42:01	  fv-az1118-845	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61552	  3	  1	  5	  6	  1	  4	  5	  3	  3	  9	  -1	  auto	  21.6 MiB	  0.00	  9	  12	  1	  9	  2	  60.1 MiB	  0.00	  0.00	  0.52647	  -0.88231	  -0.52647	  0.52647	  0.00	  1.0349e-05	  7.103e-06	  8.7732e-05	  6.8547e-05	  20	  10	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.00111687	  0.00105846	  379	  725	  -1	  6	  1	  3	  3	  36	  25	  0.605178	  0.605178	  -1.1507	  -0.605178	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00107362	  0.00104552	 
- k6_frac_N10_40nm.xml	  conn_order.eblif	  common	  0.12	  vpr	  59.98 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  2	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:42:01	  fv-az1118-845	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61420	  2	  1	  4	  5	  1	  3	  4	  3	  3	  9	  -1	  auto	  21.6 MiB	  0.00	  6	  9	  2	  3	  4	  60.0 MiB	  0.00	  0.00	  0.69084	  -1.21731	  -0.69084	  0.69084	  0.00	  1.4366e-05	  1.0429e-05	  0.000128779	  0.000106057	  20	  9	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.00110538	  0.00104614	  379	  725	  -1	  5	  1	  2	  2	  25	  19	  0.940178	  0.940178	  -1.48482	  -0.940178	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00106677	  0.00104008	 
+ k6_frac_N10_40nm.xml	  conn_order.eblif	  common	  0.12	  vpr	  59.98 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  2	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:42:01	  fv-az1118-845	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61420	  2	  1	  4	  5	  1	  3	  4	  3	  3	  9	  -1	  auto	  21.6 MiB	  0.00	  6	  9	  2	  3	  4	  60.0 MiB	  0.00	  0.00	  0.69084	  -1.21731	  -0.69084	  0.69084	  0.00	  1.4366e-05	  1.0429e-05	  0.000128779	  0.000106057	  20	  9	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.00110538	  0.00104614	  379	  725	  -1	  3	  1	  2	  2	  25	  19	  1.6923204	  1.6923204	  -2.22723	  -1.6923204	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00106677	  0.00104008	 
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt
index 28a1bb52736..ce46084d4cd 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_clock_buf/config/golden_results.txt
@@ -1,2 +1,2 @@
 arch	circuit	script_params	crit_path_delay_mcw	clk_to_clk_cpd	clk_to_clk2_cpd	clk_to_input_cpd	clk_to_output_cpd	clk2_to_clk2_cpd	clk2_to_clk_cpd	clk2_to_input_cpd	clk2_to_output_cpd	input_to_input_cpd	input_to_clk_cpd	input_to_clk2_cpd	input_to_output_cpd	output_to_output_cpd	output_to_clk_cpd	output_to_clk2_cpd	output_to_input_cpd	clk_to_clk_setup_slack	clk_to_clk2_setup_slack	clk_to_input_setup_slack	clk_to_output_setup_slack	clk2_to_clk2_setup_slack	clk2_to_clk_setup_slack	clk2_to_input_setup_slack	clk2_to_output_setup_slack	input_to_input_setup_slack	input_to_clk_setup_slack	input_to_clk2_setup_slack	input_to_output_setup_slack	output_to_output_setup_slack	output_to_clk_setup_slack	output_to_clk2_setup_slack	output_to_input_setup_slack	clk_to_clk_hold_slack	clk_to_clk2_hold_slack	clk_to_input_hold_slack	clk_to_output_hold_slack	clk2_to_clk2_hold_slack	clk2_to_clk_hold_slack	clk2_to_input_hold_slack	clk2_to_output_hold_slack	input_to_input_hold_slack	input_to_clk_hold_slack	input_to_clk2_hold_slack	input_to_output_hold_slack	output_to_output_hold_slack	output_to_clk_hold_slack	output_to_clk2_hold_slack	output_to_input_hold_slack	
-k6_frac_N10_mem32K_40nm_clk_buf.xml	multiclock_buf.blif	common	1.48876	0.545	-1	-1	-1	0.545	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0.293	-1	-1	-1	0.293	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	
+k6_frac_N10_mem32K_40nm_clk_buf.xml	multiclock_buf.blif	common	1.65996	0.545	-1	-1	-1	0.545	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	0.293	-1	-1	-1	0.293	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	-1	
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt
index f922c82810e..24a98bd463d 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_eblif_vpr/config/golden_results.txt
@@ -1,3 +1,3 @@
  arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  min_chan_width	  routed_wirelength	  min_chan_width_route_success_iteration	  logic_block_area_total	  logic_block_area_used	  min_chan_width_routing_area_total	  min_chan_width_routing_area_per_tile	  min_chan_width_route_time	  min_chan_width_total_timing_analysis_time	  min_chan_width_total_sta_time	  crit_path_num_rr_graph_nodes	  crit_path_num_rr_graph_edges	  crit_path_collapsed_nodes	  crit_path_routed_wirelength	  crit_path_route_success_iteration	  crit_path_total_nets_routed	  crit_path_total_connections_routed	  crit_path_total_heap_pushes	  crit_path_total_heap_pops	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  crit_path_routing_area_total	  crit_path_routing_area_per_tile	  router_lookahead_computation_time	  crit_path_route_time	  crit_path_create_rr_graph_time	  crit_path_create_intra_cluster_rr_graph_time	  crit_path_tile_lookahead_computation_time	  crit_path_router_lookahead_computation_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	 
  k6_frac_N10_40nm.xml	  test_eblif.eblif	  common	  0.12	  vpr	  59.98 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  3	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:47:29	  fv-az775-518	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61424	  3	  1	  5	  6	  1	  4	  5	  3	  3	  9	  -1	  auto	  21.5 MiB	  0.00	  9	  12	  1	  9	  2	  60.0 MiB	  0.00	  0.00	  0.52647	  -0.88231	  -0.52647	  0.52647	  0.00	  1.035e-05	  7.094e-06	  8.8915e-05	  6.9209e-05	  20	  10	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.0011057	  0.00104635	  379	  725	  -1	  6	  1	  3	  3	  36	  25	  0.605178	  0.605178	  -1.1507	  -0.605178	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00105006	  0.00102258	 
- k6_frac_N10_40nm.xml	  conn_order.eblif	  common	  0.12	  vpr	  59.98 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  2	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:47:29	  fv-az775-518	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61424	  2	  1	  4	  5	  1	  3	  4	  3	  3	  9	  -1	  auto	  21.5 MiB	  0.00	  6	  9	  2	  3	  4	  60.0 MiB	  0.00	  0.00	  0.69084	  -1.21731	  -0.69084	  0.69084	  0.00	  1.0129e-05	  6.963e-06	  0.000104936	  8.568e-05	  20	  9	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.00111784	  0.00105927	  379	  725	  -1	  5	  1	  2	  2	  25	  19	  0.940178	  0.940178	  -1.48482	  -0.940178	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00106769	  0.00103834	 
+ k6_frac_N10_40nm.xml	  conn_order.eblif	  common	  0.12	  vpr	  59.98 MiB	  	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  -1	  1	  2	  -1	  -1	  success	  e1c7cb1	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.8.0-1014-azure x86_64	  2024-09-24T03:47:29	  fv-az775-518	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  61424	  2	  1	  4	  5	  1	  3	  4	  3	  3	  9	  -1	  auto	  21.5 MiB	  0.00	  6	  9	  2	  3	  4	  60.0 MiB	  0.00	  0.00	  0.69084	  -1.21731	  -0.69084	  0.69084	  0.00	  1.0129e-05	  6.963e-06	  0.000104936	  8.568e-05	  20	  9	  1	  53894	  53894	  4880.82	  542.314	  0.00	  0.00111784	  0.00105927	  379	  725	  -1	  15	  1	  2	  2	  25	  19	  1.701722	  1.701722	  -2.22723	  -1.701722	  0	  0	  6579.40	  731.044	  0.00	  0.00	  0.00	  -1	  -1	  0.00	  0.00106769	  0.00103834	 
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt
index e6e7c8778e5..3813343ed35 100644
--- a/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong_odin/strong_global_nonuniform/config/golden_results.txt
@@ -1,7 +1,7 @@
  arch	  circuit	  script_params	  vtr_flow_elapsed_time	  vtr_max_mem_stage	  vtr_max_mem	  error	  odin_synth_time	  max_odin_mem	  parmys_synth_time	  max_parmys_mem	  abc_depth	  abc_synth_time	  abc_cec_time	  abc_sec_time	  max_abc_mem	  ace_time	  max_ace_mem	  num_clb	  num_io	  num_memories	  num_mult	  vpr_status	  vpr_revision	  vpr_build_info	  vpr_compiler	  vpr_compiled	  hostname	  rundir	  max_vpr_mem	  num_primary_inputs	  num_primary_outputs	  num_pre_packed_nets	  num_pre_packed_blocks	  num_netlist_clocks	  num_post_packed_nets	  num_post_packed_blocks	  device_width	  device_height	  device_grid_tiles	  device_limiting_resources	  device_name	  pack_mem	  pack_time	  placed_wirelength_est	  total_swap	  accepted_swap	  rejected_swap	  aborted_swap	  place_mem	  place_time	  place_quench_time	  placed_CPD_est	  placed_setup_TNS_est	  placed_setup_WNS_est	  placed_geomean_nonvirtual_intradomain_critical_path_delay_est	  place_delay_matrix_lookup_time	  place_quench_timing_analysis_time	  place_quench_sta_time	  place_total_timing_analysis_time	  place_total_sta_time	  ap_mem	  ap_time	  ap_full_legalizer_mem	  ap_full_legalizer_time	  min_chan_width	  routed_wirelength	  min_chan_width_route_success_iteration	  logic_block_area_total	  logic_block_area_used	  min_chan_width_routing_area_total	  min_chan_width_routing_area_per_tile	  min_chan_width_route_time	  min_chan_width_total_timing_analysis_time	  min_chan_width_total_sta_time	  crit_path_num_rr_graph_nodes	  crit_path_num_rr_graph_edges	  crit_path_collapsed_nodes	  crit_path_routed_wirelength	  crit_path_route_success_iteration	  crit_path_total_nets_routed	  crit_path_total_connections_routed	  crit_path_total_heap_pushes	  crit_path_total_heap_pops	  critical_path_delay	  geomean_nonvirtual_intradomain_critical_path_delay	  setup_TNS	  setup_WNS	  hold_TNS	  hold_WNS	  crit_path_routing_area_total	  crit_path_routing_area_per_tile	  router_lookahead_computation_time	  crit_path_route_time	  crit_path_create_rr_graph_time	  crit_path_create_intra_cluster_rr_graph_time	  crit_path_tile_lookahead_computation_time	  crit_path_router_lookahead_computation_time	  crit_path_total_timing_analysis_time	  crit_path_total_sta_time	 
  x_gaussian_y_uniform.xml	  stereovision3.v	  common	  1.42	  vpr	  65.81 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  37836	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67388	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  27.0 MiB	  0.12	  431	  2298	  449	  1774	  75	  65.8 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000483914	  0.000384949	  0.0147089	  0.012636	  -1	  -1	  -1	  -1	  12	  326	  3	  1.07788e+06	  700622	  -1	  -1	  0.20	  0.0742174	  0.0638404	  2680	  3516	  -1	  316	  3	  175	  255	  10988	  5508	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.0145719	  0.0139138	 
  x_uniform_y_gaussian.xml	  stereovision3.v	  common	  1.44	  vpr	  65.54 MiB	  	  0.05	  9856	  -1	  -1	  4	  0.17	  -1	  -1	  37820	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67108	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  26.9 MiB	  0.11	  392	  1890	  346	  1476	  68	  65.5 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000458868	  0.000376323	  0.0123402	  0.0106294	  -1	  -1	  -1	  -1	  12	  287	  5	  1.07788e+06	  700622	  -1	  -1	  0.21	  0.0867101	  0.074128	  2680	  3516	  -1	  268	  3	  167	  248	  10043	  4782	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.015461	  0.0147632	 
- x_gaussian_y_gaussian.xml	  stereovision3.v	  common	  1.50	  vpr	  65.58 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  37476	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67156	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  26.9 MiB	  0.12	  398	  2196	  430	  1697	  69	  65.6 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000468656	  0.000387965	  0.0139473	  0.0119918	  -1	  -1	  -1	  -1	  12	  284	  8	  1.07788e+06	  700622	  -1	  -1	  0.28	  0.0788417	  0.0678402	  2680	  3516	  -1	  273	  3	  184	  266	  11521	  5744	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.0151497	  0.0144591	 
+ x_gaussian_y_gaussian.xml	  stereovision3.v	  common	  1.50	  vpr	  65.58 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  37476	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67156	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  26.9 MiB	  0.12	  398	  2196	  430	  1697	  69	  65.6 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000468656	  0.000387965	  0.0139473	  0.0119918	  -1	  -1	  -1	  -1	  16	  284	  8	  1.07788e+06	  700622	  -1	  -1	  0.28	  0.0788417	  0.0678402	  2680	  3516	  -1	  273	  3	  184	  266	  11521	  5744	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.0151497	  0.0144591	 
  x_delta_y_uniform.xml	  stereovision3.v	  common	  1.67	  vpr	  65.78 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  40712	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67356	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  27.0 MiB	  0.11	  474	  1992	  348	  1574	  70	  65.8 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000450631	  0.000369149	  0.0127092	  0.0109666	  -1	  -1	  -1	  -1	  48	  367	  4	  1.07788e+06	  700622	  -1	  -1	  0.46	  0.187113	  0.157611	  2680	  3516	  -1	  363	  2	  162	  240	  11458	  5656	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.0141159	  0.0135524	 
  x_delta_y_delta.xml	  stereovision3.v	  common	  1.41	  vpr	  65.68 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  38292	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67260	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  26.9 MiB	  0.12	  411	  2094	  373	  1653	  68	  65.7 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000478875	  0.000382715	  0.0140865	  0.0122714	  -1	  -1	  -1	  -1	  48	  306	  4	  1.07788e+06	  700622	  -1	  -1	  0.20	  0.107373	  0.0919185	  2680	  3516	  -1	  300	  3	  176	  263	  11898	  5867	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.014938	  0.0142467	 
  x_uniform_y_delta.xml	  stereovision3.v	  common	  1.47	  vpr	  65.57 MiB	  	  0.05	  9984	  -1	  -1	  4	  0.17	  -1	  -1	  37488	  -1	  -1	  13	  11	  0	  0	  success	  30aea82	  Release IPO VTR_ASSERT_LEVEL=3	  GNU 11.4.0 on Linux-6.5.0-1025-azure x86_64	  2024-10-28T23:46:21	  fv-az1380-902	  /home/runner/work/vtr-verilog-to-routing/vtr-verilog-to-routing	  67144	  11	  30	  262	  292	  2	  110	  54	  7	  7	  49	  clb	  auto	  26.9 MiB	  0.11	  405	  2196	  394	  1718	  84	  65.6 MiB	  0.03	  0.00	  1.91988	  -135.359	  -1.91988	  1.85222	  0.01	  0.000448588	  0.000370342	  0.0136716	  0.0117962	  -1	  -1	  -1	  -1	  58	  286	  2	  1.07788e+06	  700622	  -1	  -1	  0.28	  0.112457	  0.0956247	  2680	  3516	  -1	  286	  2	  161	  239	  8848	  4226	  1.91988	  1.85222	  -135.359	  -1.91988	  0	  0	  -1	  -1	  0.00	  0.03	  0.00	  -1	  -1	  0.00	  0.0140539	  0.0134498	 

From 11074f3c707e24e46a124a419ec2abdaf86c9ba3 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 11 Nov 2024 12:31:02 -0500
Subject: [PATCH 17/31] types and pass by reference in TimingGraph.cpp

---
 .../libtatum/libtatum/tatum/TimingGraph.cpp   | 16 ++++-----
 .../libtatum/libtatum/tatum/TimingGraph.hpp   | 36 +++++++++++--------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
index 33392a6e8d1..3b861d054c6 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
@@ -200,7 +200,7 @@ NodeId TimingGraph::add_node(const NodeType type) {
 
 EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const NodeId sink_node) {
     //We require that the source/sink node must already be in the graph,
-    //  so we can update them with thier edge references
+    //  so we can update them with their edge references
     TATUM_ASSERT(valid_node_id(src_node));
     TATUM_ASSERT(valid_node_id(sink_node));
 
@@ -211,7 +211,7 @@ EdgeId TimingGraph::add_edge(const EdgeType type, const NodeId src_node, const N
     EdgeId edge_id = EdgeId(edge_ids_.size());
     edge_ids_.push_back(edge_id);
 
-    //Create the edgge
+    //Create the edge
     edge_types_.push_back(type);
     edge_src_nodes_.push_back(src_node);
     edge_sink_nodes_.push_back(sink_node);
@@ -318,7 +318,7 @@ GraphIdMaps TimingGraph::compress() {
     levelize();
     validate();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 void TimingGraph::levelize() {
@@ -474,7 +474,7 @@ GraphIdMaps TimingGraph::optimize_layout() {
 
     levelize();
 
-    return {node_id_map, edge_id_map};
+    return {std::move(node_id_map), std::move(edge_id_map)};
 }
 
 tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const {
@@ -483,7 +483,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
     //Determine the edges driven by each level of the graph
     std::vector<std::vector<EdgeId>> edge_levels;
     for(LevelId level_id : levels()) {
-        edge_levels.push_back(std::vector<EdgeId>());
+        edge_levels.emplace_back();
         for(auto node_id : level_nodes(level_id)) {
 
             //We walk the nodes according to the input-edge order.
@@ -874,7 +874,7 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 }
 
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -890,7 +890,7 @@ std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth) {
     std::vector<NodeId> nodes;
 
@@ -905,7 +905,7 @@ std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg,
 }
 
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg, 
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth) {
     std::vector<NodeId> nodes;
 
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
index 72a05cad9da..f4cd54ad8d9 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.hpp
@@ -11,8 +11,8 @@
  * store all edges as bi-directional edges.
  *
  * NOTE: We store only the static connectivity and node information in the 'TimingGraph' class.
- *       Other dynamic information (edge delays, node arrival/required times) is stored seperately.
- *       This means that most actions opearting on the timing graph (e.g. TimingAnalyzers) only
+ *       Other dynamic information (edge delays, node arrival/required times) is stored separately.
+ *       This means that most actions operating on the timing graph (e.g. TimingAnalyzers) only
  *       require read-only access to the timing graph.
  *
  * Accessing Graph Data
@@ -28,9 +28,9 @@
  * rather than the more typical "Array of Structs (AoS)" data layout.
  *
  * By using a SoA layout we keep all data for a particular field (e.g. node types) in contiguous
- * memory.  Using an AoS layout the various fields accross nodes would *not* be contiguous
+ * memory.  Using an AoS layout the various fields across nodes would *not* be contiguous
  * (although the different fields within each object (e.g. a TimingNode class) would be contiguous.
- * Since we typically perform operations on particular fields accross nodes the SoA layout performs
+ * Since we typically perform operations on particular fields across nodes the SoA layout performs
  * better (and enables memory ordering optimizations). The edges are also stored in a SOA format.
  *
  * The SoA layout also motivates the ID based approach, which allows direct indexing into the required
@@ -48,11 +48,12 @@
  * and ensures that each cache line pulled into the cache will (likely) be accessed multiple times
  * before being evicted.
  *
- * Note that performing these optimizations is currently done explicity by calling the optimize_edge_layout()
- * and optimize_node_layout() member functions.  In the future (particularily if incremental modification
+ * Note that performing these optimizations is currently done explicitly by calling the optimize_edge_layout()
+ * and optimize_node_layout() member functions.  In the future (particularly if incremental modification
  * support is added), it may be a good idea apply these modifications automatically as needed.
  *
  */
+#include <utility>
 #include <vector>
 #include <set>
 #include <limits>
@@ -149,7 +150,7 @@ class TimingGraph {
 
         ///\pre The graph must be levelized.
         ///\returns A range containing the nodes which are primary inputs (i.e. SOURCE's with no fanin, corresponding to top level design inputs pins)
-        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incomming edges from the clock network)
+        ///\warning Not all SOURCE nodes in the graph are primary inputs (e.g. FF Q pins are SOURCE's but have incoming edges from the clock network)
         ///\see levelize()
         node_range primary_inputs() const { 
             TATUM_ASSERT_MSG(is_levelized_, "Timing graph must be levelized");
@@ -282,7 +283,7 @@ class TimingGraph {
         //Node data
         tatum::util::linear_map<NodeId,NodeId> node_ids_; //The node IDs in the graph
         tatum::util::linear_map<NodeId,NodeType> node_types_; //Type of node
-        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incomiing edge IDs for node
+        tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_in_edges_; //Incoming edge IDs for node
         tatum::util::linear_map<NodeId,std::vector<EdgeId>> node_out_edges_; //Out going edge IDs for node
         tatum::util::linear_map<NodeId,LevelId> node_levels_; //Out going edge IDs for node
 
@@ -293,12 +294,12 @@ class TimingGraph {
         tatum::util::linear_map<EdgeId,NodeId> edge_src_nodes_; //Source node for each edge
         tatum::util::linear_map<EdgeId,bool>   edges_disabled_;
 
-        //Auxilary graph-level info, filled in by levelize()
+        //Auxiliary graph-level info, filled in by levelize()
         tatum::util::linear_map<LevelId,LevelId> level_ids_; //The level IDs in the graph
         tatum::util::linear_map<LevelId,std::vector<NodeId>> level_nodes_; //Nodes in each level
         std::vector<NodeId> primary_inputs_; //Primary input nodes of the timing graph.
         std::vector<NodeId> logical_outputs_; //Logical output nodes of the timing graph.
-        bool is_levelized_ = false; //Inidcates if the current levelization is valid
+        bool is_levelized_ = false; //Indicates if the current levelization is valid
 
         bool allow_dangling_combinational_nodes_ = false;
 
@@ -310,26 +311,31 @@ std::vector<std::vector<NodeId>> identify_combinational_loops(const TimingGraph&
 //Returns the set of nodes transitively connected (either fanin or fanout) to nodes in through_nodes
 //up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitively_connected_nodes(const TimingGraph& tg, 
-                                                      const std::vector<NodeId> through_nodes, 
+                                                      const std::vector<NodeId>& through_nodes,
                                                       size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanin of nodes in sinks up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanin_nodes(const TimingGraph& tg, 
-                                                const std::vector<NodeId> sinks, 
+                                                const std::vector<NodeId>& sinks,
                                                 size_t max_depth=std::numeric_limits<size_t>::max());
 
 //Returns the set of nodes in the transitive fanout of nodes in sources up to max_depth (default infinite) hops away
 std::vector<NodeId> find_transitive_fanout_nodes(const TimingGraph& tg,
-                                                 const std::vector<NodeId> sources, 
+                                                 const std::vector<NodeId>& sources,
                                                  size_t max_depth=std::numeric_limits<size_t>::max());
 
 EdgeType infer_edge_type(const TimingGraph& tg, EdgeId edge);
 
 //Mappings from old to new IDs
 struct GraphIdMaps {
-    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId> node_map,
-                tatum::util::linear_map<EdgeId,EdgeId> edge_map)
+    GraphIdMaps(const tatum::util::linear_map<NodeId,NodeId>& node_map,
+                const tatum::util::linear_map<EdgeId,EdgeId>& edge_map)
         : node_id_map(node_map), edge_id_map(edge_map) {}
+
+    GraphIdMaps(tatum::util::linear_map<NodeId,NodeId>&& node_map,
+                tatum::util::linear_map<EdgeId,EdgeId>&& edge_map)
+        : node_id_map(std::move(node_map)), edge_id_map(std::move(edge_map)) {}
+
     tatum::util::linear_map<NodeId,NodeId> node_id_map;
     tatum::util::linear_map<EdgeId,EdgeId> edge_id_map;
 };

From 4b3f7956b4de2991ea4e82bf7cf1ea30b20f8a37 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 12:11:35 -0500
Subject: [PATCH 18/31] fixed a few typos

---
 .../libtatum/tatum/analyzer_factory.hpp        |  6 +++---
 .../tatum/delay_calc/FixedDelayCalculator.hpp  |  2 +-
 .../tatum/graph_visitors/GraphVisitor.hpp      |  2 +-
 vpr/src/base/atom_lookup.cpp                   |  2 +-
 vpr/src/base/atom_lookup_fwd.h                 |  2 +-
 vpr/src/base/atom_netlist_utils.cpp            | 13 +++++++------
 vpr/src/timing/timing_graph_builder.cpp        | 18 +++++++++---------
 7 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
index 9ac444bc61f..db34f59a049 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/analyzer_factory.hpp
@@ -18,7 +18,7 @@ namespace tatum {
  * This file defines the AnalyzerFactory class used to construct timing analyzers.
  *
  * We assume that the user has already defined the timing graph, constraints and
- * thier own delay calculator: 
+ * their own delay calculator:
  *
  *      TimingGraph timing_graph;
  *      TimingConstraints timing_constraints;
@@ -33,7 +33,7 @@ namespace tatum {
  *                                                                 timing_constraints,
  *                                                                 delay_calculator);
  *
- * We can similarily generate analyzers for other types of analysis, for instance Hold:
+ * We can similarly generate analyzers for other types of analysis, for instance Hold:
  *
  *      auto hold_analyzer = AnalyzerFactory<SetupAnalysis>::make(timing_graph,
  *                                                                timing_constraints,
@@ -45,7 +45,7 @@ namespace tatum {
  *                                                                                         timing_constraints,
  *                                                                                         delay_calculator);
  *
- * The AnalzyerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
+ * The AnalyzerFactory returns a std::unique_ptr to the appropriate TimingAnalyzer sub-class:
  *
  *      SetupAnalysis       =>  SetupTimingAnalyzer
  *      HoldAnalysis        =>  HoldTimingAnalyzer
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp
index bfa1f0fa037..9d0a86ec217 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/delay_calc/FixedDelayCalculator.hpp
@@ -8,7 +8,7 @@
 namespace tatum {
 
 /** 
- * An exmaple DelayCalculator implementation which takes 
+ * An example DelayCalculator implementation which takes
  * a vector of fixed pre-calculated edge delays
  *
  * \see DelayCalculator
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp
index 2672560d155..be9680d20b5 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_visitors/GraphVisitor.hpp
@@ -21,7 +21,7 @@ class GraphVisitor {
         virtual void do_reset_node_arrival_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;
         virtual void do_reset_node_required_tags_from_origin(const NodeId node_id, const NodeId origin) = 0;
 
-        //Returns true if the specified source/sink is unconstrainted
+        //Returns true if the specified source/sink is unconstrained
         virtual bool do_arrival_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;
         virtual bool do_required_pre_traverse_node(const TimingGraph& tg, const TimingConstraints& tc, const NodeId node_id) = 0;
 
diff --git a/vpr/src/base/atom_lookup.cpp b/vpr/src/base/atom_lookup.cpp
index f771b8af154..eb597ff8abd 100644
--- a/vpr/src/base/atom_lookup.cpp
+++ b/vpr/src/base/atom_lookup.cpp
@@ -173,7 +173,7 @@ AtomLookup::pin_tnode_range AtomLookup::atom_pin_tnodes(BlockTnode block_tnode_t
 }
 
 void AtomLookup::set_atom_pin_tnode(const AtomPinId pin, const tatum::NodeId node, BlockTnode block_tnode_type) {
-    //A pin always expands to an external tnode (i.e. it's external connectivity in the netlist)
+    //A pin always expands to an external tnode (i.e. its external connectivity in the netlist)
     //but some pins may expand to an additional tnode (i.e. to SOURCE/SINK to cover internal sequential paths within a block)
     if (block_tnode_type == BlockTnode::EXTERNAL) {
         atom_pin_tnode_external_[pin] = node;
diff --git a/vpr/src/base/atom_lookup_fwd.h b/vpr/src/base/atom_lookup_fwd.h
index 02ab349cb94..1adb2e68bb0 100644
--- a/vpr/src/base/atom_lookup_fwd.h
+++ b/vpr/src/base/atom_lookup_fwd.h
@@ -5,7 +5,7 @@ class AtomLookup;
 
 enum class BlockTnode {
     INTERNAL, ///<tnodes corresponding to internal paths withing atom blocks
-    EXTERNAL  ///<tnodes corresponding to exteranl interface of atom blocks
+    EXTERNAL  ///<tnodes corresponding to external interface of atom blocks
 };
 
 #endif
diff --git a/vpr/src/base/atom_netlist_utils.cpp b/vpr/src/base/atom_netlist_utils.cpp
index cbfed079c25..5172db9261a 100644
--- a/vpr/src/base/atom_netlist_utils.cpp
+++ b/vpr/src/base/atom_netlist_utils.cpp
@@ -1342,9 +1342,10 @@ std::set<AtomNetId> find_netlist_physical_clock_nets(const AtomNetlist& netlist)
     //clock generators
     //
     //Since we don't have good information about what pins are clock generators we build a lookup as we go
-    for (auto blk_id : netlist.blocks()) {
+    for (AtomBlockId blk_id : netlist.blocks()) {
         if (!blk_id) continue;
 
+        // Ignore I/O blocks
         AtomBlockType type = netlist.block_type(blk_id);
         if (type != AtomBlockType::BLOCK) continue;
 
@@ -1352,7 +1353,7 @@ std::set<AtomNetId> find_netlist_physical_clock_nets(const AtomNetlist& netlist)
         const t_model* model = netlist.block_model(blk_id);
         VTR_ASSERT(model);
         if (clock_gen_ports.find(model) == clock_gen_ports.end()) {
-            //First time we've seen this model, intialize it
+            //First time we've seen this model, initialize it
             clock_gen_ports[model] = {};
 
             //Look at all the ports to find clock generators
@@ -1366,7 +1367,7 @@ std::set<AtomNetId> find_netlist_physical_clock_nets(const AtomNetlist& netlist)
         }
 
         //Look for connected input clocks
-        for (auto pin_id : netlist.block_clock_pins(blk_id)) {
+        for (AtomPinId pin_id : netlist.block_clock_pins(blk_id)) {
             if (!pin_id) continue;
 
             AtomNetId clk_net_id = netlist.pin_net(pin_id);
@@ -1402,7 +1403,7 @@ std::set<AtomNetId> find_netlist_physical_clock_nets(const AtomNetlist& netlist)
 
 ///@brief Finds all logical clock drivers in the netlist (by back-tracing through logic)
 std::set<AtomPinId> find_netlist_logical_clock_drivers(const AtomNetlist& netlist) {
-    auto clock_nets = find_netlist_physical_clock_nets(netlist);
+    std::set<AtomNetId> clock_nets = find_netlist_physical_clock_nets(netlist);
 
     //We now have a set of nets which drive clock pins
     //
@@ -1415,7 +1416,7 @@ std::set<AtomPinId> find_netlist_logical_clock_drivers(const AtomNetlist& netlis
         prev_clock_nets = clock_nets;
         clock_nets.clear();
 
-        for (auto clk_net : prev_clock_nets) {
+        for (AtomNetId clk_net : prev_clock_nets) {
             AtomPinId driver_pin = netlist.net_driver(clk_net);
             AtomPortId driver_port = netlist.pin_port(driver_pin);
             AtomBlockId driver_blk = netlist.port_block(driver_port);
@@ -1467,7 +1468,7 @@ std::set<AtomPinId> find_netlist_logical_clock_drivers(const AtomNetlist& netlis
 
     //Extract the net drivers
     std::set<AtomPinId> clock_drivers;
-    for (auto net : clock_nets) {
+    for (AtomNetId net : clock_nets) {
         AtomPinId driver = netlist.net_driver(net);
 
         if (netlist.pin_is_constant(driver)) {
diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp
index c0462429648..49534b9d380 100644
--- a/vpr/src/timing/timing_graph_builder.cpp
+++ b/vpr/src/timing/timing_graph_builder.cpp
@@ -295,7 +295,7 @@ void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) {
     }
 
     //Walk through the netlist nets adding the edges representing each net to
-    //the timiing graph. This connects the timing graph nodes of each netlist
+    //the timing graph. This connects the timing graph nodes of each netlist
     //block together.
     for (AtomNetId net : netlist_.nets()) {
         add_net_to_timing_graph(net);
@@ -359,13 +359,13 @@ void TimingGraphBuilder::add_io_to_timing_graph(const AtomBlockId blk) {
 //Creates the timing graph nodes and internal edges for a netlist block
 void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) {
     /*
-     * How the code builds the primtive timing sub-graph
+     * How the code builds the primitive timing sub-graph
      * -------------------------------------------------
      *
-     * The code below builds the timing sub-graph corresponding corresponding to the
+     * The code below builds the timing sub-graph corresponding to the
      * current netlist primitive/block. This is accomplished by walking through
      * the primitive's input, clock and output pins and creating the corresponding
-     * tnodes (note that if internal sequentail paths exist within the primitive 
+     * tnodes (note that if internal sequential paths exist within the primitive
      * this also creates the appropriate internal tnodes).
      *
      * Once all nodes have been created the edges are added between them according
@@ -374,12 +374,12 @@ void TimingGraphBuilder::add_block_to_timing_graph(const AtomBlockId blk) {
      * Note that to minimize the size of the timing graph we only create tnodes and 
      * edges where they actually exist within the netlist. This means we do not create 
      * tnodes or tedges to/from pins which are disconnected in the netlist (even if 
-     * they exist in the archtiecture).
+     * they exist in the architecture).
      *
      *
      * Clock Generators
      * ----------------
-     * An additional wrinkle in the above process is the presense of clock generators,
+     * An additional wrinkle in the above process is the presence of clock generators,
      * such as PLLs, which may define new clocks at their output (in contrast with a
      * primary input which is always a SOURCE type tnode).
      *
@@ -545,7 +545,7 @@ void TimingGraphBuilder::create_block_internal_clock_timing_edges(const AtomBloc
             AtomPinId clk_pin = netlist_.port_pin(clk_port, 0);
             VTR_ASSERT(clk_pin);
 
-            //Convert the pin to it's tnode
+            //Convert the pin to its tnode
             NodeId clk_tnode = netlist_lookup_.atom_pin_tnode(clk_pin);
             VTR_ASSERT(clk_tnode);
 
@@ -605,7 +605,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock
     //to OPIN), the end of a timing path (i.e. IPIN to SINK), or an internal timing path
     //(i.e. SOURCE to SINK).
     //
-    //Note that the creation of these edges is driven by the 'combinationl_sink_ports' specified
+    //Note that the creation of these edges is driven by the 'combinational_sink_ports' specified
     //in the architecture primitive model
     for (AtomPinId src_pin : netlist_.block_input_pins(blk)) {
         //Note that we have already created all the relevant nodes, and appropriately labelled them as
@@ -615,7 +615,7 @@ void TimingGraphBuilder::create_block_internal_data_timing_edges(const AtomBlock
 
         if (!src_tnode) continue;
 
-        auto src_type = tg_->node_type(src_tnode);
+        NodeType src_type = tg_->node_type(src_tnode);
 
         //Look-up the combinationally connected sink ports name on the port model
         AtomPortId src_port = netlist_.pin_port(src_pin);

From 4a9423d3ae58ee321d4b2565a1570719706806d0 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 12:29:18 -0500
Subject: [PATCH 19/31] get rid of DUSTY_SCHED

---
 vpr/src/base/SetupVPR.cpp     | 29 +------------
 vpr/src/base/ShowSetup.cpp    | 17 ++------
 vpr/src/base/read_options.cpp | 44 ++------------------
 vpr/src/base/read_options.h   |  5 ---
 vpr/src/base/vpr_types.h      | 13 ------
 vpr/src/place/annealer.cpp    | 76 +++++++++--------------------------
 vpr/src/place/annealer.h      | 15 ++-----
 7 files changed, 31 insertions(+), 168 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 43d7605a1fd..d6f1b69efad 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -394,7 +394,7 @@ static void SetupSwitches(const t_arch& Arch,
     device_ctx.delayless_switch_idx = RoutingArch->delayless_switch;
 
     //Warn about non-zero Cout values for the ipin switch, since these values have no effect.
-    //VPR do not model the R/C's of block internal routing connectsion.
+    //VPR do not model the R/C's of block internal routing connection.
     //
     //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO)
     if (device_ctx.arch_switch_inf[RoutingArch->wire_to_arch_ipin_switch].Cout != 0.) {
@@ -530,31 +530,6 @@ static void SetupAnnealSched(const t_options& Options,
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n");
     }
 
-    AnnealSched->alpha_min = Options.PlaceAlphaMin;
-    if (AnnealSched->alpha_min >= 1 || AnnealSched->alpha_min <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_min must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->alpha_max = Options.PlaceAlphaMax;
-    if (AnnealSched->alpha_max >= 1 || AnnealSched->alpha_max <= AnnealSched->alpha_min) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_max must be between alpha_min and 1 exclusive.\n");
-    }
-
-    AnnealSched->alpha_decay = Options.PlaceAlphaDecay;
-    if (AnnealSched->alpha_decay >= 1 || AnnealSched->alpha_decay <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_decay must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->success_min = Options.PlaceSuccessMin;
-    if (AnnealSched->success_min >= 1 || AnnealSched->success_min <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_min must be between 0 and 1 exclusive.\n");
-    }
-
-    AnnealSched->success_target = Options.PlaceSuccessTarget;
-    if (AnnealSched->success_target >= 1 || AnnealSched->success_target <= 0) {
-        VPR_FATAL_ERROR(VPR_ERROR_OTHER, "success_target must be between 0 and 1 exclusive.\n");
-    }
-
     AnnealSched->type = Options.anneal_sched_type;
 }
 
@@ -782,7 +757,7 @@ static void SetupServerOpts(const t_options& Options, t_server_opts* ServerOpts)
 }
 
 static void find_ipin_cblock_switch_index(const t_arch& Arch, int& wire_to_arch_ipin_switch, int& wire_to_arch_ipin_switch_between_dice) {
-    for (auto cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
+    for (int cb_switch_name_index = 0; cb_switch_name_index < (int)Arch.ipin_cblock_switch_name.size(); cb_switch_name_index++) {
         int ipin_cblock_switch_index = UNDEFINED;
         for (int iswitch = 0; iswitch < (int)Arch.switches.size(); ++iswitch) {
             if (Arch.switches[iswitch].name == Arch.ipin_cblock_switch_name[cb_switch_name_index]) {
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 9e6f1ed87eb..66d1da27d0a 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -3,7 +3,6 @@
 
 #include "vtr_assert.h"
 #include "vtr_log.h"
-#include "vtr_memory.h"
 
 #include "vpr_types.h"
 #include "vpr_error.h"
@@ -126,7 +125,6 @@ ClusteredNetlistStats::ClusteredNetlistStats() {
     auto& device_ctx = g_vpr_ctx.device();
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    int j;
     L_num_p_inputs = 0;
     L_num_p_outputs = 0;
     num_blocks_type = std::vector<int>(device_ctx.logical_block_types.size(), 0);
@@ -135,12 +133,12 @@ ClusteredNetlistStats::ClusteredNetlistStats() {
     logical_block_types = device_ctx.logical_block_types;
 
     /* Count I/O input and output pads */
-    for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
+    for (ClusterBlockId blk_id : cluster_ctx.clb_nlist.blocks()) {
         auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id);
         auto physical_tile = pick_physical_type(logical_block);
         num_blocks_type[logical_block->index]++;
         if (is_io_type(physical_tile)) {
-            for (j = 0; j < logical_block->pb_type->num_pins; j++) {
+            for (int j = 0; j < logical_block->pb_type->num_pins; j++) {
                 int physical_pin = get_physical_pin(physical_tile, logical_block, j);
 
                 if (cluster_ctx.clb_nlist.block_net(blk_id, j) != ClusterNetId::INVALID()) {
@@ -178,7 +176,7 @@ void ClusteredNetlistStats::write(OutputFormat fmt, std::ostream& output) const
 void writeClusteredNetlistStats(const std::string& block_usage_filename) {
     const auto stats = ClusteredNetlistStats();
 
-    // Print out the human readable version to stdout
+    // Print out the human-readable version to stdout
 
     stats.write(ClusteredNetlistStats::OutputFormat::HumanReadable, std::cout);
 
@@ -212,9 +210,6 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) {
         case e_sched_type::USER_SCHED:
             VTR_LOG("USER_SCHED\n");
             break;
-        case e_sched_type::DUSTY_SCHED:
-            VTR_LOG("DUSTY_SCHED\n");
-            break;
         default:
             VTR_LOG_ERROR("Unknown annealing schedule\n");
     }
@@ -225,12 +220,6 @@ static void ShowAnnealSched(const t_annealing_sched& AnnealSched) {
         VTR_LOG("AnnealSched.init_t: %f\n", AnnealSched.init_t);
         VTR_LOG("AnnealSched.alpha_t: %f\n", AnnealSched.alpha_t);
         VTR_LOG("AnnealSched.exit_t: %f\n", AnnealSched.exit_t);
-    } else if (e_sched_type::DUSTY_SCHED == AnnealSched.type) {
-        VTR_LOG("AnnealSched.alpha_min: %f\n", AnnealSched.alpha_min);
-        VTR_LOG("AnnealSched.alpha_max: %f\n", AnnealSched.alpha_max);
-        VTR_LOG("AnnealSched.alpha_decay: %f\n", AnnealSched.alpha_decay);
-        VTR_LOG("AnnealSched.success_min: %f\n", AnnealSched.success_min);
-        VTR_LOG("AnnealSched.success_target: %f\n", AnnealSched.success_target);
     }
 }
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index ec45d2e764f..1641e255b89 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1957,36 +1957,6 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("0.8")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceAlphaMin, "--alpha_min")
-        .help(
-            "For placement using Dusty's annealing schedule. Minimum (starting) value of alpha.")
-        .default_value("0.2")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
-    place_grp.add_argument(args.PlaceAlphaMax, "--alpha_max")
-        .help(
-            "For placement using Dusty's annealing schedule. Maximum (stopping) value of alpha.")
-        .default_value("0.9")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
-    place_grp.add_argument(args.PlaceAlphaDecay, "--alpha_decay")
-        .help(
-            "For placement using Dusty's annealing schedule. The value that alpha is scaled by after reset.")
-        .default_value("0.7")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
-    place_grp.add_argument(args.PlaceSuccessMin, "--anneal_success_min")
-        .help(
-            "For placement using Dusty's annealing schedule. Minimum success ratio when annealing before resetting the temperature to maintain the target success ratio.")
-        .default_value("0.1")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
-    place_grp.add_argument(args.PlaceSuccessTarget, "--anneal_success_target")
-        .help(
-            "For placement using Dusty's annealing schedule. Target success ratio when annealing.")
-        .default_value("0.25")
-        .show_in(argparse::ShowIn::HELP_ONLY);
-
     place_grp.add_argument<e_pad_loc_type, ParseFixPins>(args.pad_loc_type, "--fix_pins")
         .help(
             "Fixes I/O pad locations randomly during placement. Valid options:\n"
@@ -2996,7 +2966,7 @@ void set_conditional_defaults(t_options& args) {
      * Filenames
      */
 
-    //We may have recieved the full circuit filepath in the circuit name,
+    //We may have received the full circuit filepath in the circuit name,
     //remove the extension and any leading path elements
     VTR_ASSERT(args.CircuitName.provenance() == Provenance::SPECIFIED);
     auto name_ext = vtr::split_ext(args.CircuitName);
@@ -3132,15 +3102,9 @@ void set_conditional_defaults(t_options& args) {
     }
 
     //Which schedule?
-    if (args.PlaceAlphaMin.provenance() == Provenance::SPECIFIED // Any of these flags select Dusty's schedule
-        || args.PlaceAlphaMax.provenance() == Provenance::SPECIFIED
-        || args.PlaceAlphaDecay.provenance() == Provenance::SPECIFIED
-        || args.PlaceSuccessMin.provenance() == Provenance::SPECIFIED
-        || args.PlaceSuccessTarget.provenance() == Provenance::SPECIFIED) {
-        args.anneal_sched_type.set(e_sched_type::DUSTY_SCHED, Provenance::INFERRED);
-    } else if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
-               || args.PlaceExitT.provenance() == Provenance::SPECIFIED
-               || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) {
+    if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
+        || args.PlaceExitT.provenance() == Provenance::SPECIFIED
+        || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) {
         args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED);
     } else {
         args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index ca7f30d196f..b43e3734de1 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -119,11 +119,6 @@ struct t_options {
     argparse::ArgValue<float> PlaceInitT;
     argparse::ArgValue<float> PlaceExitT;
     argparse::ArgValue<float> PlaceAlphaT;
-    argparse::ArgValue<float> PlaceAlphaMin;
-    argparse::ArgValue<float> PlaceAlphaMax;
-    argparse::ArgValue<float> PlaceAlphaDecay;
-    argparse::ArgValue<float> PlaceSuccessMin;
-    argparse::ArgValue<float> PlaceSuccessTarget;
     argparse::ArgValue<e_sched_type> anneal_sched_type;
     argparse::ArgValue<e_place_algorithm> PlaceAlgorithm;
     argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index af85be64a32..c18a9fe3665 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -466,7 +466,6 @@ constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3;
 /* Timing data structures end */
 enum class e_sched_type {
     AUTO_SCHED,
-    DUSTY_SCHED,
     USER_SCHED
 };
 /* Annealing schedule */
@@ -832,18 +831,6 @@ struct t_annealing_sched {
     float init_t;
     float alpha_t;
     float exit_t;
-
-    /* Parameters for DUSTY_SCHED                                         *
-     * The alpha ranges from alpha_min to alpha_max, decaying each        *
-     * iteration by `alpha_decay`.                                        *
-     * `restart_filter` is the low-pass coefficient (EWMA) for updating   *
-     * the new starting temperature for each alpha.                       *
-     * Give up after `wait` alphas.                                       */
-    float alpha_min;
-    float alpha_max;
-    float alpha_decay;
-    float success_min;
-    float success_target;
 };
 
 /******************************************************************
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 44fd2e85f12..04e2367fd31 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -79,25 +79,17 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
 }
 
 ///@brief Constructor: Initialize all annealing state variables and macros.
-t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
-                                     float first_t,
+t_annealing_state::t_annealing_state(float first_t,
                                      float first_rlim,
                                      int first_move_lim,
                                      float first_crit_exponent) {
     num_temps = 0;
-    alpha = annealing_sched.alpha_min;
+    alpha = 1.f;
     t = first_t;
-    restart_t = first_t;
     rlim = first_rlim;
     move_lim_max = first_move_lim;
     crit_exponent = first_crit_exponent;
-
-    /* Determine the current move_lim based on the schedule type */
-    if (annealing_sched.type == e_sched_type::DUSTY_SCHED) {
-        move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target));
-    } else {
-        move_lim = move_lim_max;
-    }
+    move_lim = move_lim_max;
 
     /* Store this inverse value for speed when updating crit_exponent. */
     INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM);
@@ -132,52 +124,23 @@ bool t_annealing_state::outer_loop_update(float success_rate,
     auto& cluster_ctx = g_vpr_ctx.clustering();
     float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
 
-    if (placer_opts.anneal_sched.type == e_sched_type::DUSTY_SCHED) {
-        // May get nan if there are no nets
-        bool restart_temp = t < t_exit || std::isnan(t_exit);
 
-        /* If the success rate or the temperature is *
-         * too low, reset the temperature and alpha. */
-        if (success_rate < placer_opts.anneal_sched.success_min || restart_temp) {
-            // Only exit anneal when alpha gets too large.
-            if (alpha > placer_opts.anneal_sched.alpha_max) {
-                return false;
-            }
-
-            // Take a half step from the restart temperature.
-            t = restart_t / sqrt(alpha);
-            // Update alpha.
-            alpha = 1.0 - ((1.0 - alpha) * placer_opts.anneal_sched.alpha_decay);
-        } else {
-            /* If the success rate is promising, next time   *
-             * reset t to the current annealing temperature. */
-            if (success_rate > placer_opts.anneal_sched.success_target) {
-                restart_t = t;
-            }
-            // Update t.
-            t *= alpha;
-        }
-
-        // Update move lim.
-        update_move_lim(placer_opts.anneal_sched.success_target, success_rate);
+    VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
+    // Automatically adjust alpha according to success rate.
+    if (success_rate > 0.96) {
+        alpha = 0.5;
+    } else if (success_rate > 0.8) {
+        alpha = 0.9;
+    } else if (success_rate > 0.15 || rlim > 1.) {
+        alpha = 0.95;
     } else {
-        VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
-        // Automatically adjust alpha according to success rate.
-        if (success_rate > 0.96) {
-            alpha = 0.5;
-        } else if (success_rate > 0.8) {
-            alpha = 0.9;
-        } else if (success_rate > 0.15 || rlim > 1.) {
-            alpha = 0.95;
-        } else {
-            alpha = 0.8;
-        }
-        // Update temp.
-        t *= alpha;
-        // Must be duplicated to retain previous behavior.
-        if (t < t_exit || std::isnan(t_exit)) {
-            return false;
-        }
+        alpha = 0.8;
+    }
+    // Update temp.
+    t *= alpha;
+    // Must be duplicated to retain previous behavior.
+    if (t < t_exit || std::isnan(t_exit)) {
+        return false;
     }
 
     // Update the range limiter.
@@ -283,8 +246,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     // Get the first range limiter
     placer_state_.mutable_move().first_rlim = (float)std::max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
 
-    annealing_state_ = t_annealing_state(placer_opts_.anneal_sched,
-                                         EPSILON,    // Set the temperature low to ensure that initial placement quality will be preserved
+    annealing_state_ = t_annealing_state(EPSILON,    // Set the temperature low to ensure that initial placement quality will be preserved
                                          placer_state_.move().first_rlim,
                                          first_move_lim,
                                          first_crit_exponent);
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 730f01eec96..bf3fb62692e 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -42,23 +42,18 @@ struct t_swap_stats {
  * Public members:
  *   @param t
  *              Temperature for simulated annealing.
- *   @param restart_t
- *              Temperature used after restart due to minimum success ratio.
- *              Currently only used and updated by DUSTY_SCHED.
  *   @param alpha
  *              Temperature decays factor (multiplied each outer loop iteration).
  *   @param num_temps
  *              The count of how many temperature iterations have passed.
- *
  *   @param rlim
  *              Range limit for block swaps.
- *              Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *              Currently only updated by AUTO_SCHED.
  *   @param crit_exponent
  *              Used by timing-driven placement to "sharpen" the timing criticality.
- *              Depends on rlim. Currently only updated by DUSTY_SCHED and AUTO_SCHED.
+ *              Depends on rlim. Currently only updated by AUTO_SCHED.
  *   @param move_lim
  *              Current block move limit.
- *              Currently only updated by DUSTY_SCHED.
  *   @param move_lim_max
  *              Maximum block move limit.
  *
@@ -80,7 +75,6 @@ struct t_swap_stats {
 class t_annealing_state {
   public:
     float t;
-    float restart_t;
     float alpha;
     int num_temps;
 
@@ -96,8 +90,7 @@ class t_annealing_state {
 
   public: //Constructor
     t_annealing_state() = default;
-    t_annealing_state(const t_annealing_sched& annealing_sched,
-                      float first_t,
+    t_annealing_state(float first_t,
                       float first_rlim,
                       int first_move_lim,
                       float first_crit_exponent);
@@ -108,8 +101,6 @@ class t_annealing_state {
      *
      *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
      *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
-     *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
-     *                See doc/src/vpr/dusty_sa.rst for more details.
      *
      * @return True->continues the annealing. False->exits the annealing.
      */

From f1aaf528bad5fe68c98ae181ef501850e128abe0 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 12:41:33 -0500
Subject: [PATCH 20/31] change the order of if statement so that
 e_place_algorithm::CRITICALITY_TIMING_PLACE is checked first

---
 vpr/src/place/annealer.cpp          | 63 ++++++++++++++++-------------
 vpr/src/place/annealer.h            |  4 +-
 vpr/src/route/connection_router.cpp |  2 +-
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 04e2367fd31..4bba30fb103 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -445,11 +445,34 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_,
                                                               bb_delta_c, timing_delta_c);
 
-        //For setup slack analysis, we first do a timing analysis to get the newest
-        //slack values resulted from the proposed block moves. If the move turns out
-        //to be accepted, we keep the updated slack values and commit the block moves.
-        //If rejected, we reject the proposed block moves and revert this timing analysis.
-        if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
+
+        if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
+            /* Take delta_c as a combination of timing and wiring cost. In
+             * addition to `timing_tradeoff`, we normalize the cost values.
+             * CRITICALITY_TIMING_PLACE algorithm works with somewhat stale
+             * timing information to save CPU time.
+             */
+            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
+                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
+                           "timing_delta_c %e, timing_cost_norm %e\n",
+                           bb_delta_c,
+                           costs_.bb_cost_norm,
+                           timing_tradeoff,
+                           timing_delta_c,
+                           costs_.timing_cost_norm);
+            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
+                      + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
+        } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
+            /* For setup slack analysis, we first do a timing analysis to get the newest
+             * slack values resulted from the proposed block moves. If the move turns out
+             * to be accepted, we keep the updated slack values and commit the block moves.
+             * If rejected, we reject the proposed block moves and revert this timing analysis.
+             *
+             * It should be noted that when SLACK_TIMING_PLACE algorithm is used, proposed moves
+             * are evaluated with up-to-date timing information, which is more expensive but more
+             * accurate.
+             */
+
             // Invalidates timing of modified connections for incremental timing updates.
             pin_timing_invalidator_->invalidate_affected_connections(blocks_affected_, timing_info_);
 
@@ -473,19 +496,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             /* Get the setup slack analysis cost */
             //TODO: calculate a weighted average of the slack cost and wiring cost
             delta_c = analyze_setup_slack_cost(setup_slacks_, placer_state_) * costs_.timing_cost_norm;
-        } else if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
-            /* Take delta_c as a combination of timing and wiring cost. In
-             * addition to `timing_tradeoff`, we normalize the cost values */
-            VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
-                           "\t\tMove bb_delta_c %e, bb_cost_norm %e, timing_tradeoff %f, "
-                           "timing_delta_c %e, timing_cost_norm %e\n",
-                           bb_delta_c,
-                           costs_.bb_cost_norm,
-                           timing_tradeoff,
-                           timing_delta_c,
-                           costs_.timing_cost_norm);
-            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
-                      + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
         } else {
             VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
             VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug,
@@ -519,15 +529,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             costs_.cost += delta_c;
             costs_.bb_cost += bb_delta_c;
 
-            if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
-                // Update the timing driven cost as usual
-                costs_.timing_cost += timing_delta_c;
-
-                // Commit the setup slack information
-                // The timing delay and cost values should be committed already
-                commit_setup_slacks(setup_slacks_, placer_state_);
-            }
-
             if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
                 costs_.timing_cost += timing_delta_c;
 
@@ -539,6 +540,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                 /* Update the connection_timing_cost and connection_delay
                  * values from the temporary values. */
                 placer_state_.mutable_timing().commit_td_cost(blocks_affected_);
+
+            } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
+                // Update the timing driven cost as usual
+                costs_.timing_cost += timing_delta_c;
+
+                // Commit the setup slack information
+                // The timing delay and cost values should be committed already
+                commit_setup_slacks(setup_slacks_, placer_state_);
             }
 
             // Update net cost functions and reset flags.
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index bf3fb62692e..1197181f44e 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -191,7 +191,7 @@ class PlacementAnnealer {
                            float timing_bb_factor,
                            bool manual_move_enabled);
 
-    ///@brief Returns the total number iterations or attempted swaps
+    ///@brief Returns the total number iterations (attempted swaps).
     int get_total_iteration() const;
 
     ///@brief Returns a constant reference to the annealing state
@@ -253,7 +253,7 @@ class PlacementAnnealer {
     int quench_recompute_limit_;
     ///Used to trigger a BB and NoC cost re-computation from scratch
     int moves_since_cost_recompute_;
-    ///Total number of iterations or attempted swaps
+    ///Total number of iterations (attempted swaps).
     int tot_iter_;
     ///Indicates whether the annealer has entered into the quench stage
     bool quench_started_;
diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp
index 6f2b5d2ebe6..7fd0f0d1680 100644
--- a/vpr/src/route/connection_router.cpp
+++ b/vpr/src/route/connection_router.cpp
@@ -10,7 +10,7 @@ static bool relevant_node_to_target(const RRGraphView* rr_graph,
 
 static void update_router_stats(RouterStats* router_stats,
                                 bool is_push,
-                                RRNodeId rr_node_i,
+                                RRNodeId rr_node_id,
                                 const RRGraphView* rr_graph);
 
 /** return tuple <found_path, retry_with_full_bb, cheapest> */

From e6e02acd20c10ed8172d49c0d0b6a97edb6762ad Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 12:58:51 -0500
Subject: [PATCH 21/31] don't resize PlacerMoveContext.??_coord in the
 constructor

---
 vpr/src/place/annealer.cpp     | 63 ++++++++++++++++------------------
 vpr/src/place/placer_state.cpp |  7 +---
 vpr/src/place/placer_state.h   |  2 +-
 3 files changed, 31 insertions(+), 41 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 4bba30fb103..4fae47d7544 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -344,8 +344,8 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
      */
     auto& blk_loc_registry = placer_state_.mutable_blk_loc_registry();
 
-    float rlim_escape_fraction = placer_opts_.rlim_escape_fraction;
-    float timing_tradeoff = placer_opts_.timing_tradeoff;
+    // increment the call counter
+    swap_stats_.num_ts_called++;
 
     PlaceCritParams crit_params{annealing_state_.crit_exponent,
                                 placer_opts_.place_crit_limit};
@@ -353,27 +353,19 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     // move type and block type chosen by the agent
     t_propose_action proposed_action{e_move_type::UNIFORM, -1};
 
-    swap_stats_.num_ts_called++;
-
     MoveOutcomeStats move_outcome_stats;
 
     /* I'm using negative values of proposed_net_cost as a flag,
      * so DO NOT use cost functions that can go negative. */
-
     double delta_c = 0;        //Change in cost due to this swap.
     double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
     double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
 
-    // Determine whether we need to force swap two router blocks
-    bool router_block_move = false;
-    if (noc_opts_.noc) {
-        router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_);
-    }
 
     /* Allow some fraction of moves to not be restricted by rlim,
      * in the hopes of better escaping local minima. */
     float rlim;
-    if (rlim_escape_fraction > 0. && rng_.frand() < rlim_escape_fraction) {
+    if (placer_opts_.rlim_escape_fraction > 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) {
         rlim = std::numeric_limits<float>::infinity();
     } else {
         rlim = annealing_state_.rlim;
@@ -381,6 +373,12 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
 
     e_create_move create_move_outcome = e_create_move::ABORT;
 
+    // Determine whether we need to force swap two NoC router blocks
+    bool router_block_move = false;
+    if (noc_opts_.noc) {
+        router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_);
+    }
+
     //When manual move toggle button is active, the manual move window asks the user for input.
     if (manual_move_enabled) {
 #ifndef NO_GRAPHICS
@@ -422,30 +420,28 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     } else {
         VTR_ASSERT(create_move_outcome == e_create_move::VALID);
 
-        /*
-         * To make evaluating the move simpler (e.g. calculating changed bounding box),
+        /* To make evaluating the move simpler (e.g. calculating changed bounding box),
          * we first move the blocks to their new locations (apply the move to
          * blk_loc_registry.block_locs) and then compute the change in cost. If the move
-         * is accepted, the inverse look-up in place_ctx.grid_blocks is updated
+         * is accepted, the inverse look-up in blk_loc_registry.grid_blocks is updated
          * (committing the move). If the move is rejected, the blocks are returned to
          * their original positions (reverting blk_loc_registry.block_locs to its original state).
          *
-         * Note that the inverse look-up place_ctx.grid_blocks is only updated after
+         * Note that the inverse look-up blk_loc_registry.grid_blocks is only updated after
          * move acceptance is determined, so it should not be used when evaluating a move.
          */
 
-        /* Update the block positions */
+        // Update the block positions
         blk_loc_registry.apply_move_blocks(blocks_affected_);
 
-        //Find all the nets affected by this swap and update the wiring costs.
-        //This cost value doesn't depend on the timing info.
-        //
-        //Also find all the pins affected by the swap, and calculates new connection
-        //delays and timing costs and store them in proposed_* data structures.
+        /* Find all the nets affected by this swap and update the wiring costs.
+         * This cost value doesn't depend on the timing info.
+         * Also find all the pins affected by the swap, and calculates new connection
+         * delays and timing costs and store them in proposed_* data structures.
+         */
         net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_,
                                                               bb_delta_c, timing_delta_c);
 
-
         if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
             /* Take delta_c as a combination of timing and wiring cost. In
              * addition to `timing_tradeoff`, we normalize the cost values.
@@ -457,11 +453,11 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                            "timing_delta_c %e, timing_cost_norm %e\n",
                            bb_delta_c,
                            costs_.bb_cost_norm,
-                           timing_tradeoff,
+                           placer_opts_.timing_tradeoff,
                            timing_delta_c,
                            costs_.timing_cost_norm);
-            delta_c = (1 - timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
-                      + timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
+            delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
+                      + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
         } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
             /* For setup slack analysis, we first do a timing analysis to get the newest
              * slack values resulted from the proposed block moves. If the move turns out
@@ -515,7 +511,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             delta_c += calculate_noc_cost(noc_delta_c, costs_.noc_cost_norm_factors, noc_opts_);
         }
 
-        // 1 -> move accepted, 0 -> rejected.
+        // determine whether the move is accepted or rejected
         move_outcome = assess_swap_(delta_c, annealing_state_.t);
 
         //Updates the manual_move_state members and displays costs to the user to decide whether to ACCEPT/REJECT manual move.
@@ -582,9 +578,13 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             // Restore the blk_loc_registry.block_locs data structures to their state before the move.
             blk_loc_registry.revert_move_blocks(blocks_affected_);
 
-            if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
-                /* Revert the timing delays and costs to pre-update values.       */
-                /* These routines must be called after reverting the block moves. */
+            if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
+                // Un-stage the values stored in proposed_* data structures
+                placer_state_.mutable_timing().revert_td_cost(blocks_affected_);
+            } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
+                /* Revert the timing delays and costs to pre-update values.
+                 * These routines must be called after reverting the block moves.
+                 */
                 //TODO: make this process incremental
                 comp_td_connection_delays(delay_model_, placer_state_);
                 comp_td_costs(delay_model_, *criticalities_, placer_state_, &costs_.timing_cost);
@@ -603,11 +603,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                     "The current setup slacks should be identical to the values before the try swap timing info update.");
             }
 
-            if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
-                // Un-stage the values stored in proposed_* data structures
-                placer_state_.mutable_timing().revert_td_cost(blocks_affected_);
-            }
-
             if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
                 ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
             }
diff --git a/vpr/src/place/placer_state.cpp b/vpr/src/place/placer_state.cpp
index e83d74bbe2c..998c097b4ca 100644
--- a/vpr/src/place/placer_state.cpp
+++ b/vpr/src/place/placer_state.cpp
@@ -8,11 +8,6 @@ PlacerMoveContext::PlacerMoveContext(bool cube_bb) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    // allocate helper vectors that are used by many move generators
-    X_coord.resize(10, 0);
-    Y_coord.resize(10, 0);
-    layer_coord.resize(10, 0);
-
     const size_t num_nets = cluster_ctx.clb_nlist.nets().size();
 
     const int num_layers = device_ctx.grid.get_num_layers();
@@ -27,7 +22,7 @@ PlacerMoveContext::PlacerMoveContext(bool cube_bb) {
 
     num_sink_pin_layer.resize({num_nets, size_t(num_layers)});
     for (size_t flat_idx = 0; flat_idx < num_sink_pin_layer.size(); flat_idx++) {
-        auto& elem = num_sink_pin_layer.get(flat_idx);
+        int& elem = num_sink_pin_layer.get(flat_idx);
         elem = OPEN;
     }
 }
diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h
index c727ac181e5..8f3b966a56d 100644
--- a/vpr/src/place/placer_state.h
+++ b/vpr/src/place/placer_state.h
@@ -138,7 +138,7 @@ struct PlacerMoveContext : public Context {
     // The first range limit calculated by the annealer
     float first_rlim;
 
-    // Scratch vectors that are used by different directed moves for temporary calculations (allocated here to save runtime)
+    // Scratch vectors that are used by different directed moves for temporary calculations
     // These vectors will grow up with the net size as it is mostly used to save coords of the net pins or net bb edges
     // Given that placement moves involve operations on each coordinate independently, we chose to 
     // utilize a Struct of Arrays (SoA) rather than an Array of Struct (AoS).

From f43d22d4bf27f0a80daec6dd22f6599d0a12bd5b Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 13:07:17 -0500
Subject: [PATCH 22/31] add incr_blk_type_moves() and incr_accept_reject()
 methods

---
 vpr/src/place/annealer.cpp     | 16 ++++------------
 vpr/src/place/move_generator.h | 21 +++++++++++++++++++++
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 4fae47d7544..6106f88e0e1 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -395,9 +395,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
         create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_);
     }
 
-    if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
-        ++move_type_stats_.blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-    }
+    move_type_stats_.incr_blk_type_moves(proposed_action);
 
     if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) LOG_MOVE_STATS_PROPOSED();
 
@@ -552,11 +550,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
             // Update clb data structures since we kept the move.
             blk_loc_registry.commit_move_blocks(blocks_affected_);
 
-            // if the agent proposed the block type, then collect the block type stat
-            if (proposed_action.logical_blk_type_index != -1) {
-                ++move_type_stats_.accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-            }
-
             if (noc_opts_.noc){
                 noc_cost_handler_->commit_noc_costs();
                 costs_ += noc_delta_c;
@@ -603,15 +596,14 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                     "The current setup slacks should be identical to the values before the try swap timing info update.");
             }
 
-            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
-                ++move_type_stats_.rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
-            }
-            /* Revert the traffic flow routes within the NoC*/
+            // Revert the traffic flow routes within the NoC
             if (noc_opts_.noc) {
                 noc_cost_handler_->revert_noc_traffic_flow_routes(blocks_affected_);
             }
         }
 
+        move_type_stats_.incr_accept_reject(proposed_action, move_outcome)''
+
         move_outcome_stats.delta_cost_norm = delta_c;
         move_outcome_stats.delta_bb_cost_norm = bb_delta_c * costs_.bb_cost_norm;
         move_outcome_stats.delta_timing_cost_norm = timing_delta_c * costs_.timing_cost_norm;
diff --git a/vpr/src/place/move_generator.h b/vpr/src/place/move_generator.h
index 172c04d34f5..e39493e16c6 100644
--- a/vpr/src/place/move_generator.h
+++ b/vpr/src/place/move_generator.h
@@ -38,6 +38,27 @@ struct MoveTypeStat {
      * @brief Prints placement perturbation distribution by block and move type.
      */
     void print_placement_move_types_stats() const;
+
+    inline void incr_blk_type_moves(const t_propose_action& proposed_action) {
+        if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+            ++blk_type_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+        }
+    }
+
+    inline void incr_accept_reject(const t_propose_action& proposed_action,
+                                   e_move_result move_result) {
+        if (move_result == e_move_result::ACCEPTED) {
+            // if the agent proposed the block type, then collect the block type stat
+            if (proposed_action.logical_blk_type_index != -1) {
+                ++accepted_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+            }
+        } else {
+            VTR_ASSERT_SAFE(move_result == e_move_result::REJECTED);
+            if (proposed_action.logical_blk_type_index != -1) { //if the agent proposed the block type, then collect the block type stat
+                ++rejected_moves[proposed_action.logical_blk_type_index][(int)proposed_action.move_type];
+            }
+        }
+    }
 };
 
 /**

From 64dcd5bee131188f2886d4477c7c6f03d231e90a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 13:17:17 -0500
Subject: [PATCH 23/31] don't include time.h as we no longer call clock()

---
 vpr/src/draw/draw.cpp                  | 11 -----------
 vpr/src/draw/draw_basic.cpp            | 10 ----------
 vpr/src/draw/draw_floorplanning.cpp    | 22 +---------------------
 vpr/src/draw/draw_mux.cpp              | 11 -----------
 vpr/src/draw/draw_rr.cpp               | 10 ----------
 vpr/src/draw/draw_rr_edges.cpp         | 11 -----------
 vpr/src/draw/draw_searchbar.cpp        | 10 ----------
 vpr/src/draw/draw_toggle_functions.cpp | 11 -----------
 vpr/src/draw/draw_triangle.cpp         | 10 ----------
 vpr/src/draw/search_bar.cpp            | 24 ++++++------------------
 10 files changed, 7 insertions(+), 123 deletions(-)

diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp
index c77ab624c5c..546bc8b55f3 100644
--- a/vpr/src/draw/draw.cpp
+++ b/vpr/src/draw/draw.cpp
@@ -51,17 +51,6 @@
 #include "move_utils.h"
 #include "ui_setup.h"
 
-
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp
index 43fc2b78b32..99058b0975c 100644
--- a/vpr/src/draw/draw_basic.cpp
+++ b/vpr/src/draw/draw_basic.cpp
@@ -28,16 +28,6 @@
 #include "route_export.h"
 #include "tatum/report/TimingPathCollector.hpp"
 
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp
index 9ba201987aa..9e56cfda5ac 100644
--- a/vpr/src/draw/draw_floorplanning.cpp
+++ b/vpr/src/draw/draw_floorplanning.cpp
@@ -1,39 +1,19 @@
 #include <vector>
 
-#include "vpr_utils.h"
 #include "vpr_error.h"
 
 #include "globals.h"
-#include "atom_netlist.h"
+
 #include "draw_floorplanning.h"
 #include "user_place_constraints.h"
 #include "draw_color.h"
 #include "draw.h"
-#include "draw_rr.h"
-#include "draw_rr_edges.h"
-#include "draw_basic.h"
-#include "draw_toggle_functions.h"
-#include "draw_triangle.h"
-#include "draw_searchbar.h"
-#include "draw_mux.h"
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
 #include "intra_logic_block.h"
-#include "move_utils.h"
 #include "route_export.h"
 #include "tatum/report/TimingPathCollector.hpp"
 
-
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_mux.cpp b/vpr/src/draw/draw_mux.cpp
index e741112b6ba..e83fde50296 100644
--- a/vpr/src/draw/draw_mux.cpp
+++ b/vpr/src/draw/draw_mux.cpp
@@ -10,17 +10,6 @@
 #include "draw_mux.h"
 #include "read_xml_arch_file.h"
 
-
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp
index abfbf0babe8..e3c2467cd9e 100644
--- a/vpr/src/draw/draw_rr.cpp
+++ b/vpr/src/draw/draw_rr.cpp
@@ -24,16 +24,6 @@
 #include "read_xml_arch_file.h"
 #include "draw_global.h"
 
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp
index c4e8cbe507b..0815be661a6 100644
--- a/vpr/src/draw/draw_rr_edges.cpp
+++ b/vpr/src/draw/draw_rr_edges.cpp
@@ -18,17 +18,6 @@
 #include "draw_global.h"
 #include "draw_basic.h"
 
-
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp
index 834457e9263..a90583f42ce 100644
--- a/vpr/src/draw/draw_searchbar.cpp
+++ b/vpr/src/draw/draw_searchbar.cpp
@@ -16,16 +16,6 @@
 #include "draw_global.h"
 #include "intra_logic_block.h"
 
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_toggle_functions.cpp b/vpr/src/draw/draw_toggle_functions.cpp
index 9dab5955450..968808c2906 100644
--- a/vpr/src/draw/draw_toggle_functions.cpp
+++ b/vpr/src/draw/draw_toggle_functions.cpp
@@ -14,17 +14,6 @@
 #include "draw_global.h"
 #include "draw_basic.h"
 
-
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/draw_triangle.cpp b/vpr/src/draw/draw_triangle.cpp
index b37785b3ab1..82b5bd45376 100644
--- a/vpr/src/draw/draw_triangle.cpp
+++ b/vpr/src/draw/draw_triangle.cpp
@@ -8,16 +8,6 @@
 #include "draw_global.h"
 
 
-#ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-              * track CPU runtime.														   */
-#    include <time.h>
-#else /* For X11. The clock() function in time.h will not output correct time difference   *
-       * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-       * which means tracking CPU time will not be the same as the actual wall clock time. *
-       * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#    include <sys/time.h>
-#endif
-
 #ifndef NO_GRAPHICS
 
 //To process key presses we need the X11 keysym definitions,
diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp
index b557b27d5ca..5e78934841a 100644
--- a/vpr/src/draw/search_bar.cpp
+++ b/vpr/src/draw/search_bar.cpp
@@ -42,16 +42,6 @@
 #    include "route_export.h"
 #    include "search_bar.h"
 
-#    ifdef WIN32 /* For runtime tracking in WIN32. The clock() function defined in time.h will *
-                  * track CPU runtime.														   */
-#        include <time.h>
-#    else /* For X11. The clock() function in time.h will not output correct time difference   *
-           * for X11, because the graphics is processed by the Xserver rather than local CPU,  *
-           * which means tracking CPU time will not be the same as the actual wall clock time. *
-           * Thus, so use gettimeofday() in sys/time.h to track actual calendar time.          */
-#        include <sys/time.h>
-#    endif
-
 //To process key presses we need the X11 keysym definitions,
 //which are unavailable when building with MINGW
 #    if defined(X11) && !defined(__MINGW32__)
@@ -76,7 +66,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) {
     std::stringstream ss(user_input);
 
     auto search_type = get_search_type(app);
-    if (search_type == "")
+    if (search_type.empty())
         return;
 
     // reset
@@ -119,7 +109,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) {
          *
          * If the block does not exist in the atom netlist, we will check the CLB netlist to see if
          * they searched for a cluster block*/
-        std::string block_name = "";
+        std::string block_name;
         ss >> block_name;
 
         AtomBlockId atom_blk_id = atom_ctx.nlist.find_block(block_name);
@@ -159,7 +149,7 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) {
     else if (search_type == "Net Name") {
         //in this case, all nets (clb and non-clb) are contained in the atom netlist
         //So we only need to search this one
-        std::string net_name = "";
+        std::string net_name;
         ss >> net_name;
         AtomNetId atom_net_id = atom_ctx.nlist.find_net(net_name);
 
@@ -376,8 +366,6 @@ void warning_dialog_box(const char* message) {
                              "response",
                              G_CALLBACK(gtk_widget_destroy),
                              dialog);
-
-    return;
 }
 
 /**
@@ -411,7 +399,7 @@ void search_type_changed(GtkComboBox* self, ezgl::application* app) {
     } else if (searchType == "Net Name") {
         gtk_entry_completion_set_model(completion, netNames);
     } else { //setting to null if option does not require auto-complete
-        gtk_entry_completion_set_model(completion, NULL);
+        gtk_entry_completion_set_model(completion, nullptr);
         gtk_entry_set_completion(searchBar, nullptr);
     }
 }
@@ -506,10 +494,10 @@ void enable_autocomplete(ezgl::application* app) {
     auto draw_state = get_draw_state_vars();
 
     std::string searchType = get_search_type(app);
-    if (searchType == "")
+    if (searchType.empty())
         return;
     //Checking to make sure that we are on a mode that uses auto-complete
-    if (gtk_entry_completion_get_model(completion) == NULL) {
+    if (gtk_entry_completion_get_model(completion) == nullptr) {
         std::cout << "NO MODEL SELECTED" << std::endl;
         return;
     }

From b3c7e72844ef1497ef63093f76bf323094534219 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 15:04:05 -0500
Subject: [PATCH 24/31] move the ownership of move generators to annealer

---
 vpr/src/place/annealer.cpp | 47 +++++++++++++++++++++++--------------
 vpr/src/place/annealer.h   | 48 ++++++++++++++++++++++++--------------
 vpr/src/place/place.cpp    | 38 ++++--------------------------
 3 files changed, 64 insertions(+), 69 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index cc726283aa0..415374a61bc 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -16,6 +16,7 @@
 #include "place_timing_update.h"
 #include "read_place.h"
 #include "placer_breakpoint.h"
+#include "RL_agent_util.h"
 
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
@@ -31,7 +32,7 @@
  * If no slack values have changed, then return an arbitrary positive number. A
  * move resulting in no change in the slack values should probably be unnecessary.
  *
- * The sorting is need to prevent in the unlikely circumstances that a bad slack
+ * The sorting is needed to prevent in the unlikely circumstance that a bad slack
  * value suddenly got very good due to the block move, while a good slack value
  * got very bad, perhaps even worse than the original worse slack value.
  */
@@ -183,8 +184,8 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
                                      std::optional<NocCostHandler>& noc_cost_handler,
                                      const t_noc_opts& noc_opts,
                                      vtr::RngContainer& rng,
-                                     MoveGenerator& move_generator_1,
-                                     MoveGenerator& move_generator_2,
+                                     std::unique_ptr<MoveGenerator>&& move_generator_1,
+                                     std::unique_ptr<MoveGenerator>&& move_generator_2,
                                      ManualMoveGenerator& manual_move_generator,
                                      const PlaceDelayModel* delay_model,
                                      PlacerCriticalities* criticalities,
@@ -199,9 +200,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , noc_cost_handler_(noc_cost_handler)
     , noc_opts_(noc_opts)
     , rng_(rng)
-    , move_generator_1_(move_generator_1)
-    , move_generator_2_(move_generator_2)
+    , move_generator_1_(std::move(move_generator_1))
+    , move_generator_2_(std::move(move_generator_2))
     , manual_move_generator_(manual_move_generator)
+    , agent_state_(e_agent_state::EARLY_IN_THE_ANNEAL)
     , delay_model_(delay_model)
     , criticalities_(criticalities)
     , setup_slacks_(setup_slacks)
@@ -294,12 +296,8 @@ float PlacementAnnealer::estimate_starting_temperature() {
         }
 #endif /*NO_GRAPHICS*/
 
-        // TODO: remove this
-        constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
-
         // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
-        e_move_result swap_result = try_swap(move_generator_1_, placer_opts_.place_algorithm,
-                                             REWARD_BB_TIMING_RELATIVE_WEIGHT, manual_move_enabled);
+        e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             num_accepted++;
@@ -334,7 +332,6 @@ float PlacementAnnealer::estimate_starting_temperature() {
 
 e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
                                           const t_place_algorithm& place_algorithm,
-                                          float timing_bb_factor,
                                           bool manual_move_enabled) {
     /* Picks some block and moves it to another spot.  If this spot is
      * occupied, switch the blocks.  Assess the change in cost function.
@@ -622,7 +619,7 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     // the move generators status since this outcome is not a direct
     // consequence of the move generator
     if (!router_block_move) {
-        move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, timing_bb_factor);
+        move_generator.calculate_reward_and_process_outcome(move_outcome_stats, delta_c, REWARD_BB_TIMING_RELATIVE_WEIGHT);
     }
 
 #ifndef NO_GRAPHICS
@@ -666,9 +663,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
     costs_.cost = costs_.get_total_cost(placer_opts_, noc_opts_);
 }
 
-/* Function which contains the inner loop of the simulated annealing */
-void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
-                                             float timing_bb_factor) {
+void PlacementAnnealer::placement_inner_loop() {
     // How many times have we dumped placement to a file this temperature?
     int inner_placement_save_count = 0;
 
@@ -676,10 +671,12 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
 
     bool manual_move_enabled = false;
 
+    MoveGenerator& move_generator = select_move_generator(move_generator_1_, move_generator_2_, agent_state_,
+                                                          placer_opts_, quench_started_);
+
     // Inner loop begins
     for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm,
-                                             timing_bb_factor, manual_move_enabled);
+        e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             // Move was accepted.  Update statistics that are useful for the annealing schedule.
@@ -743,6 +740,18 @@ void PlacementAnnealer::placement_inner_loop(MoveGenerator& move_generator,
     // Calculate the success_rate and std_dev of the costs.
     placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim);
 
+    // update the RL agent's state
+    if (!quench_started_) {
+        if (placer_opts_.place_algorithm.is_timing_driven() &&
+            placer_opts_.place_agent_multistate &&
+            agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) {
+            if (annealing_state_.alpha < 0.85 && annealing_state_.alpha > 0.6) {
+                agent_state_ = e_agent_state::LATE_IN_THE_ANNEAL;
+                VTR_LOG("Agent's 2nd state: \n");
+            }
+        }
+    }
+
     tot_iter_ += annealing_state_.move_lim;
     ++annealing_state_.num_temps;
 }
@@ -752,6 +761,10 @@ int PlacementAnnealer::get_total_iteration() const {
     return tot_iter_;
 }
 
+e_agent_state PlacementAnnealer::get_agent_state() const {
+    return agent_state_;
+}
+
 const t_annealing_state& PlacementAnnealer::get_annealing_state() const {
     return annealing_state_;
 }
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 1197181f44e..3a84a05756d 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -12,6 +12,7 @@
 class PlacerState;
 class t_placer_costs;
 struct t_placer_opts;
+enum class e_agent_state;
 
 class NocCostHandler;
 class ManualMoveGenerator;
@@ -152,8 +153,8 @@ class PlacementAnnealer {
                       std::optional<NocCostHandler>& noc_cost_handler,
                       const t_noc_opts& noc_opts,
                       vtr::RngContainer& rng,
-                      MoveGenerator& move_generator_1,
-                      MoveGenerator& move_generator_2,
+                      std::unique_ptr<MoveGenerator>&& move_generator_1,
+                      std::unique_ptr<MoveGenerator>&& move_generator_2,
                       ManualMoveGenerator& manual_move_generator,
                       const PlaceDelayModel* delay_model,
                       PlacerCriticalities* criticalities,
@@ -162,9 +163,8 @@ class PlacementAnnealer {
                       NetPinTimingInvalidator* pin_timing_invalidator,
                       int move_lim);
 
-    ///@brief Contains the inner loop of the simulated annealing
-    void placement_inner_loop(MoveGenerator& move_generator,
-                              float timing_bb_factor);
+    /// @brief Contains the inner loop of the simulated annealing
+    void placement_inner_loop();
 
     void outer_loop_update_timing_info();
 
@@ -188,13 +188,15 @@ class PlacementAnnealer {
      */
     e_move_result try_swap(MoveGenerator& move_generator,
                            const t_place_algorithm& place_algorithm,
-                           float timing_bb_factor,
                            bool manual_move_enabled);
 
-    ///@brief Returns the total number iterations (attempted swaps).
+    /// @brief Returns the total number iterations (attempted swaps).
     int get_total_iteration() const;
 
-    ///@brief Returns a constant reference to the annealing state
+    /// @brief Return the RL agent's state
+    e_agent_state get_agent_state() const;
+
+    /// @brief Returns a constant reference to the annealing state
     const t_annealing_state& get_annealing_state() const;
 
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
@@ -209,7 +211,7 @@ class PlacementAnnealer {
   private:
     e_move_result assess_swap_(double delta_c, double t);
 
-  public:
+  private:
     const t_placer_opts& placer_opts_;
     PlacerState& placer_state_;
     t_placer_costs& costs_;
@@ -218,9 +220,11 @@ class PlacementAnnealer {
     const t_noc_opts& noc_opts_;
     vtr::RngContainer& rng_;
 
-    MoveGenerator& move_generator_1_;
-    MoveGenerator& move_generator_2_;
+    std::unique_ptr<MoveGenerator> move_generator_1_;
+    std::unique_ptr<MoveGenerator> move_generator_2_;
     ManualMoveGenerator& manual_move_generator_;
+    /// RL agent state definition
+    e_agent_state agent_state_;
 
     const PlaceDelayModel* delay_model_;
     PlacerCriticalities* criticalities_;
@@ -231,7 +235,7 @@ class PlacementAnnealer {
     int outer_crit_iter_count_;
 
     t_annealing_state annealing_state_;
-    ///Swap statistics keep record of the number accepted/rejected/aborted swaps.
+    /// Swap statistics keep record of the number accepted/rejected/aborted swaps.
     t_swap_stats swap_stats_;
     MoveTypeStat move_type_stats_;
     t_placer_statistics placer_stats_;
@@ -247,15 +251,16 @@ class PlacementAnnealer {
      */
     static constexpr int MAX_MOVES_BEFORE_RECOMPUTE = 500000;
 
-    ///Specifies how often timing information is recomputed when the annealer isn't in the quench stage
+    /// Specifies how often (after how many swaps) timing information is recomputed
+    /// when the annealer isn't in the quench stage
     int inner_recompute_limit_;
-    ///Specifies how often timing information is recomputed when the annealer is in the quench stage
+    /// Specifies how often timing information is recomputed when the annealer is in the quench stage
     int quench_recompute_limit_;
-    ///Used to trigger a BB and NoC cost re-computation from scratch
+    /// Used to trigger a BB and NoC cost re-computation from scratch
     int moves_since_cost_recompute_;
-    ///Total number of iterations (attempted swaps).
+    /// Total number of iterations (attempted swaps).
     int tot_iter_;
-    ///Indicates whether the annealer has entered into the quench stage
+    /// Indicates whether the annealer has entered into the quench stage
     bool quench_started_;
 
     void LOG_MOVE_STATS_HEADER();
@@ -263,7 +268,14 @@ class PlacementAnnealer {
     void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost,
                                 const char* outcome, const char* reason);
 
+    /**
+     * @brief Defines the RL agent's reward function factor constant. This factor controls the weight of bb cost
+     * compared to the timing cost in the agent's reward function. The reward is calculated as
+     * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
+     */
+    static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
+
   private:
-    ///@brief Find the starting temperature for the annealing loop.
+    /// @brief Find the starting temperature for the annealing loop.
     float estimate_starting_temperature();
 };
\ No newline at end of file
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 50a568ac0db..1a3616232de 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -34,7 +34,6 @@
 #include "place_timing_update.h"
 #include "move_transactions.h"
 #include "move_utils.h"
-#include "place_constraints.h"
 #include "buttons.h"
 
 #include "manual_move_generator.h"
@@ -57,11 +56,6 @@
 #include "net_cost_handler.h"
 #include "placer_state.h"
 
-/*  define the RL agent's reward function factor constant. This factor controls the weight of bb cost *
- *  compared to the timing cost in the agent's reward function. The reward is calculated as           *
- * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
- */
-static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
 
 /********************* Static subroutines local to place.c *******************/
 #ifdef VERBOSE
@@ -432,14 +426,8 @@ void try_place(const Netlist<>& net_list,
     }
 #endif /* ENABLE_ANALYTIC_PLACE */
 
-    //RL agent state definition
-    e_agent_state agent_state = e_agent_state::EARLY_IN_THE_ANNEAL;
-
-    //Define the timing bb weight factor for the agent's reward function
-    float timing_bb_factor = REWARD_BB_TIMING_RELATIVE_WEIGHT;
-
     PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
-                               noc_opts, rng, *move_generator, *move_generator2, manual_move_generator, place_delay_model.get(),
+                               noc_opts, rng, std::move(move_generator), std::move(move_generator2), manual_move_generator, place_delay_model.get(),
                                placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim);
 
     const t_annealing_state& annealing_state = annealer.get_annealing_state();
@@ -462,34 +450,20 @@ void try_place(const Netlist<>& net_list,
                 sWNS = timing_info->setup_worst_negative_slack();
 
                 // see if we should save the current placement solution as a checkpoint
-                if (placer_opts.place_checkpointing && agent_state == e_agent_state::LATE_IN_THE_ANNEAL) {
+                if (placer_opts.place_checkpointing && annealer.get_agent_state() == e_agent_state::LATE_IN_THE_ANNEAL) {
                     save_placement_checkpoint_if_needed(blk_loc_registry.block_locs(),
                                                         placement_checkpoint,
                                                         timing_info, costs, critical_path.delay());
                 }
             }
 
-            // select the appropriate move generator
-            MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2,
-                                                                          agent_state, placer_opts, false);
-
             // do a complete inner loop iteration
-            annealer.placement_inner_loop(current_move_generator,
-                                          timing_bb_factor);
+            annealer.placement_inner_loop();
 
             print_place_status(annealing_state, placer_stats, temperature_timer.elapsed_sec(),
                                critical_path.delay(), sTNS, sWNS, annealer.get_total_iteration(),
                                noc_opts.noc, costs.noc_cost_terms);
 
-            if (placer_opts.place_algorithm.is_timing_driven()
-                && placer_opts.place_agent_multistate
-                && agent_state == e_agent_state::EARLY_IN_THE_ANNEAL) {
-                if (annealing_state.alpha < 0.85 && annealing_state.alpha > 0.6) {
-                    agent_state = e_agent_state::LATE_IN_THE_ANNEAL;
-                    VTR_LOG("Agent's 2nd state: \n");
-                }
-            }
-
             sprintf(msg, "Cost: %g  BB Cost %g  TD Cost %g  Temperature: %g",
                     costs.cost, costs.bb_cost, costs.timing_cost, annealing_state.t);
             update_screen(ScreenUpdatePriority::MINOR, msg, PLACEMENT, timing_info);
@@ -513,13 +487,9 @@ void try_place(const Netlist<>& net_list,
 
         annealer.outer_loop_update_timing_info();
 
-        // select the appropriate move generator
-        MoveGenerator& current_move_generator = select_move_generator(move_generator, move_generator2,
-                                                                      agent_state, placer_opts, true);
-
         /* Run inner loop again with temperature = 0 so as to accept only swaps
          * which reduce the cost of the placement */
-        annealer.placement_inner_loop(current_move_generator, timing_bb_factor);
+        annealer.placement_inner_loop();
 
         if (placer_opts.place_quench_algorithm.is_timing_driven()) {
             critical_path = timing_info->least_slack_critical_path();

From 346446874e732b68226abb5e5f6cb12b1ff24667 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 15:09:53 -0500
Subject: [PATCH 25/31] construct manual move generator in annealer's
 constructor

---
 vpr/src/place/annealer.cpp | 7 +++----
 vpr/src/place/annealer.h   | 6 ++----
 vpr/src/place/place.cpp    | 6 +-----
 3 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 415374a61bc..3d686895812 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -163,10 +163,10 @@ void t_annealing_state::update_rlim(float success_rate) {
 }
 
 void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
-    /* If rlim == FINAL_RLIM, then scale == 0. */
+    // If rlim == FINAL_RLIM, then scale == 0.
     float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
 
-    /* Apply the scaling factor on crit_exponent. */
+    // Apply the scaling factor on crit_exponent.
     crit_exponent = scale * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
                     + placer_opts.td_place_exp_first;
 }
@@ -186,7 +186,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
                                      vtr::RngContainer& rng,
                                      std::unique_ptr<MoveGenerator>&& move_generator_1,
                                      std::unique_ptr<MoveGenerator>&& move_generator_2,
-                                     ManualMoveGenerator& manual_move_generator,
                                      const PlaceDelayModel* delay_model,
                                      PlacerCriticalities* criticalities,
                                      PlacerSetupSlacks* setup_slacks,
@@ -202,7 +201,7 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , rng_(rng)
     , move_generator_1_(std::move(move_generator_1))
     , move_generator_2_(std::move(move_generator_2))
-    , manual_move_generator_(manual_move_generator)
+    , manual_move_generator_(placer_state, rng)
     , agent_state_(e_agent_state::EARLY_IN_THE_ANNEAL)
     , delay_model_(delay_model)
     , criticalities_(criticalities)
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 3a84a05756d..4131a1ebbe2 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -5,6 +5,7 @@
 
 #include "move_generator.h" // movestats
 #include "net_cost_handler.h"
+#include "manual_move_generator.h"
 
 #include <optional>
 #include <tuple>
@@ -15,7 +16,6 @@ struct t_placer_opts;
 enum class e_agent_state;
 
 class NocCostHandler;
-class ManualMoveGenerator;
 class NetPinTimingInvalidator;
 
 /**
@@ -155,7 +155,6 @@ class PlacementAnnealer {
                       vtr::RngContainer& rng,
                       std::unique_ptr<MoveGenerator>&& move_generator_1,
                       std::unique_ptr<MoveGenerator>&& move_generator_2,
-                      ManualMoveGenerator& manual_move_generator,
                       const PlaceDelayModel* delay_model,
                       PlacerCriticalities* criticalities,
                       PlacerSetupSlacks* setup_slacks,
@@ -222,7 +221,7 @@ class PlacementAnnealer {
 
     std::unique_ptr<MoveGenerator> move_generator_1_;
     std::unique_ptr<MoveGenerator> move_generator_2_;
-    ManualMoveGenerator& manual_move_generator_;
+    ManualMoveGenerator manual_move_generator_;
     /// RL agent state definition
     e_agent_state agent_state_;
 
@@ -243,7 +242,6 @@ class PlacementAnnealer {
     t_pl_blocks_to_be_moved blocks_affected_;
 
   private:
-
     /**
      * @brief The maximum number of swap attempts before invoking the
      * once-in-a-while placement legality check as well as floating point
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 1a3616232de..dc907a751ea 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -36,8 +36,6 @@
 #include "move_utils.h"
 #include "buttons.h"
 
-#include "manual_move_generator.h"
-
 #include "PlacementDelayCalculator.h"
 #include "VprTimingGraphResolver.h"
 #include "timing_util.h"
@@ -216,8 +214,6 @@ void try_place(const Netlist<>& net_list,
     }
 #endif
 
-    ManualMoveGenerator manual_move_generator(placer_state, rng);
-
     vtr::ScopedStartFinishTimer timer("Placement");
 
     if (noc_opts.noc) {
@@ -427,7 +423,7 @@ void try_place(const Netlist<>& net_list,
 #endif /* ENABLE_ANALYTIC_PLACE */
 
     PlacementAnnealer annealer(placer_opts, placer_state, costs, net_cost_handler, noc_cost_handler,
-                               noc_opts, rng, std::move(move_generator), std::move(move_generator2), manual_move_generator, place_delay_model.get(),
+                               noc_opts, rng, std::move(move_generator), std::move(move_generator2), place_delay_model.get(),
                                placer_criticalities.get(), placer_setup_slacks.get(), timing_info.get(), pin_timing_invalidator.get(), move_lim);
 
     const t_annealing_state& annealing_state = annealer.get_annealing_state();

From f471b40f9590aaecce1b3ba5c0980ff4e4a1a94a Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 15:49:26 -0500
Subject: [PATCH 26/31] add some comments

---
 libs/libarchfpga/src/device_grid.h |  4 ++
 vpr/src/place/annealer.cpp         |  1 -
 vpr/src/place/annealer.h           | 64 ++++++++++++++++++++----------
 vpr/src/place/place.cpp            |  4 +-
 4 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h
index a82dd043da5..8e1332559ed 100644
--- a/libs/libarchfpga/src/device_grid.h
+++ b/libs/libarchfpga/src/device_grid.h
@@ -38,6 +38,10 @@ class DeviceGrid {
     size_t width() const { return grid_.dim_size(1); }
     ///@brief Return the height of the grid at the specified layer
     size_t height() const { return grid_.dim_size(2); }
+    ///@brief Return the grid dimensions in (# of layers, width, height) format
+    std::tuple<size_t, size_t, size_t> dim_sizes() const {
+        return {grid_.dim_size(0), grid_.dim_size(1), grid_.dim_size(2)};
+    }
 
     ///@brief Return the size of the flattened grid on the given layer
     inline size_t grid_size() const {
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 3d686895812..e3aab3316ee 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -635,7 +635,6 @@ e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
     return move_outcome;
 }
 
-/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */
 void PlacementAnnealer::outer_loop_update_timing_info() {
     if (placer_opts_.place_algorithm.is_timing_driven()) {
         /* At each temperature change we update these values to be used
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 4131a1ebbe2..039ecfb652f 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -37,9 +37,6 @@ struct t_swap_stats {
  * loop iteration. It stores various important variables that need to
  * be accessed during the placement inner loop.
  *
- * Private variables are not given accessor functions. They serve as
- * macros originally defined in place.cpp as global scope variables.
- *
  * Public members:
  *   @param t
  *              Temperature for simulated annealing.
@@ -134,13 +131,6 @@ class t_annealing_state {
      * factor is calculated and applied linearly.
      */
     inline void update_crit_exponent(const t_placer_opts& placer_opts);
-
-    /**
-     * @brief Update the move limit based on the success rate.
-     *
-     * The value is bounded between 1 and move_lim_max.
-     */
-    inline void update_move_lim(float success_target, float success_rate);
 };
 
 
@@ -162,11 +152,23 @@ class PlacementAnnealer {
                       NetPinTimingInvalidator* pin_timing_invalidator,
                       int move_lim);
 
-    /// @brief Contains the inner loop of the simulated annealing
+    /**
+     * @brief Contains the inner loop of the simulated annealing that performs
+     * a certain number of swaps with a single temperature
+     */
     void placement_inner_loop();
 
+    /**
+     * @brief Updates the setup slacks and criticalities before the inner loop
+     * of the annealing/quench. It also updates normalization factors for different
+     * placement cost terms.
+     */
     void outer_loop_update_timing_info();
 
+    /**
+     * @brief Update the annealing state according to the annealing schedule selected.
+     * @return True->continues the annealing. False->exits the annealing.
+     */
     bool outer_loop_update_state();
 
     /**
@@ -189,6 +191,13 @@ class PlacementAnnealer {
                            const t_place_algorithm& place_algorithm,
                            bool manual_move_enabled);
 
+    /**
+     * @brief Starts the quench stage in simulated annealing by
+     * setting the temperature to zero and reverting the move range limit
+     * to the initial value.
+     */
+    void start_quench();
+
     /// @brief Returns the total number iterations (attempted swaps).
     int get_total_iteration() const;
 
@@ -198,31 +207,45 @@ class PlacementAnnealer {
     /// @brief Returns a constant reference to the annealing state
     const t_annealing_state& get_annealing_state() const;
 
+    /// @brief Returns constant references to different statistics objects
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
 
+  private:
     /**
-     * @brief Starts the quench stage in simulated annealing by
-     * setting the temperature to zero and reverting the move range limit
-     * to the initial value.
+     * @brief Determines whether a move should be accepted or not.
+     * Moves with negative delta cost are always accepted, but
+     * moves that increase the total cost are accepted with a
+     * probability that diminishes as the temperature decreases.
+     * @param delta_c The cost difference if the move is accepted.
+     * @param t The annealer's temperature.
+     * @return Whether the move is accepted or not.
      */
-    void start_quench();
-
-  private:
     e_move_result assess_swap_(double delta_c, double t);
 
+    /// @brief Find the starting temperature for the annealing loop.
+    float estimate_starting_temperature();
+
   private:
     const t_placer_opts& placer_opts_;
     PlacerState& placer_state_;
+    /// Stores different placement cost terms
     t_placer_costs& costs_;
+    /// Computes bounding box for each cluster net
     NetCostHandler& net_cost_handler_;
+    /// Computes NoC-related cost terms when NoC optimization are enabled
     std::optional<NocCostHandler>& noc_cost_handler_;
+    /// Contains weighting factors for NoC-related cost terms
     const t_noc_opts& noc_opts_;
+    /// Random number generator for selecting random blocks and random locations
     vtr::RngContainer& rng_;
 
+    /// The move generator used in the first state of RL agent and initial temperature computation
     std::unique_ptr<MoveGenerator> move_generator_1_;
+    /// The move generator used in the second state of RL agent
     std::unique_ptr<MoveGenerator> move_generator_2_;
+    /// Handles manual swaps proposed by the user through graphical user interface
     ManualMoveGenerator manual_move_generator_;
-    /// RL agent state definition
+    /// RL agent state
     e_agent_state agent_state_;
 
     const PlaceDelayModel* delay_model_;
@@ -239,6 +262,7 @@ class PlacementAnnealer {
     MoveTypeStat move_type_stats_;
     t_placer_statistics placer_stats_;
 
+    /// Keep record of moved blocks and affected pins in a swap
     t_pl_blocks_to_be_moved blocks_affected_;
 
   private:
@@ -272,8 +296,4 @@ class PlacementAnnealer {
      * -1*(1.5-REWARD_BB_TIMING_RELATIVE_WEIGHT)*timing_cost + (1+REWARD_BB_TIMING_RELATIVE_WEIGHT)*bb_cost)
      */
     static constexpr float REWARD_BB_TIMING_RELATIVE_WEIGHT = 0.4;
-
-  private:
-    /// @brief Find the starting temperature for the annealing loop.
-    float estimate_starting_temperature();
 };
\ No newline at end of file
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index dc907a751ea..9af234a13f8 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -800,8 +800,8 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
                                                const PlacementDelayCalculator& delay_calc,
                                                bool is_flat,
                                                const BlkLocRegistry& blk_loc_registry) {
-    auto& timing_ctx = g_vpr_ctx.timing();
-    auto& atom_ctx = g_vpr_ctx.atom();
+    const auto& timing_ctx = g_vpr_ctx.timing();
+    const auto& atom_ctx = g_vpr_ctx.atom();
 
     VprTimingGraphResolver resolver(atom_ctx.nlist, atom_ctx.lookup, *timing_ctx.graph,
                                     delay_calc, is_flat, blk_loc_registry);

From 216893b2ae3535128d2aa25ec57f96ab85e22949 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 13 Nov 2024 17:59:04 -0500
Subject: [PATCH 27/31] fix compilation error by remove definition for
 t_annealing_state::update_move_lim()

---
 vpr/src/place/annealer.cpp                    | 7 -------
 vpr/src/timing/PostClusterDelayCalculator.tpp | 7 ++-----
 vpr/src/timing/clb_delay_calc.inl             | 2 +-
 vpr/src/timing/timing_info.h                  | 2 +-
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index e3aab3316ee..56f419477e2 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -10,7 +10,6 @@
 #include "place_util.h"
 #include "placer_state.h"
 #include "move_utils.h"
-#include "manual_move_generator.h"
 #include "noc_place_utils.h"
 #include "NetPinTimingInvalidator.h"
 #include "place_timing_update.h"
@@ -171,12 +170,6 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
                     + placer_opts.td_place_exp_first;
 }
 
-void t_annealing_state::update_move_lim(float success_target, float success_rate) {
-    move_lim = move_lim_max * (success_target / success_rate);
-    move_lim = std::min(move_lim, move_lim_max);
-    move_lim = std::max(move_lim, 1);
-}
-
 PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
                                      PlacerState& placer_state,
                                      t_placer_costs& costs,
diff --git a/vpr/src/timing/PostClusterDelayCalculator.tpp b/vpr/src/timing/PostClusterDelayCalculator.tpp
index 7f6cda39022..9c989cec03f 100644
--- a/vpr/src/timing/PostClusterDelayCalculator.tpp
+++ b/vpr/src/timing/PostClusterDelayCalculator.tpp
@@ -247,12 +247,9 @@ inline tatum::Time PostClusterDelayCalculator::atom_net_delay(const tatum::Timin
                 set_cached_pins(edge_id, delay_type, (ParentPinId&)atom_src_pin, (ParentPinId&)atom_sink_pin);
 
             } else {
-                ClusterBlockId clb_src_block;
-                ClusterBlockId clb_sink_block;
-
-                clb_src_block = netlist_lookup_.atom_clb(atom_src_block);
+                ClusterBlockId clb_src_block = netlist_lookup_.atom_clb(atom_src_block);
                 VTR_ASSERT(clb_src_block != ClusterBlockId::INVALID());
-                clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block);
+                ClusterBlockId clb_sink_block = netlist_lookup_.atom_clb(atom_sink_block);
                 VTR_ASSERT(clb_sink_block != ClusterBlockId::INVALID());
 
                 const t_pb_graph_pin* src_gpin = netlist_lookup_.atom_pin_pb_graph_pin(atom_src_pin);
diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl
index 67524a82f7b..acfbc92a189 100644
--- a/vpr/src/timing/clb_delay_calc.inl
+++ b/vpr/src/timing/clb_delay_calc.inl
@@ -22,7 +22,7 @@ inline float ClbDelayCalc::internal_src_to_internal_sink_delay(const ClusterBloc
 }
 
 inline float ClbDelayCalc::trace_delay(ClusterBlockId clb, int src_pb_route_id, int sink_pb_route_id, DelayType delay_type) const {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     VTR_ASSERT(src_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins);
     VTR_ASSERT(sink_pb_route_id < cluster_ctx.clb_nlist.block_pb(clb)->pb_graph_node->total_pb_pins);
diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h
index 323ac7efbb6..14d3b08f939 100644
--- a/vpr/src/timing/timing_info.h
+++ b/vpr/src/timing/timing_info.h
@@ -122,7 +122,7 @@ class HoldTimingInfo : public virtual TimingInfo {
 //Generic interface which provides both setup and hold related timing information
 //
 //This is useful for algorithms which require access to both setup and hold timing
-//information (e.g. simulatneously optimizing setup and hold)
+//information (e.g. simultaneously optimizing setup and hold)
 //
 //This class supports both the SetupTimingInfo and HoldTimingInfo interfaces and
 //can be used in place of them in any algorithm requiring setup or hold related

From c413d222e653e0ba396089c3e5f3374658de1a82 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 16 Nov 2024 20:22:33 -0500
Subject: [PATCH 28/31] add comment for PlacementAnnealer class make a few
 methods private

---
 vpr/src/place/annealer.cpp | 18 +++++------
 vpr/src/place/annealer.h   | 64 +++++++++++++++++++++++++-------------
 2 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 56f419477e2..4696fc37b85 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -217,7 +217,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
 
     int first_move_lim = get_initial_move_lim(placer_opts, placer_opts_.anneal_sched);
 
-
     if (placer_opts.inner_loop_recompute_divider != 0) {
         inner_recompute_limit_ = static_cast<int>(0.5 + (float)first_move_lim / (float)placer_opts.inner_loop_recompute_divider);
     } else {
@@ -258,10 +257,10 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     move_type_stats_.rejected_moves.resize({device_ctx.logical_block_types.size(), (int)e_move_type::NUMBER_OF_AUTO_MOVES}, 0);
 
     // Update the starting temperature for placement annealing to a more appropriate value
-    annealing_state_.t = estimate_starting_temperature();
+    annealing_state_.t = estimate_starting_temperature_();
 }
 
-float PlacementAnnealer::estimate_starting_temperature() {
+float PlacementAnnealer::estimate_starting_temperature_() {
     if (placer_opts_.anneal_sched.type == e_sched_type::USER_SCHED) {
         return placer_opts_.anneal_sched.init_t;
     }
@@ -289,7 +288,7 @@ float PlacementAnnealer::estimate_starting_temperature() {
 #endif /*NO_GRAPHICS*/
 
         // Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack
-        e_move_result swap_result = try_swap(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
+        e_move_result swap_result = try_swap_(*move_generator_1_, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             num_accepted++;
@@ -322,9 +321,9 @@ float PlacementAnnealer::estimate_starting_temperature() {
     return init_temp;
 }
 
-e_move_result PlacementAnnealer::try_swap(MoveGenerator& move_generator,
-                                          const t_place_algorithm& place_algorithm,
-                                          bool manual_move_enabled) {
+e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
+                                           const t_place_algorithm& place_algorithm,
+                                           bool manual_move_enabled) {
     /* Picks some block and moves it to another spot.  If this spot is
      * occupied, switch the blocks.  Assess the change in cost function.
      * rlim is the range limiter.
@@ -638,7 +637,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
             PlaceCritParams crit_params{annealing_state_.crit_exponent,
                                         placer_opts_.place_crit_limit};
 
-            //Update all timing related classes
+            // Update all timing related classes
             perform_full_timing_update(crit_params, delay_model_, criticalities_, setup_slacks_,
                                        pin_timing_invalidator_, timing_info_, &costs_, placer_state_);
 
@@ -667,7 +666,7 @@ void PlacementAnnealer::placement_inner_loop() {
 
     // Inner loop begins
     for (int inner_iter = 0, inner_crit_iter_count = 1; inner_iter < annealing_state_.move_lim; inner_iter++) {
-        e_move_result swap_result = try_swap(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
+        e_move_result swap_result = try_swap_(move_generator, placer_opts_.place_algorithm, manual_move_enabled);
 
         if (swap_result == e_move_result::ACCEPTED) {
             // Move was accepted.  Update statistics that are useful for the annealing schedule.
@@ -683,7 +682,6 @@ void PlacementAnnealer::placement_inner_loop() {
             /* Do we want to re-timing analyze the circuit to get updated slack and criticality values?
              * We do this only once in a while, since it is expensive.
              */
-
             const int recompute_limit = quench_started_ ? quench_recompute_limit_ : inner_recompute_limit_;
             // on last iteration don't recompute
             if (inner_crit_iter_count >= recompute_limit && inner_iter != annealing_state_.move_lim - 1) {
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 039ecfb652f..eec0e23106e 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -133,7 +133,26 @@ class t_annealing_state {
     inline void update_crit_exponent(const t_placer_opts& placer_opts);
 };
 
-
+/**
+ * @class PlacementAnnealer
+ * @brief Implements a simulated annealing optimizer that minimizes the placement cost
+ * by swapping clustered blocks. It always accepts swaps that reduce the placement cost,
+ * but accepts the swaps that increase the cost with a diminishing probability.
+ *
+ * @details Swaps are performed in a two nested loops. The inner loop is implemented in
+ * placement_inner_loop() method. Each iteration of the inner loop performs a single swap,
+ * and all swaps performed in each iteration of the other loop are evaluated using the same
+ * temperature.
+ *
+ * The user is expected to call outer_loop_update_timing_info() before calling
+ * placement_inner_loop(). Then, outer_loop_update_state() should be called to
+ * determine whether another iteration of the outer loop is required.
+ * If outer_loop_update_state() returns false, start_quench() can be called to
+ * set the temperate to zero so that the annealer behaves greedily. Then,
+ * outer_loop_update_timing_info() and placement_inner_loop() can be called
+ * to run the quench stage.
+ *
+ */
 class PlacementAnnealer {
   public:
     PlacementAnnealer(const t_placer_opts& placer_opts,
@@ -171,26 +190,6 @@ class PlacementAnnealer {
      */
     bool outer_loop_update_state();
 
-    /**
-     * @brief Pick some block and moves it to another spot.
-     *
-     * If the new location is empty, directly move the block. If the new location
-     * is occupied, switch the blocks. Due to the different sizes of the blocks,
-     * this block switching may occur for multiple times. It might also cause the
-     * current swap attempt to abort due to inability to find suitable locations
-     * for moved blocks.
-     *
-     * The move generator will record all the switched blocks in the variable
-     * `blocks_affected`. Afterwards, the move will be assessed by the chosen
-     * cost formulation. Currently, there are three ways to assess move cost,
-     * which are stored in the enum type `t_place_algorithm`.
-     *
-     * @return Whether the block swap is accepted, rejected or aborted.
-     */
-    e_move_result try_swap(MoveGenerator& move_generator,
-                           const t_place_algorithm& place_algorithm,
-                           bool manual_move_enabled);
-
     /**
      * @brief Starts the quench stage in simulated annealing by
      * setting the temperature to zero and reverting the move range limit
@@ -211,6 +210,27 @@ class PlacementAnnealer {
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
 
   private:
+
+    /**
+     * @brief Pick some block and moves it to another spot.
+     *
+     * If the new location is empty, directly move the block. If the new location
+     * is occupied, switch the blocks. Due to the different sizes of the blocks,
+     * this block switching may occur for multiple times. It might also cause the
+     * current swap attempt to abort due to inability to find suitable locations
+     * for moved blocks.
+     *
+     * The move generator will record all the switched blocks in the variable
+     * `blocks_affected`. Afterwards, the move will be assessed by the chosen
+     * cost formulation. Currently, there are three ways to assess move cost,
+     * which are stored in the enum type `t_place_algorithm`.
+     *
+     * @return Whether the block swap is accepted, rejected or aborted.
+     */
+    e_move_result try_swap_(MoveGenerator& move_generator,
+                            const t_place_algorithm& place_algorithm,
+                            bool manual_move_enabled);
+
     /**
      * @brief Determines whether a move should be accepted or not.
      * Moves with negative delta cost are always accepted, but
@@ -223,7 +243,7 @@ class PlacementAnnealer {
     e_move_result assess_swap_(double delta_c, double t);
 
     /// @brief Find the starting temperature for the annealing loop.
-    float estimate_starting_temperature();
+    float estimate_starting_temperature_();
 
   private:
     const t_placer_opts& placer_opts_;

From f7c239f8b82a2b1ba7e53bc8e28643ccdc06c74d Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 17 Nov 2024 12:48:51 -0500
Subject: [PATCH 29/31] apply Alex's comments

---
 .../libtatum/libtatum/tatum/TimingGraph.cpp   |  9 ++--
 vpr/src/place/annealer.h                      | 41 ++++++++++++-------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
index 3b861d054c6..d67e7c7afae 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/TimingGraph.cpp
@@ -481,14 +481,13 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
     //Make all edges in a level be contiguous in memory
 
     //Determine the edges driven by each level of the graph
-    std::vector<std::vector<EdgeId>> edge_levels;
+    std::vector<std::vector<EdgeId>> edge_levels(levels().size());
     for(LevelId level_id : levels()) {
-        edge_levels.emplace_back();
-        for(auto node_id : level_nodes(level_id)) {
+        for(NodeId node_id : level_nodes(level_id)) {
 
             //We walk the nodes according to the input-edge order.
             //This is the same order used by the arrival-time traversal (which is responsible
-            //for most of the analyzer run-time), so matching it's order exactly results in
+            //for most of the analyzer run-time), so matching its order exactly results in
             //better cache locality
             for(EdgeId edge_id : node_in_edges(node_id)) {
 
@@ -498,7 +497,7 @@ tatum::util::linear_map<EdgeId,EdgeId> TimingGraph::optimize_edge_layout() const
         }
     }
 
-    //Maps from from original to new edge id, used to update node to edge refs
+    //Maps from original to new edge id, used to update node to edge refs
     tatum::util::linear_map<EdgeId,EdgeId> orig_to_new_edge_id(edges().size());
 
     //Determine the new order
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index eec0e23106e..a0a7ef3aa9b 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -135,23 +135,36 @@ class t_annealing_state {
 
 /**
  * @class PlacementAnnealer
- * @brief Implements a simulated annealing optimizer that minimizes the placement cost
- * by swapping clustered blocks. It always accepts swaps that reduce the placement cost,
- * but accepts the swaps that increase the cost with a diminishing probability.
+ * @brief Simulated annealing optimizer for minimizing placement cost via block swaps.
  *
- * @details Swaps are performed in a two nested loops. The inner loop is implemented in
- * placement_inner_loop() method. Each iteration of the inner loop performs a single swap,
- * and all swaps performed in each iteration of the other loop are evaluated using the same
- * temperature.
+ * @details This class implements simulated annealing to optimize placement cost by swapping clustered blocks.
+ * Swaps that reduce the cost are always accepted, while those that increase the cost are accepted
+ * with a diminishing probability.
  *
- * The user is expected to call outer_loop_update_timing_info() before calling
- * placement_inner_loop(). Then, outer_loop_update_state() should be called to
- * determine whether another iteration of the outer loop is required.
- * If outer_loop_update_state() returns false, start_quench() can be called to
- * set the temperate to zero so that the annealer behaves greedily. Then,
- * outer_loop_update_timing_info() and placement_inner_loop() can be called
- * to run the quench stage.
+ * The annealing process consists of two nested loops:
+ * - The **inner loop** (implemented in `placement_inner_loop()`) performs individual swaps, all evaluated at a fixed temperature.
+ * - The **outer loop** adjusts the temperature and determines whether further iterations are needed.
  *
+ * Usage workflow:
+ * 1. Call `outer_loop_update_timing_info()` to update timing information.
+ * 2. Execute `placement_inner_loop()` for swap evaluations.
+ * 3. Call `outer_loop_update_state()` to check if more outer loop iterations are needed.
+ * 4. Optionally, use `start_quench()` to set the temperature to zero for a greedy optimization (quenching stage),
+ *    then repeat steps 1 and 2.
+ *
+ *    Usage example:
+ *    **************************************
+ *    PlacementAnnealer annealer(...);
+ *
+ *    do {
+ *      annealer.outer_loop_update_timing_info();
+ *      annealer.placement_inner_loop();
+ *    } while (annealer.outer_loop_update_state());
+ *
+ *    annealer.start_quench();
+ *    annealer.outer_loop_update_timing_info();
+ *    annealer.placement_inner_loop();
+ *    **************************************
  */
 class PlacementAnnealer {
   public:

From 834e5891d07151ae07a650c59569c0c0a67917d5 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 17 Nov 2024 12:57:25 -0500
Subject: [PATCH 30/31] add separators to annelaer.cpp

---
 vpr/src/place/annealer.cpp | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 4696fc37b85..f0d2fc50e83 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -17,6 +17,10 @@
 #include "placer_breakpoint.h"
 #include "RL_agent_util.h"
 
+/**************************************************************************/
+/*************** Static Function Declarations *****************************/
+/**************************************************************************/
+
 /**
  * @brief Check if the setup slack has gotten better or worse due to block swap.
  *
@@ -38,6 +42,9 @@
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state);
 
+/*************************************************************************/
+/*************** Static Function Definitions *****************************/
+/*************************************************************************/
 
 static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
                                       const PlacerState& placer_state) {
@@ -78,6 +85,10 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks,
     return 1;
 }
 
+/**************************************************************************************/
+/*************** Member Function Definitions for t_annealing_state ********************/
+/**************************************************************************************/
+
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(float first_t,
                                      float first_rlim,
@@ -170,6 +181,10 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
                     + placer_opts.td_place_exp_first;
 }
 
+/**************************************************************************************/
+/*************** Member Function Definitions for PlacementAnnealer ********************/
+/**************************************************************************************/
+
 PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
                                      PlacerState& placer_state,
                                      t_placer_costs& costs,

From 8c0fdfc220879a44ad1495d09bbedec41df6247e Mon Sep 17 00:00:00 2001
From: soheil <soheilqs@gmail.com>
Date: Mon, 18 Nov 2024 00:55:04 -0500
Subject: [PATCH 31/31] add get_move_abortion_logger() to PlacementAnnealer

---
 vpr/src/place/annealer.cpp | 6 +++++-
 vpr/src/place/annealer.h   | 7 +++++++
 vpr/src/place/place.cpp    | 4 +---
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index f0d2fc50e83..1d8836956ab 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -791,6 +791,10 @@ std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&>
     return {swap_stats_, move_type_stats_, placer_stats_};
 }
 
+const MoveAbortionLogger& PlacementAnnealer::get_move_abortion_logger() const {
+    return blocks_affected_.move_abortion_logger;
+}
+
 void PlacementAnnealer::LOG_MOVE_STATS_HEADER() {
     if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) {
         if (move_stats_file_) {
@@ -870,4 +874,4 @@ e_move_result PlacementAnnealer::assess_swap_(double delta_c, double t) {
     }
     VTR_LOGV_DEBUG(g_vpr_ctx.placement().f_placer_debug, "\t\tMove is rejected(hill climbing)\n");
     return e_move_result::REJECTED;
-}
\ No newline at end of file
+}
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index a0a7ef3aa9b..fd9b0dbd928 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -222,6 +222,13 @@ class PlacementAnnealer {
     /// @brief Returns constant references to different statistics objects
     std::tuple<const t_swap_stats&, const MoveTypeStat&, const t_placer_statistics&> get_stats() const;
 
+    /**
+     * @brief Returns MoveAbortionLogger to report how many moves
+     * were aborted for each reason.
+     * @return A constant reference to a  MoveAbortionLogger object.
+     */
+    const MoveAbortionLogger& get_move_abortion_logger() const;
+
   private:
 
     /**
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 9af234a13f8..69617b278a2 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -169,8 +169,6 @@ void try_place(const Netlist<>& net_list,
     std::unique_ptr<PlacerCriticalities> placer_criticalities;
     std::unique_ptr<NetPinTimingInvalidator> pin_timing_invalidator;
 
-    t_pl_blocks_to_be_moved blocks_affected(net_list.blocks().size());
-
     if (placer_opts.place_algorithm.is_timing_driven()) {
         /*do this before the initial placement to avoid messing up the initial placement */
         place_delay_model = alloc_lookups_and_delay_model(net_list,
@@ -552,7 +550,7 @@ void try_place(const Netlist<>& net_list,
     //Some stats
     VTR_LOG("\n");
     VTR_LOG("Swaps called: %d\n", swap_stats.num_ts_called);
-    blocks_affected.move_abortion_logger.report_aborted_moves();
+    annealer.get_move_abortion_logger().report_aborted_moves();
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         //Final timing estimate