From 2deff2b01ebbdcfea566e8c2c02b4340c55039a2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 28 Nov 2024 16:00:06 -0500 Subject: [PATCH 01/39] move place_delay files to a directory --- vpr/src/place/{ => timing/delay_model}/place_delay_model.cpp | 0 vpr/src/place/{ => timing/delay_model}/place_delay_model.h | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename vpr/src/place/{ => timing/delay_model}/place_delay_model.cpp (100%) rename vpr/src/place/{ => timing/delay_model}/place_delay_model.h (100%) diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp similarity index 100% rename from vpr/src/place/place_delay_model.cpp rename to vpr/src/place/timing/delay_model/place_delay_model.cpp diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h similarity index 100% rename from vpr/src/place/place_delay_model.h rename to vpr/src/place/timing/delay_model/place_delay_model.h From be8519b26ad6a27af7fc2d6e33d4a6dc5b290b7b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 28 Nov 2024 16:00:35 -0500 Subject: [PATCH 02/39] remove unused struct frp, timing_place_lookup.cpp --- vpr/src/place/timing_place_lookup.cpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 86dc396e2b8..fa6a9acb0bb 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -40,22 +40,6 @@ constexpr float UNINITIALIZED_DELTA = -1; //Ind constexpr float EMPTY_DELTA = -2; //Indicates delta delay from/to an EMPTY block constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); //Indicates there is no valid delta delay -struct t_profile_loc { - t_profile_loc(int x, int y, std::vector> delta_values) - : root(x, y) - , deltas(delta_values) {} - - vtr::Point root; - std::vector> deltas; -}; - -struct t_profile_info { - std::vector locations; - - int max_delta_x; - int max_delta_y; -}; - /*** Function Prototypes *****/ static t_chan_width setup_chan_width(const t_router_opts& router_opts, t_chan_width_dist chan_width_dist); From 47dec5c35aa48be5a5234860b0f40809338cc3b7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 28 Nov 2024 16:17:19 -0500 Subject: [PATCH 03/39] create two files for each placement delay model --- .../timing/delay_model/delta_delay_model.cpp | 34 ++ .../timing/delay_model/delta_delay_model.h | 47 +++ .../delay_model/override_delay_model.cpp | 262 +++++++++++++++ .../timing/delay_model/override_delay_model.h | 112 +++++++ .../timing/delay_model/place_delay_model.cpp | 309 +----------------- .../timing/delay_model/place_delay_model.h | 180 +--------- .../timing/delay_model/simple_delay_model.cpp | 45 +++ .../timing/delay_model/simple_delay_model.h | 39 +++ vpr/src/place/timing_place_lookup.cpp | 56 +--- 9 files changed, 554 insertions(+), 530 deletions(-) create mode 100644 vpr/src/place/timing/delay_model/delta_delay_model.cpp create mode 100644 vpr/src/place/timing/delay_model/delta_delay_model.h create mode 100644 vpr/src/place/timing/delay_model/override_delay_model.cpp create mode 100644 vpr/src/place/timing/delay_model/override_delay_model.h create mode 100644 vpr/src/place/timing/delay_model/simple_delay_model.cpp create mode 100644 vpr/src/place/timing/delay_model/simple_delay_model.h diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp new file mode 100644 index 00000000000..55bb0104316 --- /dev/null +++ b/vpr/src/place/timing/delay_model/delta_delay_model.cpp @@ -0,0 +1,34 @@ + +#include "delta_delay_model.h" + +float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, + const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} + +void DeltaDelayModel::dump_echo(std::string filepath) const { + FILE* f = vtr::fopen(filepath.c_str(), "w"); + fprintf(f, " "); + for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { + fprintf(f, " %9zu", from_layer_num); + fprintf(f, "\n"); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9zu", dx); + } + fprintf(f, "\n"); + for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { + fprintf(f, "%9zu", dy); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); + } + fprintf(f, "\n"); + } + } + } + vtr::fclose(f); +} + diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.h b/vpr/src/place/timing/delay_model/delta_delay_model.h new file mode 100644 index 00000000000..c3ae0d83cf7 --- /dev/null +++ b/vpr/src/place/timing/delay_model/delta_delay_model.h @@ -0,0 +1,47 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class DeltaDelayModel + * + * @brief A simple delay model based on the distance (delta) between block locations. + */ +class DeltaDelayModel : public PlaceDelayModel { + public: + DeltaDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + DeltaDelayModel(float min_cross_layer_delay, + vtr::NdMatrix delta_delays, + bool is_flat) + : delays_(std::move(delta_delays)) + , cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + const vtr::NdMatrix& delays() const { + return delays_; + } + + private: + vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; +}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp new file mode 100644 index 00000000000..ceb8245511b --- /dev/null +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -0,0 +1,262 @@ + +#include "override_delay_model.h" + +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + +const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { + return base_delay_model_.get(); +} + +float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { + // First check to if there is an override delay value + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); + t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); + + t_override override_key; + override_key.from_type = from_type_ptr->index; + override_key.from_class = from_type_ptr->pin_class[from_pin]; + override_key.to_type = to_type_ptr->index; + override_key.to_class = to_type_ptr->pin_class[to_pin]; + + //Delay overrides may be different for +/- delta so do not use + //an absolute delta for the look-up + override_key.delta_x = to_loc.x - from_loc.x; + override_key.delta_y = to_loc.y - from_loc.y; + + float delay_val = std::numeric_limits::quiet_NaN(); + auto override_iter = delay_overrides_.find(override_key); + if (override_iter != delay_overrides_.end()) { + //Found an override + delay_val = override_iter->second; + } else { + //Fall back to the base delay model if no override was found + delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); + } + + return delay_val; +} + +void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { + t_override override_key; + override_key.from_type = from_type; + override_key.from_class = from_class; + override_key.to_type = to_type; + override_key.to_class = to_class; + override_key.delta_x = delta_x; + override_key.delta_y = delta_y; + + auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); + if (!res.second) { //Key already exists + res.first->second = delay_val; //Overwrite existing delay + } +} + +void OverrideDelayModel::dump_echo(std::string filepath) const { + base_delay_model_->dump_echo(filepath); + + FILE* f = vtr::fopen(filepath.c_str(), "a"); + + fprintf(f, "\n"); + fprintf(f, "# Delay Overrides\n"); + auto& device_ctx = g_vpr_ctx.device(); + for (auto kv : delay_overrides_) { + auto override_key = kv.first; + float delay_val = kv.second; + fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", + device_ctx.physical_tile_types[override_key.from_type].name.c_str(), + device_ctx.physical_tile_types[override_key.to_type].name.c_str(), + override_key.from_class, + override_key.to_class, + override_key.delta_x, + override_key.delta_y, + delay_val); + } + + vtr::fclose(f); +} + +float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { + t_override key; + key.from_type = from_type; + key.from_class = from_class; + key.to_type = to_type; + key.to_class = to_class; + key.delta_x = delta_x; + key.delta_y = delta_y; + + auto iter = delay_overrides_.find(key); + if (iter == delay_overrides_.end()) { + VPR_THROW(VPR_ERROR_PLACE, "Key not found."); + } + return iter->second; +} + +void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { + base_delay_model_ = std::move(base_delay_model_obj); +} + +/** + * When writing capnp targetted serialization, always allow compilation when + * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. + */ +#ifndef VTR_ENABLE_CAPNPROTO + +# define DISABLE_ERROR \ + "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." + +void DeltaDelayModel::read(const std::string& /*file*/) { + VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::read " DISABLE_ERROR); +} + +void DeltaDelayModel::write(const std::string& /*file*/) const { + VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write " DISABLE_ERROR); +} + +void OverrideDelayModel::read(const std::string& /*file*/) { + VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); +} + +void OverrideDelayModel::write(const std::string& /*file*/) const { + VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write " DISABLE_ERROR); +} + +#else /* VTR_ENABLE_CAPNPROTO */ + +static void ToFloat(float* out, const VprFloatEntry::Reader& in) { + // Getting a scalar field is always "get()". + *out = in.getValue(); +} + +static void FromFloat(VprFloatEntry::Builder* out, const float& in) { + // Setting a scalar field is always "set(value)". + out->setValue(in); +} + +void DeltaDelayModel::read(const std::string& file) { + // MmapFile object creates an mmap of the specified path, and will munmap + // when the object leaves scope. + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + + // FlatArrayMessageReader is used to read the message from the data array + // provided by MmapFile. + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + // When reading capnproto files the Reader object to use is named + // ::Reader. + // + // Initially this object is an empty VprDeltaDelayModel. + VprDeltaDelayModel::Reader model; + + // The reader.getRoot performs a cast from the generic capnproto to fit + // with the specified schema. + // + // Note that capnproto does not validate that the incoming data matches the + // schema. If this property is required, some form of check would be + // required. + model = reader.getRoot(); + + // ToNdMatrix is a generic function for converting a Matrix capnproto + // to a vtr::NdMatrix. + // + // The use must supply the matrix dimension (2 in this case), the source + // capnproto type (VprFloatEntry), + // target C++ type (flat), and a function to convert from the source capnproto + // type to the target C++ type (ToFloat). + // + // The second argument should be of type Matrix::Reader where X is the + // capnproto element type. + ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); +} + +void DeltaDelayModel::write(const std::string& file) const { + // MallocMessageBuilder object is the generate capnproto message builder, + // using malloc for buffer allocation. + ::capnp::MallocMessageBuilder builder; + + // initRoot returns a X::Builder object that can be used to set the + // fields in the message. + auto model = builder.initRoot(); + + // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a + // Matrix message. It is the mirror function of ToNdMatrix described in + // read above. + auto delay_values = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); + + // writeMessageToFile writes message to the specified file. + writeMessageToFile(file, &builder); +} + +void OverrideDelayModel::read(const std::string& file) { + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + vtr::NdMatrix delays; + auto model = reader.getRoot(); + ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); + + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); + + // Reading non-scalar capnproto fields is roughly equivilant to using + // a std::vector of the field type. Actual type is capnp::List::Reader. + auto overrides = model.getDelayOverrides(); + std::vector > overrides_arr(overrides.size()); + for (size_t i = 0; i < overrides.size(); ++i) { + const auto& elem = overrides[i]; + overrides_arr[i].first.from_type = elem.getFromType(); + overrides_arr[i].first.to_type = elem.getToType(); + overrides_arr[i].first.from_class = elem.getFromClass(); + overrides_arr[i].first.to_class = elem.getToClass(); + overrides_arr[i].first.delta_x = elem.getDeltaX(); + overrides_arr[i].first.delta_y = elem.getDeltaY(); + + overrides_arr[i].second = elem.getDelay(); + } + + delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); +} + +void OverrideDelayModel::write(const std::string& file) const { + ::capnp::MallocMessageBuilder builder; + auto model = builder.initRoot(); + + auto delays = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); + + // Non-scalar capnproto fields should be first initialized with + // init(count), and then accessed from the returned + // std::vector-like Builder object (specifically capnp::List::Builder). + auto overrides = model.initDelayOverrides(delay_overrides_.size()); + auto dst_iter = overrides.begin(); + for (const auto& src : delay_overrides_) { + auto elem = *dst_iter++; + elem.setFromType(src.first.from_type); + elem.setToType(src.first.to_type); + elem.setFromClass(src.first.from_class); + elem.setToClass(src.first.to_class); + elem.setDeltaX(src.first.delta_x); + elem.setDeltaY(src.first.delta_y); + + elem.setDelay(src.second); + } + + writeMessageToFile(file, &builder); +} + +#endif \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.h b/vpr/src/place/timing/delay_model/override_delay_model.h new file mode 100644 index 00000000000..23f6d01d709 --- /dev/null +++ b/vpr/src/place/timing/delay_model/override_delay_model.h @@ -0,0 +1,112 @@ + +#pragma once + +#include "place_delay_model.h" +#include "delta_delay_model.h" + +class OverrideDelayModel : public PlaceDelayModel { + public: + OverrideDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + /** + * @brief returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the + * specified from and to pins + */ + float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + public: //Mutators + void set_base_delay_model(std::unique_ptr base_delay_model); + const DeltaDelayModel* base_delay_model() const; + float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; + void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); + + private: + std::unique_ptr base_delay_model_; + /// Minimum delay of cross-layer connections + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; + + void compute_override_delay_model(RouterDelayProfiler& router, + const t_router_opts& router_opts); + + /** + * @brief Structure that allows delays to be queried from the delay model. + * + * Delay is calculated given the origin physical tile, the origin + * pin, the destination physical tile, and the destination pin. + * This structure encapsulates all these information. + * + * @param from_type, to_type + * Physical tile index (for easy array access) + * @param from_class, to_class + * The class that the pins belongs to. + * @param to_x, to_y + * The horizontal and vertical displacement + * between two physical tiles. + */ + struct t_override { + short from_type; + short to_type; + short from_class; + short to_class; + short delta_x; + short delta_y; + + /** + * @brief Comparison operator designed for performance. + * + * Operator< is important since t_override serves as the key into the + * map structure delay_overrides_. A default comparison operator would + * not be inlined by the compiler. + * + * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare + * is required for operator< to be inlined by compiler. Proper inlining of + * the function reduces place time by around 5%. + * + * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 + */ + friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { + const short* left = reinterpret_cast(&lhs); + const short* right = reinterpret_cast(&rhs); + constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); + return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); + } + }; + + /** + * @brief Map data structure that returns delay values according to + * specific delay model queries. + * + * Delay model queries are provided by the t_override structure, which + * encapsulates the information regarding the origin and the destination. + */ + vtr::flat_map2 delay_overrides_; + + /** + * operator< treats memory layout of t_override as an array of short. + * This requires all members of t_override are shorts and there is no + * padding between members of t_override. + */ + static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); + static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); +}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp index 4f626a5817f..a91547a7e5e 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/place_delay_model.cpp @@ -4,319 +4,16 @@ * routines related to the placer delay model. */ -#include #include "place_delay_model.h" + +#include + #include "globals.h" #include "router_lookahead_map.h" -#include "rr_graph2.h" - #include "timing_place_lookup.h" #include "placer_state.h" - -#include "vtr_log.h" -#include "vtr_math.h" #include "vpr_error.h" -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "place_delay_model.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif /* VTR_ENABLE_CAPNPROTO */ - -///@brief DeltaDelayModel methods. -float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -void DeltaDelayModel::dump_echo(std::string filepath) const { - FILE* f = vtr::fopen(filepath.c_str(), "w"); - fprintf(f, " "); - for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { - for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { - fprintf(f, " %9zu", from_layer_num); - fprintf(f, "\n"); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9zu", dx); - } - fprintf(f, "\n"); - for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { - fprintf(f, "%9zu", dy); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); - } - fprintf(f, "\n"); - } - } - } - vtr::fclose(f); -} - -const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { - return base_delay_model_.get(); -} - -///@brief OverrideDelayModel methods. -float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { - //First check to if there is an override delay value - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - - t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); - t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); - - t_override override_key; - override_key.from_type = from_type_ptr->index; - override_key.from_class = from_type_ptr->pin_class[from_pin]; - override_key.to_type = to_type_ptr->index; - override_key.to_class = to_type_ptr->pin_class[to_pin]; - - //Delay overrides may be different for +/- delta so do not use - //an absolute delta for the look-up - override_key.delta_x = to_loc.x - from_loc.x; - override_key.delta_y = to_loc.y - from_loc.y; - - float delay_val = std::numeric_limits::quiet_NaN(); - auto override_iter = delay_overrides_.find(override_key); - if (override_iter != delay_overrides_.end()) { - //Found an override - delay_val = override_iter->second; - } else { - //Fall back to the base delay model if no override was found - delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); - } - - return delay_val; -} - -void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { - t_override override_key; - override_key.from_type = from_type; - override_key.from_class = from_class; - override_key.to_type = to_type; - override_key.to_class = to_class; - override_key.delta_x = delta_x; - override_key.delta_y = delta_y; - - auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); - if (!res.second) { //Key already exists - res.first->second = delay_val; //Overwrite existing delay - } -} - -void OverrideDelayModel::dump_echo(std::string filepath) const { - base_delay_model_->dump_echo(filepath); - - FILE* f = vtr::fopen(filepath.c_str(), "a"); - - fprintf(f, "\n"); - fprintf(f, "# Delay Overrides\n"); - auto& device_ctx = g_vpr_ctx.device(); - for (auto kv : delay_overrides_) { - auto override_key = kv.first; - float delay_val = kv.second; - fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", - device_ctx.physical_tile_types[override_key.from_type].name.c_str(), - device_ctx.physical_tile_types[override_key.to_type].name.c_str(), - override_key.from_class, - override_key.to_class, - override_key.delta_x, - override_key.delta_y, - delay_val); - } - - vtr::fclose(f); -} - -float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { - t_override key; - key.from_type = from_type; - key.from_class = from_class; - key.to_type = to_type; - key.to_class = to_class; - key.delta_x = delta_x; - key.delta_y = delta_y; - - auto iter = delay_overrides_.find(key); - if (iter == delay_overrides_.end()) { - VPR_THROW(VPR_ERROR_PLACE, "Key not found."); - } - return iter->second; -} - -void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { - base_delay_model_ = std::move(base_delay_model_obj); -} - -float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; - return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -/** - * When writing capnp targetted serialization, always allow compilation when - * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. - */ -#ifndef VTR_ENABLE_CAPNPROTO - -# define DISABLE_ERROR \ - "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." - -void DeltaDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::read " DISABLE_ERROR); -} - -void DeltaDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write " DISABLE_ERROR); -} - -void OverrideDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); -} - -void OverrideDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write " DISABLE_ERROR); -} - -#else /* VTR_ENABLE_CAPNPROTO */ - -static void ToFloat(float* out, const VprFloatEntry::Reader& in) { - // Getting a scalar field is always "get()". - *out = in.getValue(); -} - -static void FromFloat(VprFloatEntry::Builder* out, const float& in) { - // Setting a scalar field is always "set(value)". - out->setValue(in); -} - -void DeltaDelayModel::read(const std::string& file) { - // MmapFile object creates an mmap of the specified path, and will munmap - // when the object leaves scope. - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - - // FlatArrayMessageReader is used to read the message from the data array - // provided by MmapFile. - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - // When reading capnproto files the Reader object to use is named - // ::Reader. - // - // Initially this object is an empty VprDeltaDelayModel. - VprDeltaDelayModel::Reader model; - - // The reader.getRoot performs a cast from the generic capnproto to fit - // with the specified schema. - // - // Note that capnproto does not validate that the incoming data matches the - // schema. If this property is required, some form of check would be - // required. - model = reader.getRoot(); - - // ToNdMatrix is a generic function for converting a Matrix capnproto - // to a vtr::NdMatrix. - // - // The use must supply the matrix dimension (2 in this case), the source - // capnproto type (VprFloatEntry), - // target C++ type (flat), and a function to convert from the source capnproto - // type to the target C++ type (ToFloat). - // - // The second argument should be of type Matrix::Reader where X is the - // capnproto element type. - ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); -} - -void DeltaDelayModel::write(const std::string& file) const { - // MallocMessageBuilder object is the generate capnproto message builder, - // using malloc for buffer allocation. - ::capnp::MallocMessageBuilder builder; - - // initRoot returns a X::Builder object that can be used to set the - // fields in the message. - auto model = builder.initRoot(); - - // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a - // Matrix message. It is the mirror function of ToNdMatrix described in - // read above. - auto delay_values = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); - - // writeMessageToFile writes message to the specified file. - writeMessageToFile(file, &builder); -} - -void OverrideDelayModel::read(const std::string& file) { - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - vtr::NdMatrix delays; - auto model = reader.getRoot(); - ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); - - base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); - - // Reading non-scalar capnproto fields is roughly equivilant to using - // a std::vector of the field type. Actual type is capnp::List::Reader. - auto overrides = model.getDelayOverrides(); - std::vector > overrides_arr(overrides.size()); - for (size_t i = 0; i < overrides.size(); ++i) { - const auto& elem = overrides[i]; - overrides_arr[i].first.from_type = elem.getFromType(); - overrides_arr[i].first.to_type = elem.getToType(); - overrides_arr[i].first.from_class = elem.getFromClass(); - overrides_arr[i].first.to_class = elem.getToClass(); - overrides_arr[i].first.delta_x = elem.getDeltaX(); - overrides_arr[i].first.delta_y = elem.getDeltaY(); - - overrides_arr[i].second = elem.getDelay(); - } - - delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); -} - -void OverrideDelayModel::write(const std::string& file) const { - ::capnp::MallocMessageBuilder builder; - auto model = builder.initRoot(); - - auto delays = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); - - // Non-scalar capnproto fields should be first initialized with - // init(count), and then accessed from the returned - // std::vector-like Builder object (specifically capnp::List::Builder). - auto overrides = model.initDelayOverrides(delay_overrides_.size()); - auto dst_iter = overrides.begin(); - for (const auto& src : delay_overrides_) { - auto elem = *dst_iter++; - elem.setFromType(src.first.from_type); - elem.setToType(src.first.to_type); - elem.setFromClass(src.first.from_class); - elem.setToClass(src.first.to_class); - elem.setDeltaX(src.first.delta_x); - elem.setDeltaY(src.first.delta_y); - - elem.setDelay(src.second); - } - - writeMessageToFile(file, &builder); -} - -#endif - ///@brief Initialize the placer delay model. std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, t_chan_width_dist chan_width_dist, diff --git a/vpr/src/place/timing/delay_model/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h index 0aa01385e6e..e361f8cc197 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.h +++ b/vpr/src/place/timing/delay_model/place_delay_model.h @@ -5,6 +5,7 @@ */ #pragma once + #include "vtr_ndmatrix.h" #include "vtr_flat_map.h" #include "vpr_types.h" @@ -54,11 +55,10 @@ class PlaceDelayModel { virtual ~PlaceDelayModel() = default; ///@brief Computes place delay model. - virtual void compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) + virtual void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) = 0; /** @@ -86,175 +86,5 @@ class PlaceDelayModel { virtual void read(const std::string& file) = 0; }; -///@brief A simple delay model based on the distance (delta) between block locations. -class DeltaDelayModel : public PlaceDelayModel { - public: - DeltaDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - DeltaDelayModel(float min_cross_layer_delay, - vtr::NdMatrix delta_delays, - bool is_flat) - : delays_(std::move(delta_delays)) - , cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - - void compute( - RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - const vtr::NdMatrix& delays() const { - return delays_; - } - - private: - vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] - float cross_layer_delay_; - /** - * @brief Indicates whether the router is a two-stage or run-flat - */ - bool is_flat_; -}; - -class OverrideDelayModel : public PlaceDelayModel { - public: - OverrideDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - void compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the - // specified from and to pins - float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - - public: //Mutators - void set_base_delay_model(std::unique_ptr base_delay_model); - const DeltaDelayModel* base_delay_model() const; - float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; - void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); - - private: - std::unique_ptr base_delay_model_; - /** - * @brief Minimum delay of cross-layer connections - */ - float cross_layer_delay_; - /** - * @brief Indicates whether the router is a two-stage or run-flat - */ - bool is_flat_; - void compute_override_delay_model(RouterDelayProfiler& router, - const t_router_opts& router_opts); - /** - * @brief Structure that allows delays to be queried from the delay model. - * - * Delay is calculated given the origin physical tile, the origin - * pin, the destination physical tile, and the destination pin. - * This structure encapsulates all these information. - * - * @param from_type, to_type - * Physical tile index (for easy array access) - * @param from_class, to_class - * The class that the pins belongs to. - * @param to_x, to_y - * The horizontal and vertical displacement - * between two physical tiles. - */ - struct t_override { - short from_type; - short to_type; - short from_class; - short to_class; - short delta_x; - short delta_y; - - /** - * @brief Comparison operator designed for performance. - * - * Operator< is important since t_override serves as the key into the - * map structure delay_overrides_. A default comparison operator would - * not be inlined by the compiler. - * - * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare - * is required for operator< to be inlined by compiler. Proper inlining of - * the function reduces place time by around 5%. - * - * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 - */ - friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { - const short* left = reinterpret_cast(&lhs); - const short* right = reinterpret_cast(&rhs); - constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); - return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); - } - }; - - /** - * @brief Map data structure that returns delay values according to - * specific delay model queries. - * - * Delay model queries are provided by the t_override structure, which - * encapsulates the information regarding the origin and the destination. - */ - vtr::flat_map2 delay_overrides_; - - /** - * operator< treats memory layout of t_override as an array of short. - * This requires all members of t_override are shorts and there is no - * padding between members of t_override. - */ - static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); - static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); -}; - -///@brief A simple delay model based on the information stored in router lookahead -/// This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router -class SimpleDelayModel : public PlaceDelayModel { - public: - SimpleDelayModel() {} - - void compute( - RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - void dump_echo(std::string /*filepath*/) const override {} - - void read(const std::string& /*file*/) override {} - void write(const std::string& /*file*/) const override {} - - private: - /** - * @brief The matrix to store the minimum delay between different points on different layers. - * - *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers - *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs - *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers - *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. - *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. - */ - vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] -}; diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp new file mode 100644 index 00000000000..0031d9eb1fe --- /dev/null +++ b/vpr/src/place/timing/delay_model/simple_delay_model.cpp @@ -0,0 +1,45 @@ + +#include "simple_delay_model.h" + + +void SimpleDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& /*placer_opts*/, + const t_router_opts& /*router_opts*/, + int /*longest_length*/) { + const auto& grid = g_vpr_ctx.device().grid; + const size_t num_physical_tile_types = g_vpr_ctx.device().physical_tile_types.size(); + const size_t num_layers = grid.get_num_layers(); + + // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] + // The second index related to the layer that the source location is on and the third index is for the sink layer + delays_ = vtr::NdMatrix({num_physical_tile_types, + num_layers, + num_layers, + grid.width(), + grid.height()}); + + for (size_t physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { + for (size_t from_layer = 0; from_layer < num_layers; ++from_layer) { + for (size_t to_layer = 0; to_layer < num_layers; ++to_layer) { + for (size_t dx = 0; dx < grid.width(); ++dx) { + for (size_t dy = 0; dy < grid.height(); ++dy) { + float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, + from_layer, + to_layer, + dx, + dy); + delays_[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; + } + } + } + } + } +} + +float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; + return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.h b/vpr/src/place/timing/delay_model/simple_delay_model.h new file mode 100644 index 00000000000..f5a856688cd --- /dev/null +++ b/vpr/src/place/timing/delay_model/simple_delay_model.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class SimpleDelayModel + * @brief A simple delay model based on the information stored in router lookahead + * This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router + */ +class SimpleDelayModel : public PlaceDelayModel { + public: + SimpleDelayModel() {} + + /// @brief Use the information in the router lookahead to fill the delay matrix instead of running the router + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string /*filepath*/) const override {} + + void read(const std::string& /*file*/) override {} + void write(const std::string& /*file*/) const override {} + + private: + /** + * @brief The matrix to store the minimum delay between different points on different layers. + * + *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers + *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs + *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers + *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. + *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. + */ + vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] +}; \ No newline at end of file diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index fa6a9acb0bb..76b06bbc55b 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -25,6 +25,9 @@ #include "route_profiling.h" #include "router_delay_profiling.h" #include "place_delay_model.h" +#include "simple_delay_model.h" +#include "delta_delay_model.h" +#include "override_delay_model.h" /*To compute delay between blocks we calculate the delay between */ /*different nodes in the FPGA. From this procedure we generate @@ -123,13 +126,6 @@ static vtr::NdMatrix compute_delta_delay_model( int longest_length, bool is_flat); -/** - * @brief Use the information in the router lookahead to fill the delay matrix instead of running the router - * @param route_profiler - * @return The delay matrix that contain the minimum cost between two locations - */ -static vtr::NdMatrix compute_simple_delay_model(RouterDelayProfiler& route_profiler); - static bool find_direct_connect_sample_locations(const t_direct_inf* direct, t_physical_tile_type_ptr from_type, int from_pin, @@ -209,11 +205,10 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& return place_delay_model; } -void DeltaDelayModel::compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { +void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { delays_ = compute_delta_delay_model( route_profiler, placer_opts, router_opts, /*measure_directconnect=*/true, @@ -237,14 +232,6 @@ void OverrideDelayModel::compute( compute_override_delay_model(route_profiler, router_opts); } -void SimpleDelayModel::compute( - RouterDelayProfiler& router, - const t_placer_opts& /*placer_opts*/, - const t_router_opts& /*router_opts*/, - int /*longest_length*/) { - delays_ = compute_simple_delay_model(router); -} - /******* File Accessible Functions **********/ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { @@ -1028,36 +1015,7 @@ static vtr::NdMatrix compute_delta_delay_model( return delta_delays; } -static vtr::NdMatrix compute_simple_delay_model(RouterDelayProfiler& route_profiler) { - const auto& grid = g_vpr_ctx.device().grid; - int num_physical_tile_types = static_cast(g_vpr_ctx.device().physical_tile_types.size()); - // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] - // The second index related to the layer that the source location is on and the third index is for the sink layer - vtr::NdMatrix delta_delays({static_cast(num_physical_tile_types), - static_cast(grid.get_num_layers()), - static_cast(grid.get_num_layers()), - grid.width(), - grid.height()}); - - for (int physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { - for (int from_layer = 0; from_layer < grid.get_num_layers(); ++from_layer) { - for (int to_layer = 0; to_layer < grid.get_num_layers(); ++to_layer) { - for (int dx = 0; dx < static_cast(grid.width()); ++dx) { - for (int dy = 0; dy < static_cast(grid.height()); ++dy) { - float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, - from_layer, - to_layer, - dx, - dy); - delta_delays[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; - } - } - } - } - } - return delta_delays; -} //Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification static bool find_direct_connect_sample_locations(const t_direct_inf* direct, From bf02e65dfa68bfa9e04f77a9aae37436a30af585 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 12:23:13 -0500 Subject: [PATCH 04/39] add compute_delta_delays_utils files --- .../compute_delta_delays_utils.cpp | 889 ++++++++++++++ .../delay_model/compute_delta_delays_utils.h | 27 + .../timing/delay_model/delta_delay_model.cpp | 14 + .../delay_model/override_delay_model.cpp | 95 ++ vpr/src/place/timing_place_lookup.cpp | 1024 +---------------- 5 files changed, 1026 insertions(+), 1023 deletions(-) create mode 100644 vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp create mode 100644 vpr/src/place/timing/delay_model/compute_delta_delays_utils.h diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp new file mode 100644 index 00000000000..78855a251b6 --- /dev/null +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -0,0 +1,889 @@ + +#include "compute_delta_delays_utils.h" + +#include "vtr_time.h" +#include "vtr_math.h" +#include "physical_types.h" +#include "globals.h" +#include "timing_place_lookup.h" + +/// Indicates the delta delay value has not been calculated +static constexpr float UNINITIALIZED_DELTA = -1; +/// Indicates delta delay from/to an EMPTY block +static constexpr float EMPTY_DELTA = -2; +/// Indicates there is no valid delta delay +static constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); + +static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, + const t_placer_opts& palcer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + size_t longest_length, + bool is_flat); + +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); + +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); + +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays); + +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); + +static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool /*is_flat*/); + +static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool is_flat); + +static float route_connection_delay(RouterDelayProfiler& route_profiler, + int from_layer_num, + int to_layer_num, + int source_x_loc, + int source_y_loc, + int sink_x_loc, + int sink_y_loc, + const t_router_opts& router_opts, + bool measure_directconnect); + +float delay_reduce(std::vector& delays, e_reducer reducer); + +static void add_delay_to_matrix(vtr::Matrix>* matrix, + int delta_x, + int delta_y, + float delay); + +static float find_neighboring_average(vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, + int max_distance); + +/***************************************************************************************/ + +static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + size_t longest_length, + bool is_flat) { + /* To avoid edge effects we place the source at least 'longest_length' away + * from the device edge and route from there for all possible delta values < dimension + */ + + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + const size_t num_layers = grid.get_num_layers(); + const size_t device_width = grid.width(); + const size_t device_height = grid.height(); + + vtr::NdMatrix delta_delays({num_layers, num_layers, device_width, device_height}); + + for (int from_layer_num = 0; from_layer_num < (int)num_layers; from_layer_num++) { + for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) { + vtr::NdMatrix, 2> sampled_delta_delays({device_width, device_height}); + + size_t mid_x = vtr::nint(device_width / 2); + size_t mid_y = vtr::nint(device_height / 2); + + size_t low_x = std::min(longest_length, mid_x); + size_t low_y = std::min(longest_length, mid_y); + size_t high_x = mid_x; + size_t high_y = mid_y; + if (longest_length <= device_width) { + high_x = std::max(device_width - longest_length, mid_x); + } + if (longest_length <= device_height) { + high_y = std::max(device_width - longest_length, mid_y); + } + + std::set allowed_types; + if (!placer_opts.allowed_tiles_for_delay_model.empty()) { + auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + for (const auto& type : allowed_types_vector) { + allowed_types.insert(type); + } + } + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + | | + + // + A | B | C + + // + | | + + // +-----------------\-----------------------.---------------+ + // + | | + + // + | | + + // + | | + + // + | | + + // + D | E | F + + // + | | + + // + | | + + // + | | + + // + | | + + // +-----------------*-----------------------/---------------+ + // + | | + + // + G | H | I + + // + | | + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + // * = (low_x, low_y) + // . = (high_x, high_y) + // / = (high_x, low_y) + // \ = (low_x, high_y) + // + = device edge + + //Find the lowest y location on the left edge with a non-empty block + int y = 0; + int x = 0; + t_physical_tile_type_ptr src_type = nullptr; + for (x = 0; x < (int)grid.width(); ++x) { + for (y = 0; y < (int)grid.height(); ++y) { + auto type = grid.get_physical_type({x, y, from_layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { + continue; + } + src_type = type; + break; + } + } + if (src_type) { + break; + } + } + VTR_ASSERT(src_type != nullptr); + + auto generic_compute_matrix = (placer_opts.place_delta_delay_matrix_calculation_method == e_place_delta_delay_algorithm::ASTAR_ROUTE) ? generic_compute_matrix_iterative_astar : generic_compute_matrix_dijkstra_expansion; + +#ifdef VERBOSE + VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Find the lowest x location on the bottom edge with a non-empty block + src_type = nullptr; + for (y = 0; y < (int)grid.height(); ++y) { + for (x = 0; x < (int)grid.width(); ++x) { + auto type = grid.get_physical_type({x, y, from_layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { + continue; + } + src_type = type; + break; + } + } + if (src_type) { + break; + } + } + VTR_ASSERT(src_type != nullptr); +#ifdef VERBOSE + VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions B, C, E, F +#ifdef VERBOSE + VTR_LOG("Computing from low/low:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, low_y, + low_x, low_y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions D, E, G, H +#ifdef VERBOSE + VTR_LOG("Computing from high/high:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, high_y, + 0, 0, + high_x, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions A, B, D, E +#ifdef VERBOSE + VTR_LOG("Computing from high/low:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, low_y, + 0, low_y, + high_x, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions E, F, H, I +#ifdef VERBOSE + VTR_LOG("Computing from low/high:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, high_y, + low_x, 0, + grid.width() - 1, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { + for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { + delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + } + } + } + } + + return delta_delays; +} + +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { + // Set any empty delta's to the average of it's neighbours + + for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) { + for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) { + for (size_t delta_y = 0; delta_y < delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { + delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; + } + } + } + } + } +} + +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { + // Set any empty delta's to the average of it's neighbours + // + // Empty coordinates may occur if the sampling location happens to not have + // a connection at that location. However a more through sampling likely + // would return a result, so we fill in the empty holes with a small + // neighbour average. + constexpr int kMaxAverageDistance = 2; + for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { + for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) { + delta_delays[from_layer][to_layer][delta_x][delta_y] = + find_neighboring_average(delta_delays, + from_layer, + {delta_x, delta_y, to_layer}, + kMaxAverageDistance); + } + } + } + } + } +} + +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { + // Set any impossible delta's to the average of its neighbours + // + // Impossible coordinates may occur if an IPIN cannot be reached from the + // sampling OPIN. This might occur if the IPIN or OPIN used for sampling + // is specialized, and therefore cannot be reached via the by the pins + // sampled. Leaving this value in the delay matrix will result in invalid + // slacks if the delay matrix uses this value. + // + // A max average distance of 5 is used to provide increased effort in + // filling these gaps. It is more important to have a poor predication, + // than an invalid value and causing a slack assertion. + constexpr int kMaxAverageDistance = 5; + for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { + delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average( + delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance); + } + } + } + } + } +} + +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { + auto& device_ctx = g_vpr_ctx.device(); + auto& grid = device_ctx.grid; + + for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { + for (size_t x = 0; x < grid.width(); ++x) { + for (size_t y = 0; y < grid.height(); ++y) { + float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y]; + + if (delta_delay < 0.) { + VPR_ERROR(VPR_ERROR_PLACE, + "Found invaild negative delay %g for delta [%d,%d,%d,%d]", + delta_delay, from_layer_num, to_layer_num, x, y); + } + } + } + } + } + + return true; +} + +static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool /*is_flat*/) { + //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling from (%d,%d)", source_x, source_y)); + + int delta_x, delta_y; + int sink_x, sink_y; + + auto& device_ctx = g_vpr_ctx.device(); + + for (sink_x = start_x; sink_x <= end_x; sink_x++) { + for (sink_y = start_y; sink_y <= end_y; sink_y++) { + delta_x = abs(sink_x - source_x); + delta_y = abs(sink_y - source_y); + + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); + + bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE + || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); + + bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); + + if (src_or_target_empty || !is_allowed_type) { + if (matrix[delta_x][delta_y].empty()) { + //Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + } + } else { + //Valid start/end + + float delay = route_connection_delay(route_profiler, + from_layer_num, + to_layer_num, + source_x, + source_y, + sink_x, + sink_y, + router_opts, + measure_directconnect); + +#ifdef VERBOSE + VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", + delay, + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { + //Overwrite empty delta + matrix[delta_x][delta_y][0] = delay; + } else { + //Collect delta + matrix[delta_x][delta_y].push_back(delay); + } + } + } + } +} + +static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*route_profiler*/, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool is_flat) { + auto& device_ctx = g_vpr_ctx.device(); + + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); + bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); + if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + + if (matrix[delta_x][delta_y].empty()) { + //Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + } + } + } + + return; + } + + vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); + + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); + for (int driver_ptc : best_driver_ptcs) { + VTR_ASSERT(driver_ptc != OPEN); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); + + VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); + auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); + + bool path_to_all_sinks = true; + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + + if (found_matrix[delta_x][delta_y]) { + continue; + } + + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); + if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (matrix[delta_x][delta_y].empty()) { + //Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + found_matrix[delta_x][delta_y] = true; + } + } else { + bool found_a_sink = false; + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); + for (int sink_ptc : best_sink_ptcs) { + VTR_ASSERT(sink_ptc != OPEN); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); + + if (sink_rr_node == RRNodeId::INVALID()) + continue; + + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { + //Skip if we shouldn't measure direct connects and a direct connect exists + continue; + } + + if (std::isnan(delays[sink_rr_node])) { + // This sink was not found + continue; + } + +#ifdef VERBOSE + VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", + delays[size_t(sink_rr_node)], + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + found_matrix[delta_x][delta_y] = true; + + add_delay_to_matrix(&matrix, delta_x, delta_y, delays[sink_rr_node]); + + found_a_sink = true; + break; + } + + if (!found_a_sink) { + path_to_all_sinks = false; + } + } + } + } + + if (path_to_all_sinks) { + break; + } + } + + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + if (!found_matrix[delta_x][delta_y]) { + add_delay_to_matrix(&matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + source_x, + source_y, + from_layer_num, + sink_x, + sink_y, + to_layer_num, + IMPOSSIBLE_DELTA); + } + } + } +} + +static float route_connection_delay(RouterDelayProfiler& route_profiler, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int sink_x, + int sink_y, + const t_router_opts& router_opts, + bool measure_directconnect) { + //Routes between the source and sink locations and calculates the delay + + float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */ + + auto& device_ctx = g_vpr_ctx.device(); + + bool successfully_routed = false; + + //Get the rr nodes to route between + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); + + for (int driver_ptc : best_driver_ptcs) { + VTR_ASSERT(driver_ptc != OPEN); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); + + VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); + + for (int sink_ptc : best_sink_ptcs) { + VTR_ASSERT(sink_ptc != OPEN); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); + + if (sink_rr_node == RRNodeId::INVALID()) + continue; + + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { + //Skip if we shouldn't measure direct connects and a direct connect exists + continue; + } + + { + successfully_routed = route_profiler.calculate_delay( + source_rr_node, sink_rr_node, + router_opts, + &net_delay_value); + } + + if (successfully_routed) break; + } + if (successfully_routed) break; + } + + if (!successfully_routed) { + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + source_x, source_y, from_layer_num, sink_x, sink_y, to_layer_num, net_delay_value); + } + + return net_delay_value; +} + +float delay_reduce(std::vector& delays, e_reducer reducer) { + if (delays.empty()) { + return IMPOSSIBLE_DELTA; + } else if (delays.size() == 1) { + return delays[0]; + } + + VTR_ASSERT(delays.size() > 1); + + float delay; + + if (reducer == e_reducer::MIN) { + auto itr = std::min_element(delays.begin(), delays.end()); + delay = *itr; + } else if (reducer == e_reducer::MAX) { + auto itr = std::max_element(delays.begin(), delays.end()); + delay = *itr; + } else if (reducer == e_reducer::MEDIAN) { + std::stable_sort(delays.begin(), delays.end()); + delay = vtr::median(delays.begin(), delays.end()); + } else if (reducer == e_reducer::ARITHMEAN) { + delay = vtr::arithmean(delays.begin(), delays.end()); + } else if (reducer == e_reducer::GEOMEAN) { + delay = vtr::geomean(delays.begin(), delays.end()); + } else { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer"); + } + + return delay; +} + +static void add_delay_to_matrix(vtr::Matrix>* matrix, + int delta_x, + int delta_y, + float delay) { + if ((*matrix)[delta_x][delta_y].size() == 1 && (*matrix)[delta_x][delta_y][0] == EMPTY_DELTA) { + //Overwrite empty delta + (*matrix)[delta_x][delta_y][0] = delay; + } else { + //Collect delta + (*matrix)[delta_x][delta_y].push_back(delay); + } +} + +/* We return the average placement estimated delay for a routing spanning (x,y). + * We start with an averaging distance of 1 (i.e. from (x-1,y-1) to (x+1,y+1)) + * and look for legal delay values to average; if some are found we return the + * average and if none are found we increase the distance to average over. + * + * If no legal values are found to average over with a range of max_distance, + * we return IMPOSSIBLE_DELTA. + */ +static float find_neighboring_average(vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, + int max_distance) { + float sum = 0; + int counter = 0; + int endx = matrix.end_index(2); + int endy = matrix.end_index(3); + + int x = to_tile_loc.x; + int y = to_tile_loc.y; + int to_layer = to_tile_loc.layer_num; + + for (int distance = 1; distance <= max_distance; ++distance) { + for (int delx = x - distance; delx <= x + distance; delx++) { + for (int dely = y - distance; dely <= y + distance; dely++) { + // Check distance constraint + if (abs(delx - x) + abs(dely - y) > distance) { + continue; + } + + //check out of bounds + if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) { + continue; + } + + if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { + continue; + } + counter++; + sum += matrix[from_layer][to_layer][delx][dely]; + } + } + if (counter != 0) { + return sum / (float)counter; + } + } + + return IMPOSSIBLE_DELTA; +} + +/***************************************************************************************/ + +vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + int longest_length, + bool is_flat) { + vtr::ScopedStartFinishTimer timer("Computing delta delays"); + vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, + placer_opts, + router_opts, + measure_directconnect, + longest_length, + is_flat); + + fix_uninitialized_coordinates(delta_delays); + + fix_empty_coordinates(delta_delays); + + fill_impossible_coordinates(delta_delays); + + verify_delta_delays(delta_delays); + + return delta_delays; +} + +//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification +bool find_direct_connect_sample_locations(const t_direct_inf* direct, + t_physical_tile_type_ptr from_type, + int from_pin, + int from_pin_class, + t_physical_tile_type_ptr to_type, + int to_pin, + int to_pin_class, + RRNodeId& out_src_node, + RRNodeId& out_sink_node) { + VTR_ASSERT(from_type != nullptr); + VTR_ASSERT(to_type != nullptr); + + auto& device_ctx = g_vpr_ctx.device(); + auto& grid = device_ctx.grid; + const auto& node_lookup = device_ctx.rr_graph.node_lookup(); + + //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets, + //and which has the appropriate pins + int from_x = -1; + int from_y = -1; + int from_sub_tile = -1; + int to_x = 0, to_y = 0, to_sub_tile = 0; + bool found = false; + int found_layer_num = -1; + //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums + for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) { + for (int x = 0; x < (int)grid.width() && !found; ++x) { + to_x = x + direct->x_offset; + if (to_x < 0 || to_x >= (int)grid.width()) continue; + + for (int y = 0; y < (int)grid.height() && !found; ++y) { + if (grid.get_physical_type({x, y, layer_num}) != from_type) continue; + + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool from_pin_found = false; + if (direct->from_side != NUM_2D_SIDES) { + RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); + from_pin_found = from_pin_rr.is_valid(); + } else { + from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); + } + if (!from_pin_found) continue; + + to_y = y + direct->y_offset; + + if (to_y < 0 || to_y >= (int)grid.height()) continue; + if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue; + + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool to_pin_found = false; + if (direct->to_side != NUM_2D_SIDES) { + RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); + to_pin_found = (to_pin_rr != RRNodeId::INVALID()); + } else { + to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty()); + } + if (!to_pin_found) continue; + + for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) { + to_sub_tile = sub_tile_num + direct->sub_tile_offset; + + if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; + + found = true; + found_layer_num = layer_num; + from_x = x; + from_y = y; + from_sub_tile = sub_tile_num; + + break; + } + } + } + } + + if (!found) { + return false; + } + + //Now have a legal instance of this direct connect + VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type); + VTR_ASSERT(from_sub_tile < from_type->capacity); + + VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type); + VTR_ASSERT(to_sub_tile < to_type->capacity); + + VTR_ASSERT(from_x + direct->x_offset == to_x); + VTR_ASSERT(from_y + direct->y_offset == to_y); + VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile); + + // + //Find a source/sink RR node associated with the pins of the direct + // + + { + RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); + VTR_ASSERT(src_rr_candidate); + out_src_node = src_rr_candidate; + } + + { + RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); + VTR_ASSERT(sink_rr_candidate); + out_sink_node = sink_rr_candidate; + } + + return true; +} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h new file mode 100644 index 00000000000..bacff650334 --- /dev/null +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h @@ -0,0 +1,27 @@ + +#pragma once + +#include "vtr_ndmatrix.h" +#include "physical_types.h" +#include "rr_graph_fwd.h" + +struct t_placer_opts; +struct t_router_opts; +class RouterDelayProfiler; + +vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + int longest_length, + bool is_flat); + +bool find_direct_connect_sample_locations(const t_direct_inf* direct, + t_physical_tile_type_ptr from_type, + int from_pin, + int from_pin_class, + t_physical_tile_type_ptr to_type, + int to_pin, + int to_pin_class, + RRNodeId& out_src_node, + RRNodeId& out_sink_node); \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp index 55bb0104316..f4e202e7106 100644 --- a/vpr/src/place/timing/delay_model/delta_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/delta_delay_model.cpp @@ -1,6 +1,20 @@ #include "delta_delay_model.h" +#include "compute_delta_delays_utils.h" + +void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { + delays_ = compute_delta_delay_model(route_profiler, + placer_opts, + router_opts, + /*measure_directconnect=*/true, + longest_length, + is_flat_); +} + float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { int delta_x = std::abs(from_loc.x - to_loc.x); diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp index ceb8245511b..33106acb208 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -1,6 +1,8 @@ #include "override_delay_model.h" +#include "compute_delta_delays_utils.h" + #ifdef VTR_ENABLE_CAPNPROTO # include "capnp/serialize.h" # include "place_delay_model.capnp.h" @@ -9,6 +11,99 @@ # include "serdes_utils.h" #endif // VTR_ENABLE_CAPNPROTO +void OverrideDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { + auto delays = compute_delta_delay_model(route_profiler, + placer_opts, + router_opts, + /*measure_directconnect=*/false, + longest_length, + is_flat_); + + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); + + compute_override_delay_model(route_profiler, router_opts); +} + +void OverrideDelayModel::compute_override_delay_model(RouterDelayProfiler& route_profiler, + const t_router_opts& router_opts) { + t_router_opts router_opts2 = router_opts; + router_opts2.astar_fac = 0.f; + router_opts2.astar_offset = 0.f; + + //Look at all the direct connections that exist, and add overrides to delay model + auto& device_ctx = g_vpr_ctx.device(); + for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) { + const t_direct_inf* direct = &device_ctx.arch->directs[idirect]; + + InstPort from_port = parse_inst_port(direct->from_pin); + InstPort to_port = parse_inst_port(direct->to_pin); + + t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types); + t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types); + + int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1; + VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from"); + + //We now walk through all the connections associated with the current direct specification, measure + //their delay and specify that value as an override in the delay model. + // + //Note that we need to check every connection in the direct to cover the case where the pins are not + //equivalent. + // + //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK + //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in + //sampled_rr_pairs and skipping them if they occur multiple times. + int missing_instances = 0; + int missing_paths = 0; + std::set> sampled_rr_pairs; + for (int iconn = 0; iconn < num_conns; ++iconn) { + //Find the associated pins + int from_pin = find_pin(from_type, from_port.port_name(), from_port.port_low_index() + iconn); + int to_pin = find_pin(to_type, to_port.port_name(), to_port.port_low_index() + iconn); + + VTR_ASSERT(from_pin != OPEN); + VTR_ASSERT(to_pin != OPEN); + + int from_pin_class = find_pin_class(from_type, from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); + VTR_ASSERT(from_pin_class != OPEN); + + int to_pin_class = find_pin_class(to_type, to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); + VTR_ASSERT(to_pin_class != OPEN); + + bool found_sample_points; + RRNodeId src_rr, sink_rr; + found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr); + + if (!found_sample_points) { + ++missing_instances; + continue; + } + + //If some of the source/sink ports are logically equivalent we may have already + //sampled the associated source/sink pair and don't need to do so again + if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; + + float direct_connect_delay = std::numeric_limits::quiet_NaN(); + bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); + + if (found_routing_path) { + set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); + } else { + ++missing_paths; + } + + //Record that we've sampled this pair of source and sink nodes + sampled_rr_pairs.insert({src_rr, sink_rr}); + } + + VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str()); + VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str()); + } +} + const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { return base_delay_model_.get(); } diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 76b06bbc55b..21ff6d69cc6 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -47,107 +47,8 @@ constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); //Ind static t_chan_width setup_chan_width(const t_router_opts& router_opts, t_chan_width_dist chan_width_dist); -static float route_connection_delay( - RouterDelayProfiler& route_profiler, - int from_layer_num, - int to_layer_num, - int source_x_loc, - int source_y_loc, - int sink_x_loc, - int sink_y_loc, - const t_router_opts& router_opts, - bool measure_directconnect); - -// Prototype for computing delta delay matrix. -typedef std::function>&, - int, - int, - int, - int, - int, - int, - int, - int, - const t_router_opts&, - bool, - const std::set&, - bool)> - t_compute_delta_delay_matrix; - -static void generic_compute_matrix_iterative_astar( - RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool /***/); - -static void generic_compute_matrix_dijkstra_expansion( - RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool is_flat); - -static vtr::NdMatrix compute_delta_delays( - RouterDelayProfiler& route_profiler, - const t_placer_opts& palcer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - size_t longest_length, - bool is_flat); - -float delay_reduce(std::vector& delays, e_reducer reducer); - -static vtr::NdMatrix compute_delta_delay_model( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - int longest_length, - bool is_flat); - -static bool find_direct_connect_sample_locations(const t_direct_inf* direct, - t_physical_tile_type_ptr from_type, - int from_pin, - int from_pin_class, - t_physical_tile_type_ptr to_type, - int to_pin, - int to_pin_class, - RRNodeId& out_src_node, - RRNodeId& out_sink_node); - -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); - static int get_longest_segment_length(std::vector& segment_inf); -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); - -static float find_neighboring_average(vtr::NdMatrix& matrix, - int from_layer, - t_physical_tile_loc to_tile_loc, - int max_distance); - /******* Globally Accessible Functions **********/ std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, @@ -205,33 +106,6 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& return place_delay_model; } -void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { - delays_ = compute_delta_delay_model( - route_profiler, - placer_opts, router_opts, /*measure_directconnect=*/true, - longest_length, - is_flat_); -} - -void OverrideDelayModel::compute( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { - auto delays = compute_delta_delay_model( - route_profiler, - placer_opts, router_opts, /*measure_directconnect=*/false, - longest_length, - is_flat_); - - base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); - - compute_override_delay_model(route_profiler, router_opts); -} - /******* File Accessible Functions **********/ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { @@ -334,902 +208,6 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts, return init_chan(width_fac, chan_width_dist, graph_directionality); } -static float route_connection_delay( - RouterDelayProfiler& route_profiler, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int sink_x, - int sink_y, - const t_router_opts& router_opts, - bool measure_directconnect) { - //Routes between the source and sink locations and calculates the delay - - float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */ - - auto& device_ctx = g_vpr_ctx.device(); - - bool successfully_routed = false; - - //Get the rr nodes to route between - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); - - for (int driver_ptc : best_driver_ptcs) { - VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); - - VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); - - for (int sink_ptc : best_sink_ptcs) { - VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); - - if (sink_rr_node == RRNodeId::INVALID()) - continue; - - if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - //Skip if we shouldn't measure direct connects and a direct connect exists - continue; - } - - { - successfully_routed = route_profiler.calculate_delay( - source_rr_node, sink_rr_node, - router_opts, - &net_delay_value); - } - - if (successfully_routed) break; - } - if (successfully_routed) break; - } - - if (!successfully_routed) { - VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - source_x, source_y, from_layer_num, sink_x, sink_y, to_layer_num, net_delay_value); - } - - return (net_delay_value); -} - -static void add_delay_to_matrix( - vtr::Matrix>* matrix, - int delta_x, - int delta_y, - float delay) { - if ((*matrix)[delta_x][delta_y].size() == 1 && (*matrix)[delta_x][delta_y][0] == EMPTY_DELTA) { - //Overwrite empty delta - (*matrix)[delta_x][delta_y][0] = delay; - } else { - //Collect delta - (*matrix)[delta_x][delta_y].push_back(delay); - } -} - -static void generic_compute_matrix_dijkstra_expansion( - RouterDelayProfiler& /*route_profiler*/, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool is_flat) { - auto& device_ctx = g_vpr_ctx.device(); - - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); - bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); - if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - - if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - } - } - } - - return; - } - - vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); - - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); - for (int driver_ptc : best_driver_ptcs) { - VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); - - VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); - auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); - - bool path_to_all_sinks = true; - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - - if (found_matrix[delta_x][delta_y]) { - continue; - } - - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); - if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - found_matrix[delta_x][delta_y] = true; - } - } else { - bool found_a_sink = false; - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); - for (int sink_ptc : best_sink_ptcs) { - VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); - - if (sink_rr_node == RRNodeId::INVALID()) - continue; - - if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - //Skip if we shouldn't measure direct connects and a direct connect exists - continue; - } - - if (std::isnan(delays[sink_rr_node])) { - // This sink was not found - continue; - } - -#ifdef VERBOSE - VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", - delays[size_t(sink_rr_node)], - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - found_matrix[delta_x][delta_y] = true; - - add_delay_to_matrix(&matrix, delta_x, delta_y, delays[sink_rr_node]); - - found_a_sink = true; - break; - } - - if (!found_a_sink) { - path_to_all_sinks = false; - } - } - } - } - - if (path_to_all_sinks) { - break; - } - } - - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - if (!found_matrix[delta_x][delta_y]) { - add_delay_to_matrix(&matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); - VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - source_x, - source_y, - from_layer_num, - sink_x, - sink_y, - to_layer_num, - IMPOSSIBLE_DELTA); - } - } - } -} - -static void generic_compute_matrix_iterative_astar( - RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool /***/) { - //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling from (%d,%d)", source_x, source_y)); - - int delta_x, delta_y; - int sink_x, sink_y; - - auto& device_ctx = g_vpr_ctx.device(); - - for (sink_x = start_x; sink_x <= end_x; sink_x++) { - for (sink_y = start_y; sink_y <= end_y; sink_y++) { - delta_x = abs(sink_x - source_x); - delta_y = abs(sink_y - source_y); - - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); - - bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE - || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); - - bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); - - if (src_or_target_empty || !is_allowed_type) { - if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - } - } else { - //Valid start/end - - float delay = route_connection_delay(route_profiler, - from_layer_num, - to_layer_num, - source_x, - source_y, - sink_x, - sink_y, - router_opts, - measure_directconnect); - -#ifdef VERBOSE - VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", - delay, - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { - //Overwrite empty delta - matrix[delta_x][delta_y][0] = delay; - } else { - //Collect delta - matrix[delta_x][delta_y].push_back(delay); - } - } - } - } -} - -static vtr::NdMatrix compute_delta_delays( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - size_t longest_length, - bool is_flat) { - //To avoid edge effects we place the source at least 'longest_length' away - //from the device edge - //and route from there for all possible delta values < dimension - - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - - vtr::NdMatrix delta_delays({static_cast(grid.get_num_layers()), static_cast(grid.get_num_layers()), grid.width(), grid.height()}); - - for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); from_layer_num++) { - for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); to_layer_num++) { - vtr::NdMatrix, 2> sampled_delta_delays({grid.width(), grid.height()}); - - size_t mid_x = vtr::nint(grid.width() / 2); - size_t mid_y = vtr::nint(grid.height() / 2); - - size_t low_x = std::min(longest_length, mid_x); - size_t low_y = std::min(longest_length, mid_y); - size_t high_x = mid_x; - size_t high_y = mid_y; - if (longest_length <= grid.width()) { - high_x = std::max(grid.width() - longest_length, mid_x); - } - if (longest_length <= grid.height()) { - high_y = std::max(grid.height() - longest_length, mid_y); - } - - std::set allowed_types; - if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); - for (const auto& type : allowed_types_vector) { - allowed_types.insert(type); - } - } - - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // + | | + - // + A | B | C + - // + | | + - // +-----------------\-----------------------.---------------+ - // + | | + - // + | | + - // + | | + - // + | | + - // + D | E | F + - // + | | + - // + | | + - // + | | + - // + | | + - // +-----------------*-----------------------/---------------+ - // + | | + - // + G | H | I + - // + | | + - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // - // * = (low_x, low_y) - // . = (high_x, high_y) - // / = (high_x, low_y) - // \ = (low_x, high_y) - // + = device edge - - //Find the lowest y location on the left edge with a non-empty block - int y = 0; - int x = 0; - t_physical_tile_type_ptr src_type = nullptr; - for (x = 0; x < (int)grid.width(); ++x) { - for (y = 0; y < (int)grid.height(); ++y) { - auto type = grid.get_physical_type({x, y, from_layer_num}); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { - continue; - } - src_type = type; - break; - } - } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); - - t_compute_delta_delay_matrix generic_compute_matrix; - switch (placer_opts.place_delta_delay_matrix_calculation_method) { - case e_place_delta_delay_algorithm::ASTAR_ROUTE: - generic_compute_matrix = generic_compute_matrix_iterative_astar; - break; - case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION: - generic_compute_matrix = generic_compute_matrix_dijkstra_expansion; - break; - default: - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method); - } - -#ifdef VERBOSE - VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Find the lowest x location on the bottom edge with a non-empty block - src_type = nullptr; - for (y = 0; y < (int)grid.height(); ++y) { - for (x = 0; x < (int)grid.width(); ++x) { - auto type = grid.get_physical_type({x, y, from_layer_num}); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { - continue; - } - src_type = type; - break; - } - } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); -#ifdef VERBOSE - VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions B, C, E, F -#ifdef VERBOSE - VTR_LOG("Computing from low/low:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - low_x, low_y, - low_x, low_y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions D, E, G, H -#ifdef VERBOSE - VTR_LOG("Computing from high/high:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - high_x, high_y, - 0, 0, - high_x, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions A, B, D, E -#ifdef VERBOSE - VTR_LOG("Computing from high/low:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - high_x, low_y, - 0, low_y, - high_x, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions E, F, H, I -#ifdef VERBOSE - VTR_LOG("Computing from low/high:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - low_x, high_y, - low_x, 0, - grid.width() - 1, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { - for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { - delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); - } - } - } - } - - return delta_delays; -} - -float delay_reduce(std::vector& delays, e_reducer reducer) { - if (delays.empty()) { - return IMPOSSIBLE_DELTA; - } else if (delays.size() == 1) { - return delays[0]; - } - - VTR_ASSERT(delays.size() > 1); - - float delay; - - if (reducer == e_reducer::MIN) { - auto itr = std::min_element(delays.begin(), delays.end()); - delay = *itr; - } else if (reducer == e_reducer::MAX) { - auto itr = std::max_element(delays.begin(), delays.end()); - delay = *itr; - } else if (reducer == e_reducer::MEDIAN) { - std::stable_sort(delays.begin(), delays.end()); - delay = vtr::median(delays.begin(), delays.end()); - } else if (reducer == e_reducer::ARITHMEAN) { - delay = vtr::arithmean(delays.begin(), delays.end()); - } else if (reducer == e_reducer::GEOMEAN) { - delay = vtr::geomean(delays.begin(), delays.end()); - } else { - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer"); - } - - return delay; -} - -/* We return the average placement estimated delay for a routing spanning (x,y). - * We start with an averaging distance of 1 (i.e. from (x-1,y-1) to (x+1,y+1)) - * and look for legal delay values to average; if some are found we return the - * average and if none are found we increase the distance to average over. - * - * If no legal values are found to average over with a range of max_distance, - * we return IMPOSSIBLE_DELTA. - */ -static float find_neighboring_average( - vtr::NdMatrix& matrix, - int from_layer, - t_physical_tile_loc to_tile_loc, - int max_distance) { - float sum = 0; - int counter = 0; - int endx = matrix.end_index(2); - int endy = matrix.end_index(3); - - int x = to_tile_loc.x; - int y = to_tile_loc.y; - int to_layer = to_tile_loc.layer_num; - - for (int distance = 1; distance <= max_distance; ++distance) { - for (int delx = x - distance; delx <= x + distance; delx++) { - for (int dely = y - distance; dely <= y + distance; dely++) { - // Check distance constraint - if (abs(delx - x) + abs(dely - y) > distance) { - continue; - } - - //check out of bounds - if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) { - continue; - } - - if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { - continue; - } - counter++; - sum += matrix[from_layer][to_layer][delx][dely]; - } - } - if (counter != 0) { - return sum / (float)counter; - } - } - - return IMPOSSIBLE_DELTA; -} - -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { - // Set any empty delta's to the average of it's neighbours - // - // Empty coordinates may occur if the sampling location happens to not have - // a connection at that location. However a more through sampling likely - // would return a result, so we fill in the empty holes with a small - // neighbour average. - constexpr int kMaxAverageDistance = 2; - for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { - for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) { - delta_delays[from_layer][to_layer][delta_x][delta_y] = - find_neighboring_average(delta_delays, - from_layer, - {delta_x, delta_y, to_layer}, - kMaxAverageDistance); - } - } - } - } - } -} - -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { - // Set any empty delta's to the average of it's neighbours - - for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) { - for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) { - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { - delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; - } - } - } - } - } -} - -static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { - // Set any impossible delta's to the average of its neighbours - // - // Impossible coordinates may occur if an IPIN cannot be reached from the - // sampling OPIN. This might occur if the IPIN or OPIN used for sampling - // is specialized, and therefore cannot be reached via the by the pins - // sampled. Leaving this value in the delay matrix will result in invalid - // slacks if the delay matrix uses this value. - // - // A max average distance of 5 is used to provide increased effort in - // filling these gaps. It is more important to have a poor predication, - // than an invalid value and causing a slack assertion. - constexpr int kMaxAverageDistance = 5; - for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { - for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { - delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average( - delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance); - } - } - } - } - } -} - -static vtr::NdMatrix compute_delta_delay_model( - RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - int longest_length, - bool is_flat) { - vtr::ScopedStartFinishTimer timer("Computing delta delays"); - vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, - placer_opts, - router_opts, - measure_directconnect, - longest_length, - is_flat); - - fix_uninitialized_coordinates(delta_delays); - - fix_empty_coordinates(delta_delays); - - fill_impossible_coordinates(delta_delays); - - verify_delta_delays(delta_delays); - - return delta_delays; -} - - - -//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification -static bool find_direct_connect_sample_locations(const t_direct_inf* direct, - t_physical_tile_type_ptr from_type, - int from_pin, - int from_pin_class, - t_physical_tile_type_ptr to_type, - int to_pin, - int to_pin_class, - RRNodeId& out_src_node, - RRNodeId& out_sink_node) { - VTR_ASSERT(from_type != nullptr); - VTR_ASSERT(to_type != nullptr); - - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - const auto& node_lookup = device_ctx.rr_graph.node_lookup(); - - //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets, - //and which has the appropriate pins - int from_x = -1; - int from_y = -1; - int from_sub_tile = -1; - int to_x = 0, to_y = 0, to_sub_tile = 0; - bool found = false; - int found_layer_num = -1; - //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums - for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) { - for (int x = 0; x < (int)grid.width() && !found; ++x) { - to_x = x + direct->x_offset; - if (to_x < 0 || to_x >= (int)grid.width()) continue; - - for (int y = 0; y < (int)grid.height() && !found; ++y) { - if (grid.get_physical_type({x, y, layer_num}) != from_type) continue; - - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool from_pin_found = false; - if (direct->from_side != NUM_2D_SIDES) { - RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); - from_pin_found = from_pin_rr.is_valid(); - } else { - from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); - } - if (!from_pin_found) continue; - - to_y = y + direct->y_offset; - - if (to_y < 0 || to_y >= (int)grid.height()) continue; - if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue; - - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool to_pin_found = false; - if (direct->to_side != NUM_2D_SIDES) { - RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); - to_pin_found = (to_pin_rr != RRNodeId::INVALID()); - } else { - to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty()); - } - if (!to_pin_found) continue; - - for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) { - to_sub_tile = sub_tile_num + direct->sub_tile_offset; - - if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; - - found = true; - found_layer_num = layer_num; - from_x = x; - from_y = y; - from_sub_tile = sub_tile_num; - - break; - } - } - } - } - - if (!found) { - return false; - } - - //Now have a legal instance of this direct connect - VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type); - VTR_ASSERT(from_sub_tile < from_type->capacity); - - VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type); - VTR_ASSERT(to_sub_tile < to_type->capacity); - - VTR_ASSERT(from_x + direct->x_offset == to_x); - VTR_ASSERT(from_y + direct->y_offset == to_y); - VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile); - - // - //Find a source/sink RR node associated with the pins of the direct - // - - { - RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); - VTR_ASSERT(src_rr_candidate); - out_src_node = src_rr_candidate; - } - - { - RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); - VTR_ASSERT(sink_rr_candidate); - out_sink_node = sink_rr_candidate; - } - - return true; -} - -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - - for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { - for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y]; - - if (delta_delay < 0.) { - VPR_ERROR(VPR_ERROR_PLACE, - "Found invaild negative delay %g for delta [%d,%d,%d,%d]", - delta_delay, from_layer_num, to_layer_num, x, y); - } - } - } - } - } - - return true; -} - -void OverrideDelayModel::compute_override_delay_model( - RouterDelayProfiler& route_profiler, - const t_router_opts& router_opts) { - t_router_opts router_opts2 = router_opts; - router_opts2.astar_fac = 0.f; - router_opts2.astar_offset = 0.f; - - //Look at all the direct connections that exist, and add overrides to delay model - auto& device_ctx = g_vpr_ctx.device(); - for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) { - const t_direct_inf* direct = &device_ctx.arch->directs[idirect]; - - InstPort from_port = parse_inst_port(direct->from_pin); - InstPort to_port = parse_inst_port(direct->to_pin); - - t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types); - t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types); - - int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1; - VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from"); - - //We now walk through all the connections associated with the current direct specification, measure - //their delay and specify that value as an override in the delay model. - // - //Note that we need to check every connection in the direct to cover the case where the pins are not - //equivalent. - // - //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK - //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in - //sampled_rr_pairs and skipping them if they occur multiple times. - int missing_instances = 0; - int missing_paths = 0; - std::set> sampled_rr_pairs; - for (int iconn = 0; iconn < num_conns; ++iconn) { - //Find the associated pins - int from_pin = find_pin(from_type, from_port.port_name(), from_port.port_low_index() + iconn); - int to_pin = find_pin(to_type, to_port.port_name(), to_port.port_low_index() + iconn); - - VTR_ASSERT(from_pin != OPEN); - VTR_ASSERT(to_pin != OPEN); - - int from_pin_class = find_pin_class(from_type, from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); - VTR_ASSERT(from_pin_class != OPEN); - - int to_pin_class = find_pin_class(to_type, to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); - VTR_ASSERT(to_pin_class != OPEN); - - bool found_sample_points; - RRNodeId src_rr, sink_rr; - found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr); - - if (!found_sample_points) { - ++missing_instances; - continue; - } - - //If some of the source/sink ports are logically equivalent we may have already - //sampled the associated source/sink pair and don't need to do so again - if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; - - float direct_connect_delay = std::numeric_limits::quiet_NaN(); - bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); - - if (found_routing_path) { - set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); - } else { - ++missing_paths; - } - - //Record that we've sampled this pair of source and sink nodes - sampled_rr_pairs.insert({src_rr, sink_rr}); - } - - VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str()); - VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str()); - } -} - bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) { //Returns true if there is a directconnect between the two RR nodes // @@ -1258,4 +236,4 @@ bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) { } } return false; -} +} \ No newline at end of file From cddb15210d4ba99f1a07a4ca8f83e779cd14976f Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 12:49:58 -0500 Subject: [PATCH 05/39] add doxygen comments for delay_reduce, add_delay_to_matrix, and find_neighboring_average --- .../compute_delta_delays_utils.cpp | 97 +++++++++++++------ 1 file changed, 68 insertions(+), 29 deletions(-) diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index 78855a251b6..ee7da1b2265 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -69,13 +69,56 @@ static float route_connection_delay(RouterDelayProfiler& route_profiler, const t_router_opts& router_opts, bool measure_directconnect); -float delay_reduce(std::vector& delays, e_reducer reducer); +/** + * @brief Computes a reduced value from a vector of delay values using the specified reduction method. + * + * @param delays A reference to a vector of delay values. This vector may be modified + * (e.g., sorted) depending on the reducer used. + * @param reducer The reduction method to be applied. + * + * @return The reduced delay value. If the input vector is empty, the function + * returns `IMPOSSIBLE_DELTA`. + * + * @throws VPR_FATAL_ERROR if the reducer is unrecognized. + */ +static float delay_reduce(std::vector& delays, e_reducer reducer); -static void add_delay_to_matrix(vtr::Matrix>* matrix, +/** + * @brief Adds a delay value to a 2D matrix of delay vectors. + * + * Updates the delay vector at position (`delta_x`, `delta_y`) in the matrix. + * If the element contains only `EMPTY_DELTA`, it is replaced with the new delay; + * otherwise, the delay is appended to the vector. + * + * @param matrix A 2D matrix of delay vectors. + * @param delta_x The x-index in the matrix. + * @param delta_y The y-index in the matrix. + * @param delay The delay value to add. + */ +static void add_delay_to_matrix(vtr::Matrix>& matrix, int delta_x, int delta_y, float delay); +/** + * @brief Computes the average delay for a routing span. + * + * This function calculates the average placement delay for a routing span starting from a + * given layer and spanning a region defined by delta x and delta y. It iteratively searches + * for valid delay values within an expanding neighborhood (starting from a distance of 1) + * around the specified delta offsets and layer, until valid values are found or + * the maximum search distance (`max_distance`) is reached. + * + * @param matrix A 4D matrix of delay values indexed by `[from_layer][to_layer][delta_x][delta_y]`. + * @param from_layer The starting layer index of the routing span. + * @param to_tile_loc A structure holding the delta offsets (`x` and `y`) and the target layer index (`layer_num`). + * @param max_distance The maximum neighborhood distance to search for valid delay values. + * + * @return The average of valid delay values within the search range. If no valid delays + * are found up to the maximum distance, the function returns `IMPOSSIBLE_DELTA`. + * + * @note The function performs a Manhattan-distance-based neighborhood search around the target location. + */ static float find_neighboring_average(vtr::NdMatrix& matrix, int from_layer, t_physical_tile_loc to_tile_loc, @@ -560,7 +603,7 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou #endif found_matrix[delta_x][delta_y] = true; - add_delay_to_matrix(&matrix, delta_x, delta_y, delays[sink_rr_node]); + add_delay_to_matrix(matrix, delta_x, delta_y, delays[sink_rr_node]); found_a_sink = true; break; @@ -583,7 +626,7 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou int delta_x = abs(sink_x - source_x); int delta_y = abs(sink_y - source_y); if (!found_matrix[delta_x][delta_y]) { - add_delay_to_matrix(&matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); + add_delay_to_matrix(matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", source_x, source_y, @@ -656,10 +699,12 @@ static float route_connection_delay(RouterDelayProfiler& route_profiler, return net_delay_value; } -float delay_reduce(std::vector& delays, e_reducer reducer) { +static float delay_reduce(std::vector& delays, e_reducer reducer) { if (delays.empty()) { return IMPOSSIBLE_DELTA; - } else if (delays.size() == 1) { + } + + if (delays.size() == 1) { return delays[0]; } @@ -687,39 +732,31 @@ float delay_reduce(std::vector& delays, e_reducer reducer) { return delay; } -static void add_delay_to_matrix(vtr::Matrix>* matrix, +static void add_delay_to_matrix(vtr::Matrix>& matrix, int delta_x, int delta_y, float delay) { - if ((*matrix)[delta_x][delta_y].size() == 1 && (*matrix)[delta_x][delta_y][0] == EMPTY_DELTA) { - //Overwrite empty delta - (*matrix)[delta_x][delta_y][0] = delay; + if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { + // Overwrite empty delta + matrix[delta_x][delta_y][0] = delay; } else { //Collect delta - (*matrix)[delta_x][delta_y].push_back(delay); + matrix[delta_x][delta_y].push_back(delay); } } -/* We return the average placement estimated delay for a routing spanning (x,y). - * We start with an averaging distance of 1 (i.e. from (x-1,y-1) to (x+1,y+1)) - * and look for legal delay values to average; if some are found we return the - * average and if none are found we increase the distance to average over. - * - * If no legal values are found to average over with a range of max_distance, - * we return IMPOSSIBLE_DELTA. - */ static float find_neighboring_average(vtr::NdMatrix& matrix, int from_layer, t_physical_tile_loc to_tile_loc, int max_distance) { - float sum = 0; - int counter = 0; - int endx = matrix.end_index(2); - int endy = matrix.end_index(3); + float sum = 0.f; + int num_samples = 0; + const int endx = matrix.end_index(2); + const int endy = matrix.end_index(3); - int x = to_tile_loc.x; - int y = to_tile_loc.y; - int to_layer = to_tile_loc.layer_num; + const int x = to_tile_loc.x; + const int y = to_tile_loc.y; + const int to_layer = to_tile_loc.layer_num; for (int distance = 1; distance <= max_distance; ++distance) { for (int delx = x - distance; delx <= x + distance; delx++) { @@ -737,12 +774,14 @@ static float find_neighboring_average(vtr::NdMatrix& matrix, if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { continue; } - counter++; + sum += matrix[from_layer][to_layer][delx][dely]; + num_samples++; } } - if (counter != 0) { - return sum / (float)counter; + + if (num_samples != 0) { + return sum / (float)num_samples; } } From 553ff537454aade8148de6ab62e04e4a285521ee Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 15:50:53 -0500 Subject: [PATCH 06/39] move lines that don't depend on loop vars to outside the loop --- .../compute_delta_delays_utils.cpp | 151 ++++++++---------- 1 file changed, 69 insertions(+), 82 deletions(-) diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index ee7da1b2265..42142d428cc 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -132,9 +132,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p bool measure_directconnect, size_t longest_length, bool is_flat) { - /* To avoid edge effects we place the source at least 'longest_length' away - * from the device edge and route from there for all possible delta values < dimension - */ + const auto& device_ctx = g_vpr_ctx.device(); const auto& grid = device_ctx.grid; @@ -143,69 +141,64 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p const size_t device_width = grid.width(); const size_t device_height = grid.height(); + /* To avoid edge effects we place the source at least 'longest_length' away + * from the device edge and route from there for all possible delta values < dimension + */ + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + | | + + // + A | B | C + + // + | | + + // +-----------------\-----------------------.---------------+ + // + | | + + // + | | + + // + | | + + // + | | + + // + D | E | F + + // + | | + + // + | | + + // + | | + + // + | | + + // +-----------------*-----------------------/---------------+ + // + | | + + // + G | H | I + + // + | | + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + // * = (low_x, low_y) + // . = (high_x, high_y) + // / = (high_x, low_y) + // \ = (low_x, high_y) + // + = device edge + const size_t mid_x = vtr::nint(device_width / 2); + const size_t mid_y = vtr::nint(device_height / 2); + const size_t low_x = std::min(longest_length, mid_x); + const size_t low_y = std::min(longest_length, mid_y); + const size_t high_x = (longest_length <= device_width) ? std::max(device_width - longest_length, mid_x) : mid_x; + const size_t high_y = (longest_length <= device_height) ? std::max(device_width - longest_length, mid_y) : mid_y; + vtr::NdMatrix delta_delays({num_layers, num_layers, device_width, device_height}); + std::set allowed_types; + if (!placer_opts.allowed_tiles_for_delay_model.empty()) { + auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + allowed_types = std::set(allowed_types_vector.begin(), allowed_types_vector.end()); + } + for (int from_layer_num = 0; from_layer_num < (int)num_layers; from_layer_num++) { for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) { vtr::NdMatrix, 2> sampled_delta_delays({device_width, device_height}); - size_t mid_x = vtr::nint(device_width / 2); - size_t mid_y = vtr::nint(device_height / 2); - - size_t low_x = std::min(longest_length, mid_x); - size_t low_y = std::min(longest_length, mid_y); - size_t high_x = mid_x; - size_t high_y = mid_y; - if (longest_length <= device_width) { - high_x = std::max(device_width - longest_length, mid_x); - } - if (longest_length <= device_height) { - high_y = std::max(device_width - longest_length, mid_y); - } - - std::set allowed_types; - if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); - for (const auto& type : allowed_types_vector) { - allowed_types.insert(type); - } - } - - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // + | | + - // + A | B | C + - // + | | + - // +-----------------\-----------------------.---------------+ - // + | | + - // + | | + - // + | | + - // + | | + - // + D | E | F + - // + | | + - // + | | + - // + | | + - // + | | + - // +-----------------*-----------------------/---------------+ - // + | | + - // + G | H | I + - // + | | + - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // - // * = (low_x, low_y) - // . = (high_x, high_y) - // / = (high_x, low_y) - // \ = (low_x, high_y) - // + = device edge - //Find the lowest y location on the left edge with a non-empty block int y = 0; int x = 0; t_physical_tile_type_ptr src_type = nullptr; - for (x = 0; x < (int)grid.width(); ++x) { - for (y = 0; y < (int)grid.height(); ++y) { - auto type = grid.get_physical_type({x, y, from_layer_num}); + for (x = 0; x < (int)device_width; ++x) { + for (y = 0; y < (int)device_height; ++y) { + t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + // check if the tile type is among the allowed types if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { continue; } @@ -228,7 +221,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p from_layer_num, to_layer_num, x, y, x, y, - grid.width() - 1, grid.height() - 1, + device_width - 1, device_height - 1, router_opts, measure_directconnect, allowed_types, is_flat); @@ -237,9 +230,10 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p src_type = nullptr; for (y = 0; y < (int)grid.height(); ++y) { for (x = 0; x < (int)grid.width(); ++x) { - auto type = grid.get_physical_type({x, y, from_layer_num}); + t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + // check if the tile type is among the allowed types if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { continue; } @@ -259,7 +253,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p from_layer_num, to_layer_num, x, y, x, y, - grid.width() - 1, grid.height() - 1, + device_width - 1, device_height - 1, router_opts, measure_directconnect, allowed_types, is_flat); @@ -273,7 +267,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p from_layer_num, to_layer_num, low_x, low_y, low_x, low_y, - grid.width() - 1, grid.height() - 1, + device_width - 1, device_height - 1, router_opts, measure_directconnect, allowed_types, is_flat); @@ -301,7 +295,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p from_layer_num, to_layer_num, high_x, low_y, 0, low_y, - high_x, grid.height() - 1, + high_x, device_height - 1, router_opts, measure_directconnect, allowed_types, is_flat); @@ -315,7 +309,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p from_layer_num, to_layer_num, low_x, high_y, low_x, 0, - grid.width() - 1, high_y, + device_width - 1, high_y, router_opts, measure_directconnect, allowed_types, is_flat); @@ -331,8 +325,6 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p } static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { - // Set any empty delta's to the average of it's neighbours - for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) { for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) { for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) { @@ -437,15 +429,12 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr bool /*is_flat*/) { //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling from (%d,%d)", source_x, source_y)); - int delta_x, delta_y; - int sink_x, sink_y; - - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); - for (sink_x = start_x; sink_x <= end_x; sink_x++) { - for (sink_y = start_y; sink_y <= end_y; sink_y++) { - delta_x = abs(sink_x - source_x); - delta_y = abs(sink_y - source_y); + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + const int delta_x = abs(sink_x - source_x); + const int delta_y = abs(sink_y - source_y); t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); @@ -457,7 +446,7 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr if (src_or_target_empty || !is_allowed_type) { if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay + // Only set empty target if we don't already have a valid delta delay matrix[delta_x][delta_y].push_back(EMPTY_DELTA); #ifdef VERBOSE VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", @@ -488,10 +477,10 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr sink_x, sink_y); #endif if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { - //Overwrite empty delta + // Overwrite empty delta matrix[delta_x][delta_y][0] = delay; } else { - //Collect delta + // Collect delta matrix[delta_x][delta_y].push_back(delay); } } @@ -653,7 +642,7 @@ static float route_connection_delay(RouterDelayProfiler& route_profiler, float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */ - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); bool successfully_routed = false; @@ -675,16 +664,14 @@ static float route_connection_delay(RouterDelayProfiler& route_profiler, continue; if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - //Skip if we shouldn't measure direct connects and a direct connect exists + // Skip if we shouldn't measure direct connects and a direct connect exists continue; } - { - successfully_routed = route_profiler.calculate_delay( - source_rr_node, sink_rr_node, - router_opts, - &net_delay_value); - } + successfully_routed = route_profiler.calculate_delay(source_rr_node, + sink_rr_node, + router_opts, + &net_delay_value); if (successfully_routed) break; } @@ -740,7 +727,7 @@ static void add_delay_to_matrix(vtr::Matrix>& matrix, // Overwrite empty delta matrix[delta_x][delta_y][0] = delay; } else { - //Collect delta + // Collect delta matrix[delta_x][delta_y].push_back(delay); } } From 7d4fd0100892dc077745c5ed58b30391acc5e541 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 15:56:17 -0500 Subject: [PATCH 07/39] remove fix_uninitialized_coordinates --- .../compute_delta_delays_utils.cpp | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index 42142d428cc..d50bfcf1991 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -21,8 +21,6 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p size_t longest_length, bool is_flat); -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); - static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays); @@ -324,20 +322,6 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p return delta_delays; } -static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { - for (size_t from_layer_num = 0; from_layer_num < delta_delays.dim_size(0); ++from_layer_num) { - for (size_t to_layer_num = 0; to_layer_num < delta_delays.dim_size(1); ++to_layer_num) { - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(2); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { - delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; - } - } - } - } - } -} - static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { // Set any empty delta's to the average of it's neighbours // @@ -391,8 +375,8 @@ static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { } static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { @@ -402,7 +386,7 @@ static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { if (delta_delay < 0.) { VPR_ERROR(VPR_ERROR_PLACE, - "Found invaild negative delay %g for delta [%d,%d,%d,%d]", + "Found invalid negative delay %g for delta [%d,%d,%d,%d]", delta_delay, from_layer_num, to_layer_num, x, y); } } @@ -791,7 +775,14 @@ vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_pro longest_length, is_flat); - fix_uninitialized_coordinates(delta_delays); + const size_t num_elements = delta_delays.size(); + + // set uninitialized elements to infinity + for (size_t i = 0; i < num_elements; i++) { + if (delta_delays.get(i) == UNINITIALIZED_DELTA) { + delta_delays.get(i) = IMPOSSIBLE_DELTA; + } + } fix_empty_coordinates(delta_delays); From 9ce28bfd56c9246e7494c09be23e466add56c1f2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 16:17:58 -0500 Subject: [PATCH 08/39] doxygen comments for get_best_classes and route_connection_delay --- .../compute_delta_delays_utils.cpp | 60 ++++++++++++------- vpr/src/place/timing_place_lookup.cpp | 18 ++---- vpr/src/place/timing_place_lookup.h | 27 +++++++++ 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index d50bfcf1991..0feaf0cc702 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -57,13 +57,32 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& route const std::set& allowed_types, bool is_flat); +/** + * @brief Routes between a source and sink location to calculate the delay. + * + * This function computes the delay of a routed connection between a source and sink node + * specified by their coordinates and layers. It iterates over the best driver and sink pin + * classes to find a valid routing path and calculates the delay if a path exists. + * + * @param route_profiler Reference to the `RouterDelayProfiler` responsible for calculating routing delays. + * @param source_x The x-coordinate of the source location. + * @param source_y The y-coordinate of the source location. + * @param source_layer The layer index of the source node. + * @param sink_x The x-coordinate of the sink location. + * @param sink_y The y-coordinate of the sink location. + * @param sink_layer The layer index of the sink node. + * @param router_opts Routing options used for delay calculation. + * @param measure_directconnect If `true`, includes direct connect delays; otherwise, skips direct connections. + * + * @return The calculated routing delay. If routing fails, it returns `IMPOSSIBLE_DELTA`. + */ static float route_connection_delay(RouterDelayProfiler& route_profiler, - int from_layer_num, - int to_layer_num, - int source_x_loc, - int source_y_loc, - int sink_x_loc, - int sink_y_loc, + int source_x, + int source_y, + int source_layer, + int sink_x, + int sink_y, + int sink_layer, const t_router_opts& router_opts, bool measure_directconnect); @@ -323,10 +342,10 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p } static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { - // Set any empty delta's to the average of it's neighbours + // Set any empty delta's to the average of its neighbours // // Empty coordinates may occur if the sampling location happens to not have - // a connection at that location. However a more through sampling likely + // a connection at that location. However, a more thorough sampling likely // would return a result, so we fill in the empty holes with a small // neighbour average. constexpr int kMaxAverageDistance = 2; @@ -411,8 +430,6 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr bool measure_directconnect, const std::set& allowed_types, bool /*is_flat*/) { - //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling from (%d,%d)", source_x, source_y)); - const auto& device_ctx = g_vpr_ctx.device(); for (int sink_x = start_x; sink_x <= end_x; sink_x++) { @@ -444,12 +461,12 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr //Valid start/end float delay = route_connection_delay(route_profiler, - from_layer_num, - to_layer_num, source_x, source_y, + from_layer_num, sink_x, sink_y, + to_layer_num, router_opts, measure_directconnect); @@ -614,35 +631,36 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou } static float route_connection_delay(RouterDelayProfiler& route_profiler, - int from_layer_num, - int to_layer_num, int source_x, int source_y, + int source_layer, int sink_x, int sink_y, + int sink_layer, const t_router_opts& router_opts, bool measure_directconnect) { //Routes between the source and sink locations and calculates the delay - float net_delay_value = IMPOSSIBLE_DELTA; /*set to known value for debug purposes */ + // set to known value for debug purposes + float net_delay_value = IMPOSSIBLE_DELTA; const auto& device_ctx = g_vpr_ctx.device(); bool successfully_routed = false; - //Get the rr nodes to route between - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); + // Get the rr nodes to route between + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, source_layer})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, sink_layer})); for (int driver_ptc : best_driver_ptcs) { VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_layer, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_layer, sink_x, sink_y, SINK, sink_ptc); if (sink_rr_node == RRNodeId::INVALID()) continue; @@ -664,7 +682,7 @@ static float route_connection_delay(RouterDelayProfiler& route_profiler, if (!successfully_routed) { VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - source_x, source_y, from_layer_num, sink_x, sink_y, to_layer_num, net_delay_value); + source_x, source_y, source_layer, sink_x, sink_y, sink_layer, net_delay_value); } return net_delay_value; diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 21ff6d69cc6..f2a0a60edb9 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -109,22 +109,11 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& /******* File Accessible Functions **********/ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { - /* - * This function tries to identify the best pin classes to hook up - * for delay calculation. The assumption is that we should pick - * the pin class with the largest number of pins. This makes - * sense, since it ensures we pick commonly used pins, and - * removes order dependence on how the pins are specified - * in the architecture (except in the case were the two largest pin classes - * of a particular pintype have the same number of pins, in which case the - * first pin class is used). - */ - std::vector best_classes; //Record any non-zero Fc pins // - //Note that we track non-zero Fc pins, since certain Fc overides + //Note that we track non-zero Fc pins, since certain Fc overrides //may apply to only a subset of wire types. This ensures we record //which pins can potentially connect to global routing. std::unordered_set non_zero_fc_pins; @@ -149,14 +138,15 @@ std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ } } - if (!any_pins_connect_to_general_routing) continue; //Skip if doesn't connect to general routing + //Skip if the pin class doesn't connect to general routing + if (!any_pins_connect_to_general_routing) continue; //Record candidate class best_classes.push_back(i); } } - //Sort classe so largest pin class is first + //Sort classes so the largest pin class is first auto cmp_class = [&](int lhs, int rhs) { return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins; }; diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h index fba3f470483..24cfc301ce6 100644 --- a/vpr/src/place/timing_place_lookup.h +++ b/vpr/src/place/timing_place_lookup.h @@ -11,6 +11,33 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& const std::vector& directs, bool is_flat); +/** + * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity. + * + * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`) + * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins + * that connect to general routing, ensuring commonly used pins are chosen for delay profiling. + * + * @param pintype The type of pins to filter. + * @param type Pointer to the physical tile type containing pin and class information. + * + * @return A vector of indices representing the selected pin classes. The classes are sorted + * in descending order based on the number of pins they contain. + * + * @details + * - A pin class is eligible if its type matches `pintype` and it contains at least one pin + * that connects to general routing (non-zero Fc). + * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`. + * - Classes are sorted so that the class with the largest number of pins appears first. + * If multiple classes have the same pin count, their order depends on their initial appearance + * in the architecture file. + * + * @note + * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded. + * - The function ensures stability in sorting, preserving the input order for classes + * with the same number of pins. + */ + std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node); From f188b792e4e7ccf3f7c221ea7f6894876be4abb2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 29 Nov 2024 16:54:28 -0500 Subject: [PATCH 09/39] remove unused includes and constants from timing_place_lookup.cpp --- utils/route_diag/src/main.cpp | 2 +- .../compute_delta_delays_utils.cpp | 2 +- vpr/src/place/timing_place_lookup.cpp | 24 ++----------------- 3 files changed, 4 insertions(+), 24 deletions(-) diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index debd89c8bd6..8b485916532 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -238,7 +238,7 @@ static void profile_source(const Netlist<>& net_list, VTR_LOG("\n"); } -static t_chan_width setup_chan_width(t_router_opts router_opts, +static t_chan_width setup_chan_width(const t_router_opts& router_opts, t_chan_width_dist chan_width_dist) { /*we give plenty of tracks, this increases routability for the */ /*lookup table generation */ diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index 0feaf0cc702..4630ddfcfb4 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -503,7 +503,7 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou bool measure_directconnect, const std::set& allowed_types, bool is_flat) { - auto& device_ctx = g_vpr_ctx.device(); + const auto& device_ctx = g_vpr_ctx.device(); t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index f2a0a60edb9..f086283a3e7 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -1,47 +1,27 @@ -#include -#include +#include "timing_place_lookup.h" #include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_ndmatrix.h" #include "vtr_log.h" #include "vtr_util.h" -#include "vtr_math.h" -#include "vtr_memory.h" + #include "vtr_time.h" -#include "vtr_geometry.h" -#include "arch_util.h" #include "vpr_types.h" #include "globals.h" #include "place_and_route.h" #include "route_net.h" -#include "timing_place_lookup.h" #include "read_xml_arch_file.h" #include "atom_netlist.h" -// all functions in profiling:: namespace, which are only activated if PROFILE is defined -#include "route_profiling.h" #include "router_delay_profiling.h" #include "place_delay_model.h" #include "simple_delay_model.h" #include "delta_delay_model.h" #include "override_delay_model.h" -/*To compute delay between blocks we calculate the delay between */ -/*different nodes in the FPGA. From this procedure we generate - * a lookup table which tells us the delay between different locations in*/ -/*the FPGA */ - -/*the delta arrays are used to contain the best case routing delay */ -/*between different locations on the FPGA. */ - -//#define VERBOSE - -constexpr float UNINITIALIZED_DELTA = -1; //Indicates the delta delay value has not been calculated -constexpr float EMPTY_DELTA = -2; //Indicates delta delay from/to an EMPTY block -constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); //Indicates there is no valid delta delay /*** Function Prototypes *****/ static t_chan_width setup_chan_width(const t_router_opts& router_opts, From 107738c78adc5c91ce1952d1bf45de133443bd1b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 17:20:57 -0500 Subject: [PATCH 10/39] total_num_internal_pins member function for t_sub_tile --- libs/libarchfpga/src/arch_util.h | 6 +++--- libs/libarchfpga/src/physical_types.cpp | 18 ++++++++++++++++- libs/libarchfpga/src/physical_types.h | 3 +++ libs/libarchfpga/src/physical_types_util.cpp | 21 ++++++-------------- libs/libarchfpga/src/physical_types_util.h | 6 ++---- 5 files changed, 31 insertions(+), 23 deletions(-) diff --git a/libs/libarchfpga/src/arch_util.h b/libs/libarchfpga/src/arch_util.h index c39cf77b94f..fb251bffe10 100644 --- a/libs/libarchfpga/src/arch_util.h +++ b/libs/libarchfpga/src/arch_util.h @@ -23,8 +23,8 @@ class InstPort { InstPort() = default; InstPort(const std::string& str); - std::string instance_name() const { return instance_.name; } - std::string port_name() const { return port_.name; } + const std::string& instance_name() const { return instance_.name; } + const std::string& port_name() const { return port_.name; } int instance_low_index() const { return instance_.low_idx; } int instance_high_index() const { return instance_.high_idx; } @@ -40,7 +40,7 @@ class InstPort { private: struct name_index { - std::string name = ""; + std::string name; int low_idx = UNSPECIFIED; int high_idx = UNSPECIFIED; }; diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index 3bdabaee2a7..ff9baf87b3f 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -220,7 +220,7 @@ std::string t_pb_graph_pin::to_string(const bool full_description) const { return pin_string; } -/** +/* * t_pb_graph_edge */ @@ -253,3 +253,19 @@ bool t_pb_graph_edge::belongs_to_pattern(int pattern_index) const { // return false otherwise return false; } + +/* + * t_sub_tile + */ + +int t_sub_tile::total_num_internal_pins() const { + int num_pins = 0; + + for (t_logical_block_type_ptr eq_site : equivalent_sites) { + num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size(); + } + + num_pins *= capacity.total(); + + return num_pins; +} diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 4d415697554..bf306021d45 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -796,6 +796,9 @@ struct t_sub_tile { int num_phy_pins = 0; int index = -1; + + public: + int total_num_internal_pins() const; }; /** A logical pin defines the pin index of a logical block type (i.e. a top level PB type) diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp index 2256f81d66c..f23b2add270 100644 --- a/libs/libarchfpga/src/physical_types_util.cpp +++ b/libs/libarchfpga/src/physical_types_util.cpp @@ -154,7 +154,7 @@ static std::tuple get_pin_index_for_inst(t_physical_til pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst; } else { int pin_offset = get_sub_tile_inst_physical_pin_num_offset(type, sub_tile, sub_tile_cap); - int pins_per_inst = get_total_num_sub_tile_internal_pins(sub_tile) / sub_tile->capacity.total(); + int pins_per_inst = sub_tile->total_num_internal_pins() / sub_tile->capacity.total(); pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst; } @@ -225,7 +225,7 @@ static int get_sub_tile_physical_pin_num_offset(t_physical_tile_type_ptr physica if (&tmp_sub_tile == curr_sub_tile) break; else - offset += get_total_num_sub_tile_internal_pins(&tmp_sub_tile); + offset += tmp_sub_tile.total_num_internal_pins(); } return offset; @@ -235,7 +235,7 @@ static int get_sub_tile_inst_physical_pin_num_offset(t_physical_tile_type_ptr ph const t_sub_tile* curr_sub_tile, const int curr_relative_cap) { int offset = get_sub_tile_physical_pin_num_offset(physical_tile, curr_sub_tile); - int sub_tile_inst_num_pins = get_total_num_sub_tile_internal_pins(curr_sub_tile) / curr_sub_tile->capacity.total(); + int sub_tile_inst_num_pins = curr_sub_tile->total_num_internal_pins() / curr_sub_tile->capacity.total(); offset += (curr_relative_cap * sub_tile_inst_num_pins); @@ -564,7 +564,7 @@ int get_max_num_pins(t_logical_block_type_ptr logical_block) { } //Returns the pin class associated with the specified pin_index_in_port within the port port_name on type -int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type) { +int find_pin_class(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port, e_pin_type pin_type) { int iclass = OPEN; int ipin = find_pin(type, port_name, pin_index_in_port); @@ -579,7 +579,7 @@ int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin return iclass; } -int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port) { +int find_pin(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port) { int ipin = OPEN; int port_base_ipin = 0; int num_pins = OPEN; @@ -1009,7 +1009,7 @@ std::tuple get_sub_tile_from_pin_physical_num(t_physical int pin_offset = total_pin_counts; for (auto& sub_tile : physical_tile->sub_tiles) { - int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : get_total_num_sub_tile_internal_pins(&sub_tile); + int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : sub_tile.total_num_internal_pins(); total_pin_counts += sub_tile_num_pins; if (physical_num < total_pin_counts) { @@ -1347,15 +1347,6 @@ const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_t return pb_graph_pin->parent_node; } -int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile) { - int num_pins = 0; - for (auto eq_site : sub_tile->equivalent_sites) { - num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size(); - } - num_pins *= sub_tile->capacity.total(); - return num_pins; -} - int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat) { if (is_flat) { return tile->num_pins + (int)tile->pin_num_to_pb_pin.size(); diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index aa7b2617834..2a7ba563339 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -173,10 +173,10 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ ///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found) t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector& types); -int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type); +int find_pin_class(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port, e_pin_type pin_type); ///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port -int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port); +int find_pin(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port); ///@brief Returns the maximum number of pins within a logical block int get_max_num_pins(t_logical_block_type_ptr logical_block); @@ -434,8 +434,6 @@ int get_edge_sw_arch_idx(t_physical_tile_type_ptr physical_tile, const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_type_ptr physical_type, int pin_physical_num); -int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile); - int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat); int get_tile_num_internal_pin(t_physical_tile_type_ptr tile); From b06cceb5e3b4e78cb250d2de7d0309b92b312f02 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 17:24:09 -0500 Subject: [PATCH 11/39] make get_port_by_name() a member function of t_sub_tile --- libs/libarchfpga/src/physical_types.cpp | 10 ++++++++++ libs/libarchfpga/src/physical_types.h | 5 +++++ libs/libarchfpga/src/physical_types_util.cpp | 10 ---------- libs/libarchfpga/src/physical_types_util.h | 5 ----- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index ff9baf87b3f..ac830d3a464 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -269,3 +269,13 @@ int t_sub_tile::total_num_internal_pins() const { return num_pins; } + +const t_physical_tile_port* t_sub_tile::get_port(std::string_view port_name) { + for (const t_physical_tile_port& port : ports) { + if (port_name == port.name) { + return &ports[port.index]; + } + } + + return nullptr; +} \ No newline at end of file diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index bf306021d45..a46650347f5 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -799,6 +799,11 @@ struct t_sub_tile { public: int total_num_internal_pins() const; + + /** + * @brief Returns the physical tile port given the port name and the corresponding sub tile + */ + const t_physical_tile_port* get_port(std::string_view port_name); }; /** A logical pin defines the pin index of a logical block type (i.e. a top level PB type) diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp index f23b2add270..d4fe4127928 100644 --- a/libs/libarchfpga/src/physical_types_util.cpp +++ b/libs/libarchfpga/src/physical_types_util.cpp @@ -841,16 +841,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ return pin_names; } -const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name) { - for (auto port : sub_tile->ports) { - if (0 == strcmp(port.name, port_name)) { - return &sub_tile->ports[port.index]; - } - } - - return nullptr; -} - const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) { auto pb_type = type->pb_type; diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index 2a7ba563339..b5b28a79f99 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -286,11 +286,6 @@ int get_sub_tile_physical_pin(int sub_tile_index, */ t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, std::string_view port_name); -/** - * @brief Returns the physical tile port given the port name and the corresponding sub tile - */ -const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name); - /** * @brief Returns the logical block port given the port name and the corresponding logical block type */ From 85dcb10ed5a59cccb8c3cca053e77ee9e34ffa2e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 17:30:05 -0500 Subject: [PATCH 12/39] add get_port to t_logicl_block_type --- libs/libarchfpga/src/physical_types.cpp | 11 +++++++++++ libs/libarchfpga/src/physical_types.h | 6 ++++++ libs/libarchfpga/src/physical_types_util.cpp | 13 ------------- libs/libarchfpga/src/physical_types_util.h | 5 ----- libs/libarchfpga/src/read_xml_arch_file.cpp | 2 +- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index ac830d3a464..8b189fd7021 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -144,6 +144,17 @@ bool t_logical_block_type::is_empty() const { return name == std::string(EMPTY_BLOCK_NAME); } +const t_port* t_logical_block_type::get_port(std::string_view port_name) const { + for (int i = 0; i < pb_type->num_ports; i++) { + auto port = pb_type->ports[i]; + if (port_name == port.name) { + return &pb_type->ports[port.index]; + } + } + + return nullptr; +} + /** * t_pb_graph_node */ diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index a46650347f5..c5f3d39093e 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -958,6 +958,12 @@ struct t_logical_block_type { // Is this t_logical_block_type empty? bool is_empty() const; + + public: + /** + * @brief Returns the logical block port given the port name and the corresponding logical block type + */ + const t_port* get_port(std::string_view port_name) const; }; /************************************************************************************************* diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp index d4fe4127928..5d4edc65b21 100644 --- a/libs/libarchfpga/src/physical_types_util.cpp +++ b/libs/libarchfpga/src/physical_types_util.cpp @@ -841,19 +841,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ return pin_names; } -const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) { - auto pb_type = type->pb_type; - - for (int i = 0; i < pb_type->num_ports; i++) { - auto port = pb_type->ports[i]; - if (0 == strcmp(port.name, port_name)) { - return &pb_type->ports[port.index]; - } - } - - return nullptr; -} - const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) { for (auto port : sub_tile->ports) { if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) { diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index b5b28a79f99..ae9405ef44c 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -286,11 +286,6 @@ int get_sub_tile_physical_pin(int sub_tile_index, */ t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, std::string_view port_name); -/** - * @brief Returns the logical block port given the port name and the corresponding logical block type - */ -const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name); - /** * @brief Returns the physical tile port given the pin name and the corresponding sub tile */ diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index 3950eb1b15b..46cde415630 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -774,7 +774,7 @@ static std::pair ProcessPinString(pugi::xml_node Locations, "No port name is present: %s\n", pin_loc_string); } - auto port = get_port_by_name(type, token.data); + auto port = type->get_port(token.data); if (port == nullptr) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations), "Port %s for %s could not be found: %s\n", From 296b589e374ef3b49fe6a1ca8f9a219316a9b7b1 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 17:43:23 -0500 Subject: [PATCH 13/39] add get_port_by_pin() to t_sub_tile and t_logical_block_type --- libs/libarchfpga/src/arch_check.cpp | 6 ++--- libs/libarchfpga/src/physical_types.cpp | 21 +++++++++++++++++ libs/libarchfpga/src/physical_types.h | 10 ++++++++ libs/libarchfpga/src/physical_types_util.cpp | 24 -------------------- libs/libarchfpga/src/physical_types_util.h | 10 -------- 5 files changed, 34 insertions(+), 37 deletions(-) diff --git a/libs/libarchfpga/src/arch_check.cpp b/libs/libarchfpga/src/arch_check.cpp index c8fb00299c4..5360d6e4c02 100644 --- a/libs/libarchfpga/src/arch_check.cpp +++ b/libs/libarchfpga/src/arch_check.cpp @@ -32,7 +32,7 @@ bool check_model_clocks(t_model* model, const char* file, uint32_t line) { bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line) { //Outputs should have no combinational sinks for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) { - if (port->combinational_sink_ports.size() != 0) { + if (!port->combinational_sink_ports.empty()) { archfpga_throw(file, line, "Model '%s' output port '%s' can not have combinational sink ports", model->name, port->name); @@ -114,9 +114,9 @@ void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_ti } for (auto pin_map : pin_direct_map) { - auto block_port = get_port_by_pin(logical_block, pin_map.first.pin); + const t_port* block_port = logical_block->get_port_by_pin(pin_map.first.pin); - auto sub_tile_port = get_port_by_pin(sub_tile, pin_map.second.pin); + const t_physical_tile_port* sub_tile_port = sub_tile->get_port_by_pin(pin_map.second.pin); VTR_ASSERT(block_port != nullptr); VTR_ASSERT(sub_tile_port != nullptr); diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index 8b189fd7021..79619d11df4 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -155,6 +155,17 @@ const t_port* t_logical_block_type::get_port(std::string_view port_name) const { return nullptr; } +const t_port* t_logical_block_type::get_port_by_pin(int pin) const { + for (int i = 0; i < pb_type->num_ports; i++) { + const t_port& port = pb_type->ports[i]; + if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) { + return &pb_type->ports[port.index]; + } + } + + return nullptr; +} + /** * t_pb_graph_node */ @@ -288,5 +299,15 @@ const t_physical_tile_port* t_sub_tile::get_port(std::string_view port_name) { } } + return nullptr; +} + +const t_physical_tile_port* t_sub_tile::get_port_by_pin(int pin) const { + for (const t_physical_tile_port& port : ports) { + if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) { + return &ports[port.index]; + } + } + return nullptr; } \ No newline at end of file diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index c5f3d39093e..a2fc676e305 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -804,6 +804,11 @@ struct t_sub_tile { * @brief Returns the physical tile port given the port name and the corresponding sub tile */ const t_physical_tile_port* get_port(std::string_view port_name); + + /** + * @brief Returns the physical tile port given the pin name and the corresponding sub tile + */ + const t_physical_tile_port* get_port_by_pin(int pin) const; }; /** A logical pin defines the pin index of a logical block type (i.e. a top level PB type) @@ -964,6 +969,11 @@ struct t_logical_block_type { * @brief Returns the logical block port given the port name and the corresponding logical block type */ const t_port* get_port(std::string_view port_name) const; + + /** + * @brief Returns the logical block port given the pin name and the corresponding logical block type + */ + const t_port* get_port_by_pin(int pin) const; }; /************************************************************************************************* diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp index 5d4edc65b21..1374a7f7055 100644 --- a/libs/libarchfpga/src/physical_types_util.cpp +++ b/libs/libarchfpga/src/physical_types_util.cpp @@ -841,29 +841,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ return pin_names; } -const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) { - for (auto port : sub_tile->ports) { - if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) { - return &sub_tile->ports[port.index]; - } - } - - return nullptr; -} - -const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin) { - auto pb_type = type->pb_type; - - for (int i = 0; i < pb_type->num_ports; i++) { - auto port = pb_type->ports[i]; - if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) { - return &pb_type->ports[port.index]; - } - } - - return nullptr; -} - /* Access information related to pin classes */ /** get information given class physical num **/ @@ -1506,4 +1483,3 @@ std::map get_sink_choking_points(t_physical_tile_type_ptr physical_til return choking_point; } -/* */ diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index ae9405ef44c..8d2637ef048 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -286,16 +286,6 @@ int get_sub_tile_physical_pin(int sub_tile_index, */ t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, std::string_view port_name); -/** - * @brief Returns the physical tile port given the pin name and the corresponding sub tile - */ -const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin); - -/** - * @brief Returns the logical block port given the pin name and the corresponding logical block type - */ -const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin); - /************************************ Access to intra-block resources ************************************/ /* Access information related to pin classes */ From c246372717e6f0a927ffbe1f37d371aeacb37532 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 18:43:37 -0500 Subject: [PATCH 14/39] add PlacementDelayModelCreator class --- utils/route_diag/src/main.cpp | 50 +---- vpr/src/base/place_and_route.cpp | 30 +++ vpr/src/base/place_and_route.h | 6 +- vpr/src/noc/noc_routing_algorithm_creator.h | 7 +- vpr/src/place/place.cpp | 18 +- .../PlacementDelayModelCreator.cpp | 80 +++++++ .../delay_model/PlacementDelayModelCreator.h | 31 +++ .../compute_delta_delays_utils.cpp | 55 ++++- .../delay_model/compute_delta_delays_utils.h | 31 ++- .../timing/delay_model/place_delay_model.cpp | 24 +- .../timing/delay_model/place_delay_model.h | 10 - vpr/src/place/timing_place_lookup.cpp | 209 ------------------ vpr/src/place/timing_place_lookup.h | 45 ---- vpr/src/route/router_delay_profiling.cpp | 1 - vpr/src/util/vpr_utils.cpp | 27 +++ vpr/src/util/vpr_utils.h | 21 +- 16 files changed, 294 insertions(+), 351 deletions(-) create mode 100644 vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp create mode 100644 vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h delete mode 100644 vpr/src/place/timing_place_lookup.cpp delete mode 100644 vpr/src/place/timing_place_lookup.h diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 8b485916532..626b845d13a 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -37,7 +37,7 @@ #include "route_export.h" #include "rr_graph.h" #include "rr_graph2.h" -#include "timing_place_lookup.h" +#include "compute_delta_delays_utils.h" struct t_route_util_options { /* Router diag tool Options */ @@ -238,36 +238,6 @@ static void profile_source(const Netlist<>& net_list, VTR_LOG("\n"); } -static t_chan_width setup_chan_width(const t_router_opts& router_opts, - t_chan_width_dist chan_width_dist) { - /*we give plenty of tracks, this increases routability for the */ - /*lookup table generation */ - - t_graph_type graph_directionality; - int width_fac; - - if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) { - auto& device_ctx = g_vpr_ctx.device(); - - auto type = find_most_common_tile_type(device_ctx.grid); - - width_fac = 4 * type->num_pins; - /*this is 2x the value that binary search starts */ - /*this should be enough to allow most pins to */ - /*connect to tracks in the architecture */ - } else { - width_fac = router_opts.fixed_channel_width; - } - - if (router_opts.route_type == GLOBAL) { - graph_directionality = GRAPH_BIDIR; - } else { - graph_directionality = GRAPH_UNIDIR; - } - - return init_chan(width_fac, chan_width_dist, graph_directionality); -} - t_route_util_options read_route_util_options(int argc, const char** argv) { //Explicitly initialize for zero initialization t_route_util_options args = t_route_util_options(); @@ -323,17 +293,15 @@ int main(int argc, const char **argv) { const Netlist<>& net_list = is_flat ? (const Netlist<>&)g_vpr_ctx.atom().nlist : (const Netlist<>&)g_vpr_ctx.clustering().clb_nlist; - t_chan_width chan_width = setup_chan_width( - vpr_setup.RouterOpts, - Arch.Chans); + t_chan_width chan_width = setup_chan_width(vpr_setup.RouterOpts, + Arch.Chans); - alloc_routing_structs( - chan_width, - vpr_setup.RouterOpts, - &vpr_setup.RoutingArch, - vpr_setup.Segments, - Arch.directs, - is_flat); + alloc_routing_structs(chan_width, + vpr_setup.RouterOpts, + &vpr_setup.RoutingArch, + vpr_setup.Segments, + Arch.directs, + is_flat); if(route_options.profile_source) { profile_source(net_list, diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index ba7e20ccd80..2ffeb26c240 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -415,6 +415,36 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, return (final); } +t_chan_width setup_chan_width(const t_router_opts& router_opts, + t_chan_width_dist chan_width_dist) { + /*we give plenty of tracks, this increases routability for the */ + /*lookup table generation */ + + t_graph_type graph_directionality; + int width_fac; + + if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) { + auto& device_ctx = g_vpr_ctx.device(); + + auto type = find_most_common_tile_type(device_ctx.grid); + + width_fac = 4 * type->num_pins; + /*this is 2x the value that binary search starts */ + /*this should be enough to allow most pins to */ + /*connect to tracks in the architecture */ + } else { + width_fac = router_opts.fixed_channel_width; + } + + if (router_opts.route_type == GLOBAL) { + graph_directionality = GRAPH_BIDIR; + } else { + graph_directionality = GRAPH_UNIDIR; + } + + return init_chan(width_fac, chan_width_dist, graph_directionality); +} + /** * @brief Assigns widths to channels (in tracks). * diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h index 6f191c0ff9e..538996548f2 100644 --- a/vpr/src/base/place_and_route.h +++ b/vpr/src/base/place_and_route.h @@ -2,11 +2,9 @@ #define VPR_PLACE_AND_ROUTE_H #define INFINITE -1 -#define NOT_FOUND 0 #define WNEED 1 #define WL 2 -#define PROC_TIME 3 #include "vpr_types.h" #include "timing_info.h" @@ -18,7 +16,6 @@ struct t_fmap_cell { int fc; ///& placement_net_list, const std::shared_ptr& delay_calc, bool is_flat); +t_chan_width setup_chan_width(const t_router_opts& router_opts, + t_chan_width_dist chan_width_dist); + t_chan_width init_chan(int cfactor, const t_chan_width_dist& chan_width_dist, t_graph_type graph_directionality); diff --git a/vpr/src/noc/noc_routing_algorithm_creator.h b/vpr/src/noc/noc_routing_algorithm_creator.h index 8cb9b777949..4c33d13f590 100644 --- a/vpr/src/noc/noc_routing_algorithm_creator.h +++ b/vpr/src/noc/noc_routing_algorithm_creator.h @@ -8,9 +8,10 @@ * * Overview * ======== - * There are a number of different available NoC routing algorithms. This class is a factory object for the NocRouting abstract class. This class constructs - * the appropriate routing algorithm based on the user specification in the - * command line. The user identifies a + * There are a number of different available NoC routing algorithms. + * This class is a factory object for the NocRouting abstract class. + * This class constructs the appropriate routing algorithm based on + * the user specification in the command line. The user identifies a * specific routing algorithm in the command line by providing a string * (which is the name of routing algorithm). * Then the corresponding routing algorithm is created here based on the diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 3506d00b801..69e4e1895a0 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -13,7 +13,7 @@ #include "read_xml_arch_file.h" #include "echo_files.h" #include "histogram.h" -#include "place_delay_model.h" +#include "PlacementDelayModelCreator.h" #include "move_utils.h" #include "buttons.h" @@ -65,14 +65,14 @@ void try_place(const Netlist<>& net_list, if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ - place_delay_model = alloc_lookups_and_delay_model(net_list, - chan_width_dist, - placer_opts, - router_opts, - det_routing_arch, - segment_inf, - directs, - is_flat); + place_delay_model = PlacementDelayModelCreator::create_delay_model(placer_opts, + router_opts, + net_list, + det_routing_arch, + segment_inf, + chan_width_dist, + directs, + is_flat); if (isEchoFileEnabled(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)) { place_delay_model->dump_echo(getEchoFileName(E_ECHO_PLACEMENT_DELTA_DELAY_MODEL)); diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp new file mode 100644 index 00000000000..3482cd091e0 --- /dev/null +++ b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp @@ -0,0 +1,80 @@ + + +#include "PlacementDelayModelCreator.h" + +#include "place_delay_model.h" +#include "simple_delay_model.h" +#include "delta_delay_model.h" +#include "override_delay_model.h" + +#include "vtr_time.h" +#include "physical_types.h" +#include "place_and_route.h" + +static int get_longest_segment_length(std::vector& segment_inf) { + int length = 0; + + for (const t_segment_inf& seg_info : segment_inf) { + if (seg_info.length > length) { + length = seg_info.length; + } + } + + return length; +} + +std::unique_ptr +PlacementDelayModelCreator::create_delay_model(const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + const Netlist<>& net_list, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + const std::vector& directs, + bool is_flat) { + vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up"); + + t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); + + alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat); + + const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + + RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat); + + int longest_length = get_longest_segment_length(segment_inf); + + // now setup and compute the actual arrays + std::unique_ptr place_delay_model; + float min_cross_layer_delay = get_min_cross_layer_delay(); + + if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { + place_delay_model = std::make_unique(); + } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + } else { + VTR_ASSERT_MSG(false, "Invalid placer delay model"); + } + + if (placer_opts.read_placement_delay_lookup.empty()) { + place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length); + } else { + place_delay_model->read(placer_opts.read_placement_delay_lookup); + } + + if (!placer_opts.write_placement_delay_lookup.empty()) { + place_delay_model->write(placer_opts.write_placement_delay_lookup); + } + + // free all data structures that are no longer needed + free_routing_structs(); + + return place_delay_model; +} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h new file mode 100644 index 00000000000..37a8e0d51c8 --- /dev/null +++ b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h @@ -0,0 +1,31 @@ + +#pragma once + +#include +#include + +#include "netlist.h" + +class PlaceDelayModel; +struct t_placer_opts; +struct t_router_opts; +struct t_det_routing_arch; +struct t_segment_inf; +struct t_chan_width_dist; +struct t_direct_inf; + +class PlacementDelayModelCreator { + public: + // nothing to do in the constructor and destructor + PlacementDelayModelCreator() = default; + ~PlacementDelayModelCreator() = default; + + static std::unique_ptr create_delay_model(const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + const Netlist<>& net_list, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + const std::vector& directs, + bool is_flat); +}; diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index 4630ddfcfb4..eb59195f055 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -5,7 +5,7 @@ #include "vtr_math.h" #include "physical_types.h" #include "globals.h" -#include "timing_place_lookup.h" +#include "router_delay_profiling.h" /// Indicates the delta delay value has not been calculated static constexpr float UNINITIALIZED_DELTA = -1; @@ -904,10 +904,7 @@ bool find_direct_connect_sample_locations(const t_direct_inf* direct, VTR_ASSERT(from_y + direct->y_offset == to_y); VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile); - // - //Find a source/sink RR node associated with the pins of the direct - // - + // Find a source/sink RR node associated with the pins of the direct { RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); VTR_ASSERT(src_rr_candidate); @@ -921,4 +918,52 @@ bool find_direct_connect_sample_locations(const t_direct_inf* direct, } return true; +} + +std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { + std::vector best_classes; + + //Record any non-zero Fc pins + // + //Note that we track non-zero Fc pins, since certain Fc overrides + //may apply to only a subset of wire types. This ensures we record + //which pins can potentially connect to global routing. + std::unordered_set non_zero_fc_pins; + for (const t_fc_specification& fc_spec : type->fc_specs) { + if (fc_spec.fc_value == 0) continue; + + non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end()); + } + + // Collect all classes of matching type which connect to general routing + for (int i = 0; i < (int)type->class_inf.size(); i++) { + if (type->class_inf[i].type == pintype) { + //Check whether all pins in this class are ignored or have zero fc + bool any_pins_connect_to_general_routing = false; + for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) { + int pin = type->class_inf[i].pinlist[ipin]; + //If the pin isn't ignored, and has a non-zero Fc to some general + //routing the class is suitable for delay profiling + if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) { + any_pins_connect_to_general_routing = true; + break; + } + } + + // Skip if the pin class doesn't connect to general routing + if (!any_pins_connect_to_general_routing) continue; + + // Record candidate class + best_classes.push_back(i); + } + } + + // Sort classes so the largest pin class is first + auto cmp_class = [&](int lhs, int rhs) { + return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins; + }; + + std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class); + + return best_classes; } \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h index bacff650334..71ac632b149 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h @@ -24,4 +24,33 @@ bool find_direct_connect_sample_locations(const t_direct_inf* direct, int to_pin, int to_pin_class, RRNodeId& out_src_node, - RRNodeId& out_sink_node); \ No newline at end of file + RRNodeId& out_sink_node); + +/** + * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity. + * + * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`) + * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins + * that connect to general routing, ensuring commonly used pins are chosen for delay profiling. + * + * @param pintype The type of pins to filter. + * @param type Pointer to the physical tile type containing pin and class information. + * + * @return A vector of indices representing the selected pin classes. The classes are sorted + * in descending order based on the number of pins they contain. + * + * @details + * - A pin class is eligible if its type matches `pintype` and it contains at least one pin + * that connects to general routing (non-zero Fc). + * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`. + * - Classes are sorted so that the class with the largest number of pins appears first. + * If multiple classes have the same pin count, their order depends on their initial appearance + * in the architecture file. + * + * @note + * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded. + * - The function ensures stability in sorting, preserving the input order for classes + * with the same number of pins. + */ + +std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp index a91547a7e5e..04267e0e5f1 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/place_delay_model.cpp @@ -6,33 +6,11 @@ #include "place_delay_model.h" -#include - #include "globals.h" #include "router_lookahead_map.h" -#include "timing_place_lookup.h" #include "placer_state.h" #include "vpr_error.h" -///@brief Initialize the placer delay model. -std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, - t_chan_width_dist chan_width_dist, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - const std::vector& directs, - bool is_flat) { - return compute_place_delay_model(placer_opts, - router_opts, - net_list, - det_routing_arch, - segment_inf, - chan_width_dist, - directs, - is_flat); -} - /** * @brief Returns the delay of one point to point connection. * @@ -43,7 +21,7 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, const vtr::vector_map& block_locs, ClusterNetId net_id, int ipin) { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); float delay_source_to_sink = 0.; diff --git a/vpr/src/place/timing/delay_model/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h index e361f8cc197..27c89591071 100644 --- a/vpr/src/place/timing/delay_model/place_delay_model.h +++ b/vpr/src/place/timing/delay_model/place_delay_model.h @@ -29,16 +29,6 @@ class PlaceDelayModel; class PlacerState; -///@brief Initialize the placer delay model. -std::unique_ptr alloc_lookups_and_delay_model(const Netlist<>& net_list, - t_chan_width_dist chan_width_dist, - const t_placer_opts& place_opts, - const t_router_opts& router_opts, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - const std::vector& directs, - bool is_flat); - ///@brief Returns the delay of one point to point connection. float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, const vtr::vector_map& block_locs, diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp deleted file mode 100644 index f086283a3e7..00000000000 --- a/vpr/src/place/timing_place_lookup.cpp +++ /dev/null @@ -1,209 +0,0 @@ - -#include "timing_place_lookup.h" - -#include "rr_graph_fwd.h" -#include "vtr_assert.h" -#include "vtr_ndmatrix.h" -#include "vtr_log.h" -#include "vtr_util.h" - -#include "vtr_time.h" - -#include "vpr_types.h" -#include "globals.h" -#include "place_and_route.h" -#include "route_net.h" -#include "read_xml_arch_file.h" -#include "atom_netlist.h" - -#include "router_delay_profiling.h" -#include "place_delay_model.h" -#include "simple_delay_model.h" -#include "delta_delay_model.h" -#include "override_delay_model.h" - - -/*** Function Prototypes *****/ -static t_chan_width setup_chan_width(const t_router_opts& router_opts, - t_chan_width_dist chan_width_dist); - -static int get_longest_segment_length(std::vector& segment_inf); - -/******* Globally Accessible Functions **********/ - -std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - const Netlist<>& net_list, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - const std::vector& directs, - bool is_flat) { - vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up"); - - t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); - - alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat); - - const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - - RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat); - - int longest_length = get_longest_segment_length(segment_inf); - - /*now setup and compute the actual arrays */ - std::unique_ptr place_delay_model; - float min_cross_layer_delay = get_min_cross_layer_delay(); - - if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { - place_delay_model = std::make_unique(); - } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); - } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); - } else { - VTR_ASSERT_MSG(false, "Invalid placer delay model"); - } - - if (placer_opts.read_placement_delay_lookup.empty()) { - place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length); - } else { - place_delay_model->read(placer_opts.read_placement_delay_lookup); - } - - if (!placer_opts.write_placement_delay_lookup.empty()) { - place_delay_model->write(placer_opts.write_placement_delay_lookup); - } - - /*free all data structures that are no longer needed */ - free_routing_structs(); - - return place_delay_model; -} - -/******* File Accessible Functions **********/ - -std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { - std::vector best_classes; - - //Record any non-zero Fc pins - // - //Note that we track non-zero Fc pins, since certain Fc overrides - //may apply to only a subset of wire types. This ensures we record - //which pins can potentially connect to global routing. - std::unordered_set non_zero_fc_pins; - for (const t_fc_specification& fc_spec : type->fc_specs) { - if (fc_spec.fc_value == 0) continue; - - non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end()); - } - - //Collect all classes of matching type which connect to general routing - for (int i = 0; i < (int)type->class_inf.size(); i++) { - if (type->class_inf[i].type == pintype) { - //Check whether all pins in this class are ignored or have zero fc - bool any_pins_connect_to_general_routing = false; - for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) { - int pin = type->class_inf[i].pinlist[ipin]; - //If the pin isn't ignored, and has a non-zero Fc to some general - //routing the class is suitable for delay profiling - if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) { - any_pins_connect_to_general_routing = true; - break; - } - } - - //Skip if the pin class doesn't connect to general routing - if (!any_pins_connect_to_general_routing) continue; - - //Record candidate class - best_classes.push_back(i); - } - } - - //Sort classes so the largest pin class is first - auto cmp_class = [&](int lhs, int rhs) { - return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins; - }; - - std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class); - - return best_classes; -} - -static int get_longest_segment_length(std::vector& segment_inf) { - int length = 0; - - for (const t_segment_inf &seg_info : segment_inf) { - if (seg_info.length > length) { - length = seg_info.length; - } - } - - return length; -} - -static t_chan_width setup_chan_width(const t_router_opts& router_opts, - t_chan_width_dist chan_width_dist) { - /*we give plenty of tracks, this increases routability for the */ - /*lookup table generation */ - - t_graph_type graph_directionality; - int width_fac; - - if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) { - auto& device_ctx = g_vpr_ctx.device(); - - auto type = find_most_common_tile_type(device_ctx.grid); - - width_fac = 4 * type->num_pins; - /*this is 2x the value that binary search starts */ - /*this should be enough to allow most pins to */ - /*connect to tracks in the architecture */ - } else { - width_fac = router_opts.fixed_channel_width; - } - - if (router_opts.route_type == GLOBAL) { - graph_directionality = GRAPH_BIDIR; - } else { - graph_directionality = GRAPH_UNIDIR; - } - - return init_chan(width_fac, chan_width_dist, graph_directionality); -} - -bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) { - //Returns true if there is a directconnect between the two RR nodes - // - //This is checked by looking for a SOURCE -> OPIN -> IPIN -> SINK path - //which starts at src_rr_node and ends at sink_rr_node - auto& device_ctx = g_vpr_ctx.device(); - const auto& rr_graph = device_ctx.rr_graph; - - VTR_ASSERT(rr_graph.node_type(src_rr_node) == SOURCE && rr_graph.node_type(sink_rr_node) == SINK); - - //TODO: This is a constant depth search, but still may be too slow - for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(src_rr_node); ++i_src_edge) { - RRNodeId opin_rr_node = rr_graph.edge_sink_node(src_rr_node, i_src_edge); - - if (rr_graph.node_type(opin_rr_node) != OPIN) continue; - - for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(opin_rr_node); ++i_opin_edge) { - RRNodeId ipin_rr_node = rr_graph.edge_sink_node(opin_rr_node, i_opin_edge); - if (rr_graph.node_type(ipin_rr_node) != IPIN) continue; - - for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(ipin_rr_node); ++i_ipin_edge) { - if (sink_rr_node == rr_graph.edge_sink_node(ipin_rr_node, i_ipin_edge)) { - return true; - } - } - } - } - return false; -} \ No newline at end of file diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h deleted file mode 100644 index 24cfc301ce6..00000000000 --- a/vpr/src/place/timing_place_lookup.h +++ /dev/null @@ -1,45 +0,0 @@ -#ifndef TIMING_PLACE_LOOKUP_H -#define TIMING_PLACE_LOOKUP_H -#include "place_delay_model.h" - -std::unique_ptr compute_place_delay_model(const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - const Netlist<>& net_list, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - const std::vector& directs, - bool is_flat); - -/** - * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity. - * - * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`) - * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins - * that connect to general routing, ensuring commonly used pins are chosen for delay profiling. - * - * @param pintype The type of pins to filter. - * @param type Pointer to the physical tile type containing pin and class information. - * - * @return A vector of indices representing the selected pin classes. The classes are sorted - * in descending order based on the number of pins they contain. - * - * @details - * - A pin class is eligible if its type matches `pintype` and it contains at least one pin - * that connects to general routing (non-zero Fc). - * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`. - * - Classes are sorted so that the class with the largest number of pins appears first. - * If multiple classes have the same pin count, their order depends on their initial appearance - * in the architecture file. - * - * @note - * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded. - * - The function ensures stability in sorting, preserving the input order for classes - * with the same number of pins. - */ - -std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); - -bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node); - -#endif diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index 5feb0e9b2f6..f9c4c1d74a8 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -6,7 +6,6 @@ #include "route_tree.h" #include "rr_graph.h" #include "vtr_time.h" -#include "draw.h" RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list, const RouterLookahead* lookahead, diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index c2aa98286c0..02446c67c05 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1857,6 +1857,33 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) { } } +bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + VTR_ASSERT(rr_graph.node_type(src_rr_node) == SOURCE && rr_graph.node_type(sink_rr_node) == SINK); + + // A direct connection is defined as a specific path: `SOURCE -> OPIN -> IPIN -> SINK`. + //TODO: This is a constant depth search, but still may be too slow + for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(src_rr_node); ++i_src_edge) { + RRNodeId opin_rr_node = rr_graph.edge_sink_node(src_rr_node, i_src_edge); + + if (rr_graph.node_type(opin_rr_node) != OPIN) continue; + + for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(opin_rr_node); ++i_opin_edge) { + RRNodeId ipin_rr_node = rr_graph.edge_sink_node(opin_rr_node, i_opin_edge); + if (rr_graph.node_type(ipin_rr_node) != IPIN) continue; + + for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(ipin_rr_node); ++i_ipin_edge) { + if (sink_rr_node == rr_graph.edge_sink_node(ipin_rr_node, i_ipin_edge)) { + return true; + } + } + } + } + return false; +} + std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, int i, int j, diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 8869cc55ddd..abaafadbfe7 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -264,9 +264,28 @@ RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, const int j, int class_physical_num); -// Check whether the given nodes are in the same cluster +/// @brief Check whether the given nodes are in the same cluster bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second); +/** + * @brief Checks if a direct connection exists between two RR nodes. + * + * A direct connection is defined as a specific path: `SOURCE -> OPIN -> IPIN -> SINK`. + * + * @param src_rr_node The source RR node (must be of type `SOURCE`). + * @param sink_rr_node The sink RR node (must be of type `SINK`). + * + * @return `true` if a direct connection exists between the source and sink nodes; + * otherwise, `false`. + * + * @details + * - The function performs a depth-limited search starting from the source node, + * traversing through OPIN, IPIN, and finally checking if the path reaches the sink node. + * - Ensures the specified node types are respected (e.g., source node must be of type `SOURCE`). + */ + +bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node); + std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, int i, int j, From c08e8cfd92fac61feaeaff8e0c3ab0da7234318e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 30 Nov 2024 19:02:14 -0500 Subject: [PATCH 15/39] fix compilation errors --- utils/route_diag/src/main.cpp | 8 +------- vpr/test/test_connection_router.cpp | 4 +--- vpr/test/test_post_verilog.cpp | 2 +- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 626b845d13a..61b4bb644a3 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -9,13 +9,10 @@ // Tool can either perform one route between a source (--source_rr_node) and // a sink (--sink_rr_node), or profile a source to all tiles (set // --source_rr_node and "--profile_source true"). -#include -#include -#include + #include #include "vtr_error.h" -#include "vtr_memory.h" #include "vtr_log.h" #include "vtr_time.h" @@ -28,15 +25,12 @@ #include "globals.h" #include "net_delay.h" -#include "RoutingDelayCalculator.h" #include "place_and_route.h" #include "router_delay_profiling.h" #include "route_tree.h" #include "route_common.h" #include "route_net.h" -#include "route_export.h" #include "rr_graph.h" -#include "rr_graph2.h" #include "compute_delta_delays_utils.h" struct t_route_util_options { diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index a106ad80a80..2b584daedc3 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -8,7 +8,6 @@ #include "globals.h" #include "net_delay.h" #include "place_and_route.h" -#include "timing_place_lookup.h" static constexpr const char kArchFile[] = "../../vtr_flow/arch/timing/k6_frac_N10_mem32K_40nm.xml"; static constexpr int kMaxHops = 10; @@ -188,8 +187,7 @@ TEST_CASE("connection_router", "[vpr]") { // Clean up free_routing_structs(); - vpr_free_all(arch, - vpr_setup); + vpr_free_all(arch, vpr_setup); } } // namespace diff --git a/vpr/test/test_post_verilog.cpp b/vpr/test/test_post_verilog.cpp index a8344fa79d4..ca1a250b7d2 100644 --- a/vpr/test/test_post_verilog.cpp +++ b/vpr/test/test_post_verilog.cpp @@ -1,7 +1,7 @@ #include "catch2/catch_test_macros.hpp" #include "vpr_api.h" -#include "timing_place_lookup.h" +#include "router_delay_profiling.h" #include #include From 75a765810d290808434b762cc54a8a4d1c94a7b7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 1 Dec 2024 15:42:19 -0500 Subject: [PATCH 16/39] add find_pin() and find_pin_class() to t_physical_tile_type --- libs/libarchfpga/src/physical_types.cpp | 50 +++++++++++++++++ libs/libarchfpga/src/physical_types.h | 19 +++++-- libs/libarchfpga/src/physical_types_util.cpp | 53 +------------------ libs/libarchfpga/src/physical_types_util.h | 29 ++-------- vpr/src/base/read_options.cpp | 2 +- .../compute_delta_delays_utils.cpp | 19 ++++--- .../delay_model/override_delay_model.cpp | 18 +++---- .../timing/delay_model/override_delay_model.h | 4 +- vpr/src/util/vpr_utils.cpp | 2 +- 9 files changed, 90 insertions(+), 106 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index 79619d11df4..bdacf50931d 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -136,6 +136,56 @@ bool t_physical_tile_type::is_empty() const { return name == std::string(EMPTY_BLOCK_NAME); } +int t_physical_tile_type::find_pin(std::string_view port_name, int pin_index_in_port) const { + int ipin = OPEN; + int port_base_ipin = 0; + int num_port_pins = OPEN; + int pin_offset = 0; + + bool port_found = false; + for (const t_sub_tile& sub_tile : sub_tiles) { + for (const t_physical_tile_port& port : sub_tile.ports) { + if (port_name == port.name) { + port_found = true; + num_port_pins = port.num_pins; + break; + } + + port_base_ipin += port.num_pins; + } + + if (port_found) { + break; + } + + port_base_ipin = 0; + pin_offset += sub_tile.num_phy_pins; + } + + if (num_port_pins != OPEN) { + VTR_ASSERT(pin_index_in_port < num_port_pins); + + ipin = port_base_ipin + pin_index_in_port + pin_offset; + } + + return ipin; +} + +int t_physical_tile_type::find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const { + int iclass = OPEN; + + int ipin = find_pin(port_name, pin_index_in_port); + + if (ipin != OPEN) { + iclass = pin_class[ipin]; + + if (iclass != OPEN) { + VTR_ASSERT(class_inf[iclass].type == pin_type); + } + } + return iclass; +} + /* * t_logical_block_type */ diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index a2fc676e305..922b1f153f8 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -704,11 +704,7 @@ struct t_physical_tile_type { * tile_block_pin_directs_map[logical block index][logical block pin] -> physical tile pin */ std::unordered_map>> tile_block_pin_directs_map; - /* Returns the indices of pins that contain a clock for this physical logic block */ - std::vector get_clock_pins_indices() const; - // Returns the sub tile location of the physical tile given an input pin - int get_sub_tile_loc_from_pin(int pin_num) const; // TODO: Remove is_input_type / is_output_type as part of // https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193 @@ -719,8 +715,21 @@ struct t_physical_tile_type { // Does this t_physical_tile_type contain an outpad? bool is_output_type = false; - // Is this t_physical_tile_type an empty type? + public: // Function members + ///@brief Returns the indices of pins that contain a clock for this physical logic block + std::vector get_clock_pins_indices() const; + + ///@brief Returns the sub tile location of the physical tile given an input pin + int get_sub_tile_loc_from_pin(int pin_num) const; + + ///@brief Is this t_physical_tile_type an empty type? bool is_empty() const; + + ///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port + int find_pin(std::string_view port_name, int pin_index_in_port) const; + + ///@brief Returns the pin class associated with the specified pin_index_in_port within the port port_name on type + int find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const; }; /* Holds the capacity range of a certain sub_tile block within the parent physical tile type. diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp index 1374a7f7055..2ecc7fbd41c 100644 --- a/libs/libarchfpga/src/physical_types_util.cpp +++ b/libs/libarchfpga/src/physical_types_util.cpp @@ -563,57 +563,6 @@ int get_max_num_pins(t_logical_block_type_ptr logical_block) { return max_num_pins; } -//Returns the pin class associated with the specified pin_index_in_port within the port port_name on type -int find_pin_class(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port, e_pin_type pin_type) { - int iclass = OPEN; - - int ipin = find_pin(type, port_name, pin_index_in_port); - - if (ipin != OPEN) { - iclass = type->pin_class[ipin]; - - if (iclass != OPEN) { - VTR_ASSERT(type->class_inf[iclass].type == pin_type); - } - } - return iclass; -} - -int find_pin(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port) { - int ipin = OPEN; - int port_base_ipin = 0; - int num_pins = OPEN; - int pin_offset = 0; - - bool port_found = false; - for (const auto& sub_tile : type->sub_tiles) { - for (const auto& port : sub_tile.ports) { - if (0 == strcmp(port.name, port_name.c_str())) { - port_found = true; - num_pins = port.num_pins; - break; - } - - port_base_ipin += port.num_pins; - } - - if (port_found) { - break; - } - - port_base_ipin = 0; - pin_offset += sub_tile.num_phy_pins; - } - - if (num_pins != OPEN) { - VTR_ASSERT(pin_index_in_port < num_pins); - - ipin = port_base_ipin + pin_index_in_port + pin_offset; - } - - return ipin; -} - std::pair get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin) { int pins_to_remove = 0; for (const auto& sub_tile : physical_tile->sub_tiles) { @@ -638,7 +587,7 @@ std::pair get_capacity_location_from_physical_pin(t_physical_tile_type int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location) { int pins_to_add = 0; - for (auto sub_tile : physical_tile->sub_tiles) { + for (const t_sub_tile& sub_tile : physical_tile->sub_tiles) { auto capacity = sub_tile.capacity; int rel_capacity = capacity_location - capacity.low; int num_inst_pins = sub_tile.num_phy_pins / capacity.total(); diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index 8d2637ef048..94bc15e8082 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -13,11 +13,11 @@ * functions in this file are the following: * * - physical_tile_type: identifies a placeable tile within * * the device grid. * - * - logical_block_tpye: identifies a clustered block type * + * - logical_block_type: identifies a clustered block type * * within the clb_netlist * * * * All the following utilities are intended to ease the * - * developement to access the above mentioned classes and perform * + * development to access the above mentioned classes and perform * * some required operations with their data. * * * * Please classify such functions in this file * @@ -107,7 +107,7 @@ * * For instance, the following information are required: * - mapping between logical and sub tile pins. - * - mapping between sub tile pins and absoulte physical pin + * - mapping between sub tile pins and absolute physical pin * - capacity instance of the sub tile * * With all the above information we can calculate correctly the connection between the CLK (logical pin) @@ -173,11 +173,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ ///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found) t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector& types); -int find_pin_class(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port, e_pin_type pin_type); - -///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port -int find_pin(t_physical_tile_type_ptr type, const std::string& port_name, int pin_index_in_port); - ///@brief Returns the maximum number of pins within a logical block int get_max_num_pins(t_logical_block_type_ptr logical_block); @@ -316,12 +311,6 @@ inline bool is_class_on_tile(t_physical_tile_type_ptr physical_tile, int class_p /** * @brief Classes are indexed in a way that the number of classes on the same pb_graph_node is continuous - * @param physical_tile - * @param sub_tile - * @param logical_block - * @param sub_tile_relative_cap - * @param pb_graph_node - * @return */ t_class_range get_pb_graph_node_class_physical_range(t_physical_tile_type_ptr physical_tile, const t_sub_tile* sub_tile, @@ -338,15 +327,11 @@ std::vector get_tile_root_classes(t_physical_tile_type_ptr physical_type); /** * Get the number of all classes, on the tile and inside the cluster. - * @param physical_type - * @return */ t_class_range get_flat_tile_primitive_classes(t_physical_tile_type_ptr physical_type); /** **/ int get_tile_class_max_ptc(t_physical_tile_type_ptr tile, bool is_flat); -/* */ - /* Access information related to pins */ /** get information given pin physical number **/ @@ -437,11 +422,6 @@ float get_pin_primitive_comb_delay(t_physical_tile_type_ptr physical_type, /** * @brief This function is used during reachability analysis to check whether two classes should be put in the same group - * @param physical_tile - * @param first_class_ptc_num - * @param second_class_ptc_num - * @param is_flat - * @return */ bool classes_in_same_block(t_physical_tile_type_ptr physical_tile, int first_class_ptc_num, @@ -451,9 +431,6 @@ bool classes_in_same_block(t_physical_tile_type_ptr physical_tile, /** * @brief Given the sink group, identify the pins which can reach both sink_ptc_num and at least one of the sinks, * in the grp. - * @param physical_tile - * @param sink_ptc_num - * @param grp * @return Key is the pin number and value is the number of sinks, including sink_ptc_num, in the grp reachable by the pin */ std::map get_sink_choking_points(t_physical_tile_type_ptr physical_tile, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 78124dd85c3..145601ac66f 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2295,7 +2295,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.post_place_timing_report_file, "--post_place_timing_report") - .help("Name of the post-placement timing report file (not generated if unspecfied)") + .help("Name of the post-placement timing report file (not generated if unspecified)") .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp index eb59195f055..725159406c0 100644 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp @@ -198,7 +198,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p std::set allowed_types; if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + std::vector allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); allowed_types = std::set(allowed_types_vector.begin(), allowed_types_vector.end()); } @@ -206,7 +206,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) { vtr::NdMatrix, 2> sampled_delta_delays({device_width, device_height}); - //Find the lowest y location on the left edge with a non-empty block + // Find the lowest y location on the left edge with a non-empty block int y = 0; int x = 0; t_physical_tile_type_ptr src_type = nullptr; @@ -223,7 +223,7 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p break; } } - if (src_type) { + if (src_type != nullptr) { break; } } @@ -243,10 +243,10 @@ static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_p measure_directconnect, allowed_types, is_flat); - //Find the lowest x location on the bottom edge with a non-empty block + // Find the lowest x location on the bottom edge with a non-empty block src_type = nullptr; - for (y = 0; y < (int)grid.height(); ++y) { - for (x = 0; x < (int)grid.width(); ++x) { + for (y = 0; y < (int)device_height; ++y) { + for (x = 0; x < (int)device_width; ++x) { t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { @@ -458,8 +458,7 @@ static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_pr #endif } } else { - //Valid start/end - + // Valid start/end float delay = route_connection_delay(route_profiler, source_x, source_y, @@ -553,7 +552,7 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay + // Only set empty target if we don't already have a valid delta delay matrix[delta_x][delta_y].push_back(EMPTY_DELTA); #ifdef VERBOSE VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", @@ -575,7 +574,7 @@ static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*rou continue; if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - //Skip if we shouldn't measure direct connects and a direct connect exists + // Skip if we shouldn't measure direct connects and a direct connect exists continue; } diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp index 33106acb208..d496a43b5e7 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -24,17 +24,17 @@ void OverrideDelayModel::compute(RouterDelayProfiler& route_profiler, base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); - compute_override_delay_model(route_profiler, router_opts); + compute_override_delay_model_(route_profiler, router_opts); } -void OverrideDelayModel::compute_override_delay_model(RouterDelayProfiler& route_profiler, - const t_router_opts& router_opts) { +void OverrideDelayModel::compute_override_delay_model_(RouterDelayProfiler& route_profiler, + const t_router_opts& router_opts) { + const auto& device_ctx = g_vpr_ctx.device(); t_router_opts router_opts2 = router_opts; router_opts2.astar_fac = 0.f; router_opts2.astar_offset = 0.f; - //Look at all the direct connections that exist, and add overrides to delay model - auto& device_ctx = g_vpr_ctx.device(); + // Look at all the direct connections that exist, and add overrides to delay model for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) { const t_direct_inf* direct = &device_ctx.arch->directs[idirect]; @@ -61,16 +61,16 @@ void OverrideDelayModel::compute_override_delay_model(RouterDelayProfiler& route std::set> sampled_rr_pairs; for (int iconn = 0; iconn < num_conns; ++iconn) { //Find the associated pins - int from_pin = find_pin(from_type, from_port.port_name(), from_port.port_low_index() + iconn); - int to_pin = find_pin(to_type, to_port.port_name(), to_port.port_low_index() + iconn); + int from_pin = from_type->find_pin(from_port.port_name(), from_port.port_low_index() + iconn); + int to_pin = to_type->find_pin(to_port.port_name(), to_port.port_low_index() + iconn); VTR_ASSERT(from_pin != OPEN); VTR_ASSERT(to_pin != OPEN); - int from_pin_class = find_pin_class(from_type, from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); + int from_pin_class = from_type->find_pin_class(from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); VTR_ASSERT(from_pin_class != OPEN); - int to_pin_class = find_pin_class(to_type, to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); + int to_pin_class = to_type->find_pin_class(to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); VTR_ASSERT(to_pin_class != OPEN); bool found_sample_points; diff --git a/vpr/src/place/timing/delay_model/override_delay_model.h b/vpr/src/place/timing/delay_model/override_delay_model.h index 23f6d01d709..5965261c272 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.h +++ b/vpr/src/place/timing/delay_model/override_delay_model.h @@ -41,8 +41,8 @@ class OverrideDelayModel : public PlaceDelayModel { /// Indicates whether the router is a two-stage or run-flat bool is_flat_; - void compute_override_delay_model(RouterDelayProfiler& router, - const t_router_opts& router_opts); + void compute_override_delay_model_(RouterDelayProfiler& router, + const t_router_opts& router_opts); /** * @brief Structure that allows delays to be queried from the delay model. diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 02446c67c05..430b386562f 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -708,7 +708,7 @@ InstPort parse_inst_port(const std::string& str) { VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find block type named %s", inst_port.instance_name().c_str()); } - int num_pins = find_tile_port_by_name(blk_type, inst_port.port_name().c_str()).num_pins; + int num_pins = find_tile_port_by_name(blk_type, inst_port.port_name()).num_pins; if (num_pins == OPEN) { VPR_FATAL_ERROR(VPR_ERROR_ARCH, "Failed to find port %s on block type %s", inst_port.port_name().c_str(), inst_port.instance_name().c_str()); From eecfde287ac033495abecc3c0f219adbf0093d84 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 1 Dec 2024 15:58:15 -0500 Subject: [PATCH 17/39] move timing_place.cpp/.h and place_timing_update.cpp/.h to place/timing directory --- libs/libarchfpga/src/physical_types.h | 5 +---- libs/libarchfpga/src/physical_types_util.h | 14 +++++--------- vpr/src/place/{ => timing}/place_timing_update.cpp | 0 vpr/src/place/{ => timing}/place_timing_update.h | 0 vpr/src/place/{ => timing}/timing_place.cpp | 0 vpr/src/place/{ => timing}/timing_place.h | 0 6 files changed, 6 insertions(+), 13 deletions(-) rename vpr/src/place/{ => timing}/place_timing_update.cpp (100%) rename vpr/src/place/{ => timing}/place_timing_update.h (100%) rename vpr/src/place/{ => timing}/timing_place.cpp (100%) rename vpr/src/place/{ => timing}/timing_place.h (100%) diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 922b1f153f8..c11f1c451ee 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -24,8 +24,7 @@ * Authors: Jason Luu and Kenneth Kent */ -#ifndef PHYSICAL_TYPES_H -#define PHYSICAL_TYPES_H +#pragma once #include #include @@ -2157,5 +2156,3 @@ struct t_arch { /// Stores NoC-related architectural information when there is an embedded NoC t_noc_inf* noc = nullptr; }; - -#endif diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index 94bc15e8082..d4d5dc55924 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -1,5 +1,5 @@ -#ifndef PHYSICAL_TYPES_UTIL_H -#define PHYSICAL_TYPES_UTIL_H + +#pragma once #include "physical_types.h" @@ -152,12 +152,12 @@ int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_ti * * Take the above CLOCK TILE example: * - given the CLOCK TILE and the index corresponding to the CLK_1 pin, we want the relative pin - * of one of its sub tiles at a particualr capacity location (i.e. sub tile instance). + * of one of its sub tiles at a particular capacity location (i.e. sub tile instance). * * std::tie(absolute_capacity, relative_pin) = get_capacity_location_from_physical_pin(clock_tile, 3) * * The value returned is (1, 0), where: - * - 1 corresponds to the capacity location (sub tile instance) where the absoulte physical pin index (CLK_1) is connected + * - 1 corresponds to the capacity location (sub tile instance) where the absolute physical pin index (CLK_1) is connected * - 0 corresponds to the relative pin index within the BUFGCTRL sub tile */ std::pair get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin); @@ -223,7 +223,7 @@ int get_physical_pin(t_physical_tile_type_ptr physical_tile, int pin); /** * @brief Returns the physical pin index (within 'physical_tile') corresponding to the - * logical index ('pin' of the first instance of 'logical_block' within the physcial tile. + * logical index ('pin' of the first instance of 'logical_block' within the physical tile. * This function considers if a given offset is in the range of sub tile capacity * * (First pin index at current sub-tile) (The wanted pin index) @@ -436,7 +436,3 @@ bool classes_in_same_block(t_physical_tile_type_ptr physical_tile, std::map get_sink_choking_points(t_physical_tile_type_ptr physical_tile, int sink_ptc_num, const std::vector& grp); - -/* */ - -#endif diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/timing/place_timing_update.cpp similarity index 100% rename from vpr/src/place/place_timing_update.cpp rename to vpr/src/place/timing/place_timing_update.cpp diff --git a/vpr/src/place/place_timing_update.h b/vpr/src/place/timing/place_timing_update.h similarity index 100% rename from vpr/src/place/place_timing_update.h rename to vpr/src/place/timing/place_timing_update.h diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing/timing_place.cpp similarity index 100% rename from vpr/src/place/timing_place.cpp rename to vpr/src/place/timing/timing_place.cpp diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing/timing_place.h similarity index 100% rename from vpr/src/place/timing_place.h rename to vpr/src/place/timing/timing_place.h From 4aa3d744ca7825541866d2eef207728f9e985fb7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 1 Dec 2024 16:55:02 -0500 Subject: [PATCH 18/39] add files for PlacerSetupSlacks and PlacerCriticalities --- vpr/src/place/annealer.cpp | 2 + vpr/src/place/annealer.h | 1 + .../place/move_generators/move_generator.h | 2 +- vpr/src/place/net_cost_handler.cpp | 1 + vpr/src/place/net_cost_handler.h | 1 + vpr/src/place/place_checkpoint.cpp | 4 + vpr/src/place/placer.h | 3 + ...ming_place.cpp => PlacerCriticalities.cpp} | 124 +-------- vpr/src/place/timing/PlacerCriticalities.h | 160 +++++++++++ vpr/src/place/timing/PlacerSetupSlacks.cpp | 109 ++++++++ vpr/src/place/timing/PlacerSetupSlacks.h | 108 ++++++++ vpr/src/place/timing/place_timing_update.cpp | 9 +- vpr/src/place/timing/place_timing_update.h | 11 +- vpr/src/place/timing/timing_place.h | 261 +----------------- 14 files changed, 414 insertions(+), 382 deletions(-) rename vpr/src/place/timing/{timing_place.cpp => PlacerCriticalities.cpp} (60%) create mode 100644 vpr/src/place/timing/PlacerCriticalities.h create mode 100644 vpr/src/place/timing/PlacerSetupSlacks.cpp create mode 100644 vpr/src/place/timing/PlacerSetupSlacks.h diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index b18f60b27bd..42fd3356709 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -16,6 +16,8 @@ #include "read_place.h" #include "placer_breakpoint.h" #include "RL_agent_util.h" +#include "PlacerSetupSlacks.h" +#include "PlacerCriticalities.h" /**************************************************************************/ /*************** Static Function Declarations *****************************/ diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index fd9b0dbd928..f788aea666d 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -17,6 +17,7 @@ enum class e_agent_state; class NocCostHandler; class NetPinTimingInvalidator; +class PlacerSetupSlacks; /** * These variables keep track of the number of swaps diff --git a/vpr/src/place/move_generators/move_generator.h b/vpr/src/place/move_generators/move_generator.h index e39493e16c6..5ca0b4ce1f5 100644 --- a/vpr/src/place/move_generators/move_generator.h +++ b/vpr/src/place/move_generators/move_generator.h @@ -3,7 +3,7 @@ #include "vpr_types.h" #include "move_utils.h" -#include "timing_place.h" +#include "PlacerCriticalities.h" #include diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index ac049995347..e2a8e902e31 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -34,6 +34,7 @@ #include "vtr_math.h" #include "vtr_ndmatrix.h" #include "vtr_ndoffsetmatrix.h" +#include "PlacerCriticalities.h" #include diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 2b8e59af88f..6436265dbda 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -15,6 +15,7 @@ #include class PlacerState; +class PlacerCriticalities; /** * @brief The method used to calculate placement cost diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp index 60b009d85ae..a6e2858e577 100644 --- a/vpr/src/place/place_checkpoint.cpp +++ b/vpr/src/place/place_checkpoint.cpp @@ -1,7 +1,11 @@ + #include "place_checkpoint.h" + #include "noc_place_utils.h" #include "placer_state.h" #include "grid_block.h" +#include "PlacerCriticalities.h" +#include "PlacerSetupSlacks.h" float t_placement_checkpoint::get_cp_cpd() const { return cpd_; } diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 99c00d7e8e5..11924314c8b 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -27,6 +27,9 @@ #include "noc_place_utils.h" #include "net_cost_handler.h" #include "placement_log_printer.h" +#include "PlacerSetupSlacks.h" +#include "PlacerCriticalities.h" +#include "NetPinTimingInvalidator.h" class PlacementAnnealer; namespace vtr{ diff --git a/vpr/src/place/timing/timing_place.cpp b/vpr/src/place/timing/PlacerCriticalities.cpp similarity index 60% rename from vpr/src/place/timing/timing_place.cpp rename to vpr/src/place/timing/PlacerCriticalities.cpp index badd9d1fb61..8aa248abab6 100644 --- a/vpr/src/place/timing/timing_place.cpp +++ b/vpr/src/place/timing/PlacerCriticalities.cpp @@ -1,19 +1,9 @@ -/** - * @file timing_place.cpp - * @brief Stores the method definitions of classes defined in timing_place.h. - */ - -#include -#include "vtr_util.h" - -#include "vpr_types.h" -#include "vpr_utils.h" -#include "net_delay.h" -#include "timing_place.h" -#include "placer_state.h" +#include "PlacerCriticalities.h" #include "timing_info.h" +#include "timing_util.h" +#include "placer_state.h" ///@brief Allocates space for the timing_place_crit_ data structure. PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, @@ -161,110 +151,4 @@ void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float c */ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const { return vtr::make_range(cluster_pins_with_modified_criticality_); -} - -/**************************************/ - -///@brief Allocates space for the timing_place_setup_slacks_ data structure. -PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info) - : clb_nlist_(clb_nlist) - , pin_lookup_(netlist_pin_lookup) - , timing_info_(std::move(timing_info)) - , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { -} - -/** - * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure. - * - * If the setup slacks are not updated immediately after each time we call - * timing_info->update(), then timing_info->pins_with_modified_setup_slack() - * cannot accurately account for all the pins that need to be updated. - * - * In this case, `recompute_required` would be true, and we update all setup slacks - * from scratch. - */ -void PlacerSetupSlacks::update_setup_slacks() { - /* If update is not enabled, exit the routine. */ - if (!update_enabled) { - /* re-computation is required on the next iteration */ - recompute_required = true; - return; - } - - /* Determine what pins need updating */ - if (!recompute_required) { - incr_update_setup_slacks(); - } else { - recompute_setup_slacks(); - } - - /* Update the affected pins */ - for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) { - ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); - int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); - - float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin); - - timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; - } - - /* Setup slacks updated. In sync with timing info. */ - /* Can be incrementally updated on the next iteration. */ - recompute_required = false; -} - -/** - * @brief Collect the cluster pins which need to be updated based on the latest timing - * analysis so that incremental updates to setup slacks can be performed. - * - * Note we use the set of pins reported by the *timing_info* as having modified - * setup slacks, rather than those marked as modified by the timing analyzer. - */ -void PlacerSetupSlacks::incr_update_setup_slacks() { - cluster_pins_with_modified_setup_slack_.clear(); - - for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) { - ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); - - //Some atom pins correspond to connections which are completely - //contained within a cluster, and hence have no corresponding - //clustered pin. - if (!clb_pin) continue; - - cluster_pins_with_modified_setup_slack_.insert(clb_pin); - } -} - -/** - * @brief Collect all the sink pins in the netlist and prepare them update. - * - * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks(). - */ -void PlacerSetupSlacks::recompute_setup_slacks() { - cluster_pins_with_modified_setup_slack_.clear(); - - /* Non-incremental: all sink pins need updating */ - for (ClusterNetId net_id : clb_nlist_.nets()) { - for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { - cluster_pins_with_modified_setup_slack_.insert(pin_id); - } - } -} - -///@brief Override the setup slack of a particular connection. -void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) { - VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)"); - VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout"); - - timing_place_setup_slacks_[net_id][ipin] = slack_val; -} - -/** - * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) - * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). - */ -PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const { - return vtr::make_range(cluster_pins_with_modified_setup_slack_); -} +} \ No newline at end of file diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h new file mode 100644 index 00000000000..7f7a1975ff2 --- /dev/null +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -0,0 +1,160 @@ + +#pragma once + +#include "vtr_vec_id_set.h" +#include "timing_info_fwd.h" +#include "clustered_netlist_utils.h" +#include "place_delay_model.h" +#include "vpr_net_pins_matrix.h" + +/** + * @brief Saves the placement criticality parameters + * + * crit_exponent: The criticality exponent used to sharpen the criticalities + * crit_limit: The limit to consider a pin as timing critical + */ +struct PlaceCritParams { + float crit_exponent; + float crit_limit; +}; + +/** + * @brief PlacerCriticalities returns the clustered netlist connection criticalities + * used by the placer ('sharpened' by a criticality exponent). + * + * Usage + * ===== + * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds) + * to the clustered netlist (i.e. ClusterPinIds) used during placement. + * + * Criticalities are updated by update_criticalities(), given that `update_enabled` is + * set to true. It will update criticalities based on the atom netlist connection + * criticalities provided by the passed in SetupTimingInfo. + * + * This process can be done incrementally, based on the modified connections/AtomPinIds + * returned by SetupTimingInfo. However, the set returned only reflects the connections + * changed by the last call to the timing info update. + * + * Therefore, if SetupTimingInfo is updated twice in succession without criticalities + * getting updated (update_enabled = false), the returned set cannot account for all + * the connections that have been modified. In this case, we flag `recompute_required` + * as false, and we recompute the criticalities for every connection to ensure that + * they are all up to date. Hence, each time update_setup_slacks_and_criticalities() + * is called, we assign `recompute_required` the opposite value of `update_enabled`. + * + * This class also maps/transforms the modified atom connections/pins returned by the + * timing info into modified clustered netlist connections/pins after calling + * update_criticalities(). The interface then enables users to iterate over this range + * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating + * the timing costs. + * + * The criticalities of individual connections can then be queried by calling the + * criticality() member function. + * + * Implementation + * ============== + * To support incremental re-calculation, the class saves the last criticality exponent + * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same + * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated + * from scratch, since a change in exponent changes *all* criticalities. + */ +class PlacerCriticalities { + public: //Types + typedef vtr::vec_id_set::iterator pin_iterator; + typedef vtr::vec_id_set::iterator net_iterator; + + typedef vtr::Range pin_range; + typedef vtr::Range net_range; + + public: //Lifetime + PlacerCriticalities(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info); + PlacerCriticalities(const PlacerCriticalities&) = delete; + PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; + + public: //Accessors + ///@brief Returns the criticality of the specified connection. + float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; } + + /** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which + * were modified by the last call to PlacerCriticalities::update_criticalities(). + */ + pin_range pins_with_modified_criticality() const; + + public: //Modifiers + /** + * @brief Updates criticalities based on the atom netlist criticalitites + * provided by timing_info and the provided criticality_exponent. + * + * Should consistently call this method after the most recent timing analysis to + * keep the criticalities stored in this class in sync with the timing analyzer. + * If out of sync, then the criticalities cannot be incrementally updated on + * during the next timing analysis iteration. + */ + void update_criticalities(const PlaceCritParams& crit_params, + PlacerState& placer_state); + + ///@bried Enable the recompute_required flag to enforce from scratch update. + void set_recompute_required(); + + ///@brief From scratch update. See timing_place.cpp for more. + void recompute_criticalities(); + + ///@brief Override the criticality of a particular connection. + void set_criticality(ClusterNetId net, int ipin, float crit_val); + + ///@brief Set `update_enabled` to true. + void enable_update() { update_enabled = true; } + + ///@brief Set `update_enabled` to true. + void disable_update() { update_enabled = false; } + + private: //Data + ///@brief The clb netlist in the placement context. + const ClusteredNetlist& clb_nlist_; + + ///@brief The lookup table that maps atom pins to clb pins. + const ClusteredPinAtomPinsLookup& pin_lookup_; + + ///@brief A pointer to the setup timing analyzer + std::shared_ptr timing_info_; + + /** + * @brief The matrix that stores criticality value for each connection. + * + * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] + */ + ClbNetPinsMatrix timing_place_crit_; + + /** + * The criticality exponent when update_criticalites() was last called + * (used to detect if incremental update can be used). + */ + float last_crit_exponent_ = std::numeric_limits::quiet_NaN(); + + ///@brief Set of pins with criticaltites modified by last call to update_criticalities(). + vtr::vec_id_set cluster_pins_with_modified_criticality_; + + ///@brief Incremental update. See timing_place.cpp for more. + void incr_update_criticalities(); + + ///@brief Flag that turns on/off the update_criticalities() routine. + bool update_enabled = true; + + /** + * @brief Flag that checks if criticalities need to be recomputed for all connections. + * + * Used by the method update_criticalities(). They incremental update is not possible + * if this method wasn't called updated after the previous timing info update. + */ + bool recompute_required = true; + + /** + * @brief if this is first time to call update_criticality + * + * This can be used for incremental criticality update and also incrementally update the highly critical pins + */ + bool first_time_update_criticality = true; +}; diff --git a/vpr/src/place/timing/PlacerSetupSlacks.cpp b/vpr/src/place/timing/PlacerSetupSlacks.cpp new file mode 100644 index 00000000000..ffc637f423b --- /dev/null +++ b/vpr/src/place/timing/PlacerSetupSlacks.cpp @@ -0,0 +1,109 @@ + +#include "PlacerSetupSlacks.h" + +#include "timing_util.h" +#include "timing_info.h" + +///@brief Allocates space for the timing_place_setup_slacks_ data structure. +PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info) + : clb_nlist_(clb_nlist) + , pin_lookup_(netlist_pin_lookup) + , timing_info_(std::move(timing_info)) + , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { +} + +/** + * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure. + * + * If the setup slacks are not updated immediately after each time we call + * timing_info->update(), then timing_info->pins_with_modified_setup_slack() + * cannot accurately account for all the pins that need to be updated. + * + * In this case, `recompute_required` would be true, and we update all setup slacks + * from scratch. + */ +void PlacerSetupSlacks::update_setup_slacks() { + /* If update is not enabled, exit the routine. */ + if (!update_enabled) { + /* re-computation is required on the next iteration */ + recompute_required = true; + return; + } + + /* Determine what pins need updating */ + if (!recompute_required) { + incr_update_setup_slacks(); + } else { + recompute_setup_slacks(); + } + + /* Update the affected pins */ + for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) { + ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); + int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); + + float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info_, pin_lookup_, clb_pin); + + timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; + } + + /* Setup slacks updated. In sync with timing info. */ + /* Can be incrementally updated on the next iteration. */ + recompute_required = false; +} + +/** + * @brief Collect the cluster pins which need to be updated based on the latest timing + * analysis so that incremental updates to setup slacks can be performed. + * + * Note we use the set of pins reported by the *timing_info* as having modified + * setup slacks, rather than those marked as modified by the timing analyzer. + */ +void PlacerSetupSlacks::incr_update_setup_slacks() { + cluster_pins_with_modified_setup_slack_.clear(); + + for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_slack()) { + ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); + + //Some atom pins correspond to connections which are completely + //contained within a cluster, and hence have no corresponding + //clustered pin. + if (!clb_pin) continue; + + cluster_pins_with_modified_setup_slack_.insert(clb_pin); + } +} + +/** + * @brief Collect all the sink pins in the netlist and prepare them update. + * + * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks(). + */ +void PlacerSetupSlacks::recompute_setup_slacks() { + cluster_pins_with_modified_setup_slack_.clear(); + + /* Non-incremental: all sink pins need updating */ + for (ClusterNetId net_id : clb_nlist_.nets()) { + for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { + cluster_pins_with_modified_setup_slack_.insert(pin_id); + } + } +} + +///@brief Override the setup slack of a particular connection. +void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) { + VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)"); + VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout"); + + timing_place_setup_slacks_[net_id][ipin] = slack_val; +} + +/** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) + * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). + */ +PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const { + return vtr::make_range(cluster_pins_with_modified_setup_slack_); +} diff --git a/vpr/src/place/timing/PlacerSetupSlacks.h b/vpr/src/place/timing/PlacerSetupSlacks.h new file mode 100644 index 00000000000..580a26db2c2 --- /dev/null +++ b/vpr/src/place/timing/PlacerSetupSlacks.h @@ -0,0 +1,108 @@ + +#pragma once + +#include "vtr_vec_id_set.h" +#include "timing_info_fwd.h" +#include "clustered_netlist_utils.h" +#include "place_delay_model.h" +#include "vpr_net_pins_matrix.h" + +/** + * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection. + * + * Usage + * ===== + * This class mirrors PlacerCriticalities by both its methods and its members. The only + * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo + * rather than criticalities. See the documentation on PlacerCriticalities for more. + * + * RAW setup slacks are unlike criticalities. Their values are not confined between + * 0 and 1. Their values can be either positive or negative. + * + * This class also provides iterating over the clustered netlist connections/pins that + * have modified setup slacks by the last call to update_setup_slacks(). However, this + * utility is mainly used for incrementally committing the setup slack values into the + * structure `connection_setup_slack` used by many placer routines. + */ +class PlacerSetupSlacks { + public: //Types + typedef vtr::vec_id_set::iterator pin_iterator; + typedef vtr::vec_id_set::iterator net_iterator; + + typedef vtr::Range pin_range; + typedef vtr::Range net_range; + + public: //Lifetime + PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info); + PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; + PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; + + public: //Accessors + ///@brief Returns the setup slack of the specified connection. + float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; } + + /** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) + * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). + */ + pin_range pins_with_modified_setup_slack() const; + + public: //Modifiers + /** + * @brief Updates setup slacks based on the atom netlist setup slacks provided + * by timing_info_. + * + * Should consistently call this method after the most recent timing analysis to + * keep the setup slacks stored in this class in sync with the timing analyzer. + * If out of sync, then the setup slacks cannot be incrementally updated on + * during the next timing analysis iteration. + */ + void update_setup_slacks(); + + ///@bried Enable the recompute_required flag to enforce from scratch update. + void set_recompute_required() { recompute_required = true; } + + ///@brief Override the setup slack of a particular connection. + void set_setup_slack(ClusterNetId net, int ipin, float slack_val); + + ///@brief Set `update_enabled` to true. + void enable_update() { update_enabled = true; } + + ///@brief Set `update_enabled` to true. + void disable_update() { update_enabled = false; } + + private: //Data + const ClusteredNetlist& clb_nlist_; + const ClusteredPinAtomPinsLookup& pin_lookup_; + std::shared_ptr timing_info_; + + /** + * @brief The matrix that stores raw setup slack values for each connection. + * + * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] + */ + ClbNetPinsMatrix timing_place_setup_slacks_; + + ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks() + vtr::vec_id_set cluster_pins_with_modified_setup_slack_; + + ///@brief Incremental update. See timing_place.cpp for more. + void incr_update_setup_slacks(); + + ///@brief Incremental update. See timing_place.cpp for more. + void recompute_setup_slacks(); + + ///@brief Flag that turns on/off the update_setup_slacks() routine. + bool update_enabled = true; + + /** + * @brief Flag that checks if setup slacks need to be recomputed for all connections. + * + * Used by the method update_setup_slacks(). They incremental update is not possible + * if this method wasn't called updated after the previous timing info update. + */ + bool recompute_required = true; +}; + diff --git a/vpr/src/place/timing/place_timing_update.cpp b/vpr/src/place/timing/place_timing_update.cpp index c9c53b88f90..00cad07da7b 100644 --- a/vpr/src/place/timing/place_timing_update.cpp +++ b/vpr/src/place/timing/place_timing_update.cpp @@ -3,10 +3,15 @@ * @brief Defines the routines declared in place_timing_update.h. */ -#include "vtr_time.h" - #include "place_timing_update.h" + +#include "NetPinTimingInvalidator.h" +#include "PlacerCriticalities.h" +#include "PlacerSetupSlacks.h" #include "placer_state.h" +#include "place_util.h" +#include "vtr_time.h" + /* Routines local to place_timing_update.cpp */ static double comp_td_connection_cost(const PlaceDelayModel* delay_model, diff --git a/vpr/src/place/timing/place_timing_update.h b/vpr/src/place/timing/place_timing_update.h index 7944c4a7552..6ced93e4487 100644 --- a/vpr/src/place/timing/place_timing_update.h +++ b/vpr/src/place/timing/place_timing_update.h @@ -4,10 +4,15 @@ */ #pragma once -#include "timing_place.h" -#include "place_util.h" -#include "NetPinTimingInvalidator.h" +class PlacerState; +class PlaceCritParams; +class PlacerCriticalities; +class PlacerSetupSlacks; +class NetPinTimingInvalidator; +class PlaceDelayModel; +class SetupTimingInfo; +struct t_placer_costs; ///@brief Initialize the timing information and structures in the placer. void initialize_timing_info(const PlaceCritParams& crit_params, diff --git a/vpr/src/place/timing/timing_place.h b/vpr/src/place/timing/timing_place.h index 71e144334ad..bd85061065f 100644 --- a/vpr/src/place/timing/timing_place.h +++ b/vpr/src/place/timing/timing_place.h @@ -8,7 +8,7 @@ * range from negative to positive values. Also maps * atom pin setup slacks to clb pin setup slacks. * @class PlacerCriticalities - * Query connection criticalities, which are calculuated + * Query connection criticalities, which are calculated * based on the raw setup slacks and ranges from 0 to 1. * Also maps atom pin crit. to clb pin crit. * @class PlacerTimingCosts @@ -41,257 +41,6 @@ #include "place_delay_model.h" #include "vpr_net_pins_matrix.h" -/** - * @brief Saves the placement criticality parameters - * - * crit_exponent: The criticality exponent used to sharpen the criticalities - * crit_limit: The limit to consider a pin as timing critical - */ -struct PlaceCritParams { - float crit_exponent; - float crit_limit; -}; - -/** - * @brief PlacerCriticalities returns the clustered netlist connection criticalities - * used by the placer ('sharpened' by a criticality exponent). - * - * Usage - * ===== - * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds) - * to the clustered netlist (i.e. ClusterPinIds) used during placement. - * - * Criticalities are updated by update_criticalities(), given that `update_enabled` is - * set to true. It will update criticalities based on the atom netlist connection - * criticalities provided by the passed in SetupTimingInfo. - * - * This process can be done incrementally, based on the modified connections/AtomPinIds - * returned by SetupTimingInfo. However, the set returned only reflects the connections - * changed by the last call to the timing info update. - * - * Therefore, if SetupTimingInfo is updated twice in succession without criticalities - * getting updated (update_enabled = false), the returned set cannot account for all - * the connections that have been modified. In this case, we flag `recompute_required` - * as false, and we recompute the criticalities for every connection to ensure that - * they are all up to date. Hence, each time update_setup_slacks_and_criticalities() - * is called, we assign `recompute_required` the opposite value of `update_enabled`. - * - * This class also maps/transforms the modified atom connections/pins returned by the - * timing info into modified clustered netlist connections/pins after calling - * update_criticalities(). The interface then enables users to iterate over this range - * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating - * the timing costs. - * - * The criticalities of individual connections can then be queried by calling the - * criticality() member function. - * - * Implementation - * ============== - * To support incremental re-calculation, the class saves the last criticality exponent - * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same - * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated - * from scratch, since a change in exponent changes *all* criticalities. - */ -class PlacerCriticalities { - public: //Types - typedef vtr::vec_id_set::iterator pin_iterator; - typedef vtr::vec_id_set::iterator net_iterator; - - typedef vtr::Range pin_range; - typedef vtr::Range net_range; - - public: //Lifetime - PlacerCriticalities(const ClusteredNetlist& clb_nlist, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info); - PlacerCriticalities(const PlacerCriticalities&) = delete; - PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; - - public: //Accessors - ///@brief Returns the criticality of the specified connection. - float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; } - - /** - * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which - * were modified by the last call to PlacerCriticalities::update_criticalities(). - */ - pin_range pins_with_modified_criticality() const; - - public: //Modifiers - /** - * @brief Updates criticalities based on the atom netlist criticalitites - * provided by timing_info and the provided criticality_exponent. - * - * Should consistently call this method after the most recent timing analysis to - * keep the criticalities stored in this class in sync with the timing analyzer. - * If out of sync, then the criticalities cannot be incrementally updated on - * during the next timing analysis iteration. - */ - void update_criticalities(const PlaceCritParams& crit_params, - PlacerState& placer_state); - - ///@bried Enable the recompute_required flag to enforce from scratch update. - void set_recompute_required(); - - ///@brief From scratch update. See timing_place.cpp for more. - void recompute_criticalities(); - - ///@brief Override the criticality of a particular connection. - void set_criticality(ClusterNetId net, int ipin, float crit_val); - - ///@brief Set `update_enabled` to true. - void enable_update() { update_enabled = true; } - - ///@brief Set `update_enabled` to true. - void disable_update() { update_enabled = false; } - - private: //Data - ///@brief The clb netlist in the placement context. - const ClusteredNetlist& clb_nlist_; - - ///@brief The lookup table that maps atom pins to clb pins. - const ClusteredPinAtomPinsLookup& pin_lookup_; - - ///@brief A pointer to the setup timing analyzer - std::shared_ptr timing_info_; - - /** - * @brief The matrix that stores criticality value for each connection. - * - * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] - */ - ClbNetPinsMatrix timing_place_crit_; - - /** - * The criticality exponent when update_criticalites() was last called - * (used to detect if incremental update can be used). - */ - float last_crit_exponent_ = std::numeric_limits::quiet_NaN(); - - ///@brief Set of pins with criticaltites modified by last call to update_criticalities(). - vtr::vec_id_set cluster_pins_with_modified_criticality_; - - ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_criticalities(); - - ///@brief Flag that turns on/off the update_criticalities() routine. - bool update_enabled = true; - - /** - * @brief Flag that checks if criticalities need to be recomputed for all connections. - * - * Used by the method update_criticalities(). They incremental update is not possible - * if this method wasn't called updated after the previous timing info update. - */ - bool recompute_required = true; - - /** - * @brief if this is first time to call update_criticality - * - * This can be used for incremental criticality update and also incrementally update the highly critical pins - */ - bool first_time_update_criticality = true; -}; - -/** - * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection. - * - * Usage - * ===== - * This class mirrors PlacerCriticalities by both its methods and its members. The only - * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo - * rather than criticalities. See the documentation on PlacerCriticalities for more. - * - * RAW setup slacks are unlike criticalities. Their values are not confined between - * 0 and 1. Their values can be either positive or negative. - * - * This class also provides iterating over the clustered netlist connections/pins that - * have modified setup slacks by the last call to update_setup_slacks(). However, this - * utility is mainly used for incrementally committing the setup slack values into the - * structure `connection_setup_slack` used by many placer routines. - */ -class PlacerSetupSlacks { - public: //Types - typedef vtr::vec_id_set::iterator pin_iterator; - typedef vtr::vec_id_set::iterator net_iterator; - - typedef vtr::Range pin_range; - typedef vtr::Range net_range; - - public: //Lifetime - PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info); - PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; - PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; - - public: //Accessors - ///@brief Returns the setup slack of the specified connection. - float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; } - - /** - * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) - * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). - */ - pin_range pins_with_modified_setup_slack() const; - - public: //Modifiers - /** - * @brief Updates setup slacks based on the atom netlist setup slacks provided - * by timing_info_. - * - * Should consistently call this method after the most recent timing analysis to - * keep the setup slacks stored in this class in sync with the timing analyzer. - * If out of sync, then the setup slacks cannot be incrementally updated on - * during the next timing analysis iteration. - */ - void update_setup_slacks(); - - ///@bried Enable the recompute_required flag to enforce from scratch update. - void set_recompute_required() { recompute_required = true; } - - ///@brief Override the setup slack of a particular connection. - void set_setup_slack(ClusterNetId net, int ipin, float slack_val); - - ///@brief Set `update_enabled` to true. - void enable_update() { update_enabled = true; } - - ///@brief Set `update_enabled` to true. - void disable_update() { update_enabled = false; } - - private: //Data - const ClusteredNetlist& clb_nlist_; - const ClusteredPinAtomPinsLookup& pin_lookup_; - std::shared_ptr timing_info_; - - /** - * @brief The matrix that stores raw setup slack values for each connection. - * - * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] - */ - ClbNetPinsMatrix timing_place_setup_slacks_; - - ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks() - vtr::vec_id_set cluster_pins_with_modified_setup_slack_; - - ///@brief Incremental update. See timing_place.cpp for more. - void incr_update_setup_slacks(); - - ///@brief Incremental update. See timing_place.cpp for more. - void recompute_setup_slacks(); - - ///@brief Flag that turns on/off the update_setup_slacks() routine. - bool update_enabled = true; - - /** - * @brief Flag that checks if setup slacks need to be recomputed for all connections. - * - * Used by the method update_setup_slacks(). They incremental update is not possible - * if this method wasn't called updated after the previous timing info update. - */ - bool recompute_required = true; -}; - /** * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from: * [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]. @@ -541,7 +290,7 @@ class PlacerTimingCosts { double node_cost = total_cost_recurr(left_child(inode)) + total_cost_recurr(right_child(inode)); - //Save intermedate cost at this node + //Save intermediate cost at this node connection_costs_[inode] = node_cost; return node_cost; @@ -563,7 +312,7 @@ class PlacerTimingCosts { ///@brief Friend-ed so it can call invalidate(). friend ConnectionProxy; - void invalidate(double* invalidated_cost) { + void invalidate(const double* invalidated_cost) { //Check pointer within range of internal storage VTR_ASSERT_SAFE_MSG( invalidated_cost >= &connection_costs_[0], @@ -632,12 +381,12 @@ class PlacerTimingCosts { * the tree are the intermediate nodes. * * The methods left_child()/right_child()/parent() can be used - * to traverse the tree by indicies into this vector. + * to traverse the tree by indices into this vector. */ std::vector connection_costs_; /** - * @brief Vector storing the indicies of the first connection + * @brief Vector storing the indices of the first connection * for each net in the netlist, used for indexing by net. */ vtr::vector net_start_indicies_; From a5a036f7d0fecc52913e39a29a33775903ffa430 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 1 Dec 2024 17:57:29 -0500 Subject: [PATCH 19/39] move highly_crit_pins from PlacerMoveContext to PlacerCriticalities --- vpr/src/place/annealer.cpp | 4 +- .../centroid_move_generator.cpp | 1 + .../critical_uniform_move_generator.cpp | 5 +- .../feasible_region_move_generator.cpp | 1 + .../move_generators/median_move_generator.cpp | 1 + .../uniform_move_generator.cpp | 1 + .../weighted_median_move_generator.cpp | 1 + vpr/src/place/move_utils.cpp | 31 +++++---- vpr/src/place/move_utils.h | 4 ++ vpr/src/place/placer_state.h | 3 - vpr/src/place/timing/PlacerCriticalities.cpp | 63 ++++++------------- vpr/src/place/timing/PlacerCriticalities.h | 34 ++++++++-- vpr/src/place/timing/place_timing_update.cpp | 8 +-- vpr/src/place/timing/place_timing_update.h | 3 +- 14 files changed, 82 insertions(+), 78 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 42fd3356709..e6e0ffc85dd 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -490,7 +490,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, criticalities_->disable_update(); setup_slacks_->enable_update(); update_timing_classes(crit_params, timing_info_, criticalities_, - setup_slacks_, pin_timing_invalidator_, placer_state_); + setup_slacks_, pin_timing_invalidator_); /* Get the setup slack analysis cost */ //TODO: calculate a weighted average of the slack cost and wiring cost @@ -594,7 +594,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, // Revert the timing update update_timing_classes(crit_params, timing_info_, criticalities_, - setup_slacks_, pin_timing_invalidator_, placer_state_); + setup_slacks_, pin_timing_invalidator_); VTR_ASSERT_SAFE_MSG( verify_connection_setup_slacks(setup_slacks_, placer_state_), diff --git a/vpr/src/place/move_generators/centroid_move_generator.cpp b/vpr/src/place/move_generators/centroid_move_generator.cpp index 45ba9121719..767fbf2ce7e 100644 --- a/vpr/src/place/move_generators/centroid_move_generator.cpp +++ b/vpr/src/place/move_generators/centroid_move_generator.cpp @@ -44,6 +44,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/false, + /*placer_criticalities=*/nullptr, /*net_from=*/nullptr, /*pin_from=*/nullptr, placer_state, diff --git a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp index 7a1d39ed308..ab1039ae3d0 100644 --- a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp +++ b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp @@ -13,8 +13,8 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved t_propose_action& proposed_action, float rlim, const t_placer_opts& placer_opts, - const PlacerCriticalities* /*criticalities*/) { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const PlacerCriticalities* criticalities) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& placer_state = placer_state_.get(); const auto& block_locs = placer_state.block_locs(); const auto& blk_loc_registry = placer_state.blk_loc_registry(); @@ -25,6 +25,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/true, + criticalities, &net_from, &pin_from, placer_state, diff --git a/vpr/src/place/move_generators/feasible_region_move_generator.cpp b/vpr/src/place/move_generators/feasible_region_move_generator.cpp index 75210dafd43..1c719a7b0ff 100644 --- a/vpr/src/place/move_generators/feasible_region_move_generator.cpp +++ b/vpr/src/place/move_generators/feasible_region_move_generator.cpp @@ -30,6 +30,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/true, + criticalities, &net_from, &pin_from, placer_state, diff --git a/vpr/src/place/move_generators/median_move_generator.cpp b/vpr/src/place/move_generators/median_move_generator.cpp index 2e982ac6425..99c1b892e17 100644 --- a/vpr/src/place/move_generators/median_move_generator.cpp +++ b/vpr/src/place/move_generators/median_move_generator.cpp @@ -28,6 +28,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/false, + /*placer_criticalities=*/nullptr, /*net_from=*/nullptr, /*pin_from=*/nullptr, placer_state, diff --git a/vpr/src/place/move_generators/uniform_move_generator.cpp b/vpr/src/place/move_generators/uniform_move_generator.cpp index 6c6e283ba94..7190918aba3 100644 --- a/vpr/src/place/move_generators/uniform_move_generator.cpp +++ b/vpr/src/place/move_generators/uniform_move_generator.cpp @@ -24,6 +24,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/false, + /*placer_criticalities=*/nullptr, /*net_from=*/nullptr, /*pin_from=*/nullptr, placer_state, diff --git a/vpr/src/place/move_generators/weighted_median_move_generator.cpp b/vpr/src/place/move_generators/weighted_median_move_generator.cpp index b391509f5c3..de949d37a75 100644 --- a/vpr/src/place/move_generators/weighted_median_move_generator.cpp +++ b/vpr/src/place/move_generators/weighted_median_move_generator.cpp @@ -30,6 +30,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& ClusterBlockId b_from = propose_block_to_move(placer_opts, proposed_action.logical_blk_type_index, /*highly_crit_block=*/false, + /*placer_criticalities=*/nullptr, /*net_from=*/nullptr, /*pin_from=*/nullptr, placer_state, diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index b5efb699fc7..78623200f42 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -547,16 +547,17 @@ void enable_placer_debug(const t_placer_opts& placer_opts, ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, bool highly_crit_block, + const PlacerCriticalities* placer_criticalities, ClusterNetId* net_from, int* pin_from, const PlacerState& placer_state, vtr::RngContainer& rng) { ClusterBlockId b_from = ClusterBlockId::INVALID(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); if (logical_blk_type_index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, placer_state, rng); + b_from = pick_from_highly_critical_block(*net_from, *pin_from, placer_state, *placer_criticalities, rng); } else { b_from = pick_from_block(rng); } @@ -567,7 +568,7 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, } } else { //If the block type is specified, choose a random block with blk_type to be swapped with another random block if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, rng); + b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, *placer_criticalities, rng); } else { b_from = pick_from_block(logical_blk_type_index, rng); } @@ -624,22 +625,24 @@ ClusterBlockId pick_from_block(const int logical_blk_type_index, vtr::RngContain ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const PlacerState& placer_state, + const PlacerCriticalities& placer_criticalities, vtr::RngContainer& rng) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = placer_state.move(); - auto& block_locs = placer_state.block_locs(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = placer_state.block_locs(); //Initialize critical net and pin to be invalid net_from = ClusterNetId::INVALID(); pin_from = -1; + const auto& highly_crit_pins = placer_criticalities.get_highly_critical_pins(); + //check if any critical block is available - if (place_move_ctx.highly_crit_pins.empty()) { + if (highly_crit_pins.empty()) { return ClusterBlockId::INVALID(); } //pick a random highly critical pin and find the nets driver block - std::pair crit_pin = place_move_ctx.highly_crit_pins[rng.irand(place_move_ctx.highly_crit_pins.size() - 1)]; + std::pair crit_pin = highly_crit_pins[rng.irand(highly_crit_pins.size() - 1)]; ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first); if (block_locs[b_from].is_fixed) { @@ -660,22 +663,24 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const int logical_blk_type_index, const PlacerState& placer_state, + const PlacerCriticalities& placer_criticalities, vtr::RngContainer& rng) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& place_move_ctx = placer_state.move(); - auto& block_locs = placer_state.block_locs(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& block_locs = placer_state.block_locs(); //Initialize critical net and pin to be invalid net_from = ClusterNetId::INVALID(); pin_from = -1; + const auto& highly_crit_pins = placer_criticalities.get_highly_critical_pins(); + //check if any critical block is available - if (place_move_ctx.highly_crit_pins.empty()) { + if (highly_crit_pins.empty()) { return ClusterBlockId::INVALID(); } //pick a random highly critical pin and find the nets driver block - std::pair crit_pin = place_move_ctx.highly_crit_pins[rng.irand(place_move_ctx.highly_crit_pins.size() - 1)]; + std::pair crit_pin = highly_crit_pins[rng.irand(highly_crit_pins.size() - 1)]; ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first); //Check if picked block type matches with the blk_type specified, and it is not fixed diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index de3d771e7ae..1aa5591f5c8 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -7,6 +7,7 @@ class PlacerState; class BlkLocRegistry; +class PlacerCriticalities; namespace vtr { class RngContainer; } @@ -171,6 +172,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk, ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int& logical_blk_type_index, bool highly_crit_block, + const PlacerCriticalities* placer_criticalities, ClusterNetId* net_from, int* pin_from, const PlacerState& placer_state, @@ -207,6 +209,7 @@ ClusterBlockId pick_from_block(int logical_blk_type_index, vtr::RngContainer& rn ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const PlacerState& placer_state, + const PlacerCriticalities& placer_criticalities, vtr::RngContainer& rng); /** @@ -220,6 +223,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, int logical_blk_type_index, const PlacerState& placer_state, + const PlacerCriticalities& placer_criticalities, vtr::RngContainer& rng); bool find_to_loc_uniform(t_logical_block_type_ptr type, diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 8f3b966a56d..35f1ec73766 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -145,9 +145,6 @@ struct PlacerMoveContext : public Context { std::vector X_coord; std::vector Y_coord; std::vector layer_coord; - - // Container to save the highly critical pins (higher than a timing criticality limit set by commandline option) - std::vector> highly_crit_pins; }; diff --git a/vpr/src/place/timing/PlacerCriticalities.cpp b/vpr/src/place/timing/PlacerCriticalities.cpp index 8aa248abab6..ccf1028283c 100644 --- a/vpr/src/place/timing/PlacerCriticalities.cpp +++ b/vpr/src/place/timing/PlacerCriticalities.cpp @@ -3,9 +3,7 @@ #include "timing_info.h" #include "timing_util.h" -#include "placer_state.h" -///@brief Allocates space for the timing_place_crit_ data structure. PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info) @@ -25,55 +23,51 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, * * If the criticality exponent has changed, we also need to update from scratch. */ -void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params, - PlacerState& placer_state) { - /* If update is not enabled, exit the routine. */ +void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params) { + // If update is not enabled, exit the routine. if (!update_enabled) { - /* re-computation is required on the next iteration */ + // re-computation is required on the next iteration recompute_required = true; return; } - /* Determine what pins need updating */ + // Determine what pins need updating if (!recompute_required && crit_params.crit_exponent == last_crit_exponent_) { incr_update_criticalities(); } else { recompute_criticalities(); - /* Record new criticality exponent */ + // Record new criticality exponent last_crit_exponent_ = crit_params.crit_exponent; } - auto& place_move_ctx = placer_state.mutable_move(); - /* Performs a 1-to-1 mapping from criticality to timing_place_crit_. * For every pin on every net (or, equivalently, for every tedge ending * in that pin), timing_place_crit_ = criticality^(criticality exponent) */ - /* Update the affected pins */ + // Update the affected pins for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) { ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); - // Routing for placement is not flat (at least for the time being) - float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); + float clb_pin_crit = calculate_clb_net_pin_criticality(*timing_info_, pin_lookup_, ParentPinId(size_t(clb_pin)), /*is_flat=*/false); float new_crit = pow(clb_pin_crit, crit_params.crit_exponent); - /* - * Update the highly critical pins container + + /* Update the highly critical pins container * * If the old criticality < limit and the new criticality > limit --> add this pin to the highly critical pins * If the old criticality > limit and the new criticality < limit --> remove this pin from the highly critical pins */ if (!first_time_update_criticality) { if (new_crit > crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] < crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); + highly_crit_pins.emplace_back(clb_net, pin_index_in_net); } else if (new_crit < crit_params.crit_limit && timing_place_crit_[clb_net][pin_index_in_net] > crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.erase(std::remove(place_move_ctx.highly_crit_pins.begin(), place_move_ctx.highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), - place_move_ctx.highly_crit_pins.end()); + highly_crit_pins.erase(std::remove(highly_crit_pins.begin(), highly_crit_pins.end(), std::make_pair(clb_net, pin_index_in_net)), + highly_crit_pins.end()); } } else { if (new_crit > crit_params.crit_limit) { - place_move_ctx.highly_crit_pins.emplace_back(clb_net, pin_index_in_net); + highly_crit_pins.emplace_back(clb_net, pin_index_in_net); } } @@ -94,42 +88,25 @@ void PlacerCriticalities::set_recompute_required() { recompute_required = true; } -/** - * @brief Collect the cluster pins which need to be updated based on the latest timing - * analysis so that incremental updates to criticalities can be performed. - * - * Note we use the set of pins reported by the *timing_info* as having modified - * criticality, rather than those marked as modified by the timing analyzer. - * - * Since timing_info uses shifted/relaxed criticality (which depends on max required - * time and worst case slacks), additional nodes may be modified when updating the - * atom pin criticalities. - */ - void PlacerCriticalities::incr_update_criticalities() { cluster_pins_with_modified_criticality_.clear(); for (AtomPinId atom_pin : timing_info_->pins_with_modified_setup_criticality()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); - //Some atom pins correspond to connections which are completely - //contained within a cluster, and hence have no corresponding - //clustered pin. + /* Some atom pins correspond to connections which are completely + * contained within a cluster, and hence have no corresponding + * clustered pin. */ if (!clb_pin) continue; cluster_pins_with_modified_criticality_.insert(clb_pin); } } -/** - * @brief Collect all the sink pins in the netlist and prepare them update. - * - * For the incremental version, see PlacerCriticalities::incr_update_criticalities(). - */ void PlacerCriticalities::recompute_criticalities() { cluster_pins_with_modified_criticality_.clear(); - /* Non-incremental: all sink pins need updating */ + // Non-incremental: all sink pins need updating for (ClusterNetId net_id : clb_nlist_.nets()) { for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { cluster_pins_with_modified_criticality_.insert(pin_id); @@ -145,10 +122,6 @@ void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float c timing_place_crit_[net_id][ipin] = crit_val; } -/** - * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which - * were modified by the last call to PlacerCriticalities::update_criticalities(). - */ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const { return vtr::make_range(cluster_pins_with_modified_criticality_); -} \ No newline at end of file +} diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index 7f7a1975ff2..4a6c5518eb2 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -67,9 +67,12 @@ class PlacerCriticalities { typedef vtr::Range net_range; public: //Lifetime + + ///@brief Allocates space for the timing_place_crit_ data structure. PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info); + PlacerCriticalities(const PlacerCriticalities&) = delete; PlacerCriticalities& operator=(const PlacerCriticalities&) = delete; @@ -83,9 +86,12 @@ class PlacerCriticalities { */ pin_range pins_with_modified_criticality() const; + /// @brief Returns a constant reference to highly critical pins + const std::vector>& get_highly_critical_pins() const { return highly_crit_pins; } + public: //Modifiers /** - * @brief Updates criticalities based on the atom netlist criticalitites + * @brief Updates criticalities based on the atom netlist criticalities * provided by timing_info and the provided criticality_exponent. * * Should consistently call this method after the most recent timing analysis to @@ -93,13 +99,16 @@ class PlacerCriticalities { * If out of sync, then the criticalities cannot be incrementally updated on * during the next timing analysis iteration. */ - void update_criticalities(const PlaceCritParams& crit_params, - PlacerState& placer_state); + void update_criticalities(const PlaceCritParams& crit_params); ///@bried Enable the recompute_required flag to enforce from scratch update. void set_recompute_required(); - ///@brief From scratch update. See timing_place.cpp for more. + /** + * @brief Collect all the sink pins in the netlist and prepare them update. + * + * For the incremental version, see PlacerCriticalities::incr_update_criticalities(). + */ void recompute_criticalities(); ///@brief Override the criticality of a particular connection. @@ -134,10 +143,20 @@ class PlacerCriticalities { */ float last_crit_exponent_ = std::numeric_limits::quiet_NaN(); - ///@brief Set of pins with criticaltites modified by last call to update_criticalities(). + ///@brief Set of pins with criticalities modified by last call to update_criticalities(). vtr::vec_id_set cluster_pins_with_modified_criticality_; - ///@brief Incremental update. See timing_place.cpp for more. + /** + * @brief Collect the cluster pins which need to be updated based on the latest timing + * analysis so that incremental updates to criticalities can be performed. + * + * Note we use the set of pins reported by the *timing_info* as having modified + * criticality, rather than those marked as modified by the timing analyzer. + * + * Since timing_info uses shifted/relaxed criticality (which depends on max required + * time and worst case slacks), additional nodes may be modified when updating the + * atom pin criticalities. + */ void incr_update_criticalities(); ///@brief Flag that turns on/off the update_criticalities() routine. @@ -157,4 +176,7 @@ class PlacerCriticalities { * This can be used for incremental criticality update and also incrementally update the highly critical pins */ bool first_time_update_criticality = true; + + /// @brief Saves the highly critical pins (higher than a timing criticality limit set by commandline option) + std::vector> highly_crit_pins; }; diff --git a/vpr/src/place/timing/place_timing_update.cpp b/vpr/src/place/timing/place_timing_update.cpp index 00cad07da7b..246db01f97d 100644 --- a/vpr/src/place/timing/place_timing_update.cpp +++ b/vpr/src/place/timing/place_timing_update.cpp @@ -99,8 +99,7 @@ void perform_full_timing_update(const PlaceCritParams& crit_params, timing_info, criticalities, setup_slacks, - pin_timing_invalidator, - placer_state); + pin_timing_invalidator); /* Update the timing cost with new connection criticalities. */ update_timing_cost(delay_model, @@ -141,13 +140,12 @@ void update_timing_classes(const PlaceCritParams& crit_params, SetupTimingInfo* timing_info, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - PlacerState& placer_state) { + NetPinTimingInvalidator* pin_timing_invalidator) { /* Run STA to update slacks and adjusted/relaxed criticalities. */ timing_info->update(); /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */ - criticalities->update_criticalities(crit_params, placer_state); + criticalities->update_criticalities(crit_params); /* Update the placer's raw setup slacks. */ setup_slacks->update_setup_slacks(); diff --git a/vpr/src/place/timing/place_timing_update.h b/vpr/src/place/timing/place_timing_update.h index 6ced93e4487..8e7a0dc1f46 100644 --- a/vpr/src/place/timing/place_timing_update.h +++ b/vpr/src/place/timing/place_timing_update.h @@ -39,8 +39,7 @@ void update_timing_classes(const PlaceCritParams& crit_params, SetupTimingInfo* timing_info, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - NetPinTimingInvalidator* pin_timing_invalidator, - PlacerState& placer_state); + NetPinTimingInvalidator* pin_timing_invalidator); ///@brief Updates the timing driven (td) costs. void update_timing_cost(const PlaceDelayModel* delay_model, From 871b2891e3fd9eb35c597aa16014410de2a20b2a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 1 Dec 2024 19:02:53 -0500 Subject: [PATCH 20/39] last commit before I go home it doesn't compile --- vpr/src/place/timing/PlacerCriticalities.cpp | 4 +- vpr/src/place/timing/PlacerSetupSlacks.cpp | 33 +- vpr/src/place/timing/PlacerSetupSlacks.h | 16 +- vpr/src/place/timing/PlacerTimingCosts.cpp | 60 ++++ vpr/src/place/timing/PlacerTimingCosts.h | 303 ++++++++++++++++ vpr/src/place/timing/timing_place.h | 358 ------------------- 6 files changed, 387 insertions(+), 387 deletions(-) create mode 100644 vpr/src/place/timing/PlacerTimingCosts.cpp create mode 100644 vpr/src/place/timing/PlacerTimingCosts.h diff --git a/vpr/src/place/timing/PlacerCriticalities.cpp b/vpr/src/place/timing/PlacerCriticalities.cpp index ccf1028283c..1f2e4f518e9 100644 --- a/vpr/src/place/timing/PlacerCriticalities.cpp +++ b/vpr/src/place/timing/PlacerCriticalities.cpp @@ -77,8 +77,8 @@ void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_param timing_place_crit_[clb_net][pin_index_in_net] = new_crit; } - /* Criticalities updated. In sync with timing info. */ - /* Can be incrementally updated on the next iteration */ + /* Criticalities updated. In sync with timing info. + * Can be incrementally updated on the next iteration */ recompute_required = false; first_time_update_criticality = false; diff --git a/vpr/src/place/timing/PlacerSetupSlacks.cpp b/vpr/src/place/timing/PlacerSetupSlacks.cpp index ffc637f423b..3a097a582ff 100644 --- a/vpr/src/place/timing/PlacerSetupSlacks.cpp +++ b/vpr/src/place/timing/PlacerSetupSlacks.cpp @@ -4,7 +4,7 @@ #include "timing_util.h" #include "timing_info.h" -///@brief Allocates space for the timing_place_setup_slacks_ data structure. + PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info) @@ -25,21 +25,21 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, * from scratch. */ void PlacerSetupSlacks::update_setup_slacks() { - /* If update is not enabled, exit the routine. */ + // If update is not enabled, exit the routine. if (!update_enabled) { - /* re-computation is required on the next iteration */ + // re-computation is required on the next iteration recompute_required = true; return; } - /* Determine what pins need updating */ + // Determine what pins need updating if (!recompute_required) { incr_update_setup_slacks(); } else { recompute_setup_slacks(); } - /* Update the affected pins */ + // Update the affected pins for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) { ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); @@ -49,18 +49,11 @@ void PlacerSetupSlacks::update_setup_slacks() { timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; } - /* Setup slacks updated. In sync with timing info. */ - /* Can be incrementally updated on the next iteration. */ + /* Setup slacks updated. In sync with timing info. + * Can be incrementally updated on the next iteration. */ recompute_required = false; } -/** - * @brief Collect the cluster pins which need to be updated based on the latest timing - * analysis so that incremental updates to setup slacks can be performed. - * - * Note we use the set of pins reported by the *timing_info* as having modified - * setup slacks, rather than those marked as modified by the timing analyzer. - */ void PlacerSetupSlacks::incr_update_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); @@ -76,15 +69,10 @@ void PlacerSetupSlacks::incr_update_setup_slacks() { } } -/** - * @brief Collect all the sink pins in the netlist and prepare them update. - * - * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks(). - */ void PlacerSetupSlacks::recompute_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); - /* Non-incremental: all sink pins need updating */ + // Non-incremental: all sink pins need updating for (ClusterNetId net_id : clb_nlist_.nets()) { for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { cluster_pins_with_modified_setup_slack_.insert(pin_id); @@ -92,7 +80,6 @@ void PlacerSetupSlacks::recompute_setup_slacks() { } } -///@brief Override the setup slack of a particular connection. void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) { VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)"); VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout"); @@ -100,10 +87,6 @@ void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float sla timing_place_setup_slacks_[net_id][ipin] = slack_val; } -/** - * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) - * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). - */ PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const { return vtr::make_range(cluster_pins_with_modified_setup_slack_); } diff --git a/vpr/src/place/timing/PlacerSetupSlacks.h b/vpr/src/place/timing/PlacerSetupSlacks.h index 580a26db2c2..7ffc450e94b 100644 --- a/vpr/src/place/timing/PlacerSetupSlacks.h +++ b/vpr/src/place/timing/PlacerSetupSlacks.h @@ -33,9 +33,11 @@ class PlacerSetupSlacks { typedef vtr::Range net_range; public: //Lifetime + ///@brief Allocates space for the timing_place_setup_slacks_ data structure. PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info); + PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; @@ -88,10 +90,20 @@ class PlacerSetupSlacks { ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks() vtr::vec_id_set cluster_pins_with_modified_setup_slack_; - ///@brief Incremental update. See timing_place.cpp for more. + /** + * @brief Collect the cluster pins which need to be updated based on the latest timing + * analysis so that incremental updates to setup slacks can be performed. + * + * Note we use the set of pins reported by the *timing_info* as having modified + * setup slacks, rather than those marked as modified by the timing analyzer. + */ void incr_update_setup_slacks(); - ///@brief Incremental update. See timing_place.cpp for more. + /** + * @brief Collect all the sink pins in the netlist and prepare them update. + * + * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks(). + */ void recompute_setup_slacks(); ///@brief Flag that turns on/off the update_setup_slacks() routine. diff --git a/vpr/src/place/timing/PlacerTimingCosts.cpp b/vpr/src/place/timing/PlacerTimingCosts.cpp new file mode 100644 index 00000000000..c7fe35fc020 --- /dev/null +++ b/vpr/src/place/timing/PlacerTimingCosts.cpp @@ -0,0 +1,60 @@ + +#include "PlacerTimingCosts.h" + + +PlacerTimingCosts::PlacerTimingCosts(const ClusteredNetlist& nlist) { + auto nets = nlist.nets(); + + net_start_indicies_.resize(nets.size()); + + // Walk through the netlist to determine how many connections there are. + size_t iconn = 0; + for (ClusterNetId net : nets) { + // The placer always skips 'ignored' nets, so they don't affect timing + // costs, so we also skip them here + if (nlist.net_is_ignored(net)) { + net_start_indicies_[net] = OPEN; + continue; + } + + // Save the starting index of the current net's connections. + // We use a -1 offset, since sinks indexed from [1..num_net_pins-1] + // (there is no timing cost associated with net drivers) + net_start_indicies_[net] = iconn - 1; + + // Reserve space for all this net's connections + iconn += nlist.net_sinks(net).size(); + } + + const size_t num_connections = iconn; + + // Determine how many binary tree levels we need to have a leaf for each connection cost + size_t ilevel = 0; + while (num_nodes_in_level(ilevel) < num_connections) { + ++ilevel; + } + num_levels_ = ilevel + 1; + + size_t num_leaves = num_nodes_in_level(ilevel); + size_t num_nodes_in_previous_level = num_nodes_in_level(ilevel - 1); + + VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections"); + VTR_ASSERT_MSG(num_connections == 0 || num_nodes_in_previous_level < num_connections, + "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); + + // We don't need to store all possible leaves if we have fewer connections (i.e. bottom-right of tree is empty) + size_t last_level_unused_nodes = num_nodes_in_level(ilevel) - num_connections; + size_t num_nodes = num_nodes_up_to_level(ilevel) - last_level_unused_nodes; + + // Reserve space for connection costs and intermediate node values + connection_costs_ = std::vector(num_nodes, std::numeric_limits::quiet_NaN()); + + // The net start indices we calculated earlier didn't account for intermediate binary tree nodes + // Shift the start indices after the intermediate nodes + size_t num_intermediate_nodes = num_nodes_up_to_level(ilevel - 1); + for (ClusterNetId net : nets) { + if (nlist.net_is_ignored(net)) continue; + + net_start_indicies_[net] = net_start_indicies_[net] + num_intermediate_nodes; + } +} diff --git a/vpr/src/place/timing/PlacerTimingCosts.h b/vpr/src/place/timing/PlacerTimingCosts.h new file mode 100644 index 00000000000..f84f4446466 --- /dev/null +++ b/vpr/src/place/timing/PlacerTimingCosts.h @@ -0,0 +1,303 @@ + +#pragma once +#include "vtr_vec_id_set.h" +#include "timing_info_fwd.h" +#include "clustered_netlist_utils.h" +#include "place_delay_model.h" +#include "vpr_net_pins_matrix.h" + +/** + * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from: + * [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]. + * + * It can be used similar to: + * + * PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct + * + * //... + * + * //Modify a connection cost + * connection_timing_costs[net_id][ipin] = new_cost; + * + * //Potentially other modifications... + * + * //Calculate the updated timing cost, of all connections, + * //incrementally based on modifications + * float total_timing_cost = connection_timing_costs.total_cost(); + * + * However behind the scenes PlacerTimingCosts tracks when connection costs are modified, + * and efficiently re-calculates the total timing cost incrementally based on the connections + * which have had their cost modified. + * + * Implementation + * ============== + * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part + * of connection_costs_. To mimic 2d-array like access PlacerTimingCosts also uses two proxy + * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy + * respectively). + * + * The first part of connection_costs_ stores intermediate sums of the connection costs for + * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary + * tree, where leaves correspond to individual connection costs and intermediate nodes the + * partial sums of the connection costs. (The binary tree is stored implicitly in the + * connection_costs_ vector, using Eytzinger's/BFS layout.) By summing the entire binary + * tree we calculate the total timing cost over all connections. + * + * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset + * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up + * to the root) which have ancestors (leaves) with modified connection costs. When the + * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost. + * Only invalidated nodes are traversed, with valid nodes just returning their previously + * calculated (and unchanged) value. + * + * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can + * be done in O(k log K) time. + * + * It is important to note that due to limited floating point precision, floating point + * arithmetic has an order dependence (due to round-off). Using a binary tree to total + * the timing connection costs allows us to incrementally update the total timing cost while + * maintaining the *same order of operations* as if it was re-computed from scratch. This + * ensures we *always* get consistent results regardless of what/when connections are changed. + * + * Proxy Classes + * ============= + * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of + * internal storage of that net's connection costs. + * + * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular + * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy + * supports assignment, allowing clients to modify the connection cost. It also detects if the + * assigned value differs from the previous value and if so, calls PlacerTimingCosts's + * invalidate() method on that connection cost. + * + * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN) + * so they will be re-calculated by PlacerTimingCosts' total_cost() method. + */ +class PlacerTimingCosts { + public: + PlacerTimingCosts() = default; + + PlacerTimingCosts(const ClusteredNetlist& nlist); + + /** + * @brief Proxy class representing a connection cost. + * + * Supports modification of connection cost while detecting + * changes and reporting them up to PlacerTimingCosts. + */ + class ConnectionProxy { + public: + ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost) + : timing_costs_(timing_costs) + , connection_cost_(connection_cost) {} + + ///@brief Allow clients to modify the connection cost via assignment. + ConnectionProxy& operator=(double new_cost) { + if (new_cost != connection_cost_) { + //If connection cost changed, update it, and mark it + //as invalidated + connection_cost_ = new_cost; + timing_costs_->invalidate(&connection_cost_); + } + return *this; + } + + /** + * @brief Support getting the current connection cost as a double. + * + * Useful for client code operating on the cost values (e.g. difference between costs). + */ + operator double() const { + return connection_cost_; + } + + private: + PlacerTimingCosts* timing_costs_; + double& connection_cost_; + }; + + /** + * @brief Proxy class representing the connection costs of a net. + * + * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection. + */ + class NetProxy { + public: + NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs) + : timing_costs_(timing_costs) + , net_sink_costs_(net_sink_costs) {} + + ///@brief Indexes into the specific net pin/connection. + ConnectionProxy operator[](size_t ipin) { + return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); + } + + const ConnectionProxy operator[](size_t ipin) const { + return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); + } + + private: + PlacerTimingCosts* timing_costs_; + double* net_sink_costs_; + }; + + ///@brief Indexes into the specific net. + NetProxy operator[](ClusterNetId net_id) { + VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); + + double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; + return NetProxy(this, net_connection_costs); + } + + NetProxy operator[](ClusterNetId net_id) const { + VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); + + const double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; + return NetProxy(const_cast(this), const_cast(net_connection_costs)); + } + + void clear() { + connection_costs_.clear(); + net_start_indicies_.clear(); + } + + void swap(PlacerTimingCosts& other) { + std::swap(connection_costs_, other.connection_costs_); + std::swap(net_start_indicies_, other.net_start_indicies_); + std::swap(num_levels_, other.num_levels_); + } + + /** + * @brief Calculates the total cost of all connections efficiently + * in the face of modified connection costs. + */ + double total_cost() { + float cost = total_cost_recurr(0); //Root + + VTR_ASSERT_DEBUG_MSG(cost == total_cost_from_scratch(0), + "Expected incremental and from-scratch costs to be consistent"); + + return cost; + } + + private: + ///@brief Recursively calculate and update the timing cost rooted at inode. + double total_cost_recurr(size_t inode) { + //Prune out-of-tree + if (inode > connection_costs_.size() - 1) { + return 0.; + } + + //Valid pre-calculated intermediate result or valid leaf + if (!std::isnan(connection_costs_[inode])) { + return connection_costs_[inode]; + } + + //Recompute recursively + double node_cost = total_cost_recurr(left_child(inode)) + + total_cost_recurr(right_child(inode)); + + //Save intermediate cost at this node + connection_costs_[inode] = node_cost; + + return node_cost; + } + + double total_cost_from_scratch(size_t inode) const { + //Prune out-of-tree + if (inode > connection_costs_.size() - 1) { + return 0.; + } + + //Recompute recursively + double node_cost = total_cost_from_scratch(left_child(inode)) + + total_cost_from_scratch(right_child(inode)); + + return node_cost; + } + + ///@brief Friend-ed so it can call invalidate(). + friend ConnectionProxy; + + void invalidate(const double* invalidated_cost) { + //Check pointer within range of internal storage + VTR_ASSERT_SAFE_MSG( + invalidated_cost >= &connection_costs_[0], + "Connection cost pointer should be after start of internal storage"); + + VTR_ASSERT_SAFE_MSG( + invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], + "Connection cost pointer should be before end of internal storage"); + + size_t icost = invalidated_cost - &connection_costs_[0]; + + VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2)); + + //Invalidate parent intermediate costs up to root or first + //already-invalidated parent + size_t iparent = parent(icost); + + while (!std::isnan(connection_costs_[iparent])) { + //Invalidate + connection_costs_[iparent] = std::numeric_limits::quiet_NaN(); + + if (iparent == 0) { + break; //At root + } else { + //Next parent + iparent = parent(iparent); + } + } + + VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root"); + } + + size_t left_child(size_t i) const { + return 2 * i + 1; + } + + size_t right_child(size_t i) const { + return 2 * i + 2; + } + + size_t parent(size_t i) const { + return (i - 1) / 2; + } + + /** + * @brief Returns the number of nodes in ilevel'th level. + * + * If ilevel is negative, return 0, since the root shouldn't + * be counted as a leaf node candidate. + */ + size_t num_nodes_in_level(int ilevel) const { + return ilevel < 0 ? 0 : (2 << (ilevel)); + } + + ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive). + size_t num_nodes_up_to_level(int ilevel) const { + return (2 << (ilevel + 1)) - 1; + } + + private: + /** + * @brief Vector storing the implicit binary tree of connection costs. + * + * The actual connections are stored at the end of the vector + * (last level of the binary tree). The earlier portions of + * the tree are the intermediate nodes. + * + * The methods left_child()/right_child()/parent() can be used + * to traverse the tree by indices into this vector. + */ + std::vector connection_costs_; + + /** + * @brief Vector storing the indices of the first connection + * for each net in the netlist, used for indexing by net. + */ + vtr::vector net_start_indicies_; + + ///@brief Number of levels in the binary tree. + size_t num_levels_ = 0; +}; diff --git a/vpr/src/place/timing/timing_place.h b/vpr/src/place/timing/timing_place.h index bd85061065f..54641947803 100644 --- a/vpr/src/place/timing/timing_place.h +++ b/vpr/src/place/timing/timing_place.h @@ -34,363 +34,5 @@ * calc_relaxed_criticality() in `timing_util.cpp`. */ -#pragma once -#include "vtr_vec_id_set.h" -#include "timing_info_fwd.h" -#include "clustered_netlist_utils.h" -#include "place_delay_model.h" -#include "vpr_net_pins_matrix.h" -/** - * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from: - * [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]. - * - * It can be used similar to: - * - * PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct - * - * //... - * - * //Modify a connection cost - * connection_timing_costs[net_id][ipin] = new_cost; - * - * //Potentially other modifications... - * - * //Calculate the updated timing cost, of all connections, - * //incrementally based on modifications - * float total_timing_cost = connection_timing_costs.total_cost(); - * - * However behind the scenes PlacerTimingCosts tracks when connection costs are modified, - * and efficiently re-calculates the total timing cost incrementally based on the connections - * which have had their cost modified. - * - * Implementation - * ============== - * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part - * of connection_costs_. To mimic 2d-array like access PlacerTimingCosts also uses two proxy - * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy - * respectively). - * - * The first part of connection_costs_ stores intermediate sums of the connection costs for - * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary - * tree, where leaves correspond to individual connection costs and intermediate nodes the - * partial sums of the connection costs. (The binary tree is stored implicitly in the - * connection_costs_ vector, using Eytzinger's/BFS layout.) By summing the entire binary - * tree we calculate the total timing cost over all connections. - * - * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset - * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up - * to the root) which have ancestors (leaves) with modified connection costs. When the - * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost. - * Only invalidated nodes are traversed, with valid nodes just returning their previously - * calculated (and unchanged) value. - * - * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can - * be done in O(k log K) time. - * - * It is important to note that due to limited floating point precision, floating point - * arithmetic has an order dependence (due to round-off). Using a binary tree to total - * the timing connection costs allows us to incrementally update the total timing cost while - * maintianing the *same order of operations* as if it was re-computed from scratch. This - * ensures we *always* get consistent results regardless of what/when connections are changed. - * - * Proxy Classes - * ============= - * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of - * internal storage of that net's connection costs. - * - * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular - * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy - * supports assignment, allowing clients to modify the connection cost. It also detects if the - * assigned value differs from the previous value and if so, calls PlacerTimingCosts's - * invalidate() method on that connection cost. - * - * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN) - * so they will be re-calculated by PlacerTimingCosts' total_cost() method. - */ -class PlacerTimingCosts { - public: - PlacerTimingCosts() = default; - - PlacerTimingCosts(const ClusteredNetlist& nlist) { - auto nets = nlist.nets(); - - net_start_indicies_.resize(nets.size()); - - //Walk through the netlist to determine how many connections there are. - size_t iconn = 0; - for (ClusterNetId net : nets) { - //The placer always skips 'ignored' nets, so they don't affect timing - //costs, so we also skip them here - if (nlist.net_is_ignored(net)) { - net_start_indicies_[net] = OPEN; - continue; - } - - //Save the startind index of the current net's connections. - // We use a -1 offset, since sinks indexed from [1..num_net_pins-1] - // (there is no timing cost associated with net drivers) - net_start_indicies_[net] = iconn - 1; - - //Reserve space for all this net's connections - iconn += nlist.net_sinks(net).size(); - } - - size_t num_connections = iconn; - - //Determine how many binary tree levels we need to have a leaf - //for each connection cost - size_t ilevel = 0; - while (num_nodes_in_level(ilevel) < num_connections) { - ++ilevel; - } - num_levels_ = ilevel + 1; - - size_t num_leaves = num_nodes_in_level(ilevel); - size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1); - - VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections"); - VTR_ASSERT_MSG( - num_connections == 0 || num_level_before_leaves < num_connections, - "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); - - //We don't need to store all possible leaves if we have fewer connections - //(i.e. bottom-right of tree is empty) - size_t last_level_unused_nodes = num_nodes_in_level(ilevel) - num_connections; - size_t num_nodes = num_nodes_up_to_level(ilevel) - last_level_unused_nodes; - - //Reserve space for connection costs and intermediate node values - connection_costs_ = std::vector(num_nodes, std::numeric_limits::quiet_NaN()); - - //The net start indicies we calculated earlier didn't account for intermediate binary tree nodes - //Shift the start indicies after the intermediate nodes - size_t num_intermediate_nodes = num_nodes_up_to_level(ilevel - 1); - for (ClusterNetId net : nets) { - if (nlist.net_is_ignored(net)) continue; - - net_start_indicies_[net] = net_start_indicies_[net] + num_intermediate_nodes; - } - } - - /** - * @brief Proxy class representing a connection cost. - * - * Supports modification of connection cost while detecting - * changes and reporting them up to PlacerTimingCosts. - */ - class ConnectionProxy { - public: - ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost) - : timing_costs_(timing_costs) - , connection_cost_(connection_cost) {} - - ///@brief Allow clients to modify the connection cost via assignment. - ConnectionProxy& operator=(double new_cost) { - if (new_cost != connection_cost_) { - //If connection cost changed, update it, and mark it - //as invalidated - connection_cost_ = new_cost; - timing_costs_->invalidate(&connection_cost_); - } - return *this; - } - - /** - * @brief Support getting the current connection cost as a double. - * - * Useful for client code operating on the cost values (e.g. difference between costs). - */ - operator double() const { - return connection_cost_; - } - - private: - PlacerTimingCosts* timing_costs_; - double& connection_cost_; - }; - - /** - * @brief Proxy class representing the connection costs of a net. - * - * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection. - */ - class NetProxy { - public: - NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs) - : timing_costs_(timing_costs) - , net_sink_costs_(net_sink_costs) {} - - ///@brief Indexes into the specific net pin/connection. - ConnectionProxy operator[](size_t ipin) { - return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); - } - - const ConnectionProxy operator[](size_t ipin) const { - return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); - } - - private: - PlacerTimingCosts* timing_costs_; - double* net_sink_costs_; - }; - - ///@brief Indexes into the specific net. - NetProxy operator[](ClusterNetId net_id) { - VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); - - double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; - return NetProxy(this, net_connection_costs); - } - - NetProxy operator[](ClusterNetId net_id) const { - VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); - - const double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; - return NetProxy(const_cast(this), const_cast(net_connection_costs)); - } - - void clear() { - connection_costs_.clear(); - net_start_indicies_.clear(); - } - - void swap(PlacerTimingCosts& other) { - std::swap(connection_costs_, other.connection_costs_); - std::swap(net_start_indicies_, other.net_start_indicies_); - std::swap(num_levels_, other.num_levels_); - } - - /** - * @brief Calculates the total cost of all connections efficiently - * in the face of modified connection costs. - */ - double total_cost() { - float cost = total_cost_recurr(0); //Root - - VTR_ASSERT_DEBUG_MSG(cost == total_cost_from_scratch(0), - "Expected incremental and from-scratch costs to be consistent"); - - return cost; - } - - private: - ///@brief Recursively calculate and update the timing cost rooted at inode. - double total_cost_recurr(size_t inode) { - //Prune out-of-tree - if (inode > connection_costs_.size() - 1) { - return 0.; - } - - //Valid pre-calculated intermediate result or valid leaf - if (!std::isnan(connection_costs_[inode])) { - return connection_costs_[inode]; - } - - //Recompute recursively - double node_cost = total_cost_recurr(left_child(inode)) - + total_cost_recurr(right_child(inode)); - - //Save intermediate cost at this node - connection_costs_[inode] = node_cost; - - return node_cost; - } - - double total_cost_from_scratch(size_t inode) const { - //Prune out-of-tree - if (inode > connection_costs_.size() - 1) { - return 0.; - } - - //Recompute recursively - double node_cost = total_cost_from_scratch(left_child(inode)) - + total_cost_from_scratch(right_child(inode)); - - return node_cost; - } - - ///@brief Friend-ed so it can call invalidate(). - friend ConnectionProxy; - - void invalidate(const double* invalidated_cost) { - //Check pointer within range of internal storage - VTR_ASSERT_SAFE_MSG( - invalidated_cost >= &connection_costs_[0], - "Connection cost pointer should be after start of internal storage"); - - VTR_ASSERT_SAFE_MSG( - invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], - "Connection cost pointer should be before end of internal storage"); - - size_t icost = invalidated_cost - &connection_costs_[0]; - - VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2)); - - //Invalidate parent intermediate costs up to root or first - //already-invalidated parent - size_t iparent = parent(icost); - - while (!std::isnan(connection_costs_[iparent])) { - //Invalidate - connection_costs_[iparent] = std::numeric_limits::quiet_NaN(); - - if (iparent == 0) { - break; //At root - } else { - //Next parent - iparent = parent(iparent); - } - } - - VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root"); - } - - size_t left_child(size_t i) const { - return 2 * i + 1; - } - - size_t right_child(size_t i) const { - return 2 * i + 2; - } - - size_t parent(size_t i) const { - return (i - 1) / 2; - } - - /** - * @brief Returns the number of nodes in ilevel'th level. - * - * If ilevel is negative, return 0, since the root shouldn't - * be counted as a leaf node candidate. - */ - size_t num_nodes_in_level(int ilevel) const { - return ilevel < 0 ? 0 : (2 << (ilevel)); - } - - ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive). - size_t num_nodes_up_to_level(int ilevel) const { - return (2 << (ilevel + 1)) - 1; - } - - private: - /** - * @brief Vector storing the implicit binary tree of connection costs. - * - * The actual connections are stored at the end of the vector - * (last level of the binary tree). The earlier portions of - * the tree are the intermediate nodes. - * - * The methods left_child()/right_child()/parent() can be used - * to traverse the tree by indices into this vector. - */ - std::vector connection_costs_; - - /** - * @brief Vector storing the indices of the first connection - * for each net in the netlist, used for indexing by net. - */ - vtr::vector net_start_indicies_; - ///@brief Number of levels in the binary tree. - size_t num_levels_ = 0; -}; From 7d7d488d24650447d921c12f9530ff32b354bf40 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 2 Dec 2024 12:07:18 -0500 Subject: [PATCH 21/39] remove timing_place.h --- vpr/src/base/place_and_route.cpp | 16 ---- vpr/src/base/read_route.cpp | 3 +- vpr/src/place/analytic_placer.h | 1 - .../critical_uniform_move_generator.cpp | 2 + .../critical_uniform_move_generator.h | 1 - .../feasible_region_move_generator.h | 3 +- .../weighted_median_move_generator.h | 1 - vpr/src/place/move_utils.cpp | 2 +- vpr/src/place/net_cost_handler.h | 1 - vpr/src/place/placer.h | 1 - vpr/src/place/placer_state.h | 2 +- vpr/src/place/timing/PlacerCriticalities.h | 17 ++++ vpr/src/place/timing/PlacerTimingCosts.cpp | 76 +++++++++++++++-- vpr/src/place/timing/PlacerTimingCosts.h | 81 +++---------------- vpr/src/place/timing/timing_place.h | 38 --------- 15 files changed, 104 insertions(+), 141 deletions(-) delete mode 100644 vpr/src/place/timing/timing_place.h diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 2ffeb26c240..7074d34662a 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -1,14 +1,9 @@ -#include #include -#include -#include #include #include #include -#include "vtr_util.h" -#include "vtr_memory.h" #include "vtr_assert.h" #include "vtr_log.h" @@ -16,7 +11,6 @@ #include "vpr_utils.h" #include "vpr_error.h" #include "globals.h" -#include "atom_netlist.h" #include "place_and_route.h" #include "place.h" #include "read_place.h" @@ -24,21 +18,11 @@ #include "route.h" #include "route_export.h" #include "draw.h" -#include "stats.h" -#include "check_route.h" #include "rr_graph.h" -#include "net_delay.h" -#include "timing_place.h" #include "read_xml_arch_file.h" -#include "echo_files.h" #include "route_common.h" -#include "place_macro.h" -#include "power.h" -#include "place_util.h" #include "RoutingDelayCalculator.h" -#include "timing_info.h" -#include "tatum/echo_writer.hpp" /******************* Subroutines local to this module ************************/ diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index d2d3bc14d54..6ac9d099c4b 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -39,12 +39,12 @@ #include "vpr_utils.h" #include "vpr_error.h" #include "place_and_route.h" -#include "timing_place.h" #include "route_export.h" #include "echo_files.h" #include "route_common.h" #include "route_tree.h" #include "read_route.h" +#include "d_ary_heap.h" #include "old_traceback.h" @@ -212,7 +212,6 @@ static void process_nets(const Netlist<>& net_list, std::ifstream& fp, ClusterNe process_nodes(net_list, fp, inet, filename, lineno); } input_tokens.clear(); - return; } static void process_nodes(const Netlist<>& net_list, std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno) { diff --git a/vpr/src/place/analytic_placer.h b/vpr/src/place/analytic_placer.h index b73b3486f57..b279b82e058 100644 --- a/vpr/src/place/analytic_placer.h +++ b/vpr/src/place/analytic_placer.h @@ -83,7 +83,6 @@ */ # include "vpr_context.h" -# include "timing_place.h" # include "PlacementDelayCalculator.h" /* diff --git a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp index ab1039ae3d0..7d36889c2f6 100644 --- a/vpr/src/place/move_generators/critical_uniform_move_generator.cpp +++ b/vpr/src/place/move_generators/critical_uniform_move_generator.cpp @@ -1,4 +1,6 @@ + #include "critical_uniform_move_generator.h" + #include "globals.h" #include "place_constraints.h" #include "placer_state.h" diff --git a/vpr/src/place/move_generators/critical_uniform_move_generator.h b/vpr/src/place/move_generators/critical_uniform_move_generator.h index dd4e5391474..68358552668 100644 --- a/vpr/src/place/move_generators/critical_uniform_move_generator.h +++ b/vpr/src/place/move_generators/critical_uniform_move_generator.h @@ -1,7 +1,6 @@ #ifndef VPR_CRITICAL_UNIFORM_MOVE_GEN_H #define VPR_CRITICAL_UNIFORM_MOVE_GEN_H #include "move_generator.h" -#include "timing_place.h" /** * @file diff --git a/vpr/src/place/move_generators/feasible_region_move_generator.h b/vpr/src/place/move_generators/feasible_region_move_generator.h index 702f8bdd26c..75304a60fd6 100644 --- a/vpr/src/place/move_generators/feasible_region_move_generator.h +++ b/vpr/src/place/move_generators/feasible_region_move_generator.h @@ -1,10 +1,9 @@ #ifndef VPR_FEASIBLE_REGION_MOVE_GEN_H #define VPR_FEASIBLE_REGION_MOVE_GEN_H #include "move_generator.h" -#include "timing_place.h" /** - * @brief Feasible Reion (FR) move genrator + * @brief Feasible Region (FR) move generator * * This move was originally defined by Chen et al . in "Simultaneous timing-driven placement and duplication", FPGA 2005 * diff --git a/vpr/src/place/move_generators/weighted_median_move_generator.h b/vpr/src/place/move_generators/weighted_median_move_generator.h index a6041f13e87..7da4be46bf6 100644 --- a/vpr/src/place/move_generators/weighted_median_move_generator.h +++ b/vpr/src/place/move_generators/weighted_median_move_generator.h @@ -2,7 +2,6 @@ #define VPR_WEIGHTED_MEDIAN_MOVE_GEN_H #include "move_generator.h" -#include "timing_place.h" /** * @brief The weighted median move generator diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 78623200f42..bab61cd0f6d 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -712,7 +712,7 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, // //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting //of the same block types, and not the physical grid space). This means, for example, that columns of 'rare' - //blocks (e.g. DSPs/RAMs) which are physically far appart but logically adjacent will be swappable even + //blocks (e.g. DSPs/RAMs) which are physically far apart but logically adjacent will be swappable even //at an rlim fo 1. // //This ensures that such blocks don't get locked down too early during placement (as would be the diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 6436265dbda..9fad2757681 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -7,7 +7,6 @@ #pragma once #include "place_delay_model.h" -#include "timing_place.h" #include "move_transactions.h" #include "place_util.h" #include "vtr_ndoffsetmatrix.h" diff --git a/vpr/src/place/placer.h b/vpr/src/place/placer.h index 11924314c8b..3fb89fb20f3 100644 --- a/vpr/src/place/placer.h +++ b/vpr/src/place/placer.h @@ -20,7 +20,6 @@ #include #include -#include "timing_place.h" #include "place_checkpoint.h" #include "PlacementDelayCalculator.h" #include "placer_state.h" diff --git a/vpr/src/place/placer_state.h b/vpr/src/place/placer_state.h index 35f1ec73766..a6896a359e8 100644 --- a/vpr/src/place/placer_state.h +++ b/vpr/src/place/placer_state.h @@ -12,7 +12,7 @@ #include "vpr_context.h" #include "vpr_net_pins_matrix.h" #include "vpr_types.h" -#include "timing_place.h" +#include "PlacerTimingCosts.h" /** * @brief State relating to the timing driven data. diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index 4a6c5518eb2..161423dba6a 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -57,6 +57,23 @@ struct PlaceCritParams { * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated * from scratch, since a change in exponent changes *all* criticalities. + * + * Calculating criticalities: + * All the raw setup slack values across a single clock domain are gathered + * and rated from the best to the worst in terms of criticalities. In order + * to calculate criticalities, all the slack values need to be non-negative. + * Hence, if the worst slack is negative, all the slack values are shifted + * by the value of the worst slack so that the value is at least 0. If the + * worst slack is positive, then no shift happens. + * + * The best (shifted) slack (the most positive one) will have a criticality of 0. + * The worst (shifted) slack value will have a criticality of 1. + * + * Criticalities are used to calculated timing costs for each connection. + * The formula is cost = delay * criticality. + * + * For a more detailed description on how criticalities are calculated, see + * calc_relaxed_criticality() in `timing_util.cpp`. */ class PlacerCriticalities { public: //Types diff --git a/vpr/src/place/timing/PlacerTimingCosts.cpp b/vpr/src/place/timing/PlacerTimingCosts.cpp index c7fe35fc020..d8ad6afafab 100644 --- a/vpr/src/place/timing/PlacerTimingCosts.cpp +++ b/vpr/src/place/timing/PlacerTimingCosts.cpp @@ -1,11 +1,10 @@ #include "PlacerTimingCosts.h" - PlacerTimingCosts::PlacerTimingCosts(const ClusteredNetlist& nlist) { auto nets = nlist.nets(); - net_start_indicies_.resize(nets.size()); + net_start_indices_.resize(nets.size()); // Walk through the netlist to determine how many connections there are. size_t iconn = 0; @@ -13,14 +12,14 @@ PlacerTimingCosts::PlacerTimingCosts(const ClusteredNetlist& nlist) { // The placer always skips 'ignored' nets, so they don't affect timing // costs, so we also skip them here if (nlist.net_is_ignored(net)) { - net_start_indicies_[net] = OPEN; + net_start_indices_[net] = OPEN; continue; } // Save the starting index of the current net's connections. // We use a -1 offset, since sinks indexed from [1..num_net_pins-1] // (there is no timing cost associated with net drivers) - net_start_indicies_[net] = iconn - 1; + net_start_indices_[net] = iconn - 1; // Reserve space for all this net's connections iconn += nlist.net_sinks(net).size(); @@ -55,6 +54,73 @@ PlacerTimingCosts::PlacerTimingCosts(const ClusteredNetlist& nlist) { for (ClusterNetId net : nets) { if (nlist.net_is_ignored(net)) continue; - net_start_indicies_[net] = net_start_indicies_[net] + num_intermediate_nodes; + net_start_indices_[net] = net_start_indices_[net] + num_intermediate_nodes; + } +} + +double PlacerTimingCosts::total_cost_recurr(size_t inode) { + // Prune out-of-tree + if (inode > connection_costs_.size() - 1) { + return 0.; + } + + //Valid pre-calculated intermediate result or valid leaf + if (!std::isnan(connection_costs_[inode])) { + return connection_costs_[inode]; + } + + //Recompute recursively + double node_cost = total_cost_recurr(left_child(inode)) + + total_cost_recurr(right_child(inode)); + + //Save intermediate cost at this node + connection_costs_[inode] = node_cost; + + return node_cost; +} + +double PlacerTimingCosts::total_cost_from_scratch(size_t inode) const { + // Prune out-of-tree + if (inode > connection_costs_.size() - 1) { + return 0.; } + + //Recompute recursively + double node_cost = total_cost_from_scratch(left_child(inode)) + + total_cost_from_scratch(right_child(inode)); + + return node_cost; } + +void PlacerTimingCosts::invalidate(const double* invalidated_cost) { + //Check pointer within range of internal storage + VTR_ASSERT_SAFE_MSG( + invalidated_cost >= &connection_costs_[0], + "Connection cost pointer should be after start of internal storage"); + + VTR_ASSERT_SAFE_MSG( + invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], + "Connection cost pointer should be before end of internal storage"); + + size_t icost = invalidated_cost - &connection_costs_[0]; + + VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2)); + + //Invalidate parent intermediate costs up to root or first + //already-invalidated parent + size_t iparent = parent(icost); + + while (!std::isnan(connection_costs_[iparent])) { + //Invalidate + connection_costs_[iparent] = std::numeric_limits::quiet_NaN(); + + if (iparent == 0) { + break; //At root + } else { + //Next parent + iparent = parent(iparent); + } + } + + VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root"); +} \ No newline at end of file diff --git a/vpr/src/place/timing/PlacerTimingCosts.h b/vpr/src/place/timing/PlacerTimingCosts.h index f84f4446466..67523b7de66 100644 --- a/vpr/src/place/timing/PlacerTimingCosts.h +++ b/vpr/src/place/timing/PlacerTimingCosts.h @@ -143,27 +143,27 @@ class PlacerTimingCosts { ///@brief Indexes into the specific net. NetProxy operator[](ClusterNetId net_id) { - VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); + VTR_ASSERT_SAFE(net_start_indices_[net_id] >= 0); - double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; + double* net_connection_costs = &connection_costs_[net_start_indices_[net_id]]; return NetProxy(this, net_connection_costs); } NetProxy operator[](ClusterNetId net_id) const { - VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); + VTR_ASSERT_SAFE(net_start_indices_[net_id] >= 0); - const double* net_connection_costs = &connection_costs_[net_start_indicies_[net_id]]; + const double* net_connection_costs = &connection_costs_[net_start_indices_[net_id]]; return NetProxy(const_cast(this), const_cast(net_connection_costs)); } void clear() { connection_costs_.clear(); - net_start_indicies_.clear(); + net_start_indices_.clear(); } void swap(PlacerTimingCosts& other) { std::swap(connection_costs_, other.connection_costs_); - std::swap(net_start_indicies_, other.net_start_indicies_); + std::swap(net_start_indices_, other.net_start_indices_); std::swap(num_levels_, other.num_levels_); } @@ -182,75 +182,14 @@ class PlacerTimingCosts { private: ///@brief Recursively calculate and update the timing cost rooted at inode. - double total_cost_recurr(size_t inode) { - //Prune out-of-tree - if (inode > connection_costs_.size() - 1) { - return 0.; - } - - //Valid pre-calculated intermediate result or valid leaf - if (!std::isnan(connection_costs_[inode])) { - return connection_costs_[inode]; - } + double total_cost_recurr(size_t inode); - //Recompute recursively - double node_cost = total_cost_recurr(left_child(inode)) - + total_cost_recurr(right_child(inode)); - - //Save intermediate cost at this node - connection_costs_[inode] = node_cost; - - return node_cost; - } - - double total_cost_from_scratch(size_t inode) const { - //Prune out-of-tree - if (inode > connection_costs_.size() - 1) { - return 0.; - } - - //Recompute recursively - double node_cost = total_cost_from_scratch(left_child(inode)) - + total_cost_from_scratch(right_child(inode)); - - return node_cost; - } + double total_cost_from_scratch(size_t inode) const; ///@brief Friend-ed so it can call invalidate(). friend ConnectionProxy; - void invalidate(const double* invalidated_cost) { - //Check pointer within range of internal storage - VTR_ASSERT_SAFE_MSG( - invalidated_cost >= &connection_costs_[0], - "Connection cost pointer should be after start of internal storage"); - - VTR_ASSERT_SAFE_MSG( - invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], - "Connection cost pointer should be before end of internal storage"); - - size_t icost = invalidated_cost - &connection_costs_[0]; - - VTR_ASSERT_SAFE(icost >= num_nodes_up_to_level(num_levels_ - 2)); - - //Invalidate parent intermediate costs up to root or first - //already-invalidated parent - size_t iparent = parent(icost); - - while (!std::isnan(connection_costs_[iparent])) { - //Invalidate - connection_costs_[iparent] = std::numeric_limits::quiet_NaN(); - - if (iparent == 0) { - break; //At root - } else { - //Next parent - iparent = parent(iparent); - } - } - - VTR_ASSERT_SAFE_MSG(std::isnan(connection_costs_[0]), "Invalidating any connection should have invalidated the root"); - } + void invalidate(const double* invalidated_cost); size_t left_child(size_t i) const { return 2 * i + 1; @@ -296,7 +235,7 @@ class PlacerTimingCosts { * @brief Vector storing the indices of the first connection * for each net in the netlist, used for indexing by net. */ - vtr::vector net_start_indicies_; + vtr::vector net_start_indices_; ///@brief Number of levels in the binary tree. size_t num_levels_ = 0; diff --git a/vpr/src/place/timing/timing_place.h b/vpr/src/place/timing/timing_place.h deleted file mode 100644 index 54641947803..00000000000 --- a/vpr/src/place/timing/timing_place.h +++ /dev/null @@ -1,38 +0,0 @@ -/** - * @file timing_place.h - * @brief Interface used by the VPR placer to query information - * from the Tatum timing analyzer. - * - * @class PlacerSetupSlacks - * Queries connection **RAW** setup slacks, which can - * range from negative to positive values. Also maps - * atom pin setup slacks to clb pin setup slacks. - * @class PlacerCriticalities - * Query connection criticalities, which are calculated - * based on the raw setup slacks and ranges from 0 to 1. - * Also maps atom pin crit. to clb pin crit. - * @class PlacerTimingCosts - * Hierarchical structure used by update_td_costs() to - * maintain the order of addition operation of float values - * (to avoid round-offs) while doing incremental updates. - * - * Calculating criticalities: - * All the raw setup slack values across a single clock domain are gathered - * and rated from the best to the worst in terms of criticalities. In order - * to calculate criticalities, all the slack values need to be non-negative. - * Hence, if the worst slack is negative, all the slack values are shifted - * by the value of the worst slack so that the value is at least 0. If the - * worst slack is positive, then no shift happens. - * - * The best (shifted) slack (the most positive one) will have a criticality of 0. - * The worst (shifted) slack value will have a criticality of 1. - * - * Criticalities are used to calculated timing costs for each connection. - * The formula is cost = delay * criticality. - * - * For a more detailed description on how criticalities are calculated, see - * calc_relaxed_criticality() in `timing_util.cpp`. - */ - - - From 246498d610d065a4d7ce8fcf991dd77a225dfefa Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 5 Dec 2024 16:06:40 -0500 Subject: [PATCH 22/39] make some methods static in PlacerTimingCosts --- vpr/src/place/timing/PlacerTimingCosts.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vpr/src/place/timing/PlacerTimingCosts.h b/vpr/src/place/timing/PlacerTimingCosts.h index 67523b7de66..5e1415581c3 100644 --- a/vpr/src/place/timing/PlacerTimingCosts.h +++ b/vpr/src/place/timing/PlacerTimingCosts.h @@ -191,15 +191,15 @@ class PlacerTimingCosts { void invalidate(const double* invalidated_cost); - size_t left_child(size_t i) const { + static size_t left_child(size_t i) { return 2 * i + 1; } - size_t right_child(size_t i) const { + static size_t right_child(size_t i) { return 2 * i + 2; } - size_t parent(size_t i) const { + static size_t parent(size_t i) { return (i - 1) / 2; } @@ -209,12 +209,12 @@ class PlacerTimingCosts { * If ilevel is negative, return 0, since the root shouldn't * be counted as a leaf node candidate. */ - size_t num_nodes_in_level(int ilevel) const { + static size_t num_nodes_in_level(int ilevel) { return ilevel < 0 ? 0 : (2 << (ilevel)); } ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive). - size_t num_nodes_up_to_level(int ilevel) const { + static size_t num_nodes_up_to_level(int ilevel) { return (2 << (ilevel + 1)) - 1; } From d579250abf88c0c7f4180decc045b0298b4e685a Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 5 Dec 2024 16:11:48 -0500 Subject: [PATCH 23/39] delete PlacementDelayModelCreator's constructor --- .../place/timing/delay_model/PlacementDelayModelCreator.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h index 37a8e0d51c8..c92b67d4854 100644 --- a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h +++ b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h @@ -16,9 +16,8 @@ struct t_direct_inf; class PlacementDelayModelCreator { public: - // nothing to do in the constructor and destructor - PlacementDelayModelCreator() = default; - ~PlacementDelayModelCreator() = default; + // nothing to do in the constructor + PlacementDelayModelCreator() = delete; static std::unique_ptr create_delay_model(const t_placer_opts& placer_opts, const t_router_opts& router_opts, From 864bd282676157a692bbdc1cd86c6629d04264f8 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 5 Dec 2024 16:58:20 -0500 Subject: [PATCH 24/39] remove one of the signatures of pick_from_block and pick_from_highly_critical_block --- libs/libarchfpga/src/physical_types_util.h | 2 +- vpr/src/place/move_utils.cpp | 93 ++++------------------ vpr/src/place/move_utils.h | 17 ---- 3 files changed, 18 insertions(+), 94 deletions(-) diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h index d4d5dc55924..a081683faeb 100644 --- a/libs/libarchfpga/src/physical_types_util.h +++ b/libs/libarchfpga/src/physical_types_util.h @@ -212,7 +212,7 @@ int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_ t_logical_block_type_ptr logical_block); /** * @brief Returns the physical pin index (within 'physical_tile') corresponding to the - * logical index ('pin' of the first instance of 'logical_block' within the physcial tile. + * logical index ('pin' of the first instance of 'logical_block' within the physical tile. * * This function is called before/during placement, when a sub tile index was not yet assigned. * diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index bab61cd0f6d..601d2dea852 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -552,26 +552,19 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, int* pin_from, const PlacerState& placer_state, vtr::RngContainer& rng) { - ClusterBlockId b_from = ClusterBlockId::INVALID(); const auto& cluster_ctx = g_vpr_ctx.clustering(); - if (logical_blk_type_index == -1) { //If the block type is unspecified, choose any random block to be swapped with another random block - if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, placer_state, *placer_criticalities, rng); - } else { - b_from = pick_from_block(rng); - } + ClusterBlockId b_from = ClusterBlockId::INVALID(); - //if a movable block found, set the block type - if (b_from) { - logical_blk_type_index = cluster_ctx.clb_nlist.block_type(b_from)->index; - } - } else { //If the block type is specified, choose a random block with blk_type to be swapped with another random block - if (highly_crit_block) { - b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, *placer_criticalities, rng); - } else { - b_from = pick_from_block(logical_blk_type_index, rng); - } + if (highly_crit_block) { + b_from = pick_from_highly_critical_block(*net_from, *pin_from, logical_blk_type_index, placer_state, *placer_criticalities, rng); + } else { + b_from = pick_from_block(logical_blk_type_index, rng); + } + + //if a movable block found, set the block type + if (b_from) { + logical_blk_type_index = cluster_ctx.clb_nlist.block_type(b_from)->index; } if constexpr (VTR_ENABLE_DEBUG_LOGGING_CONST_EXPR) { @@ -590,71 +583,20 @@ const std::vector& movable_blocks_per_type(const t_logical_block return place_ctx.movable_blocks_per_type[blk_type.index]; } -ClusterBlockId pick_from_block(vtr::RngContainer& rng) { - auto& place_ctx = g_vpr_ctx.placement(); - - // get the number of movable clustered blocks - const size_t n_movable_blocks = place_ctx.movable_blocks.size(); - - if (n_movable_blocks > 0) { - //Pick a movable block at random and return it - auto b_from = ClusterBlockId(rng.irand((int)n_movable_blocks - 1)); - return b_from; - } else { - //No movable blocks found - return ClusterBlockId::INVALID(); - } -} - ClusterBlockId pick_from_block(const int logical_blk_type_index, vtr::RngContainer& rng) { - auto& place_ctx = g_vpr_ctx.placement(); - - const auto& movable_blocks_of_type = place_ctx.movable_blocks_per_type[logical_blk_type_index]; - - if (movable_blocks_of_type.empty()) { - return ClusterBlockId::INVALID(); - } - - auto b_from = ClusterBlockId(movable_blocks_of_type[rng.irand((int)movable_blocks_of_type.size() - 1)]); - - return b_from; -} - -//Pick a random highly critical block to be swapped with another random block. -//If none is found return ClusterBlockId::INVALID() -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, - int& pin_from, - const PlacerState& placer_state, - const PlacerCriticalities& placer_criticalities, - vtr::RngContainer& rng) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& block_locs = placer_state.block_locs(); - - //Initialize critical net and pin to be invalid - net_from = ClusterNetId::INVALID(); - pin_from = -1; + const auto& place_ctx = g_vpr_ctx.placement(); - const auto& highly_crit_pins = placer_criticalities.get_highly_critical_pins(); + // if logical block type is specified, pick the 'from' block from block of that type; otherwise, + // pick it from all blocks + const auto& movable_blocks = (logical_blk_type_index < 0 )? place_ctx.movable_blocks : place_ctx.movable_blocks_per_type[logical_blk_type_index]; - //check if any critical block is available - if (highly_crit_pins.empty()) { + if (movable_blocks.empty()) { return ClusterBlockId::INVALID(); } - //pick a random highly critical pin and find the nets driver block - std::pair crit_pin = highly_crit_pins[rng.irand(highly_crit_pins.size() - 1)]; - ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first); - - if (block_locs[b_from].is_fixed) { - return ClusterBlockId::INVALID(); //Block is fixed, cannot move - } + ClusterBlockId b_from = movable_blocks[rng.irand((int)movable_blocks.size() - 1)]; - net_from = crit_pin.first; - pin_from = crit_pin.second; return b_from; - - //Unreachable statement - return ClusterBlockId::INVALID(); } //Pick a random highly critical block with a specified block type to be swapped with another random block. @@ -686,7 +628,7 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, //Check if picked block type matches with the blk_type specified, and it is not fixed //blk_type from propose move doesn't account for the EMPTY type auto b_from_type = cluster_ctx.clb_nlist.block_type(b_from); - if (b_from_type->index == logical_blk_type_index) { + if (b_from_type->index == logical_blk_type_index || logical_blk_type_index < 0) { if (block_locs[b_from].is_fixed) { return ClusterBlockId::INVALID(); //Block is fixed, cannot move } @@ -697,7 +639,6 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, } //No critical block with 'blk_type' found - //Unreachable statement return ClusterBlockId::INVALID(); } diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 1aa5591f5c8..2b3f8de0ce1 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -185,12 +185,6 @@ ClusterBlockId propose_block_to_move(const t_placer_opts& placer_opts, */ const std::vector& movable_blocks_per_type(const t_logical_block_type& blk_type); -/** - * @brief Select a random block to be swapped with another block - * - * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found - */ -ClusterBlockId pick_from_block(vtr::RngContainer& rng); /** * @brief Find a block with a specific block type to be swapped with another block @@ -201,17 +195,6 @@ ClusterBlockId pick_from_block(vtr::RngContainer& rng); */ ClusterBlockId pick_from_block(int logical_blk_type_index, vtr::RngContainer& rng); -/** - * @brief Select a random highly critical block to be swapped with another block - * - * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found - */ -ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, - int& pin_from, - const PlacerState& placer_state, - const PlacerCriticalities& placer_criticalities, - vtr::RngContainer& rng); - /** * @brief Find a block with a specific block type to be swapped with another block * From e3cad45d6ff576432a3d21157da5f4d539022dc2 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 5 Dec 2024 17:12:52 -0500 Subject: [PATCH 25/39] update comments for pick_from_block and pick_from_highly_critical_block --- vpr/src/place/move_utils.cpp | 6 ++---- vpr/src/place/move_utils.h | 12 ++++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 601d2dea852..d44c3611eca 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -586,8 +586,8 @@ const std::vector& movable_blocks_per_type(const t_logical_block ClusterBlockId pick_from_block(const int logical_blk_type_index, vtr::RngContainer& rng) { const auto& place_ctx = g_vpr_ctx.placement(); - // if logical block type is specified, pick the 'from' block from block of that type; otherwise, - // pick it from all blocks + // if logical block type is specified, pick the 'from' block from blocks of that type; + // otherwise, select it randomly from all blocks const auto& movable_blocks = (logical_blk_type_index < 0 )? place_ctx.movable_blocks : place_ctx.movable_blocks_per_type[logical_blk_type_index]; if (movable_blocks.empty()) { @@ -599,8 +599,6 @@ ClusterBlockId pick_from_block(const int logical_blk_type_index, vtr::RngContain return b_from; } -//Pick a random highly critical block with a specified block type to be swapped with another random block. -//If none is found return ClusterBlockId::INVALID() ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, const int logical_blk_type_index, diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 2b3f8de0ce1..ea9a90cc18d 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -189,18 +189,22 @@ const std::vector& movable_blocks_per_type(const t_logical_block /** * @brief Find a block with a specific block type to be swapped with another block * - * @param logical_blk_type_index: the agent type of the moving block. + * @param logical_blk_type_index The logical type of the moving block. If a negative value is passed, + * the block is selected randomly from all movable blocks and not from a specific type. + * @param rng A random number generator used to select a random block. * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found */ ClusterBlockId pick_from_block(int logical_blk_type_index, vtr::RngContainer& rng); /** - * @brief Find a block with a specific block type to be swapped with another block + * @brief Find a highly critical block with a specific block type to be swapped with another block. * - * @param logical_blk_type_index: the agent type of the moving block. + * @param logical_blk_type_index The logical type of the moving block. If a negative value is passed, + * the block is selected randomly from all movable blocks and not from a specific type. + * @param rng A random number generator used to select a random highly critical block. * - * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found + * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found. */ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, int& pin_from, From b76b41ec27101be25368a00bc5530bfc50e3ecdf Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 15 Jan 2025 17:41:21 -0500 Subject: [PATCH 26/39] move PlacerSetupSlacks::update_setup_slacks() doxygen comment from .cpp to .h file --- vpr/src/place/timing/PlacerSetupSlacks.cpp | 10 ---------- vpr/src/place/timing/PlacerSetupSlacks.h | 13 +++++++++++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/vpr/src/place/timing/PlacerSetupSlacks.cpp b/vpr/src/place/timing/PlacerSetupSlacks.cpp index 3a097a582ff..18df9ed66d2 100644 --- a/vpr/src/place/timing/PlacerSetupSlacks.cpp +++ b/vpr/src/place/timing/PlacerSetupSlacks.cpp @@ -14,16 +14,6 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } -/** - * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure. - * - * If the setup slacks are not updated immediately after each time we call - * timing_info->update(), then timing_info->pins_with_modified_setup_slack() - * cannot accurately account for all the pins that need to be updated. - * - * In this case, `recompute_required` would be true, and we update all setup slacks - * from scratch. - */ void PlacerSetupSlacks::update_setup_slacks() { // If update is not enabled, exit the routine. if (!update_enabled) { diff --git a/vpr/src/place/timing/PlacerSetupSlacks.h b/vpr/src/place/timing/PlacerSetupSlacks.h index 7ffc450e94b..5248bdebc70 100644 --- a/vpr/src/place/timing/PlacerSetupSlacks.h +++ b/vpr/src/place/timing/PlacerSetupSlacks.h @@ -56,10 +56,19 @@ class PlacerSetupSlacks { * @brief Updates setup slacks based on the atom netlist setup slacks provided * by timing_info_. * + * @note This function updates the setup slacks in the timing_place_setup_slacks_ + * data structure. + * * Should consistently call this method after the most recent timing analysis to * keep the setup slacks stored in this class in sync with the timing analyzer. - * If out of sync, then the setup slacks cannot be incrementally updated on - * during the next timing analysis iteration. + * If out of sync, then the setup slacks cannot be incrementally updated during + * the next timing analysis iteration. + * + * If the setup slacks are not updated immediately after each time we cal + * timing_info->update(), then timing_info->pins_with_modified_setup_slack() + * cannot accurately account for all the pins that need to be updated. + * In this case, `recompute_required` would be true, and we update all setup slacks + * from scratch. */ void update_setup_slacks(); From ec00c1e03eeb4ec4cb6afe769689bf2d681abae6 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 15 Jan 2025 19:00:28 -0500 Subject: [PATCH 27/39] add comments to pick_from_highly_critical_block() --- vpr/src/place/move_utils.cpp | 8 +++++--- vpr/src/place/move_utils.h | 6 ++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index d44c3611eca..6e79bdaac4d 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -623,12 +623,14 @@ ClusterBlockId pick_from_highly_critical_block(ClusterNetId& net_from, std::pair crit_pin = highly_crit_pins[rng.irand(highly_crit_pins.size() - 1)]; ClusterBlockId b_from = cluster_ctx.clb_nlist.net_driver_block(crit_pin.first); - //Check if picked block type matches with the blk_type specified, and it is not fixed - //blk_type from propose move doesn't account for the EMPTY type auto b_from_type = cluster_ctx.clb_nlist.block_type(b_from); + + // check if the type of the picked block matches with the specified block type + // when a block type is specified, i.e. when logical_blk_type_index >= 0 if (b_from_type->index == logical_blk_type_index || logical_blk_type_index < 0) { + // ensure that the selected block is not fixed if (block_locs[b_from].is_fixed) { - return ClusterBlockId::INVALID(); //Block is fixed, cannot move + return ClusterBlockId::INVALID(); // a fixed block can't be moved } net_from = crit_pin.first; diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index ea9a90cc18d..ba93014297a 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -200,8 +200,14 @@ ClusterBlockId pick_from_block(int logical_blk_type_index, vtr::RngContainer& rn /** * @brief Find a highly critical block with a specific block type to be swapped with another block. * + * @param net_from The clustered net id of the critical connection of the selected block by this function. + * To be filled by this function. + * @param pin_from The pin id of the critical connection of the selected block by this function. + * To be filled by this function. * @param logical_blk_type_index The logical type of the moving block. If a negative value is passed, * the block is selected randomly from all movable blocks and not from a specific type. + * @param placer_state Used to access the current placement's info, e.g. block locations and if they are fixed. + * @param placer_criticalities Holds the clustered netlist connection criticalities. * @param rng A random number generator used to select a random highly critical block. * * @return BlockId of the selected block, ClusterBlockId::INVALID() if no block with specified block type found. From 9933fc6dfc3f3f0997c08f4387a3938b20817e84 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 16 Jan 2025 11:23:48 -0500 Subject: [PATCH 28/39] make a paragraph in the big picture comment of PlacerCriticalities more clear --- vpr/src/place/timing/PlacerCriticalities.cpp | 10 ---------- vpr/src/place/timing/PlacerCriticalities.h | 21 +++++++++++++------- 2 files changed, 14 insertions(+), 17 deletions(-) diff --git a/vpr/src/place/timing/PlacerCriticalities.cpp b/vpr/src/place/timing/PlacerCriticalities.cpp index 1f2e4f518e9..4cbf1ec66ec 100644 --- a/vpr/src/place/timing/PlacerCriticalities.cpp +++ b/vpr/src/place/timing/PlacerCriticalities.cpp @@ -13,16 +13,6 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } -/** - * @brief Updated the criticalities in the timing_place_crit_ data structure. - * - * If the criticalities are not updated immediately after each time we call - * timing_info->update(), then timing_info->pins_with_modified_setup_criticality() - * cannot accurately account for all the pins that need to be updated. In this case, - * `recompute_required` would be true, and we update all criticalities from scratch. - * - * If the criticality exponent has changed, we also need to update from scratch. - */ void PlacerCriticalities::update_criticalities(const PlaceCritParams& crit_params) { // If update is not enabled, exit the routine. if (!update_enabled) { diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index 161423dba6a..c134f9af056 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -33,14 +33,15 @@ struct PlaceCritParams { * * This process can be done incrementally, based on the modified connections/AtomPinIds * returned by SetupTimingInfo. However, the set returned only reflects the connections - * changed by the last call to the timing info update. + * changed by the last call to the timing info update (update_setup() method of SetupTimingInfo). * - * Therefore, if SetupTimingInfo is updated twice in succession without criticalities - * getting updated (update_enabled = false), the returned set cannot account for all - * the connections that have been modified. In this case, we flag `recompute_required` - * as false, and we recompute the criticalities for every connection to ensure that - * they are all up to date. Hence, each time update_setup_slacks_and_criticalities() - * is called, we assign `recompute_required` the opposite value of `update_enabled`. + * Therefore, if SetupTimingInfo is updated twice in a row without criticalities + * getting updated after the first update of SetupTimingInfo (PlacerCriticalities::update_enabled = false), + * the returned set of modified connections/AtomPinIds by SetupTimingInfo after its second update does not + * account for all the connections that have been modified. + * To address this issue, whenever update_criticalities() is called with flag update_enabled = false, + * we don't update criticalities and set flag recompute_required to true to remember that criticalities + * need to be recomputed from scratch in the first call to update_criticalities() with update_enabled = true. * * This class also maps/transforms the modified atom connections/pins returned by the * timing info into modified clustered netlist connections/pins after calling @@ -115,6 +116,12 @@ class PlacerCriticalities { * keep the criticalities stored in this class in sync with the timing analyzer. * If out of sync, then the criticalities cannot be incrementally updated on * during the next timing analysis iteration. + * + * If the criticalities are not updated immediately after each time we call + * timing_info->update(), then timing_info->pins_with_modified_setup_criticality() + * cannot accurately account for all the pins that need to be updated. In this case, + * `recompute_required` would be true, and we update all criticalities from scratch. + * If the criticality exponent has changed, we also need to update from scratch. */ void update_criticalities(const PlaceCritParams& crit_params); From 463dd2f4b97d9c5ab67af3a12ae720dd091de588 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 16 Jan 2025 11:39:17 -0500 Subject: [PATCH 29/39] added parameter list to the doxygen comment of PlacerCriticalities constructor --- vpr/src/place/timing/PlacerCriticalities.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index c134f9af056..b03bda4eb87 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -86,7 +86,16 @@ class PlacerCriticalities { public: //Lifetime - ///@brief Allocates space for the timing_place_crit_ data structure. + /** + * @brief Allocates space for the timing_place_crit_ data structure. + * @param clb_nlist Used to lookup and iterate clustered netlist connections. + * @param netlist_pin_lookup Used to lookup Atom/Clustered pins connected to a Clustered/Atom pin. + * @param timing_info Holds setup timing info. + * + * @note timing_info may be shared by multiple objects with different lifetimes. + * To ensure timing_info is destroyed only after all its user object are destructed, + * each user object should hold a shared_ptr to it. + */ PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup, std::shared_ptr timing_info); From 174b9a4a1e357e5db175e5bf57462ae3e8bd286b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 16 Jan 2025 12:47:58 -0500 Subject: [PATCH 30/39] move DeltaDelayModel::read and DeltaDelayModel::write to its own file --- .../timing/delay_model/delta_delay_model.cpp | 87 +++++++++++++++++++ .../delay_model/override_delay_model.cpp | 67 -------------- 2 files changed, 87 insertions(+), 67 deletions(-) diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp index f4e202e7106..e8d56b09516 100644 --- a/vpr/src/place/timing/delay_model/delta_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/delta_delay_model.cpp @@ -3,6 +3,14 @@ #include "compute_delta_delays_utils.h" +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -46,3 +54,82 @@ void DeltaDelayModel::dump_echo(std::string filepath) const { vtr::fclose(f); } +void DeltaDelayModel::read(const std::string& file) { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; +#else + + // MmapFile object creates an mmap of the specified path, and will munmap + // when the object leaves scope. + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + + // FlatArrayMessageReader is used to read the message from the data array + // provided by MmapFile. + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + // When reading capnproto files the Reader object to use is named + // ::Reader. + // + // Initially this object is an empty VprDeltaDelayModel. + VprDeltaDelayModel::Reader model; + + // The reader.getRoot performs a cast from the generic capnproto to fit + // with the specified schema. + // + // Note that capnproto does not validate that the incoming data matches the + // schema. If this property is required, some form of check would be + // required. + model = reader.getRoot(); + + auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { + *out = in.getValue(); + }; + + // ToNdMatrix is a generic function for converting a Matrix capnproto + // to a vtr::NdMatrix. + // + // The user must supply the matrix dimension (2 in this case), the source + // capnproto type (VprFloatEntry), + // target C++ type (flat), and a function to convert from the source capnproto + // type to the target C++ type (ToFloat). + // + // The second argument should be of type Matrix::Reader where X is the + // capnproto element type. + ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); +#endif +} + +void DeltaDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "DeltaDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; +#else + + // MallocMessageBuilder object is the generate capnproto message builder, + // using malloc for buffer allocation. + ::capnp::MallocMessageBuilder builder; + + // initRoot returns a X::Builder object that can be used to set the + // fields in the message. + auto model = builder.initRoot(); + + auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { + out->setValue(in); + }; + + // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a + // Matrix message. It is the mirror function of ToNdMatrix described in + // read above. + auto delay_values = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, fromFloat); + + // writeMessageToFile writes message to the specified file. + writeMessageToFile(file, &builder); +#endif +} diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp index ed1e53fc58a..1135f85533b 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -204,19 +204,10 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr b * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. */ #ifndef VTR_ENABLE_CAPNPROTO - # define DISABLE_ERROR \ "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." -void DeltaDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::read " DISABLE_ERROR); -} - -void DeltaDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write " DISABLE_ERROR); -} - void OverrideDelayModel::read(const std::string& /*file*/) { VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); } @@ -237,64 +228,6 @@ static void FromFloat(VprFloatEntry::Builder* out, const float& in) { out->setValue(in); } -void DeltaDelayModel::read(const std::string& file) { - // MmapFile object creates an mmap of the specified path, and will munmap - // when the object leaves scope. - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - - // FlatArrayMessageReader is used to read the message from the data array - // provided by MmapFile. - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - // When reading capnproto files the Reader object to use is named - // ::Reader. - // - // Initially this object is an empty VprDeltaDelayModel. - VprDeltaDelayModel::Reader model; - - // The reader.getRoot performs a cast from the generic capnproto to fit - // with the specified schema. - // - // Note that capnproto does not validate that the incoming data matches the - // schema. If this property is required, some form of check would be - // required. - model = reader.getRoot(); - - // ToNdMatrix is a generic function for converting a Matrix capnproto - // to a vtr::NdMatrix. - // - // The user must supply the matrix dimension (2 in this case), the source - // capnproto type (VprFloatEntry), - // target C++ type (flat), and a function to convert from the source capnproto - // type to the target C++ type (ToFloat). - // - // The second argument should be of type Matrix::Reader where X is the - // capnproto element type. - ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); -} - -void DeltaDelayModel::write(const std::string& file) const { - // MallocMessageBuilder object is the generate capnproto message builder, - // using malloc for buffer allocation. - ::capnp::MallocMessageBuilder builder; - - // initRoot returns a X::Builder object that can be used to set the - // fields in the message. - auto model = builder.initRoot(); - - // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a - // Matrix message. It is the mirror function of ToNdMatrix described in - // read above. - auto delay_values = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, FromFloat); - - // writeMessageToFile writes message to the specified file. - writeMessageToFile(file, &builder); -} - void OverrideDelayModel::read(const std::string& file) { MmapFile f(file); From 5be891c4b65251f27e86f52d32196dc93821ee87 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 16 Jan 2025 12:54:59 -0500 Subject: [PATCH 31/39] move #ifndef VTR_ENABLE_CAPNPROTO to inside function defs instead of defining them multiple times --- .../delay_model/override_delay_model.cpp | 54 ++++++++----------- .../timing/delay_model/simple_delay_model.cpp | 31 ++++------- 2 files changed, 33 insertions(+), 52 deletions(-) diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp index 1135f85533b..61acd2937b5 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -199,45 +199,25 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr b base_delay_model_ = std::move(base_delay_model_obj); } -/** - * When writing capnp targetted serialization, always allow compilation when - * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. - */ -#ifndef VTR_ENABLE_CAPNPROTO -# define DISABLE_ERROR \ - "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." - -void OverrideDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read " DISABLE_ERROR); -} - -void OverrideDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write " DISABLE_ERROR); -} - -#else /* VTR_ENABLE_CAPNPROTO */ - -static void ToFloat(float* out, const VprFloatEntry::Reader& in) { - // Getting a scalar field is always "get()". - *out = in.getValue(); -} - -static void FromFloat(VprFloatEntry::Builder* out, const float& in) { - // Setting a scalar field is always "set(value)". - out->setValue(in); -} - void OverrideDelayModel::read(const std::string& file) { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); +#else MmapFile f(file); /* Increase reader limit to 1G words to allow for large files. */ ::capnp::ReaderOptions opts = default_large_capnp_opts(); ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { + *out = in.getValue(); + }; + vtr::NdMatrix delays; auto model = reader.getRoot(); - ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); + ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), toFloat); base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); @@ -258,14 +238,24 @@ void OverrideDelayModel::read(const std::string& file) { } delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); +#endif } void OverrideDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); +#else ::capnp::MallocMessageBuilder builder; auto model = builder.initRoot(); + auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { + out->setValue(in); + }; + auto delays = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); + FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), fromFloat); // Non-scalar capnproto fields should be first initialized with // init(count), and then accessed from the returned @@ -285,6 +275,6 @@ void OverrideDelayModel::write(const std::string& file) const { } writeMessageToFile(file, &builder); +#endif } -#endif \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp index 445c7e81847..1fcd86eca64 100644 --- a/vpr/src/place/timing/delay_model/simple_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/simple_delay_model.cpp @@ -51,26 +51,12 @@ float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pi return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; } -/** - * When writing capnp targetted serialization, always allow compilation when - * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead. - */ +void SimpleDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO - -# define DISABLE_ERROR \ - "is disable because VTR_ENABLE_CAPNPROTO=OFF." \ - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable." - -void SimpleDelayModel::read(const std::string& /*file*/) { - VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::read " DISABLE_ERROR); -} - -void SimpleDelayModel::write(const std::string& /*file*/) const { - VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::write " DISABLE_ERROR); -} + VPR_THROW(VPR_ERROR_PLACE, + "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); #else - -void SimpleDelayModel::read(const std::string& file) { // MmapFile object creates an mmap of the specified path, and will munmap // when the object leaves scope. MmapFile f(file); @@ -111,9 +97,15 @@ void SimpleDelayModel::read(const std::string& file) { // The second argument should be of type Matrix::Reader where X is the // capnproto element type. ToNdMatrix<5, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); +#endif } void SimpleDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); +#else // MallocMessageBuilder object generates capnproto message builder, // using malloc for buffer allocation. ::capnp::MallocMessageBuilder builder; @@ -134,6 +126,5 @@ void SimpleDelayModel::write(const std::string& file) const { // writeMessageToFile writes message to the specified file. writeMessageToFile(file, &builder); +#endif } - -#endif \ No newline at end of file From 7ef8c397115004adbbc2687891395f6d7e43d1ea Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 16 Jan 2025 13:08:45 -0500 Subject: [PATCH 32/39] move delay_model directory from place/timing to under place/ --- .../PlacementDelayModelCreator.cpp | 80 ++ .../delay_model/PlacementDelayModelCreator.h | 30 + .../compute_delta_delays_utils.cpp | 968 ++++++++++++++++++ .../delay_model/compute_delta_delays_utils.h | 56 + .../place/delay_model/delta_delay_model.cpp | 135 +++ vpr/src/place/delay_model/delta_delay_model.h | 47 + .../delay_model/override_delay_model.cpp | 280 +++++ .../place/delay_model/override_delay_model.h | 112 ++ .../place/delay_model/place_delay_model.cpp | 78 ++ vpr/src/place/delay_model/place_delay_model.h | 80 ++ .../place/delay_model/simple_delay_model.cpp | 130 +++ .../place/delay_model/simple_delay_model.h | 39 + .../delay_model/override_delay_model.cpp | 2 +- .../timing/delay_model/simple_delay_model.cpp | 4 +- 14 files changed, 2038 insertions(+), 3 deletions(-) create mode 100644 vpr/src/place/delay_model/PlacementDelayModelCreator.cpp create mode 100644 vpr/src/place/delay_model/PlacementDelayModelCreator.h create mode 100644 vpr/src/place/delay_model/compute_delta_delays_utils.cpp create mode 100644 vpr/src/place/delay_model/compute_delta_delays_utils.h create mode 100644 vpr/src/place/delay_model/delta_delay_model.cpp create mode 100644 vpr/src/place/delay_model/delta_delay_model.h create mode 100644 vpr/src/place/delay_model/override_delay_model.cpp create mode 100644 vpr/src/place/delay_model/override_delay_model.h create mode 100644 vpr/src/place/delay_model/place_delay_model.cpp create mode 100644 vpr/src/place/delay_model/place_delay_model.h create mode 100644 vpr/src/place/delay_model/simple_delay_model.cpp create mode 100644 vpr/src/place/delay_model/simple_delay_model.h diff --git a/vpr/src/place/delay_model/PlacementDelayModelCreator.cpp b/vpr/src/place/delay_model/PlacementDelayModelCreator.cpp new file mode 100644 index 00000000000..3482cd091e0 --- /dev/null +++ b/vpr/src/place/delay_model/PlacementDelayModelCreator.cpp @@ -0,0 +1,80 @@ + + +#include "PlacementDelayModelCreator.h" + +#include "place_delay_model.h" +#include "simple_delay_model.h" +#include "delta_delay_model.h" +#include "override_delay_model.h" + +#include "vtr_time.h" +#include "physical_types.h" +#include "place_and_route.h" + +static int get_longest_segment_length(std::vector& segment_inf) { + int length = 0; + + for (const t_segment_inf& seg_info : segment_inf) { + if (seg_info.length > length) { + length = seg_info.length; + } + } + + return length; +} + +std::unique_ptr +PlacementDelayModelCreator::create_delay_model(const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + const Netlist<>& net_list, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + const std::vector& directs, + bool is_flat) { + vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up"); + + t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); + + alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat); + + const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + + RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat); + + int longest_length = get_longest_segment_length(segment_inf); + + // now setup and compute the actual arrays + std::unique_ptr place_delay_model; + float min_cross_layer_delay = get_min_cross_layer_delay(); + + if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { + place_delay_model = std::make_unique(); + } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { + place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); + } else { + VTR_ASSERT_MSG(false, "Invalid placer delay model"); + } + + if (placer_opts.read_placement_delay_lookup.empty()) { + place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length); + } else { + place_delay_model->read(placer_opts.read_placement_delay_lookup); + } + + if (!placer_opts.write_placement_delay_lookup.empty()) { + place_delay_model->write(placer_opts.write_placement_delay_lookup); + } + + // free all data structures that are no longer needed + free_routing_structs(); + + return place_delay_model; +} \ No newline at end of file diff --git a/vpr/src/place/delay_model/PlacementDelayModelCreator.h b/vpr/src/place/delay_model/PlacementDelayModelCreator.h new file mode 100644 index 00000000000..c92b67d4854 --- /dev/null +++ b/vpr/src/place/delay_model/PlacementDelayModelCreator.h @@ -0,0 +1,30 @@ + +#pragma once + +#include +#include + +#include "netlist.h" + +class PlaceDelayModel; +struct t_placer_opts; +struct t_router_opts; +struct t_det_routing_arch; +struct t_segment_inf; +struct t_chan_width_dist; +struct t_direct_inf; + +class PlacementDelayModelCreator { + public: + // nothing to do in the constructor + PlacementDelayModelCreator() = delete; + + static std::unique_ptr create_delay_model(const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + const Netlist<>& net_list, + t_det_routing_arch* det_routing_arch, + std::vector& segment_inf, + t_chan_width_dist chan_width_dist, + const std::vector& directs, + bool is_flat); +}; diff --git a/vpr/src/place/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/delay_model/compute_delta_delays_utils.cpp new file mode 100644 index 00000000000..725159406c0 --- /dev/null +++ b/vpr/src/place/delay_model/compute_delta_delays_utils.cpp @@ -0,0 +1,968 @@ + +#include "compute_delta_delays_utils.h" + +#include "vtr_time.h" +#include "vtr_math.h" +#include "physical_types.h" +#include "globals.h" +#include "router_delay_profiling.h" + +/// Indicates the delta delay value has not been calculated +static constexpr float UNINITIALIZED_DELTA = -1; +/// Indicates delta delay from/to an EMPTY block +static constexpr float EMPTY_DELTA = -2; +/// Indicates there is no valid delta delay +static constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); + +static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, + const t_placer_opts& palcer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + size_t longest_length, + bool is_flat); + +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); + +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays); + +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); + +static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool /*is_flat*/); + +static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool is_flat); + +/** + * @brief Routes between a source and sink location to calculate the delay. + * + * This function computes the delay of a routed connection between a source and sink node + * specified by their coordinates and layers. It iterates over the best driver and sink pin + * classes to find a valid routing path and calculates the delay if a path exists. + * + * @param route_profiler Reference to the `RouterDelayProfiler` responsible for calculating routing delays. + * @param source_x The x-coordinate of the source location. + * @param source_y The y-coordinate of the source location. + * @param source_layer The layer index of the source node. + * @param sink_x The x-coordinate of the sink location. + * @param sink_y The y-coordinate of the sink location. + * @param sink_layer The layer index of the sink node. + * @param router_opts Routing options used for delay calculation. + * @param measure_directconnect If `true`, includes direct connect delays; otherwise, skips direct connections. + * + * @return The calculated routing delay. If routing fails, it returns `IMPOSSIBLE_DELTA`. + */ +static float route_connection_delay(RouterDelayProfiler& route_profiler, + int source_x, + int source_y, + int source_layer, + int sink_x, + int sink_y, + int sink_layer, + const t_router_opts& router_opts, + bool measure_directconnect); + +/** + * @brief Computes a reduced value from a vector of delay values using the specified reduction method. + * + * @param delays A reference to a vector of delay values. This vector may be modified + * (e.g., sorted) depending on the reducer used. + * @param reducer The reduction method to be applied. + * + * @return The reduced delay value. If the input vector is empty, the function + * returns `IMPOSSIBLE_DELTA`. + * + * @throws VPR_FATAL_ERROR if the reducer is unrecognized. + */ +static float delay_reduce(std::vector& delays, e_reducer reducer); + +/** + * @brief Adds a delay value to a 2D matrix of delay vectors. + * + * Updates the delay vector at position (`delta_x`, `delta_y`) in the matrix. + * If the element contains only `EMPTY_DELTA`, it is replaced with the new delay; + * otherwise, the delay is appended to the vector. + * + * @param matrix A 2D matrix of delay vectors. + * @param delta_x The x-index in the matrix. + * @param delta_y The y-index in the matrix. + * @param delay The delay value to add. + */ +static void add_delay_to_matrix(vtr::Matrix>& matrix, + int delta_x, + int delta_y, + float delay); + +/** + * @brief Computes the average delay for a routing span. + * + * This function calculates the average placement delay for a routing span starting from a + * given layer and spanning a region defined by delta x and delta y. It iteratively searches + * for valid delay values within an expanding neighborhood (starting from a distance of 1) + * around the specified delta offsets and layer, until valid values are found or + * the maximum search distance (`max_distance`) is reached. + * + * @param matrix A 4D matrix of delay values indexed by `[from_layer][to_layer][delta_x][delta_y]`. + * @param from_layer The starting layer index of the routing span. + * @param to_tile_loc A structure holding the delta offsets (`x` and `y`) and the target layer index (`layer_num`). + * @param max_distance The maximum neighborhood distance to search for valid delay values. + * + * @return The average of valid delay values within the search range. If no valid delays + * are found up to the maximum distance, the function returns `IMPOSSIBLE_DELTA`. + * + * @note The function performs a Manhattan-distance-based neighborhood search around the target location. + */ +static float find_neighboring_average(vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, + int max_distance); + +/***************************************************************************************/ + +static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + size_t longest_length, + bool is_flat) { + + + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + const size_t num_layers = grid.get_num_layers(); + const size_t device_width = grid.width(); + const size_t device_height = grid.height(); + + /* To avoid edge effects we place the source at least 'longest_length' away + * from the device edge and route from there for all possible delta values < dimension + */ + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + | | + + // + A | B | C + + // + | | + + // +-----------------\-----------------------.---------------+ + // + | | + + // + | | + + // + | | + + // + | | + + // + D | E | F + + // + | | + + // + | | + + // + | | + + // + | | + + // +-----------------*-----------------------/---------------+ + // + | | + + // + G | H | I + + // + | | + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + // * = (low_x, low_y) + // . = (high_x, high_y) + // / = (high_x, low_y) + // \ = (low_x, high_y) + // + = device edge + const size_t mid_x = vtr::nint(device_width / 2); + const size_t mid_y = vtr::nint(device_height / 2); + const size_t low_x = std::min(longest_length, mid_x); + const size_t low_y = std::min(longest_length, mid_y); + const size_t high_x = (longest_length <= device_width) ? std::max(device_width - longest_length, mid_x) : mid_x; + const size_t high_y = (longest_length <= device_height) ? std::max(device_width - longest_length, mid_y) : mid_y; + + vtr::NdMatrix delta_delays({num_layers, num_layers, device_width, device_height}); + + std::set allowed_types; + if (!placer_opts.allowed_tiles_for_delay_model.empty()) { + std::vector allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + allowed_types = std::set(allowed_types_vector.begin(), allowed_types_vector.end()); + } + + for (int from_layer_num = 0; from_layer_num < (int)num_layers; from_layer_num++) { + for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) { + vtr::NdMatrix, 2> sampled_delta_delays({device_width, device_height}); + + // Find the lowest y location on the left edge with a non-empty block + int y = 0; + int x = 0; + t_physical_tile_type_ptr src_type = nullptr; + for (x = 0; x < (int)device_width; ++x) { + for (y = 0; y < (int)device_height; ++y) { + t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + // check if the tile type is among the allowed types + if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { + continue; + } + src_type = type; + break; + } + } + if (src_type != nullptr) { + break; + } + } + VTR_ASSERT(src_type != nullptr); + + auto generic_compute_matrix = (placer_opts.place_delta_delay_matrix_calculation_method == e_place_delta_delay_algorithm::ASTAR_ROUTE) ? generic_compute_matrix_iterative_astar : generic_compute_matrix_dijkstra_expansion; + +#ifdef VERBOSE + VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + device_width - 1, device_height - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + // Find the lowest x location on the bottom edge with a non-empty block + src_type = nullptr; + for (y = 0; y < (int)device_height; ++y) { + for (x = 0; x < (int)device_width; ++x) { + t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + // check if the tile type is among the allowed types + if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { + continue; + } + src_type = type; + break; + } + } + if (src_type) { + break; + } + } + VTR_ASSERT(src_type != nullptr); +#ifdef VERBOSE + VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + x, y, + x, y, + device_width - 1, device_height - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions B, C, E, F +#ifdef VERBOSE + VTR_LOG("Computing from low/low:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, low_y, + low_x, low_y, + device_width - 1, device_height - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions D, E, G, H +#ifdef VERBOSE + VTR_LOG("Computing from high/high:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, high_y, + 0, 0, + high_x, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions A, B, D, E +#ifdef VERBOSE + VTR_LOG("Computing from high/low:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + high_x, low_y, + 0, low_y, + high_x, device_height - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions E, F, H, I +#ifdef VERBOSE + VTR_LOG("Computing from low/high:\n"); +#endif + generic_compute_matrix(route_profiler, sampled_delta_delays, + from_layer_num, to_layer_num, + low_x, high_y, + low_x, 0, + device_width - 1, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { + for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { + delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + } + } + } + } + + return delta_delays; +} + +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { + // Set any empty delta's to the average of its neighbours + // + // Empty coordinates may occur if the sampling location happens to not have + // a connection at that location. However, a more thorough sampling likely + // would return a result, so we fill in the empty holes with a small + // neighbour average. + constexpr int kMaxAverageDistance = 2; + for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { + for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) { + delta_delays[from_layer][to_layer][delta_x][delta_y] = + find_neighboring_average(delta_delays, + from_layer, + {delta_x, delta_y, to_layer}, + kMaxAverageDistance); + } + } + } + } + } +} + +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { + // Set any impossible delta's to the average of its neighbours + // + // Impossible coordinates may occur if an IPIN cannot be reached from the + // sampling OPIN. This might occur if the IPIN or OPIN used for sampling + // is specialized, and therefore cannot be reached via the by the pins + // sampled. Leaving this value in the delay matrix will result in invalid + // slacks if the delay matrix uses this value. + // + // A max average distance of 5 is used to provide increased effort in + // filling these gaps. It is more important to have a poor predication, + // than an invalid value and causing a slack assertion. + constexpr int kMaxAverageDistance = 5; + for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { + if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { + delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average( + delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance); + } + } + } + } + } +} + +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { + for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { + for (size_t x = 0; x < grid.width(); ++x) { + for (size_t y = 0; y < grid.height(); ++y) { + float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y]; + + if (delta_delay < 0.) { + VPR_ERROR(VPR_ERROR_PLACE, + "Found invalid negative delay %g for delta [%d,%d,%d,%d]", + delta_delay, from_layer_num, to_layer_num, x, y); + } + } + } + } + } + + return true; +} + +static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool /*is_flat*/) { + const auto& device_ctx = g_vpr_ctx.device(); + + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + const int delta_x = abs(sink_x - source_x); + const int delta_y = abs(sink_y - source_y); + + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); + + bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE + || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); + + bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); + + if (src_or_target_empty || !is_allowed_type) { + if (matrix[delta_x][delta_y].empty()) { + // Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + } + } else { + // Valid start/end + float delay = route_connection_delay(route_profiler, + source_x, + source_y, + from_layer_num, + sink_x, + sink_y, + to_layer_num, + router_opts, + measure_directconnect); + +#ifdef VERBOSE + VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", + delay, + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { + // Overwrite empty delta + matrix[delta_x][delta_y][0] = delay; + } else { + // Collect delta + matrix[delta_x][delta_y].push_back(delay); + } + } + } + } +} + +static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*route_profiler*/, + vtr::Matrix>& matrix, + int from_layer_num, + int to_layer_num, + int source_x, + int source_y, + int start_x, + int start_y, + int end_x, + int end_y, + const t_router_opts& router_opts, + bool measure_directconnect, + const std::set& allowed_types, + bool is_flat) { + const auto& device_ctx = g_vpr_ctx.device(); + + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); + bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); + if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + + if (matrix[delta_x][delta_y].empty()) { + //Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + } + } + } + + return; + } + + vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); + + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); + for (int driver_ptc : best_driver_ptcs) { + VTR_ASSERT(driver_ptc != OPEN); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); + + VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); + auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); + + bool path_to_all_sinks = true; + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + + if (found_matrix[delta_x][delta_y]) { + continue; + } + + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); + if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (matrix[delta_x][delta_y].empty()) { + // Only set empty target if we don't already have a valid delta delay + matrix[delta_x][delta_y].push_back(EMPTY_DELTA); +#ifdef VERBOSE + VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", + "EMPTY", + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + found_matrix[delta_x][delta_y] = true; + } + } else { + bool found_a_sink = false; + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); + for (int sink_ptc : best_sink_ptcs) { + VTR_ASSERT(sink_ptc != OPEN); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); + + if (sink_rr_node == RRNodeId::INVALID()) + continue; + + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { + // Skip if we shouldn't measure direct connects and a direct connect exists + continue; + } + + if (std::isnan(delays[sink_rr_node])) { + // This sink was not found + continue; + } + +#ifdef VERBOSE + VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", + delays[size_t(sink_rr_node)], + delta_x, delta_y, + source_x, source_y, + sink_x, sink_y); +#endif + found_matrix[delta_x][delta_y] = true; + + add_delay_to_matrix(matrix, delta_x, delta_y, delays[sink_rr_node]); + + found_a_sink = true; + break; + } + + if (!found_a_sink) { + path_to_all_sinks = false; + } + } + } + } + + if (path_to_all_sinks) { + break; + } + } + + for (int sink_x = start_x; sink_x <= end_x; sink_x++) { + for (int sink_y = start_y; sink_y <= end_y; sink_y++) { + int delta_x = abs(sink_x - source_x); + int delta_y = abs(sink_y - source_y); + if (!found_matrix[delta_x][delta_y]) { + add_delay_to_matrix(matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + source_x, + source_y, + from_layer_num, + sink_x, + sink_y, + to_layer_num, + IMPOSSIBLE_DELTA); + } + } + } +} + +static float route_connection_delay(RouterDelayProfiler& route_profiler, + int source_x, + int source_y, + int source_layer, + int sink_x, + int sink_y, + int sink_layer, + const t_router_opts& router_opts, + bool measure_directconnect) { + //Routes between the source and sink locations and calculates the delay + + // set to known value for debug purposes + float net_delay_value = IMPOSSIBLE_DELTA; + + const auto& device_ctx = g_vpr_ctx.device(); + + bool successfully_routed = false; + + // Get the rr nodes to route between + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, source_layer})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, sink_layer})); + + for (int driver_ptc : best_driver_ptcs) { + VTR_ASSERT(driver_ptc != OPEN); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_layer, source_x, source_y, SOURCE, driver_ptc); + + VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); + + for (int sink_ptc : best_sink_ptcs) { + VTR_ASSERT(sink_ptc != OPEN); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_layer, sink_x, sink_y, SINK, sink_ptc); + + if (sink_rr_node == RRNodeId::INVALID()) + continue; + + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { + // Skip if we shouldn't measure direct connects and a direct connect exists + continue; + } + + successfully_routed = route_profiler.calculate_delay(source_rr_node, + sink_rr_node, + router_opts, + &net_delay_value); + + if (successfully_routed) break; + } + if (successfully_routed) break; + } + + if (!successfully_routed) { + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + source_x, source_y, source_layer, sink_x, sink_y, sink_layer, net_delay_value); + } + + return net_delay_value; +} + +static float delay_reduce(std::vector& delays, e_reducer reducer) { + if (delays.empty()) { + return IMPOSSIBLE_DELTA; + } + + if (delays.size() == 1) { + return delays[0]; + } + + VTR_ASSERT(delays.size() > 1); + + float delay; + + if (reducer == e_reducer::MIN) { + auto itr = std::min_element(delays.begin(), delays.end()); + delay = *itr; + } else if (reducer == e_reducer::MAX) { + auto itr = std::max_element(delays.begin(), delays.end()); + delay = *itr; + } else if (reducer == e_reducer::MEDIAN) { + std::stable_sort(delays.begin(), delays.end()); + delay = vtr::median(delays.begin(), delays.end()); + } else if (reducer == e_reducer::ARITHMEAN) { + delay = vtr::arithmean(delays.begin(), delays.end()); + } else if (reducer == e_reducer::GEOMEAN) { + delay = vtr::geomean(delays.begin(), delays.end()); + } else { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer"); + } + + return delay; +} + +static void add_delay_to_matrix(vtr::Matrix>& matrix, + int delta_x, + int delta_y, + float delay) { + if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { + // Overwrite empty delta + matrix[delta_x][delta_y][0] = delay; + } else { + // Collect delta + matrix[delta_x][delta_y].push_back(delay); + } +} + +static float find_neighboring_average(vtr::NdMatrix& matrix, + int from_layer, + t_physical_tile_loc to_tile_loc, + int max_distance) { + float sum = 0.f; + int num_samples = 0; + const int endx = matrix.end_index(2); + const int endy = matrix.end_index(3); + + const int x = to_tile_loc.x; + const int y = to_tile_loc.y; + const int to_layer = to_tile_loc.layer_num; + + for (int distance = 1; distance <= max_distance; ++distance) { + for (int delx = x - distance; delx <= x + distance; delx++) { + for (int dely = y - distance; dely <= y + distance; dely++) { + // Check distance constraint + if (abs(delx - x) + abs(dely - y) > distance) { + continue; + } + + //check out of bounds + if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) { + continue; + } + + if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { + continue; + } + + sum += matrix[from_layer][to_layer][delx][dely]; + num_samples++; + } + } + + if (num_samples != 0) { + return sum / (float)num_samples; + } + } + + return IMPOSSIBLE_DELTA; +} + +/***************************************************************************************/ + +vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + int longest_length, + bool is_flat) { + vtr::ScopedStartFinishTimer timer("Computing delta delays"); + vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, + placer_opts, + router_opts, + measure_directconnect, + longest_length, + is_flat); + + const size_t num_elements = delta_delays.size(); + + // set uninitialized elements to infinity + for (size_t i = 0; i < num_elements; i++) { + if (delta_delays.get(i) == UNINITIALIZED_DELTA) { + delta_delays.get(i) = IMPOSSIBLE_DELTA; + } + } + + fix_empty_coordinates(delta_delays); + + fill_impossible_coordinates(delta_delays); + + verify_delta_delays(delta_delays); + + return delta_delays; +} + +//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification +bool find_direct_connect_sample_locations(const t_direct_inf* direct, + t_physical_tile_type_ptr from_type, + int from_pin, + int from_pin_class, + t_physical_tile_type_ptr to_type, + int to_pin, + int to_pin_class, + RRNodeId& out_src_node, + RRNodeId& out_sink_node) { + VTR_ASSERT(from_type != nullptr); + VTR_ASSERT(to_type != nullptr); + + auto& device_ctx = g_vpr_ctx.device(); + auto& grid = device_ctx.grid; + const auto& node_lookup = device_ctx.rr_graph.node_lookup(); + + //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets, + //and which has the appropriate pins + int from_x = -1; + int from_y = -1; + int from_sub_tile = -1; + int to_x = 0, to_y = 0, to_sub_tile = 0; + bool found = false; + int found_layer_num = -1; + //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums + for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) { + for (int x = 0; x < (int)grid.width() && !found; ++x) { + to_x = x + direct->x_offset; + if (to_x < 0 || to_x >= (int)grid.width()) continue; + + for (int y = 0; y < (int)grid.height() && !found; ++y) { + if (grid.get_physical_type({x, y, layer_num}) != from_type) continue; + + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool from_pin_found = false; + if (direct->from_side != NUM_2D_SIDES) { + RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); + from_pin_found = from_pin_rr.is_valid(); + } else { + from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); + } + if (!from_pin_found) continue; + + to_y = y + direct->y_offset; + + if (to_y < 0 || to_y >= (int)grid.height()) continue; + if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue; + + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool to_pin_found = false; + if (direct->to_side != NUM_2D_SIDES) { + RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); + to_pin_found = (to_pin_rr != RRNodeId::INVALID()); + } else { + to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty()); + } + if (!to_pin_found) continue; + + for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) { + to_sub_tile = sub_tile_num + direct->sub_tile_offset; + + if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; + + found = true; + found_layer_num = layer_num; + from_x = x; + from_y = y; + from_sub_tile = sub_tile_num; + + break; + } + } + } + } + + if (!found) { + return false; + } + + //Now have a legal instance of this direct connect + VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type); + VTR_ASSERT(from_sub_tile < from_type->capacity); + + VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type); + VTR_ASSERT(to_sub_tile < to_type->capacity); + + VTR_ASSERT(from_x + direct->x_offset == to_x); + VTR_ASSERT(from_y + direct->y_offset == to_y); + VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile); + + // Find a source/sink RR node associated with the pins of the direct + { + RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); + VTR_ASSERT(src_rr_candidate); + out_src_node = src_rr_candidate; + } + + { + RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); + VTR_ASSERT(sink_rr_candidate); + out_sink_node = sink_rr_candidate; + } + + return true; +} + +std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { + std::vector best_classes; + + //Record any non-zero Fc pins + // + //Note that we track non-zero Fc pins, since certain Fc overrides + //may apply to only a subset of wire types. This ensures we record + //which pins can potentially connect to global routing. + std::unordered_set non_zero_fc_pins; + for (const t_fc_specification& fc_spec : type->fc_specs) { + if (fc_spec.fc_value == 0) continue; + + non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end()); + } + + // Collect all classes of matching type which connect to general routing + for (int i = 0; i < (int)type->class_inf.size(); i++) { + if (type->class_inf[i].type == pintype) { + //Check whether all pins in this class are ignored or have zero fc + bool any_pins_connect_to_general_routing = false; + for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) { + int pin = type->class_inf[i].pinlist[ipin]; + //If the pin isn't ignored, and has a non-zero Fc to some general + //routing the class is suitable for delay profiling + if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) { + any_pins_connect_to_general_routing = true; + break; + } + } + + // Skip if the pin class doesn't connect to general routing + if (!any_pins_connect_to_general_routing) continue; + + // Record candidate class + best_classes.push_back(i); + } + } + + // Sort classes so the largest pin class is first + auto cmp_class = [&](int lhs, int rhs) { + return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins; + }; + + std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class); + + return best_classes; +} \ No newline at end of file diff --git a/vpr/src/place/delay_model/compute_delta_delays_utils.h b/vpr/src/place/delay_model/compute_delta_delays_utils.h new file mode 100644 index 00000000000..71ac632b149 --- /dev/null +++ b/vpr/src/place/delay_model/compute_delta_delays_utils.h @@ -0,0 +1,56 @@ + +#pragma once + +#include "vtr_ndmatrix.h" +#include "physical_types.h" +#include "rr_graph_fwd.h" + +struct t_placer_opts; +struct t_router_opts; +class RouterDelayProfiler; + +vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + bool measure_directconnect, + int longest_length, + bool is_flat); + +bool find_direct_connect_sample_locations(const t_direct_inf* direct, + t_physical_tile_type_ptr from_type, + int from_pin, + int from_pin_class, + t_physical_tile_type_ptr to_type, + int to_pin, + int to_pin_class, + RRNodeId& out_src_node, + RRNodeId& out_sink_node); + +/** + * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity. + * + * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`) + * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins + * that connect to general routing, ensuring commonly used pins are chosen for delay profiling. + * + * @param pintype The type of pins to filter. + * @param type Pointer to the physical tile type containing pin and class information. + * + * @return A vector of indices representing the selected pin classes. The classes are sorted + * in descending order based on the number of pins they contain. + * + * @details + * - A pin class is eligible if its type matches `pintype` and it contains at least one pin + * that connects to general routing (non-zero Fc). + * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`. + * - Classes are sorted so that the class with the largest number of pins appears first. + * If multiple classes have the same pin count, their order depends on their initial appearance + * in the architecture file. + * + * @note + * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded. + * - The function ensures stability in sorting, preserving the input order for classes + * with the same number of pins. + */ + +std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); \ No newline at end of file diff --git a/vpr/src/place/delay_model/delta_delay_model.cpp b/vpr/src/place/delay_model/delta_delay_model.cpp new file mode 100644 index 00000000000..e8d56b09516 --- /dev/null +++ b/vpr/src/place/delay_model/delta_delay_model.cpp @@ -0,0 +1,135 @@ + +#include "delta_delay_model.h" + +#include "compute_delta_delays_utils.h" + +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + +void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { + delays_ = compute_delta_delay_model(route_profiler, + placer_opts, + router_opts, + /*measure_directconnect=*/true, + longest_length, + is_flat_); +} + +float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, + const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} + +void DeltaDelayModel::dump_echo(std::string filepath) const { + FILE* f = vtr::fopen(filepath.c_str(), "w"); + fprintf(f, " "); + for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { + for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { + fprintf(f, " %9zu", from_layer_num); + fprintf(f, "\n"); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9zu", dx); + } + fprintf(f, "\n"); + for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { + fprintf(f, "%9zu", dy); + for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { + fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); + } + fprintf(f, "\n"); + } + } + } + vtr::fclose(f); +} + +void DeltaDelayModel::read(const std::string& file) { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; +#else + + // MmapFile object creates an mmap of the specified path, and will munmap + // when the object leaves scope. + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + + // FlatArrayMessageReader is used to read the message from the data array + // provided by MmapFile. + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + // When reading capnproto files the Reader object to use is named + // ::Reader. + // + // Initially this object is an empty VprDeltaDelayModel. + VprDeltaDelayModel::Reader model; + + // The reader.getRoot performs a cast from the generic capnproto to fit + // with the specified schema. + // + // Note that capnproto does not validate that the incoming data matches the + // schema. If this property is required, some form of check would be + // required. + model = reader.getRoot(); + + auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { + *out = in.getValue(); + }; + + // ToNdMatrix is a generic function for converting a Matrix capnproto + // to a vtr::NdMatrix. + // + // The user must supply the matrix dimension (2 in this case), the source + // capnproto type (VprFloatEntry), + // target C++ type (flat), and a function to convert from the source capnproto + // type to the target C++ type (ToFloat). + // + // The second argument should be of type Matrix::Reader where X is the + // capnproto element type. + ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); +#endif +} + +void DeltaDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "DeltaDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; +#else + + // MallocMessageBuilder object is the generate capnproto message builder, + // using malloc for buffer allocation. + ::capnp::MallocMessageBuilder builder; + + // initRoot returns a X::Builder object that can be used to set the + // fields in the message. + auto model = builder.initRoot(); + + auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { + out->setValue(in); + }; + + // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a + // Matrix message. It is the mirror function of ToNdMatrix described in + // read above. + auto delay_values = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, fromFloat); + + // writeMessageToFile writes message to the specified file. + writeMessageToFile(file, &builder); +#endif +} diff --git a/vpr/src/place/delay_model/delta_delay_model.h b/vpr/src/place/delay_model/delta_delay_model.h new file mode 100644 index 00000000000..c3ae0d83cf7 --- /dev/null +++ b/vpr/src/place/delay_model/delta_delay_model.h @@ -0,0 +1,47 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class DeltaDelayModel + * + * @brief A simple delay model based on the distance (delta) between block locations. + */ +class DeltaDelayModel : public PlaceDelayModel { + public: + DeltaDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + DeltaDelayModel(float min_cross_layer_delay, + vtr::NdMatrix delta_delays, + bool is_flat) + : delays_(std::move(delta_delays)) + , cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + const vtr::NdMatrix& delays() const { + return delays_; + } + + private: + vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; +}; \ No newline at end of file diff --git a/vpr/src/place/delay_model/override_delay_model.cpp b/vpr/src/place/delay_model/override_delay_model.cpp new file mode 100644 index 00000000000..61acd2937b5 --- /dev/null +++ b/vpr/src/place/delay_model/override_delay_model.cpp @@ -0,0 +1,280 @@ + +#include "override_delay_model.h" + +#include "compute_delta_delays_utils.h" + +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + +void OverrideDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) { + auto delays = compute_delta_delay_model(route_profiler, + placer_opts, + router_opts, + /*measure_directconnect=*/false, + longest_length, + is_flat_); + + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); + + compute_override_delay_model_(route_profiler, router_opts); +} + +void OverrideDelayModel::compute_override_delay_model_(RouterDelayProfiler& route_profiler, + const t_router_opts& router_opts) { + const auto& device_ctx = g_vpr_ctx.device(); + t_router_opts router_opts2 = router_opts; + router_opts2.astar_fac = 0.f; + router_opts2.astar_offset = 0.f; + + // Look at all the direct connections that exist, and add overrides to delay model + for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) { + const t_direct_inf* direct = &device_ctx.arch->directs[idirect]; + + InstPort from_port = parse_inst_port(direct->from_pin); + InstPort to_port = parse_inst_port(direct->to_pin); + + t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types); + t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types); + + int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1; + VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from"); + + //We now walk through all the connections associated with the current direct specification, measure + //their delay and specify that value as an override in the delay model. + // + //Note that we need to check every connection in the direct to cover the case where the pins are not + //equivalent. + // + //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK + //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in + //sampled_rr_pairs and skipping them if they occur multiple times. + int missing_instances = 0; + int missing_paths = 0; + std::set> sampled_rr_pairs; + for (int iconn = 0; iconn < num_conns; ++iconn) { + //Find the associated pins + int from_pin = from_type->find_pin(from_port.port_name(), from_port.port_low_index() + iconn); + int to_pin = to_type->find_pin(to_port.port_name(), to_port.port_low_index() + iconn); + + VTR_ASSERT(from_pin != OPEN); + VTR_ASSERT(to_pin != OPEN); + + int from_pin_class = from_type->find_pin_class(from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); + VTR_ASSERT(from_pin_class != OPEN); + + int to_pin_class = to_type->find_pin_class(to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); + VTR_ASSERT(to_pin_class != OPEN); + + bool found_sample_points; + RRNodeId src_rr, sink_rr; + found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr); + + if (!found_sample_points) { + ++missing_instances; + continue; + } + + //If some of the source/sink ports are logically equivalent we may have already + //sampled the associated source/sink pair and don't need to do so again + if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; + + float direct_connect_delay = std::numeric_limits::quiet_NaN(); + bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); + + if (found_routing_path) { + set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); + } else { + ++missing_paths; + } + + //Record that we've sampled this pair of source and sink nodes + sampled_rr_pairs.insert({src_rr, sink_rr}); + } + + VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str()); + VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str()); + } +} + +const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { + return base_delay_model_.get(); +} + +float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { + // First check to if there is an override delay value + const auto& device_ctx = g_vpr_ctx.device(); + const auto& grid = device_ctx.grid; + + t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); + t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); + + t_override override_key; + override_key.from_type = from_type_ptr->index; + override_key.from_class = from_type_ptr->pin_class[from_pin]; + override_key.to_type = to_type_ptr->index; + override_key.to_class = to_type_ptr->pin_class[to_pin]; + + //Delay overrides may be different for +/- delta so do not use + //an absolute delta for the look-up + override_key.delta_x = to_loc.x - from_loc.x; + override_key.delta_y = to_loc.y - from_loc.y; + + float delay_val = std::numeric_limits::quiet_NaN(); + auto override_iter = delay_overrides_.find(override_key); + if (override_iter != delay_overrides_.end()) { + //Found an override + delay_val = override_iter->second; + } else { + //Fall back to the base delay model if no override was found + delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); + } + + return delay_val; +} + +void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { + t_override override_key; + override_key.from_type = from_type; + override_key.from_class = from_class; + override_key.to_type = to_type; + override_key.to_class = to_class; + override_key.delta_x = delta_x; + override_key.delta_y = delta_y; + + auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); + if (!res.second) { //Key already exists + res.first->second = delay_val; //Overwrite existing delay + } +} + +void OverrideDelayModel::dump_echo(std::string filepath) const { + base_delay_model_->dump_echo(filepath); + + FILE* f = vtr::fopen(filepath.c_str(), "a"); + + fprintf(f, "\n"); + fprintf(f, "# Delay Overrides\n"); + auto& device_ctx = g_vpr_ctx.device(); + for (auto kv : delay_overrides_) { + auto override_key = kv.first; + float delay_val = kv.second; + fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", + device_ctx.physical_tile_types[override_key.from_type].name.c_str(), + device_ctx.physical_tile_types[override_key.to_type].name.c_str(), + override_key.from_class, + override_key.to_class, + override_key.delta_x, + override_key.delta_y, + delay_val); + } + + vtr::fclose(f); +} + +float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { + t_override key; + key.from_type = from_type; + key.from_class = from_class; + key.to_type = to_type; + key.to_class = to_class; + key.delta_x = delta_x; + key.delta_y = delta_y; + + auto iter = delay_overrides_.find(key); + if (iter == delay_overrides_.end()) { + VPR_THROW(VPR_ERROR_PLACE, "Key not found."); + } + return iter->second; +} + +void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { + base_delay_model_ = std::move(base_delay_model_obj); +} + +void OverrideDelayModel::read(const std::string& file) { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); +#else + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { + *out = in.getValue(); + }; + + vtr::NdMatrix delays; + auto model = reader.getRoot(); + ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), toFloat); + + base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); + + // Reading non-scalar capnproto fields is roughly equivilant to using + // a std::vector of the field type. Actual type is capnp::List::Reader. + auto overrides = model.getDelayOverrides(); + std::vector > overrides_arr(overrides.size()); + for (size_t i = 0; i < overrides.size(); ++i) { + const auto& elem = overrides[i]; + overrides_arr[i].first.from_type = elem.getFromType(); + overrides_arr[i].first.to_type = elem.getToType(); + overrides_arr[i].first.from_class = elem.getFromClass(); + overrides_arr[i].first.to_class = elem.getToClass(); + overrides_arr[i].first.delta_x = elem.getDeltaX(); + overrides_arr[i].first.delta_y = elem.getDeltaY(); + + overrides_arr[i].second = elem.getDelay(); + } + + delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); +#endif +} + +void OverrideDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); +#else + ::capnp::MallocMessageBuilder builder; + auto model = builder.initRoot(); + + auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { + out->setValue(in); + }; + + auto delays = model.getDelays(); + FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), fromFloat); + + // Non-scalar capnproto fields should be first initialized with + // init(count), and then accessed from the returned + // std::vector-like Builder object (specifically capnp::List::Builder). + auto overrides = model.initDelayOverrides(delay_overrides_.size()); + auto dst_iter = overrides.begin(); + for (const auto& src : delay_overrides_) { + auto elem = *dst_iter++; + elem.setFromType(src.first.from_type); + elem.setToType(src.first.to_type); + elem.setFromClass(src.first.from_class); + elem.setToClass(src.first.to_class); + elem.setDeltaX(src.first.delta_x); + elem.setDeltaY(src.first.delta_y); + + elem.setDelay(src.second); + } + + writeMessageToFile(file, &builder); +#endif +} + diff --git a/vpr/src/place/delay_model/override_delay_model.h b/vpr/src/place/delay_model/override_delay_model.h new file mode 100644 index 00000000000..5965261c272 --- /dev/null +++ b/vpr/src/place/delay_model/override_delay_model.h @@ -0,0 +1,112 @@ + +#pragma once + +#include "place_delay_model.h" +#include "delta_delay_model.h" + +class OverrideDelayModel : public PlaceDelayModel { + public: + OverrideDelayModel(float min_cross_layer_delay, + bool is_flat) + : cross_layer_delay_(min_cross_layer_delay) + , is_flat_(is_flat) {} + + void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + /** + * @brief returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the + * specified from and to pins + */ + float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; + + void dump_echo(std::string filepath) const override; + + void read(const std::string& file) override; + void write(const std::string& file) const override; + + public: //Mutators + void set_base_delay_model(std::unique_ptr base_delay_model); + const DeltaDelayModel* base_delay_model() const; + float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; + void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); + + private: + std::unique_ptr base_delay_model_; + /// Minimum delay of cross-layer connections + float cross_layer_delay_; + + /// Indicates whether the router is a two-stage or run-flat + bool is_flat_; + + void compute_override_delay_model_(RouterDelayProfiler& router, + const t_router_opts& router_opts); + + /** + * @brief Structure that allows delays to be queried from the delay model. + * + * Delay is calculated given the origin physical tile, the origin + * pin, the destination physical tile, and the destination pin. + * This structure encapsulates all these information. + * + * @param from_type, to_type + * Physical tile index (for easy array access) + * @param from_class, to_class + * The class that the pins belongs to. + * @param to_x, to_y + * The horizontal and vertical displacement + * between two physical tiles. + */ + struct t_override { + short from_type; + short to_type; + short from_class; + short to_class; + short delta_x; + short delta_y; + + /** + * @brief Comparison operator designed for performance. + * + * Operator< is important since t_override serves as the key into the + * map structure delay_overrides_. A default comparison operator would + * not be inlined by the compiler. + * + * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare + * is required for operator< to be inlined by compiler. Proper inlining of + * the function reduces place time by around 5%. + * + * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 + */ + friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { + const short* left = reinterpret_cast(&lhs); + const short* right = reinterpret_cast(&rhs); + constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); + return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); + } + }; + + /** + * @brief Map data structure that returns delay values according to + * specific delay model queries. + * + * Delay model queries are provided by the t_override structure, which + * encapsulates the information regarding the origin and the destination. + */ + vtr::flat_map2 delay_overrides_; + + /** + * operator< treats memory layout of t_override as an array of short. + * This requires all members of t_override are shorts and there is no + * padding between members of t_override. + */ + static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); + static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); + static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); +}; \ No newline at end of file diff --git a/vpr/src/place/delay_model/place_delay_model.cpp b/vpr/src/place/delay_model/place_delay_model.cpp new file mode 100644 index 00000000000..04267e0e5f1 --- /dev/null +++ b/vpr/src/place/delay_model/place_delay_model.cpp @@ -0,0 +1,78 @@ +/** + * @file place_delay_model.cpp + * @brief This file implements all the class methods and individual + * routines related to the placer delay model. + */ + +#include "place_delay_model.h" + +#include "globals.h" +#include "router_lookahead_map.h" +#include "placer_state.h" +#include "vpr_error.h" + +/** + * @brief Returns the delay of one point to point connection. + * + * Only estimate delay for signals routed through the inter-block routing network. + * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay." + */ +float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, + const vtr::vector_map& block_locs, + ClusterNetId net_id, + int ipin) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + float delay_source_to_sink = 0.; + + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id); + ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net_id, ipin); + + ClusterBlockId source_block = cluster_ctx.clb_nlist.pin_block(source_pin); + ClusterBlockId sink_block = cluster_ctx.clb_nlist.pin_block(sink_pin); + + int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin); + int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin); + + t_pl_loc source_block_loc = block_locs[source_block].loc; + t_pl_loc sink_block_loc = block_locs[sink_block].loc; + + /** + * This heuristic only considers delta_x and delta_y, a much better + * heuristic would be to to create a more comprehensive lookup table. + * + * In particular this approach does not accurately capture the effect + * of fast carry-chain connections. + */ + delay_source_to_sink = delay_model->delay({source_block_loc.x, source_block_loc.y, source_block_loc.layer}, source_block_ipin, + {sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer}, sink_block_ipin); + if (delay_source_to_sink < 0) { + VPR_ERROR(VPR_ERROR_PLACE, + "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d,%d) to %s (at %d,%d,%d)\n" + "in comp_td_single_connection_delay: Delay is less than 0\n", + block_type_pin_index_to_name(physical_tile_type(source_block_loc), source_block_ipin, false).c_str(), + source_block_loc.x, source_block_loc.y, source_block_loc.layer, + block_type_pin_index_to_name(physical_tile_type(sink_block_loc), sink_block_ipin, false).c_str(), + sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer, + delay_source_to_sink); + } + } + + return (delay_source_to_sink); +} + +///@brief Recompute all point to point delays, updating `connection_delay` matrix. +void comp_td_connection_delays(const PlaceDelayModel* delay_model, + PlacerState& placer_state) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& p_timing_ctx = placer_state.mutable_timing(); + auto& block_locs = placer_state.block_locs(); + auto& connection_delay = p_timing_ctx.connection_delay; + + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) { + connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, block_locs, net_id, ipin); + } + } +} diff --git a/vpr/src/place/delay_model/place_delay_model.h b/vpr/src/place/delay_model/place_delay_model.h new file mode 100644 index 00000000000..27c89591071 --- /dev/null +++ b/vpr/src/place/delay_model/place_delay_model.h @@ -0,0 +1,80 @@ +/** + * @file place_delay_model.h + * @brief This file contains all the class and function declarations related to + * the placer delay model. For implementations, see place_delay_model.cpp. + */ + +#pragma once + +#include "vtr_ndmatrix.h" +#include "vtr_flat_map.h" +#include "vpr_types.h" +#include "router_delay_profiling.h" + +#ifndef __has_attribute +# define __has_attribute(x) 0 // Compatibility with non-clang compilers. +#endif + +#if defined(COMPILER_GCC) && defined(NDEBUG) +# define ALWAYS_INLINE inline __attribute__((__always_inline__)) +#elif defined(COMPILER_MSVC) && defined(NDEBUG) +# define ALWAYS_INLINE __forceinline +#elif __has_attribute(always_inline) +# define ALWAYS_INLINE __attribute__((always_inline)) // clang +#else +# define ALWAYS_INLINE inline +#endif + +///@brief Forward declarations. +class PlaceDelayModel; +class PlacerState; + +///@brief Returns the delay of one point to point connection. +float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, + const vtr::vector_map& block_locs, + ClusterNetId net_id, + int ipin); + +///@brief Recompute all point to point delays, updating `connection_delay` matrix. +void comp_td_connection_delays(const PlaceDelayModel* delay_model, + PlacerState& placer_state); + +///@brief Abstract interface to a placement delay model. +class PlaceDelayModel { + public: + virtual ~PlaceDelayModel() = default; + + ///@brief Computes place delay model. + virtual void compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) + = 0; + + /** + * @brief Returns the delay estimate between the specified block pins. + * + * Either compute or read methods must be invoked before invoking delay. + */ + virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0; + + ///@brief Dumps the delay model to an echo file. + virtual void dump_echo(std::string filename) const = 0; + + /** + * @brief Write place delay model to specified file. + * + * May be unimplemented, in which case method should throw an exception. + */ + virtual void write(const std::string& file) const = 0; + + /** + * @brief Read place delay model from specified file. + * + * May be unimplemented, in which case method should throw an exception. + */ + virtual void read(const std::string& file) = 0; +}; + + + diff --git a/vpr/src/place/delay_model/simple_delay_model.cpp b/vpr/src/place/delay_model/simple_delay_model.cpp new file mode 100644 index 00000000000..1fcd86eca64 --- /dev/null +++ b/vpr/src/place/delay_model/simple_delay_model.cpp @@ -0,0 +1,130 @@ + +#include "simple_delay_model.h" + +#ifdef VTR_ENABLE_CAPNPROTO +# include "capnp/serialize.h" +# include "place_delay_model.capnp.h" +# include "ndmatrix_serdes.h" +# include "mmap_file.h" +# include "serdes_utils.h" +#endif // VTR_ENABLE_CAPNPROTO + +void SimpleDelayModel::compute(RouterDelayProfiler& route_profiler, + const t_placer_opts& /*placer_opts*/, + const t_router_opts& /*router_opts*/, + int /*longest_length*/) { + const auto& grid = g_vpr_ctx.device().grid; + const size_t num_physical_tile_types = g_vpr_ctx.device().physical_tile_types.size(); + const size_t num_layers = grid.get_num_layers(); + + // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] + // The second index related to the layer that the source location is on and the third index is for the sink layer + delays_ = vtr::NdMatrix({num_physical_tile_types, + num_layers, + num_layers, + grid.width(), + grid.height()}); + + for (size_t physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { + for (size_t from_layer = 0; from_layer < num_layers; ++from_layer) { + for (size_t to_layer = 0; to_layer < num_layers; ++to_layer) { + for (size_t dx = 0; dx < grid.width(); ++dx) { + for (size_t dy = 0; dy < grid.height(); ++dy) { + float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, + from_layer, + to_layer, + dx, + dy); + delays_[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; + } + } + } + } + } +} + +float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { + int delta_x = std::abs(from_loc.x - to_loc.x); + int delta_y = std::abs(from_loc.y - to_loc.y); + + int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; + return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; +} + +void SimpleDelayModel::read(const std::string& file) { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); +#else + // MmapFile object creates an mmap of the specified path, and will munmap + // when the object leaves scope. + MmapFile f(file); + + /* Increase reader limit to 1G words to allow for large files. */ + ::capnp::ReaderOptions opts = default_large_capnp_opts(); + + // FlatArrayMessageReader is used to read the message from the data array + // provided by MmapFile. + ::capnp::FlatArrayMessageReader reader(f.getData(), opts); + + // When reading capnproto files the Reader object to use is named + // ::Reader. + // + // Initially this object is an empty VprDeltaDelayModel. + VprDeltaDelayModel::Reader model; + + // The reader.getRoot performs a cast from the generic capnproto to fit + // with the specified schema. + // + // Note that capnproto does not validate that the incoming data matches the + // schema. If this property is required, some form of check would be + // required. + model = reader.getRoot(); + + auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { + *out = in.getValue(); + }; + + // ToNdMatrix is a generic function for converting a Matrix capnproto + // to a vtr::NdMatrix. + // + // The user must supply the matrix dimension (5 in this case), the source + // capnproto type (VprFloatEntry), + // target C++ type (flat), and a function to convert from the source capnproto + // type to the target C++ type (ToFloat). + // + // The second argument should be of type Matrix::Reader where X is the + // capnproto element type. + ToNdMatrix<5, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); +#endif +} + +void SimpleDelayModel::write(const std::string& file) const { +#ifndef VTR_ENABLE_CAPNPROTO + VPR_THROW(VPR_ERROR_PLACE, + "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); +#else + // MallocMessageBuilder object generates capnproto message builder, + // using malloc for buffer allocation. + ::capnp::MallocMessageBuilder builder; + + // initRoot returns a X::Builder object that can be used to set the + // fields in the message. + auto model = builder.initRoot(); + + auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { + out->setValue(in); + }; + + // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a + // Matrix message. It is the mirror function of ToNdMatrix described in + // read above. + auto delay_values = model.getDelays(); + FromNdMatrix<5, VprFloatEntry, float>(&delay_values, delays_, fromFloat); + + // writeMessageToFile writes message to the specified file. + writeMessageToFile(file, &builder); +#endif +} diff --git a/vpr/src/place/delay_model/simple_delay_model.h b/vpr/src/place/delay_model/simple_delay_model.h new file mode 100644 index 00000000000..25dce08c4fc --- /dev/null +++ b/vpr/src/place/delay_model/simple_delay_model.h @@ -0,0 +1,39 @@ + +#pragma once + +#include "place_delay_model.h" + +/** + * @class SimpleDelayModel + * @brief A simple delay model based on the information stored in router lookahead + * This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router + */ +class SimpleDelayModel : public PlaceDelayModel { + public: + SimpleDelayModel() {} + + /// @brief Use the information in the router lookahead to fill the delay matrix instead of running the router + void compute(RouterDelayProfiler& router, + const t_placer_opts& placer_opts, + const t_router_opts& router_opts, + int longest_length) override; + + float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; + + void dump_echo(std::string /*filepath*/) const override {} + + void read(const std::string& /*file*/) override; + void write(const std::string& /*file*/) const override; + + private: + /** + * @brief The matrix to store the minimum delay between different points on different layers. + * + *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers + *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs + *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers + *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. + *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. + */ + vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] +}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp index 61acd2937b5..6cbb2c7f654 100644 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/override_delay_model.cpp @@ -245,7 +245,7 @@ void OverrideDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else ::capnp::MallocMessageBuilder builder; auto model = builder.initRoot(); diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp index 1fcd86eca64..dac18890366 100644 --- a/vpr/src/place/timing/delay_model/simple_delay_model.cpp +++ b/vpr/src/place/timing/delay_model/simple_delay_model.cpp @@ -55,7 +55,7 @@ void SimpleDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MmapFile object creates an mmap of the specified path, and will munmap // when the object leaves scope. @@ -104,7 +104,7 @@ void SimpleDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MallocMessageBuilder object generates capnproto message builder, // using malloc for buffer allocation. From 94cfd6ff2a1403e6315608cd405fe2973bc09820 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 19 Jan 2025 17:07:35 -0500 Subject: [PATCH 33/39] delete duplicate files --- .../PlacementDelayModelCreator.cpp | 80 -- .../delay_model/PlacementDelayModelCreator.h | 30 - .../compute_delta_delays_utils.cpp | 968 ------------------ .../delay_model/compute_delta_delays_utils.h | 56 - .../timing/delay_model/delta_delay_model.cpp | 135 --- .../timing/delay_model/delta_delay_model.h | 47 - .../delay_model/override_delay_model.cpp | 280 ----- .../timing/delay_model/override_delay_model.h | 112 -- .../timing/delay_model/place_delay_model.cpp | 78 -- .../timing/delay_model/place_delay_model.h | 80 -- .../timing/delay_model/simple_delay_model.cpp | 130 --- .../timing/delay_model/simple_delay_model.h | 39 - 12 files changed, 2035 deletions(-) delete mode 100644 vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp delete mode 100644 vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h delete mode 100644 vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp delete mode 100644 vpr/src/place/timing/delay_model/compute_delta_delays_utils.h delete mode 100644 vpr/src/place/timing/delay_model/delta_delay_model.cpp delete mode 100644 vpr/src/place/timing/delay_model/delta_delay_model.h delete mode 100644 vpr/src/place/timing/delay_model/override_delay_model.cpp delete mode 100644 vpr/src/place/timing/delay_model/override_delay_model.h delete mode 100644 vpr/src/place/timing/delay_model/place_delay_model.cpp delete mode 100644 vpr/src/place/timing/delay_model/place_delay_model.h delete mode 100644 vpr/src/place/timing/delay_model/simple_delay_model.cpp delete mode 100644 vpr/src/place/timing/delay_model/simple_delay_model.h diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp deleted file mode 100644 index 3482cd091e0..00000000000 --- a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.cpp +++ /dev/null @@ -1,80 +0,0 @@ - - -#include "PlacementDelayModelCreator.h" - -#include "place_delay_model.h" -#include "simple_delay_model.h" -#include "delta_delay_model.h" -#include "override_delay_model.h" - -#include "vtr_time.h" -#include "physical_types.h" -#include "place_and_route.h" - -static int get_longest_segment_length(std::vector& segment_inf) { - int length = 0; - - for (const t_segment_inf& seg_info : segment_inf) { - if (seg_info.length > length) { - length = seg_info.length; - } - } - - return length; -} - -std::unique_ptr -PlacementDelayModelCreator::create_delay_model(const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - const Netlist<>& net_list, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - const std::vector& directs, - bool is_flat) { - vtr::ScopedStartFinishTimer timer("Computing placement delta delay look-up"); - - t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); - - alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, is_flat); - - const RouterLookahead* router_lookahead = get_cached_router_lookahead(*det_routing_arch, - router_opts.lookahead_type, - router_opts.write_router_lookahead, - router_opts.read_router_lookahead, - segment_inf, - is_flat); - - RouterDelayProfiler route_profiler(net_list, router_lookahead, is_flat); - - int longest_length = get_longest_segment_length(segment_inf); - - // now setup and compute the actual arrays - std::unique_ptr place_delay_model; - float min_cross_layer_delay = get_min_cross_layer_delay(); - - if (placer_opts.delay_model_type == PlaceDelayModelType::SIMPLE) { - place_delay_model = std::make_unique(); - } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); - } else if (placer_opts.delay_model_type == PlaceDelayModelType::DELTA_OVERRIDE) { - place_delay_model = std::make_unique(min_cross_layer_delay, is_flat); - } else { - VTR_ASSERT_MSG(false, "Invalid placer delay model"); - } - - if (placer_opts.read_placement_delay_lookup.empty()) { - place_delay_model->compute(route_profiler, placer_opts, router_opts, longest_length); - } else { - place_delay_model->read(placer_opts.read_placement_delay_lookup); - } - - if (!placer_opts.write_placement_delay_lookup.empty()) { - place_delay_model->write(placer_opts.write_placement_delay_lookup); - } - - // free all data structures that are no longer needed - free_routing_structs(); - - return place_delay_model; -} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h b/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h deleted file mode 100644 index c92b67d4854..00000000000 --- a/vpr/src/place/timing/delay_model/PlacementDelayModelCreator.h +++ /dev/null @@ -1,30 +0,0 @@ - -#pragma once - -#include -#include - -#include "netlist.h" - -class PlaceDelayModel; -struct t_placer_opts; -struct t_router_opts; -struct t_det_routing_arch; -struct t_segment_inf; -struct t_chan_width_dist; -struct t_direct_inf; - -class PlacementDelayModelCreator { - public: - // nothing to do in the constructor - PlacementDelayModelCreator() = delete; - - static std::unique_ptr create_delay_model(const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - const Netlist<>& net_list, - t_det_routing_arch* det_routing_arch, - std::vector& segment_inf, - t_chan_width_dist chan_width_dist, - const std::vector& directs, - bool is_flat); -}; diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp deleted file mode 100644 index 725159406c0..00000000000 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.cpp +++ /dev/null @@ -1,968 +0,0 @@ - -#include "compute_delta_delays_utils.h" - -#include "vtr_time.h" -#include "vtr_math.h" -#include "physical_types.h" -#include "globals.h" -#include "router_delay_profiling.h" - -/// Indicates the delta delay value has not been calculated -static constexpr float UNINITIALIZED_DELTA = -1; -/// Indicates delta delay from/to an EMPTY block -static constexpr float EMPTY_DELTA = -2; -/// Indicates there is no valid delta delay -static constexpr float IMPOSSIBLE_DELTA = std::numeric_limits::infinity(); - -static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, - const t_placer_opts& palcer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - size_t longest_length, - bool is_flat); - -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); - -static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays); - -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); - -static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool /*is_flat*/); - -static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool is_flat); - -/** - * @brief Routes between a source and sink location to calculate the delay. - * - * This function computes the delay of a routed connection between a source and sink node - * specified by their coordinates and layers. It iterates over the best driver and sink pin - * classes to find a valid routing path and calculates the delay if a path exists. - * - * @param route_profiler Reference to the `RouterDelayProfiler` responsible for calculating routing delays. - * @param source_x The x-coordinate of the source location. - * @param source_y The y-coordinate of the source location. - * @param source_layer The layer index of the source node. - * @param sink_x The x-coordinate of the sink location. - * @param sink_y The y-coordinate of the sink location. - * @param sink_layer The layer index of the sink node. - * @param router_opts Routing options used for delay calculation. - * @param measure_directconnect If `true`, includes direct connect delays; otherwise, skips direct connections. - * - * @return The calculated routing delay. If routing fails, it returns `IMPOSSIBLE_DELTA`. - */ -static float route_connection_delay(RouterDelayProfiler& route_profiler, - int source_x, - int source_y, - int source_layer, - int sink_x, - int sink_y, - int sink_layer, - const t_router_opts& router_opts, - bool measure_directconnect); - -/** - * @brief Computes a reduced value from a vector of delay values using the specified reduction method. - * - * @param delays A reference to a vector of delay values. This vector may be modified - * (e.g., sorted) depending on the reducer used. - * @param reducer The reduction method to be applied. - * - * @return The reduced delay value. If the input vector is empty, the function - * returns `IMPOSSIBLE_DELTA`. - * - * @throws VPR_FATAL_ERROR if the reducer is unrecognized. - */ -static float delay_reduce(std::vector& delays, e_reducer reducer); - -/** - * @brief Adds a delay value to a 2D matrix of delay vectors. - * - * Updates the delay vector at position (`delta_x`, `delta_y`) in the matrix. - * If the element contains only `EMPTY_DELTA`, it is replaced with the new delay; - * otherwise, the delay is appended to the vector. - * - * @param matrix A 2D matrix of delay vectors. - * @param delta_x The x-index in the matrix. - * @param delta_y The y-index in the matrix. - * @param delay The delay value to add. - */ -static void add_delay_to_matrix(vtr::Matrix>& matrix, - int delta_x, - int delta_y, - float delay); - -/** - * @brief Computes the average delay for a routing span. - * - * This function calculates the average placement delay for a routing span starting from a - * given layer and spanning a region defined by delta x and delta y. It iteratively searches - * for valid delay values within an expanding neighborhood (starting from a distance of 1) - * around the specified delta offsets and layer, until valid values are found or - * the maximum search distance (`max_distance`) is reached. - * - * @param matrix A 4D matrix of delay values indexed by `[from_layer][to_layer][delta_x][delta_y]`. - * @param from_layer The starting layer index of the routing span. - * @param to_tile_loc A structure holding the delta offsets (`x` and `y`) and the target layer index (`layer_num`). - * @param max_distance The maximum neighborhood distance to search for valid delay values. - * - * @return The average of valid delay values within the search range. If no valid delays - * are found up to the maximum distance, the function returns `IMPOSSIBLE_DELTA`. - * - * @note The function performs a Manhattan-distance-based neighborhood search around the target location. - */ -static float find_neighboring_average(vtr::NdMatrix& matrix, - int from_layer, - t_physical_tile_loc to_tile_loc, - int max_distance); - -/***************************************************************************************/ - -static vtr::NdMatrix compute_delta_delays(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - size_t longest_length, - bool is_flat) { - - - const auto& device_ctx = g_vpr_ctx.device(); - const auto& grid = device_ctx.grid; - - const size_t num_layers = grid.get_num_layers(); - const size_t device_width = grid.width(); - const size_t device_height = grid.height(); - - /* To avoid edge effects we place the source at least 'longest_length' away - * from the device edge and route from there for all possible delta values < dimension - */ - - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // + | | + - // + A | B | C + - // + | | + - // +-----------------\-----------------------.---------------+ - // + | | + - // + | | + - // + | | + - // + | | + - // + D | E | F + - // + | | + - // + | | + - // + | | + - // + | | + - // +-----------------*-----------------------/---------------+ - // + | | + - // + G | H | I + - // + | | + - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // - // * = (low_x, low_y) - // . = (high_x, high_y) - // / = (high_x, low_y) - // \ = (low_x, high_y) - // + = device edge - const size_t mid_x = vtr::nint(device_width / 2); - const size_t mid_y = vtr::nint(device_height / 2); - const size_t low_x = std::min(longest_length, mid_x); - const size_t low_y = std::min(longest_length, mid_y); - const size_t high_x = (longest_length <= device_width) ? std::max(device_width - longest_length, mid_x) : mid_x; - const size_t high_y = (longest_length <= device_height) ? std::max(device_width - longest_length, mid_y) : mid_y; - - vtr::NdMatrix delta_delays({num_layers, num_layers, device_width, device_height}); - - std::set allowed_types; - if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - std::vector allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); - allowed_types = std::set(allowed_types_vector.begin(), allowed_types_vector.end()); - } - - for (int from_layer_num = 0; from_layer_num < (int)num_layers; from_layer_num++) { - for (int to_layer_num = 0; to_layer_num < (int)num_layers; to_layer_num++) { - vtr::NdMatrix, 2> sampled_delta_delays({device_width, device_height}); - - // Find the lowest y location on the left edge with a non-empty block - int y = 0; - int x = 0; - t_physical_tile_type_ptr src_type = nullptr; - for (x = 0; x < (int)device_width; ++x) { - for (y = 0; y < (int)device_height; ++y) { - t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - // check if the tile type is among the allowed types - if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { - continue; - } - src_type = type; - break; - } - } - if (src_type != nullptr) { - break; - } - } - VTR_ASSERT(src_type != nullptr); - - auto generic_compute_matrix = (placer_opts.place_delta_delay_matrix_calculation_method == e_place_delta_delay_algorithm::ASTAR_ROUTE) ? generic_compute_matrix_iterative_astar : generic_compute_matrix_dijkstra_expansion; - -#ifdef VERBOSE - VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - x, y, - x, y, - device_width - 1, device_height - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - // Find the lowest x location on the bottom edge with a non-empty block - src_type = nullptr; - for (y = 0; y < (int)device_height; ++y) { - for (x = 0; x < (int)device_width; ++x) { - t_physical_tile_type_ptr type = grid.get_physical_type({x, y, from_layer_num}); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - // check if the tile type is among the allowed types - if (!allowed_types.empty() && allowed_types.find(type->name) == allowed_types.end()) { - continue; - } - src_type = type; - break; - } - } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); -#ifdef VERBOSE - VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - x, y, - x, y, - device_width - 1, device_height - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions B, C, E, F -#ifdef VERBOSE - VTR_LOG("Computing from low/low:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - low_x, low_y, - low_x, low_y, - device_width - 1, device_height - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions D, E, G, H -#ifdef VERBOSE - VTR_LOG("Computing from high/high:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - high_x, high_y, - 0, 0, - high_x, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions A, B, D, E -#ifdef VERBOSE - VTR_LOG("Computing from high/low:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - high_x, low_y, - 0, low_y, - high_x, device_height - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions E, F, H, I -#ifdef VERBOSE - VTR_LOG("Computing from low/high:\n"); -#endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - from_layer_num, to_layer_num, - low_x, high_y, - low_x, 0, - device_width - 1, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { - for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { - delta_delays[from_layer_num][to_layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); - } - } - } - } - - return delta_delays; -} - -static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { - // Set any empty delta's to the average of its neighbours - // - // Empty coordinates may occur if the sampling location happens to not have - // a connection at that location. However, a more thorough sampling likely - // would return a result, so we fill in the empty holes with a small - // neighbour average. - constexpr int kMaxAverageDistance = 2; - for (int from_layer = 0; from_layer < (int)delta_delays.dim_size(0); ++from_layer) { - for (int to_layer = 0; to_layer < (int)delta_delays.dim_size(1); ++to_layer) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer][to_layer][delta_x][delta_y] == EMPTY_DELTA) { - delta_delays[from_layer][to_layer][delta_x][delta_y] = - find_neighboring_average(delta_delays, - from_layer, - {delta_x, delta_y, to_layer}, - kMaxAverageDistance); - } - } - } - } - } -} - -static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { - // Set any impossible delta's to the average of its neighbours - // - // Impossible coordinates may occur if an IPIN cannot be reached from the - // sampling OPIN. This might occur if the IPIN or OPIN used for sampling - // is specialized, and therefore cannot be reached via the by the pins - // sampled. Leaving this value in the delay matrix will result in invalid - // slacks if the delay matrix uses this value. - // - // A max average distance of 5 is used to provide increased effort in - // filling these gaps. It is more important to have a poor predication, - // than an invalid value and causing a slack assertion. - constexpr int kMaxAverageDistance = 5; - for (int from_layer_num = 0; from_layer_num < (int)delta_delays.dim_size(0); ++from_layer_num) { - for (int to_layer_num = 0; to_layer_num < (int)delta_delays.dim_size(1); ++to_layer_num) { - for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(2); ++delta_x) { - for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(3); ++delta_y) { - if (delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { - delta_delays[from_layer_num][to_layer_num][delta_x][delta_y] = find_neighboring_average( - delta_delays, from_layer_num, {delta_x, delta_y, to_layer_num}, kMaxAverageDistance); - } - } - } - } - } -} - -static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { - const auto& device_ctx = g_vpr_ctx.device(); - const auto& grid = device_ctx.grid; - - for (int from_layer_num = 0; from_layer_num < grid.get_num_layers(); ++from_layer_num) { - for (int to_layer_num = 0; to_layer_num < grid.get_num_layers(); ++to_layer_num) { - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - float delta_delay = delta_delays[from_layer_num][to_layer_num][x][y]; - - if (delta_delay < 0.) { - VPR_ERROR(VPR_ERROR_PLACE, - "Found invalid negative delay %g for delta [%d,%d,%d,%d]", - delta_delay, from_layer_num, to_layer_num, x, y); - } - } - } - } - } - - return true; -} - -static void generic_compute_matrix_iterative_astar(RouterDelayProfiler& route_profiler, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool /*is_flat*/) { - const auto& device_ctx = g_vpr_ctx.device(); - - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - const int delta_x = abs(sink_x - source_x); - const int delta_y = abs(sink_y - source_y); - - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); - - bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE - || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); - - bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); - - if (src_or_target_empty || !is_allowed_type) { - if (matrix[delta_x][delta_y].empty()) { - // Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - } - } else { - // Valid start/end - float delay = route_connection_delay(route_profiler, - source_x, - source_y, - from_layer_num, - sink_x, - sink_y, - to_layer_num, - router_opts, - measure_directconnect); - -#ifdef VERBOSE - VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", - delay, - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { - // Overwrite empty delta - matrix[delta_x][delta_y][0] = delay; - } else { - // Collect delta - matrix[delta_x][delta_y].push_back(delay); - } - } - } - } -} - -static void generic_compute_matrix_dijkstra_expansion(RouterDelayProfiler& /*route_profiler*/, - vtr::Matrix>& matrix, - int from_layer_num, - int to_layer_num, - int source_x, - int source_y, - int start_x, - int start_y, - int end_x, - int end_y, - const t_router_opts& router_opts, - bool measure_directconnect, - const std::set& allowed_types, - bool is_flat) { - const auto& device_ctx = g_vpr_ctx.device(); - - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num}); - bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); - if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - - if (matrix[delta_x][delta_y].empty()) { - //Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - } - } - } - - return; - } - - vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); - - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, from_layer_num})); - for (int driver_ptc : best_driver_ptcs) { - VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(from_layer_num, source_x, source_y, SOURCE, driver_ptc); - - VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); - auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); - - bool path_to_all_sinks = true; - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - - if (found_matrix[delta_x][delta_y]) { - continue; - } - - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num}); - if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (matrix[delta_x][delta_y].empty()) { - // Only set empty target if we don't already have a valid delta delay - matrix[delta_x][delta_y].push_back(EMPTY_DELTA); -#ifdef VERBOSE - VTR_LOG("Computed delay: %12s delta: %d,%d (src: %d,%d sink: %d,%d)\n", - "EMPTY", - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - found_matrix[delta_x][delta_y] = true; - } - } else { - bool found_a_sink = false; - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, to_layer_num})); - for (int sink_ptc : best_sink_ptcs) { - VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(to_layer_num, sink_x, sink_y, SINK, sink_ptc); - - if (sink_rr_node == RRNodeId::INVALID()) - continue; - - if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - // Skip if we shouldn't measure direct connects and a direct connect exists - continue; - } - - if (std::isnan(delays[sink_rr_node])) { - // This sink was not found - continue; - } - -#ifdef VERBOSE - VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", - delays[size_t(sink_rr_node)], - delta_x, delta_y, - source_x, source_y, - sink_x, sink_y); -#endif - found_matrix[delta_x][delta_y] = true; - - add_delay_to_matrix(matrix, delta_x, delta_y, delays[sink_rr_node]); - - found_a_sink = true; - break; - } - - if (!found_a_sink) { - path_to_all_sinks = false; - } - } - } - } - - if (path_to_all_sinks) { - break; - } - } - - for (int sink_x = start_x; sink_x <= end_x; sink_x++) { - for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - int delta_x = abs(sink_x - source_x); - int delta_y = abs(sink_y - source_y); - if (!found_matrix[delta_x][delta_y]) { - add_delay_to_matrix(matrix, delta_x, delta_y, IMPOSSIBLE_DELTA); - VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - source_x, - source_y, - from_layer_num, - sink_x, - sink_y, - to_layer_num, - IMPOSSIBLE_DELTA); - } - } - } -} - -static float route_connection_delay(RouterDelayProfiler& route_profiler, - int source_x, - int source_y, - int source_layer, - int sink_x, - int sink_y, - int sink_layer, - const t_router_opts& router_opts, - bool measure_directconnect) { - //Routes between the source and sink locations and calculates the delay - - // set to known value for debug purposes - float net_delay_value = IMPOSSIBLE_DELTA; - - const auto& device_ctx = g_vpr_ctx.device(); - - bool successfully_routed = false; - - // Get the rr nodes to route between - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, source_layer})); - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, sink_layer})); - - for (int driver_ptc : best_driver_ptcs) { - VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_layer, source_x, source_y, SOURCE, driver_ptc); - - VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); - - for (int sink_ptc : best_sink_ptcs) { - VTR_ASSERT(sink_ptc != OPEN); - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_layer, sink_x, sink_y, SINK, sink_ptc); - - if (sink_rr_node == RRNodeId::INVALID()) - continue; - - if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { - // Skip if we shouldn't measure direct connects and a direct connect exists - continue; - } - - successfully_routed = route_profiler.calculate_delay(source_rr_node, - sink_rr_node, - router_opts, - &net_delay_value); - - if (successfully_routed) break; - } - if (successfully_routed) break; - } - - if (!successfully_routed) { - VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", - source_x, source_y, source_layer, sink_x, sink_y, sink_layer, net_delay_value); - } - - return net_delay_value; -} - -static float delay_reduce(std::vector& delays, e_reducer reducer) { - if (delays.empty()) { - return IMPOSSIBLE_DELTA; - } - - if (delays.size() == 1) { - return delays[0]; - } - - VTR_ASSERT(delays.size() > 1); - - float delay; - - if (reducer == e_reducer::MIN) { - auto itr = std::min_element(delays.begin(), delays.end()); - delay = *itr; - } else if (reducer == e_reducer::MAX) { - auto itr = std::max_element(delays.begin(), delays.end()); - delay = *itr; - } else if (reducer == e_reducer::MEDIAN) { - std::stable_sort(delays.begin(), delays.end()); - delay = vtr::median(delays.begin(), delays.end()); - } else if (reducer == e_reducer::ARITHMEAN) { - delay = vtr::arithmean(delays.begin(), delays.end()); - } else if (reducer == e_reducer::GEOMEAN) { - delay = vtr::geomean(delays.begin(), delays.end()); - } else { - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unrecognized delta delay reducer"); - } - - return delay; -} - -static void add_delay_to_matrix(vtr::Matrix>& matrix, - int delta_x, - int delta_y, - float delay) { - if (matrix[delta_x][delta_y].size() == 1 && matrix[delta_x][delta_y][0] == EMPTY_DELTA) { - // Overwrite empty delta - matrix[delta_x][delta_y][0] = delay; - } else { - // Collect delta - matrix[delta_x][delta_y].push_back(delay); - } -} - -static float find_neighboring_average(vtr::NdMatrix& matrix, - int from_layer, - t_physical_tile_loc to_tile_loc, - int max_distance) { - float sum = 0.f; - int num_samples = 0; - const int endx = matrix.end_index(2); - const int endy = matrix.end_index(3); - - const int x = to_tile_loc.x; - const int y = to_tile_loc.y; - const int to_layer = to_tile_loc.layer_num; - - for (int distance = 1; distance <= max_distance; ++distance) { - for (int delx = x - distance; delx <= x + distance; delx++) { - for (int dely = y - distance; dely <= y + distance; dely++) { - // Check distance constraint - if (abs(delx - x) + abs(dely - y) > distance) { - continue; - } - - //check out of bounds - if (delx < 0 || dely < 0 || delx >= endx || dely >= endy || (delx == x && dely == y)) { - continue; - } - - if (matrix[from_layer][to_layer][delx][dely] == EMPTY_DELTA || matrix[from_layer][to_layer][delx][dely] == IMPOSSIBLE_DELTA) { - continue; - } - - sum += matrix[from_layer][to_layer][delx][dely]; - num_samples++; - } - } - - if (num_samples != 0) { - return sum / (float)num_samples; - } - } - - return IMPOSSIBLE_DELTA; -} - -/***************************************************************************************/ - -vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - int longest_length, - bool is_flat) { - vtr::ScopedStartFinishTimer timer("Computing delta delays"); - vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, - placer_opts, - router_opts, - measure_directconnect, - longest_length, - is_flat); - - const size_t num_elements = delta_delays.size(); - - // set uninitialized elements to infinity - for (size_t i = 0; i < num_elements; i++) { - if (delta_delays.get(i) == UNINITIALIZED_DELTA) { - delta_delays.get(i) = IMPOSSIBLE_DELTA; - } - } - - fix_empty_coordinates(delta_delays); - - fill_impossible_coordinates(delta_delays); - - verify_delta_delays(delta_delays); - - return delta_delays; -} - -//Finds a src_rr and sink_rr appropriate for measuring the delay of the current direct specification -bool find_direct_connect_sample_locations(const t_direct_inf* direct, - t_physical_tile_type_ptr from_type, - int from_pin, - int from_pin_class, - t_physical_tile_type_ptr to_type, - int to_pin, - int to_pin_class, - RRNodeId& out_src_node, - RRNodeId& out_sink_node) { - VTR_ASSERT(from_type != nullptr); - VTR_ASSERT(to_type != nullptr); - - auto& device_ctx = g_vpr_ctx.device(); - auto& grid = device_ctx.grid; - const auto& node_lookup = device_ctx.rr_graph.node_lookup(); - - //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets, - //and which has the appropriate pins - int from_x = -1; - int from_y = -1; - int from_sub_tile = -1; - int to_x = 0, to_y = 0, to_sub_tile = 0; - bool found = false; - int found_layer_num = -1; - //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums - for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) { - for (int x = 0; x < (int)grid.width() && !found; ++x) { - to_x = x + direct->x_offset; - if (to_x < 0 || to_x >= (int)grid.width()) continue; - - for (int y = 0; y < (int)grid.height() && !found; ++y) { - if (grid.get_physical_type({x, y, layer_num}) != from_type) continue; - - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool from_pin_found = false; - if (direct->from_side != NUM_2D_SIDES) { - RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); - from_pin_found = from_pin_rr.is_valid(); - } else { - from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); - } - if (!from_pin_found) continue; - - to_y = y + direct->y_offset; - - if (to_y < 0 || to_y >= (int)grid.height()) continue; - if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue; - - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool to_pin_found = false; - if (direct->to_side != NUM_2D_SIDES) { - RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); - to_pin_found = (to_pin_rr != RRNodeId::INVALID()); - } else { - to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty()); - } - if (!to_pin_found) continue; - - for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) { - to_sub_tile = sub_tile_num + direct->sub_tile_offset; - - if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; - - found = true; - found_layer_num = layer_num; - from_x = x; - from_y = y; - from_sub_tile = sub_tile_num; - - break; - } - } - } - } - - if (!found) { - return false; - } - - //Now have a legal instance of this direct connect - VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type); - VTR_ASSERT(from_sub_tile < from_type->capacity); - - VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type); - VTR_ASSERT(to_sub_tile < to_type->capacity); - - VTR_ASSERT(from_x + direct->x_offset == to_x); - VTR_ASSERT(from_y + direct->y_offset == to_y); - VTR_ASSERT(from_sub_tile + direct->sub_tile_offset == to_sub_tile); - - // Find a source/sink RR node associated with the pins of the direct - { - RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); - VTR_ASSERT(src_rr_candidate); - out_src_node = src_rr_candidate; - } - - { - RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); - VTR_ASSERT(sink_rr_candidate); - out_sink_node = sink_rr_candidate; - } - - return true; -} - -std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type) { - std::vector best_classes; - - //Record any non-zero Fc pins - // - //Note that we track non-zero Fc pins, since certain Fc overrides - //may apply to only a subset of wire types. This ensures we record - //which pins can potentially connect to global routing. - std::unordered_set non_zero_fc_pins; - for (const t_fc_specification& fc_spec : type->fc_specs) { - if (fc_spec.fc_value == 0) continue; - - non_zero_fc_pins.insert(fc_spec.pins.begin(), fc_spec.pins.end()); - } - - // Collect all classes of matching type which connect to general routing - for (int i = 0; i < (int)type->class_inf.size(); i++) { - if (type->class_inf[i].type == pintype) { - //Check whether all pins in this class are ignored or have zero fc - bool any_pins_connect_to_general_routing = false; - for (int ipin = 0; ipin < type->class_inf[i].num_pins; ++ipin) { - int pin = type->class_inf[i].pinlist[ipin]; - //If the pin isn't ignored, and has a non-zero Fc to some general - //routing the class is suitable for delay profiling - if (!type->is_ignored_pin[pin] && non_zero_fc_pins.count(pin)) { - any_pins_connect_to_general_routing = true; - break; - } - } - - // Skip if the pin class doesn't connect to general routing - if (!any_pins_connect_to_general_routing) continue; - - // Record candidate class - best_classes.push_back(i); - } - } - - // Sort classes so the largest pin class is first - auto cmp_class = [&](int lhs, int rhs) { - return type->class_inf[lhs].num_pins > type->class_inf[rhs].num_pins; - }; - - std::stable_sort(best_classes.begin(), best_classes.end(), cmp_class); - - return best_classes; -} \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h b/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h deleted file mode 100644 index 71ac632b149..00000000000 --- a/vpr/src/place/timing/delay_model/compute_delta_delays_utils.h +++ /dev/null @@ -1,56 +0,0 @@ - -#pragma once - -#include "vtr_ndmatrix.h" -#include "physical_types.h" -#include "rr_graph_fwd.h" - -struct t_placer_opts; -struct t_router_opts; -class RouterDelayProfiler; - -vtr::NdMatrix compute_delta_delay_model(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - bool measure_directconnect, - int longest_length, - bool is_flat); - -bool find_direct_connect_sample_locations(const t_direct_inf* direct, - t_physical_tile_type_ptr from_type, - int from_pin, - int from_pin_class, - t_physical_tile_type_ptr to_type, - int to_pin, - int to_pin_class, - RRNodeId& out_src_node, - RRNodeId& out_sink_node); - -/** - * @brief Identifies the best pin classes for delay calculation based on pin count and connectivity. - * - * This function selects pin classes of a specified type (`pintype`) from a physical tile type (`type`) - * that are suitable for delay calculations. It prioritizes pin classes with the largest number of pins - * that connect to general routing, ensuring commonly used pins are chosen for delay profiling. - * - * @param pintype The type of pins to filter. - * @param type Pointer to the physical tile type containing pin and class information. - * - * @return A vector of indices representing the selected pin classes. The classes are sorted - * in descending order based on the number of pins they contain. - * - * @details - * - A pin class is eligible if its type matches `pintype` and it contains at least one pin - * that connects to general routing (non-zero Fc). - * - Non-zero Fc pins are determined by inspecting the tile's `fc_specs`. - * - Classes are sorted so that the class with the largest number of pins appears first. - * If multiple classes have the same pin count, their order depends on their initial appearance - * in the architecture file. - * - * @note - * - Pins explicitly marked as ignored in `type->is_ignored_pin` are excluded. - * - The function ensures stability in sorting, preserving the input order for classes - * with the same number of pins. - */ - -std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.cpp b/vpr/src/place/timing/delay_model/delta_delay_model.cpp deleted file mode 100644 index e8d56b09516..00000000000 --- a/vpr/src/place/timing/delay_model/delta_delay_model.cpp +++ /dev/null @@ -1,135 +0,0 @@ - -#include "delta_delay_model.h" - -#include "compute_delta_delays_utils.h" - -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "place_delay_model.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif // VTR_ENABLE_CAPNPROTO - -void DeltaDelayModel::compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { - delays_ = compute_delta_delay_model(route_profiler, - placer_opts, - router_opts, - /*measure_directconnect=*/true, - longest_length, - is_flat_); -} - -float DeltaDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, - const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - return delays_[from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -void DeltaDelayModel::dump_echo(std::string filepath) const { - FILE* f = vtr::fopen(filepath.c_str(), "w"); - fprintf(f, " "); - for (size_t from_layer_num = 0; from_layer_num < delays_.dim_size(0); ++from_layer_num) { - for (size_t to_layer_num = 0; to_layer_num < delays_.dim_size(1); ++to_layer_num) { - fprintf(f, " %9zu", from_layer_num); - fprintf(f, "\n"); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9zu", dx); - } - fprintf(f, "\n"); - for (size_t dy = 0; dy < delays_.dim_size(3); ++dy) { - fprintf(f, "%9zu", dy); - for (size_t dx = 0; dx < delays_.dim_size(2); ++dx) { - fprintf(f, " %9.2e", delays_[from_layer_num][to_layer_num][dx][dy]); - } - fprintf(f, "\n"); - } - } - } - vtr::fclose(f); -} - -void DeltaDelayModel::read(const std::string& file) { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; -#else - - // MmapFile object creates an mmap of the specified path, and will munmap - // when the object leaves scope. - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - - // FlatArrayMessageReader is used to read the message from the data array - // provided by MmapFile. - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - // When reading capnproto files the Reader object to use is named - // ::Reader. - // - // Initially this object is an empty VprDeltaDelayModel. - VprDeltaDelayModel::Reader model; - - // The reader.getRoot performs a cast from the generic capnproto to fit - // with the specified schema. - // - // Note that capnproto does not validate that the incoming data matches the - // schema. If this property is required, some form of check would be - // required. - model = reader.getRoot(); - - auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { - *out = in.getValue(); - }; - - // ToNdMatrix is a generic function for converting a Matrix capnproto - // to a vtr::NdMatrix. - // - // The user must supply the matrix dimension (2 in this case), the source - // capnproto type (VprFloatEntry), - // target C++ type (flat), and a function to convert from the source capnproto - // type to the target C++ type (ToFloat). - // - // The second argument should be of type Matrix::Reader where X is the - // capnproto element type. - ToNdMatrix<4, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); -#endif -} - -void DeltaDelayModel::write(const std::string& file) const { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "DeltaDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; -#else - - // MallocMessageBuilder object is the generate capnproto message builder, - // using malloc for buffer allocation. - ::capnp::MallocMessageBuilder builder; - - // initRoot returns a X::Builder object that can be used to set the - // fields in the message. - auto model = builder.initRoot(); - - auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { - out->setValue(in); - }; - - // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a - // Matrix message. It is the mirror function of ToNdMatrix described in - // read above. - auto delay_values = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delay_values, delays_, fromFloat); - - // writeMessageToFile writes message to the specified file. - writeMessageToFile(file, &builder); -#endif -} diff --git a/vpr/src/place/timing/delay_model/delta_delay_model.h b/vpr/src/place/timing/delay_model/delta_delay_model.h deleted file mode 100644 index c3ae0d83cf7..00000000000 --- a/vpr/src/place/timing/delay_model/delta_delay_model.h +++ /dev/null @@ -1,47 +0,0 @@ - -#pragma once - -#include "place_delay_model.h" - -/** - * @class DeltaDelayModel - * - * @brief A simple delay model based on the distance (delta) between block locations. - */ -class DeltaDelayModel : public PlaceDelayModel { - public: - DeltaDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - - DeltaDelayModel(float min_cross_layer_delay, - vtr::NdMatrix delta_delays, - bool is_flat) - : delays_(std::move(delta_delays)) - , cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - - void compute(RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - - const vtr::NdMatrix& delays() const { - return delays_; - } - - private: - vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] - float cross_layer_delay_; - - /// Indicates whether the router is a two-stage or run-flat - bool is_flat_; -}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/override_delay_model.cpp b/vpr/src/place/timing/delay_model/override_delay_model.cpp deleted file mode 100644 index 6cbb2c7f654..00000000000 --- a/vpr/src/place/timing/delay_model/override_delay_model.cpp +++ /dev/null @@ -1,280 +0,0 @@ - -#include "override_delay_model.h" - -#include "compute_delta_delays_utils.h" - -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "place_delay_model.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif // VTR_ENABLE_CAPNPROTO - -void OverrideDelayModel::compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) { - auto delays = compute_delta_delay_model(route_profiler, - placer_opts, - router_opts, - /*measure_directconnect=*/false, - longest_length, - is_flat_); - - base_delay_model_ = std::make_unique(cross_layer_delay_, delays, false); - - compute_override_delay_model_(route_profiler, router_opts); -} - -void OverrideDelayModel::compute_override_delay_model_(RouterDelayProfiler& route_profiler, - const t_router_opts& router_opts) { - const auto& device_ctx = g_vpr_ctx.device(); - t_router_opts router_opts2 = router_opts; - router_opts2.astar_fac = 0.f; - router_opts2.astar_offset = 0.f; - - // Look at all the direct connections that exist, and add overrides to delay model - for (int idirect = 0; idirect < (int)device_ctx.arch->directs.size(); ++idirect) { - const t_direct_inf* direct = &device_ctx.arch->directs[idirect]; - - InstPort from_port = parse_inst_port(direct->from_pin); - InstPort to_port = parse_inst_port(direct->to_pin); - - t_physical_tile_type_ptr from_type = find_tile_type_by_name(from_port.instance_name(), device_ctx.physical_tile_types); - t_physical_tile_type_ptr to_type = find_tile_type_by_name(to_port.instance_name(), device_ctx.physical_tile_types); - - int num_conns = from_port.port_high_index() - from_port.port_low_index() + 1; - VTR_ASSERT_MSG(num_conns == to_port.port_high_index() - to_port.port_low_index() + 1, "Directs must have the same size to/from"); - - //We now walk through all the connections associated with the current direct specification, measure - //their delay and specify that value as an override in the delay model. - // - //Note that we need to check every connection in the direct to cover the case where the pins are not - //equivalent. - // - //However, if the from/to ports are equivalent we could end up sampling the same RR SOURCE/SINK - //paths multiple times (wasting CPU time) -- we avoid this by recording the sampled paths in - //sampled_rr_pairs and skipping them if they occur multiple times. - int missing_instances = 0; - int missing_paths = 0; - std::set> sampled_rr_pairs; - for (int iconn = 0; iconn < num_conns; ++iconn) { - //Find the associated pins - int from_pin = from_type->find_pin(from_port.port_name(), from_port.port_low_index() + iconn); - int to_pin = to_type->find_pin(to_port.port_name(), to_port.port_low_index() + iconn); - - VTR_ASSERT(from_pin != OPEN); - VTR_ASSERT(to_pin != OPEN); - - int from_pin_class = from_type->find_pin_class(from_port.port_name(), from_port.port_low_index() + iconn, DRIVER); - VTR_ASSERT(from_pin_class != OPEN); - - int to_pin_class = to_type->find_pin_class(to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); - VTR_ASSERT(to_pin_class != OPEN); - - bool found_sample_points; - RRNodeId src_rr, sink_rr; - found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr); - - if (!found_sample_points) { - ++missing_instances; - continue; - } - - //If some of the source/sink ports are logically equivalent we may have already - //sampled the associated source/sink pair and don't need to do so again - if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; - - float direct_connect_delay = std::numeric_limits::quiet_NaN(); - bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); - - if (found_routing_path) { - set_delay_override(from_type->index, from_pin_class, to_type->index, to_pin_class, direct->x_offset, direct->y_offset, direct_connect_delay); - } else { - ++missing_paths; - } - - //Record that we've sampled this pair of source and sink nodes - sampled_rr_pairs.insert({src_rr, sink_rr}); - } - - VTR_LOGV_WARN(missing_instances > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no instances of this direct found)\n", missing_instances, direct->name.c_str()); - VTR_LOGV_WARN(missing_paths > 0, "Found no delta delay for %d bits of inter-block direct connect '%s' (no routing path found)\n", missing_paths, direct->name.c_str()); - } -} - -const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { - return base_delay_model_.get(); -} - -float OverrideDelayModel::delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const { - // First check to if there is an override delay value - const auto& device_ctx = g_vpr_ctx.device(); - const auto& grid = device_ctx.grid; - - t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_loc); - t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_loc); - - t_override override_key; - override_key.from_type = from_type_ptr->index; - override_key.from_class = from_type_ptr->pin_class[from_pin]; - override_key.to_type = to_type_ptr->index; - override_key.to_class = to_type_ptr->pin_class[to_pin]; - - //Delay overrides may be different for +/- delta so do not use - //an absolute delta for the look-up - override_key.delta_x = to_loc.x - from_loc.x; - override_key.delta_y = to_loc.y - from_loc.y; - - float delay_val = std::numeric_limits::quiet_NaN(); - auto override_iter = delay_overrides_.find(override_key); - if (override_iter != delay_overrides_.end()) { - //Found an override - delay_val = override_iter->second; - } else { - //Fall back to the base delay model if no override was found - delay_val = base_delay_model_->delay(from_loc, from_pin, to_loc, to_pin); - } - - return delay_val; -} - -void OverrideDelayModel::set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay_val) { - t_override override_key; - override_key.from_type = from_type; - override_key.from_class = from_class; - override_key.to_type = to_type; - override_key.to_class = to_class; - override_key.delta_x = delta_x; - override_key.delta_y = delta_y; - - auto res = delay_overrides_.insert(std::make_pair(override_key, delay_val)); - if (!res.second) { //Key already exists - res.first->second = delay_val; //Overwrite existing delay - } -} - -void OverrideDelayModel::dump_echo(std::string filepath) const { - base_delay_model_->dump_echo(filepath); - - FILE* f = vtr::fopen(filepath.c_str(), "a"); - - fprintf(f, "\n"); - fprintf(f, "# Delay Overrides\n"); - auto& device_ctx = g_vpr_ctx.device(); - for (auto kv : delay_overrides_) { - auto override_key = kv.first; - float delay_val = kv.second; - fprintf(f, "from_type: %s to_type: %s from_pin_class: %d to_pin_class: %d delta_x: %d delta_y: %d -> delay: %g\n", - device_ctx.physical_tile_types[override_key.from_type].name.c_str(), - device_ctx.physical_tile_types[override_key.to_type].name.c_str(), - override_key.from_class, - override_key.to_class, - override_key.delta_x, - override_key.delta_y, - delay_val); - } - - vtr::fclose(f); -} - -float OverrideDelayModel::get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const { - t_override key; - key.from_type = from_type; - key.from_class = from_class; - key.to_type = to_type; - key.to_class = to_class; - key.delta_x = delta_x; - key.delta_y = delta_y; - - auto iter = delay_overrides_.find(key); - if (iter == delay_overrides_.end()) { - VPR_THROW(VPR_ERROR_PLACE, "Key not found."); - } - return iter->second; -} - -void OverrideDelayModel::set_base_delay_model(std::unique_ptr base_delay_model_obj) { - base_delay_model_ = std::move(base_delay_model_obj); -} - -void OverrideDelayModel::read(const std::string& file) { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); -#else - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { - *out = in.getValue(); - }; - - vtr::NdMatrix delays; - auto model = reader.getRoot(); - ToNdMatrix<4, VprFloatEntry, float>(&delays, model.getDelays(), toFloat); - - base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); - - // Reading non-scalar capnproto fields is roughly equivilant to using - // a std::vector of the field type. Actual type is capnp::List::Reader. - auto overrides = model.getDelayOverrides(); - std::vector > overrides_arr(overrides.size()); - for (size_t i = 0; i < overrides.size(); ++i) { - const auto& elem = overrides[i]; - overrides_arr[i].first.from_type = elem.getFromType(); - overrides_arr[i].first.to_type = elem.getToType(); - overrides_arr[i].first.from_class = elem.getFromClass(); - overrides_arr[i].first.to_class = elem.getToClass(); - overrides_arr[i].first.delta_x = elem.getDeltaX(); - overrides_arr[i].first.delta_y = elem.getDeltaY(); - - overrides_arr[i].second = elem.getDelay(); - } - - delay_overrides_ = vtr::make_flat_map2(std::move(overrides_arr)); -#endif -} - -void OverrideDelayModel::write(const std::string& file) const { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); -#else - ::capnp::MallocMessageBuilder builder; - auto model = builder.initRoot(); - - auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { - out->setValue(in); - }; - - auto delays = model.getDelays(); - FromNdMatrix<4, VprFloatEntry, float>(&delays, base_delay_model_->delays(), fromFloat); - - // Non-scalar capnproto fields should be first initialized with - // init(count), and then accessed from the returned - // std::vector-like Builder object (specifically capnp::List::Builder). - auto overrides = model.initDelayOverrides(delay_overrides_.size()); - auto dst_iter = overrides.begin(); - for (const auto& src : delay_overrides_) { - auto elem = *dst_iter++; - elem.setFromType(src.first.from_type); - elem.setToType(src.first.to_type); - elem.setFromClass(src.first.from_class); - elem.setToClass(src.first.to_class); - elem.setDeltaX(src.first.delta_x); - elem.setDeltaY(src.first.delta_y); - - elem.setDelay(src.second); - } - - writeMessageToFile(file, &builder); -#endif -} - diff --git a/vpr/src/place/timing/delay_model/override_delay_model.h b/vpr/src/place/timing/delay_model/override_delay_model.h deleted file mode 100644 index 5965261c272..00000000000 --- a/vpr/src/place/timing/delay_model/override_delay_model.h +++ /dev/null @@ -1,112 +0,0 @@ - -#pragma once - -#include "place_delay_model.h" -#include "delta_delay_model.h" - -class OverrideDelayModel : public PlaceDelayModel { - public: - OverrideDelayModel(float min_cross_layer_delay, - bool is_flat) - : cross_layer_delay_(min_cross_layer_delay) - , is_flat_(is_flat) {} - - void compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - - /** - * @brief returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the - * specified from and to pins - */ - float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const override; - - void dump_echo(std::string filepath) const override; - - void read(const std::string& file) override; - void write(const std::string& file) const override; - - public: //Mutators - void set_base_delay_model(std::unique_ptr base_delay_model); - const DeltaDelayModel* base_delay_model() const; - float get_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y) const; - void set_delay_override(int from_type, int from_class, int to_type, int to_class, int delta_x, int delta_y, float delay); - - private: - std::unique_ptr base_delay_model_; - /// Minimum delay of cross-layer connections - float cross_layer_delay_; - - /// Indicates whether the router is a two-stage or run-flat - bool is_flat_; - - void compute_override_delay_model_(RouterDelayProfiler& router, - const t_router_opts& router_opts); - - /** - * @brief Structure that allows delays to be queried from the delay model. - * - * Delay is calculated given the origin physical tile, the origin - * pin, the destination physical tile, and the destination pin. - * This structure encapsulates all these information. - * - * @param from_type, to_type - * Physical tile index (for easy array access) - * @param from_class, to_class - * The class that the pins belongs to. - * @param to_x, to_y - * The horizontal and vertical displacement - * between two physical tiles. - */ - struct t_override { - short from_type; - short to_type; - short from_class; - short to_class; - short delta_x; - short delta_y; - - /** - * @brief Comparison operator designed for performance. - * - * Operator< is important since t_override serves as the key into the - * map structure delay_overrides_. A default comparison operator would - * not be inlined by the compiler. - * - * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare - * is required for operator< to be inlined by compiler. Proper inlining of - * the function reduces place time by around 5%. - * - * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225 - */ - friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) { - const short* left = reinterpret_cast(&lhs); - const short* right = reinterpret_cast(&rhs); - constexpr size_t NUM_T_OVERRIDE_MEMBERS = sizeof(t_override) / sizeof(short); - return std::lexicographical_compare(left, left + NUM_T_OVERRIDE_MEMBERS, right, right + NUM_T_OVERRIDE_MEMBERS); - } - }; - - /** - * @brief Map data structure that returns delay values according to - * specific delay model queries. - * - * Delay model queries are provided by the t_override structure, which - * encapsulates the information regarding the origin and the destination. - */ - vtr::flat_map2 delay_overrides_; - - /** - * operator< treats memory layout of t_override as an array of short. - * This requires all members of t_override are shorts and there is no - * padding between members of t_override. - */ - static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)"); - static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::from_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::to_class) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_x) == sizeof(short), "Expect all t_override data members to be shorts"); - static_assert(sizeof(t_override::delta_y) == sizeof(short), "Expect all t_override data members to be shorts"); -}; \ No newline at end of file diff --git a/vpr/src/place/timing/delay_model/place_delay_model.cpp b/vpr/src/place/timing/delay_model/place_delay_model.cpp deleted file mode 100644 index 04267e0e5f1..00000000000 --- a/vpr/src/place/timing/delay_model/place_delay_model.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/** - * @file place_delay_model.cpp - * @brief This file implements all the class methods and individual - * routines related to the placer delay model. - */ - -#include "place_delay_model.h" - -#include "globals.h" -#include "router_lookahead_map.h" -#include "placer_state.h" -#include "vpr_error.h" - -/** - * @brief Returns the delay of one point to point connection. - * - * Only estimate delay for signals routed through the inter-block routing network. - * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay." - */ -float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, - const vtr::vector_map& block_locs, - ClusterNetId net_id, - int ipin) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - - float delay_source_to_sink = 0.; - - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id); - ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net_id, ipin); - - ClusterBlockId source_block = cluster_ctx.clb_nlist.pin_block(source_pin); - ClusterBlockId sink_block = cluster_ctx.clb_nlist.pin_block(sink_pin); - - int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin); - int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin); - - t_pl_loc source_block_loc = block_locs[source_block].loc; - t_pl_loc sink_block_loc = block_locs[sink_block].loc; - - /** - * This heuristic only considers delta_x and delta_y, a much better - * heuristic would be to to create a more comprehensive lookup table. - * - * In particular this approach does not accurately capture the effect - * of fast carry-chain connections. - */ - delay_source_to_sink = delay_model->delay({source_block_loc.x, source_block_loc.y, source_block_loc.layer}, source_block_ipin, - {sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer}, sink_block_ipin); - if (delay_source_to_sink < 0) { - VPR_ERROR(VPR_ERROR_PLACE, - "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d,%d) to %s (at %d,%d,%d)\n" - "in comp_td_single_connection_delay: Delay is less than 0\n", - block_type_pin_index_to_name(physical_tile_type(source_block_loc), source_block_ipin, false).c_str(), - source_block_loc.x, source_block_loc.y, source_block_loc.layer, - block_type_pin_index_to_name(physical_tile_type(sink_block_loc), sink_block_ipin, false).c_str(), - sink_block_loc.x, sink_block_loc.y, sink_block_loc.layer, - delay_source_to_sink); - } - } - - return (delay_source_to_sink); -} - -///@brief Recompute all point to point delays, updating `connection_delay` matrix. -void comp_td_connection_delays(const PlaceDelayModel* delay_model, - PlacerState& placer_state) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& p_timing_ctx = placer_state.mutable_timing(); - auto& block_locs = placer_state.block_locs(); - auto& connection_delay = p_timing_ctx.connection_delay; - - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) { - connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, block_locs, net_id, ipin); - } - } -} diff --git a/vpr/src/place/timing/delay_model/place_delay_model.h b/vpr/src/place/timing/delay_model/place_delay_model.h deleted file mode 100644 index 27c89591071..00000000000 --- a/vpr/src/place/timing/delay_model/place_delay_model.h +++ /dev/null @@ -1,80 +0,0 @@ -/** - * @file place_delay_model.h - * @brief This file contains all the class and function declarations related to - * the placer delay model. For implementations, see place_delay_model.cpp. - */ - -#pragma once - -#include "vtr_ndmatrix.h" -#include "vtr_flat_map.h" -#include "vpr_types.h" -#include "router_delay_profiling.h" - -#ifndef __has_attribute -# define __has_attribute(x) 0 // Compatibility with non-clang compilers. -#endif - -#if defined(COMPILER_GCC) && defined(NDEBUG) -# define ALWAYS_INLINE inline __attribute__((__always_inline__)) -#elif defined(COMPILER_MSVC) && defined(NDEBUG) -# define ALWAYS_INLINE __forceinline -#elif __has_attribute(always_inline) -# define ALWAYS_INLINE __attribute__((always_inline)) // clang -#else -# define ALWAYS_INLINE inline -#endif - -///@brief Forward declarations. -class PlaceDelayModel; -class PlacerState; - -///@brief Returns the delay of one point to point connection. -float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, - const vtr::vector_map& block_locs, - ClusterNetId net_id, - int ipin); - -///@brief Recompute all point to point delays, updating `connection_delay` matrix. -void comp_td_connection_delays(const PlaceDelayModel* delay_model, - PlacerState& placer_state); - -///@brief Abstract interface to a placement delay model. -class PlaceDelayModel { - public: - virtual ~PlaceDelayModel() = default; - - ///@brief Computes place delay model. - virtual void compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) - = 0; - - /** - * @brief Returns the delay estimate between the specified block pins. - * - * Either compute or read methods must be invoked before invoking delay. - */ - virtual float delay(const t_physical_tile_loc& from_loc, int from_pin, const t_physical_tile_loc& to_loc, int to_pin) const = 0; - - ///@brief Dumps the delay model to an echo file. - virtual void dump_echo(std::string filename) const = 0; - - /** - * @brief Write place delay model to specified file. - * - * May be unimplemented, in which case method should throw an exception. - */ - virtual void write(const std::string& file) const = 0; - - /** - * @brief Read place delay model from specified file. - * - * May be unimplemented, in which case method should throw an exception. - */ - virtual void read(const std::string& file) = 0; -}; - - - diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.cpp b/vpr/src/place/timing/delay_model/simple_delay_model.cpp deleted file mode 100644 index dac18890366..00000000000 --- a/vpr/src/place/timing/delay_model/simple_delay_model.cpp +++ /dev/null @@ -1,130 +0,0 @@ - -#include "simple_delay_model.h" - -#ifdef VTR_ENABLE_CAPNPROTO -# include "capnp/serialize.h" -# include "place_delay_model.capnp.h" -# include "ndmatrix_serdes.h" -# include "mmap_file.h" -# include "serdes_utils.h" -#endif // VTR_ENABLE_CAPNPROTO - -void SimpleDelayModel::compute(RouterDelayProfiler& route_profiler, - const t_placer_opts& /*placer_opts*/, - const t_router_opts& /*router_opts*/, - int /*longest_length*/) { - const auto& grid = g_vpr_ctx.device().grid; - const size_t num_physical_tile_types = g_vpr_ctx.device().physical_tile_types.size(); - const size_t num_layers = grid.get_num_layers(); - - // Initializing the delay matrix to [num_physical_types][num_layers][num_layers][width][height] - // The second index related to the layer that the source location is on and the third index is for the sink layer - delays_ = vtr::NdMatrix({num_physical_tile_types, - num_layers, - num_layers, - grid.width(), - grid.height()}); - - for (size_t physical_tile_type_idx = 0; physical_tile_type_idx < num_physical_tile_types; ++physical_tile_type_idx) { - for (size_t from_layer = 0; from_layer < num_layers; ++from_layer) { - for (size_t to_layer = 0; to_layer < num_layers; ++to_layer) { - for (size_t dx = 0; dx < grid.width(); ++dx) { - for (size_t dy = 0; dy < grid.height(); ++dy) { - float min_delay = route_profiler.get_min_delay(physical_tile_type_idx, - from_layer, - to_layer, - dx, - dy); - delays_[physical_tile_type_idx][from_layer][to_layer][dx][dy] = min_delay; - } - } - } - } - } -} - -float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const { - int delta_x = std::abs(from_loc.x - to_loc.x); - int delta_y = std::abs(from_loc.y - to_loc.y); - - int from_tile_idx = g_vpr_ctx.device().grid.get_physical_type(from_loc)->index; - return delays_[from_tile_idx][from_loc.layer_num][to_loc.layer_num][delta_x][delta_y]; -} - -void SimpleDelayModel::read(const std::string& file) { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); -#else - // MmapFile object creates an mmap of the specified path, and will munmap - // when the object leaves scope. - MmapFile f(file); - - /* Increase reader limit to 1G words to allow for large files. */ - ::capnp::ReaderOptions opts = default_large_capnp_opts(); - - // FlatArrayMessageReader is used to read the message from the data array - // provided by MmapFile. - ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - - // When reading capnproto files the Reader object to use is named - // ::Reader. - // - // Initially this object is an empty VprDeltaDelayModel. - VprDeltaDelayModel::Reader model; - - // The reader.getRoot performs a cast from the generic capnproto to fit - // with the specified schema. - // - // Note that capnproto does not validate that the incoming data matches the - // schema. If this property is required, some form of check would be - // required. - model = reader.getRoot(); - - auto toFloat = [](float* out, const VprFloatEntry::Reader& in) -> void { - *out = in.getValue(); - }; - - // ToNdMatrix is a generic function for converting a Matrix capnproto - // to a vtr::NdMatrix. - // - // The user must supply the matrix dimension (5 in this case), the source - // capnproto type (VprFloatEntry), - // target C++ type (flat), and a function to convert from the source capnproto - // type to the target C++ type (ToFloat). - // - // The second argument should be of type Matrix::Reader where X is the - // capnproto element type. - ToNdMatrix<5, VprFloatEntry, float>(&delays_, model.getDelays(), toFloat); -#endif -} - -void SimpleDelayModel::write(const std::string& file) const { -#ifndef VTR_ENABLE_CAPNPROTO - VPR_THROW(VPR_ERROR_PLACE, - "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); -#else - // MallocMessageBuilder object generates capnproto message builder, - // using malloc for buffer allocation. - ::capnp::MallocMessageBuilder builder; - - // initRoot returns a X::Builder object that can be used to set the - // fields in the message. - auto model = builder.initRoot(); - - auto fromFloat = [](VprFloatEntry::Builder* out, const float& in) -> void { - out->setValue(in); - }; - - // FromNdMatrix is a generic function for converting a vtr::NdMatrix to a - // Matrix message. It is the mirror function of ToNdMatrix described in - // read above. - auto delay_values = model.getDelays(); - FromNdMatrix<5, VprFloatEntry, float>(&delay_values, delays_, fromFloat); - - // writeMessageToFile writes message to the specified file. - writeMessageToFile(file, &builder); -#endif -} diff --git a/vpr/src/place/timing/delay_model/simple_delay_model.h b/vpr/src/place/timing/delay_model/simple_delay_model.h deleted file mode 100644 index 25dce08c4fc..00000000000 --- a/vpr/src/place/timing/delay_model/simple_delay_model.h +++ /dev/null @@ -1,39 +0,0 @@ - -#pragma once - -#include "place_delay_model.h" - -/** - * @class SimpleDelayModel - * @brief A simple delay model based on the information stored in router lookahead - * This is in contrast to other placement delay models that get the cost of getting from one location to another by running the router - */ -class SimpleDelayModel : public PlaceDelayModel { - public: - SimpleDelayModel() {} - - /// @brief Use the information in the router lookahead to fill the delay matrix instead of running the router - void compute(RouterDelayProfiler& router, - const t_placer_opts& placer_opts, - const t_router_opts& router_opts, - int longest_length) override; - - float delay(const t_physical_tile_loc& from_loc, int /*from_pin*/, const t_physical_tile_loc& to_loc, int /*to_pin*/) const override; - - void dump_echo(std::string /*filepath*/) const override {} - - void read(const std::string& /*file*/) override; - void write(const std::string& /*file*/) const override; - - private: - /** - * @brief The matrix to store the minimum delay between different points on different layers. - * - *The matrix used to store delay information is a 5D matrix. This data structure stores the minimum delay for each tile type on each layer to other layers - *for each dx and dy. We decided to separate the delay for each physical type on each die to accommodate cases where the connectivity of a physical type differs - *on each layer. Additionally, instead of using d_layer, we distinguish between the destination layer to handle scenarios where connectivity between layers - *is not uniform. For example, if the number of inter-layer connections between layer 1 and 2 differs from the number of connections between layer 0 and 1. - *One might argue that this variability could also occur for dx and dy. However, we are operating under the assumption that the FPGA fabric architecture is regular. - */ - vtr::NdMatrix delays_; // [0..num_physical_type-1][0..num_layers-1][0..num_layers-1][0..max_dx][0..max_dy] -}; \ No newline at end of file From 40274a1feaa2e6adb1450b5cc4aa90b4b7cba42e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 20 Jan 2025 09:54:50 -0500 Subject: [PATCH 34/39] fix missing terminating " character error --- vpr/src/place/delay_model/override_delay_model.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/place/delay_model/override_delay_model.cpp b/vpr/src/place/delay_model/override_delay_model.cpp index 61acd2937b5..3d98fc56f3e 100644 --- a/vpr/src/place/delay_model/override_delay_model.cpp +++ b/vpr/src/place/delay_model/override_delay_model.cpp @@ -221,7 +221,7 @@ void OverrideDelayModel::read(const std::string& file) { base_delay_model_ = std::make_unique(cross_layer_delay_, delays, is_flat_); - // Reading non-scalar capnproto fields is roughly equivilant to using + // Reading non-scalar capnproto fields is roughly equivalent to using // a std::vector of the field type. Actual type is capnp::List::Reader. auto overrides = model.getDelayOverrides(); std::vector > overrides_arr(overrides.size()); @@ -245,7 +245,7 @@ void OverrideDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else ::capnp::MallocMessageBuilder builder; auto model = builder.initRoot(); From e383c988e1d5f7a5bb652a46ff76e397aa660268 Mon Sep 17 00:00:00 2001 From: soheil Date: Mon, 20 Jan 2025 11:44:13 -0500 Subject: [PATCH 35/39] fix two other missing terminating " character error when capnproto is disabled --- vpr/src/place/delay_model/override_delay_model.cpp | 4 ++-- vpr/src/place/delay_model/simple_delay_model.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/delay_model/override_delay_model.cpp b/vpr/src/place/delay_model/override_delay_model.cpp index 3d98fc56f3e..c7f8ac10e81 100644 --- a/vpr/src/place/delay_model/override_delay_model.cpp +++ b/vpr/src/place/delay_model/override_delay_model.cpp @@ -202,8 +202,8 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr b void OverrideDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, - "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); + "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else MmapFile f(file); diff --git a/vpr/src/place/delay_model/simple_delay_model.cpp b/vpr/src/place/delay_model/simple_delay_model.cpp index 1fcd86eca64..dac18890366 100644 --- a/vpr/src/place/delay_model/simple_delay_model.cpp +++ b/vpr/src/place/delay_model/simple_delay_model.cpp @@ -55,7 +55,7 @@ void SimpleDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MmapFile object creates an mmap of the specified path, and will munmap // when the object leaves scope. @@ -104,7 +104,7 @@ void SimpleDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable.\"); + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MallocMessageBuilder object generates capnproto message builder, // using malloc for buffer allocation. From 433dc96bcd00e418d972705a9a458463c36addbc Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 20 Jan 2025 13:41:16 -0500 Subject: [PATCH 36/39] =?UTF-8?q?fix=20invalid=20use=20of=20incomplete=20t?= =?UTF-8?q?ype=20=E2=80=98const=20class=20PlacerCriticalities=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vpr/src/place/delay_model/delta_delay_model.cpp | 2 +- vpr/src/place/timing/PlacerCriticalities.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/delay_model/delta_delay_model.cpp b/vpr/src/place/delay_model/delta_delay_model.cpp index e8d56b09516..f88e4a45003 100644 --- a/vpr/src/place/delay_model/delta_delay_model.cpp +++ b/vpr/src/place/delay_model/delta_delay_model.cpp @@ -58,7 +58,7 @@ void DeltaDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MmapFile object creates an mmap of the specified path, and will munmap diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index b03bda4eb87..b08499d6ac4 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -6,6 +6,7 @@ #include "clustered_netlist_utils.h" #include "place_delay_model.h" #include "vpr_net_pins_matrix.h" +#include "PlacerCriticalities.h" /** * @brief Saves the placement criticality parameters From 928ac041dd1eca9df1dd1d69c1afc20a700b74ac Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 20 Jan 2025 15:26:42 -0500 Subject: [PATCH 37/39] void cast file argument to fix unused parameter warning --- vpr/src/place/delay_model/delta_delay_model.cpp | 4 +++- vpr/src/place/delay_model/override_delay_model.cpp | 2 ++ vpr/src/place/delay_model/simple_delay_model.cpp | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/delay_model/delta_delay_model.cpp b/vpr/src/place/delay_model/delta_delay_model.cpp index f88e4a45003..b58dda8a453 100644 --- a/vpr/src/place/delay_model/delta_delay_model.cpp +++ b/vpr/src/place/delay_model/delta_delay_model.cpp @@ -56,6 +56,7 @@ void DeltaDelayModel::dump_echo(std::string filepath) const { void DeltaDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); @@ -106,9 +107,10 @@ void DeltaDelayModel::read(const std::string& file) { void DeltaDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "DeltaDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " - "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."; + "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); #else // MallocMessageBuilder object is the generate capnproto message builder, diff --git a/vpr/src/place/delay_model/override_delay_model.cpp b/vpr/src/place/delay_model/override_delay_model.cpp index c7f8ac10e81..83141fb4bad 100644 --- a/vpr/src/place/delay_model/override_delay_model.cpp +++ b/vpr/src/place/delay_model/override_delay_model.cpp @@ -201,6 +201,7 @@ void OverrideDelayModel::set_base_delay_model(std::unique_ptr b void OverrideDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); @@ -243,6 +244,7 @@ void OverrideDelayModel::read(const std::string& file) { void OverrideDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "OverrideDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); diff --git a/vpr/src/place/delay_model/simple_delay_model.cpp b/vpr/src/place/delay_model/simple_delay_model.cpp index dac18890366..72a0d017d1e 100644 --- a/vpr/src/place/delay_model/simple_delay_model.cpp +++ b/vpr/src/place/delay_model/simple_delay_model.cpp @@ -53,6 +53,7 @@ float SimpleDelayModel::delay(const t_physical_tile_loc& from_loc, int /*from_pi void SimpleDelayModel::read(const std::string& file) { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::read is disabled because VTR_ENABLE_CAPNPROTO=OFF. " "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); @@ -102,6 +103,7 @@ void SimpleDelayModel::read(const std::string& file) { void SimpleDelayModel::write(const std::string& file) const { #ifndef VTR_ENABLE_CAPNPROTO + (void)file; VPR_THROW(VPR_ERROR_PLACE, "SimpleDelayModel::write is disabled because VTR_ENABLE_CAPNPROTO=OFF. " "Re-compile with CMake option VTR_ENABLE_CAPNPROTO=ON to enable."); From cd7474ff11869290aa27912ba44f1ecd0d7d13f5 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 20 Jan 2025 16:41:02 -0500 Subject: [PATCH 38/39] =?UTF-8?q?fix=20error:=20invalid=20use=20of=20incom?= =?UTF-8?q?plete=20type=20=E2=80=98const=20class=20PlacerCriticalities?= =?UTF-8?q?=E2=80=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vpr/src/place/move_utils.cpp | 1 + vpr/src/place/timing/PlacerCriticalities.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 6e79bdaac4d..893e19243d8 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -10,6 +10,7 @@ #include "place_constraints.h" #include "placer_state.h" +#include "PlacerCriticalities.h" //f_placer_breakpoint_reached is used to stop the placer when a breakpoint is reached. // When this flag is true, it stops the placer after the current perturbation. Thus, when a breakpoint is reached, this flag is set to true. diff --git a/vpr/src/place/timing/PlacerCriticalities.h b/vpr/src/place/timing/PlacerCriticalities.h index b08499d6ac4..b03bda4eb87 100644 --- a/vpr/src/place/timing/PlacerCriticalities.h +++ b/vpr/src/place/timing/PlacerCriticalities.h @@ -6,7 +6,6 @@ #include "clustered_netlist_utils.h" #include "place_delay_model.h" #include "vpr_net_pins_matrix.h" -#include "PlacerCriticalities.h" /** * @brief Saves the placement criticality parameters From 56568302c145d3dbdf583390621e0e3d579078ee Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 21 Jan 2025 14:24:35 -0500 Subject: [PATCH 39/39] =?UTF-8?q?fix=20error:=20=E2=80=98ConnectionRouter?= =?UTF-8?q?=E2=80=99=20was=20not=20declared=20in=20this=20scope?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- vpr/test/test_connection_router.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 2b584daedc3..5319cc05818 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -8,6 +8,8 @@ #include "globals.h" #include "net_delay.h" #include "place_and_route.h" +#include "connection_router.h" +#include "router_delay_profiling.h" static constexpr const char kArchFile[] = "../../vtr_flow/arch/timing/k6_frac_N10_mem32K_40nm.xml"; static constexpr int kMaxHops = 10;