@@ -151,79 +151,48 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
151151}
152152
153153void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ () {
154- const double place_cost_exp = static_cast <double >(placer_opts_.place_cost_exp );
155- auto & device_ctx = g_vpr_ctx.device ();
156-
157- const int grid_height = device_ctx.grid .height ();
158- const int grid_width = device_ctx.grid .width ();
159-
160- /* Access arrays below as chan?_place_cost_fac_(subhigh, sublow). Since subhigh must be greater than or
161- * equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway
162- * for simplicity, so we can use the vtr utility matrix functions. */
163- chanx_place_cost_fac_ = vtr::NdOffsetMatrix<float , 2 >({{{-1 , grid_height}, {-1 , grid_height}}});
164- chany_place_cost_fac_ = vtr::NdOffsetMatrix<float , 2 >({{{-1 , grid_width}, {-1 , grid_width}}});
165-
166- // First compute the number of tracks between channel high and channel low, inclusive.
167- chanx_place_cost_fac_[-1 ][-1 ] = 0 ;
168-
169- for (int high = 0 ; high < grid_height; high++) {
170- chanx_place_cost_fac_[high][high] = (float )device_ctx.chan_width .x_list [high];
171- for (int low = -1 ; low < high; low++) {
172- chanx_place_cost_fac_[high][low] = chanx_place_cost_fac_[high - 1 ][low] + (float )device_ctx.chan_width .x_list [high];
173- }
174- }
175-
176- /* Now compute the inverse of the average number of tracks per channel *
177- * between high and low. The cost function divides by the average *
178- * number of tracks per channel, so by storing the inverse I convert *
179- * this to a faster multiplication. Take this final number to the *
180- * place_cost_exp power -- numbers other than one mean this is no *
181- * longer a simple "average number of tracks"; it is some power of *
182- * that, allowing greater penalization of narrow channels. */
183- for (int high = -1 ; high < grid_height; high++) {
184- for (int low = -1 ; low <= high; low++) {
185- /* Since we will divide the wiring cost by the average channel *
186- * capacity between high and low, having only 0 width channels *
187- * will result in infinite wiring capacity normalization *
188- * factor, and extremely bad placer behaviour. Hence we change *
189- * this to a small (1 track) channel capacity instead. */
190- if (chanx_place_cost_fac_[high][low] == 0 .0f ) {
191- VTR_LOG_WARN (" CHANX place cost fac is 0 at %d %d\n " , high, low);
192- chanx_place_cost_fac_[high][low] = 1 .0f ;
193- }
154+ const auto & device_ctx = g_vpr_ctx.device ();
194155
195- chanx_place_cost_fac_[high][low] = (high - low + 1 .) / chanx_place_cost_fac_[high][low];
196- chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], place_cost_exp);
197- }
198- }
156+ const int grid_height = (int )device_ctx.grid .height ();
157+ const int grid_width = (int )device_ctx.grid .width ();
158+
159+ /* These arrays contain accumulative channel width between channel zero and
160+ * the channel specified by the given index. The accumulated channel width
161+ * is inclusive, meaning that it includes both channel zero and channel `idx`.
162+ * To compute the total channel width between channels 'low' and 'high', use the
163+ * following formula:
164+ * acc_chan?_width_[high] - acc_chan?_width_[low - 1]
165+ * This returns the total number of tracks between channels 'low' and 'high',
166+ * including tracks in these channels.
167+ *
168+ * Channel -1 doesn't exist, so we can say it has zero tracks. We need to be able
169+ * to access these arrays with index -1 to handle cases where the lower channel is 0.
170+ */
171+ acc_chanx_width_ = vtr::NdOffsetMatrix<int , 1 >({{{-1 , grid_height}}});
172+ acc_chany_width_ = vtr::NdOffsetMatrix<int , 1 >({{{-1 , grid_width}}});
199173
200- /* Now do the same thing for the y-directed channels. First get the
201- * number of tracks between channel high and channel low, inclusive. */
202- chany_place_cost_fac_[-1 ][-1 ] = 0 ;
174+ // initialize the first element (index -1) with zero
175+ acc_chanx_width_[-1 ] = 0 ;
176+ for (int y = 0 ; y < grid_height; y++) {
177+ acc_chanx_width_[y] = acc_chanx_width_[y - 1 ] + device_ctx.chan_width .x_list [y];
203178
204- for (int high = 0 ; high < grid_width; high++) {
205- chany_place_cost_fac_[high][high] = device_ctx.chan_width .y_list [high];
206- for (int low = -1 ; low < high; low++) {
207- chany_place_cost_fac_[high][low] = chany_place_cost_fac_[high - 1 ][low] + device_ctx.chan_width .y_list [high];
179+ /* If the number of tracks in a channel is zero, two consecutive elements take the same
180+ * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
181+ * potential issue, we assume that the channel width is at least 1.
182+ */
183+ if (acc_chanx_width_[y] == acc_chanx_width_[y - 1 ]) {
184+ acc_chanx_width_[y]++;
208185 }
209186 }
210187
211- /* Now compute the inverse of the average number of tracks per channel
212- * between high and low. Take to specified power. */
213- for (int high = -1 ; high < grid_width; high++) {
214- for (int low = -1 ; low <= high; low++) {
215- /* Since we will divide the wiring cost by the average channel *
216- * capacity between high and low, having only 0 width channels *
217- * will result in infinite wiring capacity normalization *
218- * factor, and extremely bad placer behaviour. Hence we change *
219- * this to a small (1 track) channel capacity instead. */
220- if (chany_place_cost_fac_[high][low] == 0 .0f ) {
221- VTR_LOG_WARN (" CHANY place cost fac is 0 at %d %d\n " , high, low);
222- chany_place_cost_fac_[high][low] = 1 .0f ;
223- }
188+ // initialize the first element (index -1) with zero
189+ acc_chany_width_[-1 ] = 0 ;
190+ for (int x = 0 ; x < grid_width; x++) {
191+ acc_chany_width_[x] = acc_chany_width_[x - 1 ] + device_ctx.chan_width .y_list [x];
224192
225- chany_place_cost_fac_[high][low] = (high - low + 1 .) / chany_place_cost_fac_[high][low];
226- chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], place_cost_exp);
193+ // to avoid a division by zero
194+ if (acc_chany_width_[x] == acc_chany_width_[x - 1 ]) {
195+ acc_chany_width_[x]++;
227196 }
228197 }
229198
@@ -239,33 +208,32 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
239208 const size_t grid_height = device_ctx.grid .height ();
240209 const size_t grid_width = device_ctx.grid .width ();
241210
242-
243- acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
211+ acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 );
244212
245213 vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
246214
247215 /*
248- * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
249- * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
250- * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
251- * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
252- * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
216+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
217+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
218+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
219+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
220+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
253221 * if someday we have architectures with widely varying connectivity between different layers in a stack.
254- */
222+ */
255223
256224 /*
257- * To calculate the accumulative number of inter-die connections we first need to get the number of
258- * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
259- * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
260- * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
261- * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
262- * inter-die connections by adding up the number of inter-die block at that location + the accumulation
263- * for the block below and left to it. Then, since the accumulated number of inter-die connection to
264- * the block on the lower left connection of the block is added twice, that part needs to be removed.
265- */
266- for (const auto & src_rr_node : rr_graph.nodes ()) {
267- for (const auto & rr_edge_idx : rr_graph.edges (src_rr_node)) {
268- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
225+ * To calculate the accumulative number of inter-die connections we first need to get the number of
226+ * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
227+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
228+ * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
229+ * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
230+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
231+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
232+ * the block on the lower left connection of the block is added twice, that part needs to be removed.
233+ */
234+ for (const RRNodeId src_rr_node : rr_graph.nodes ()) {
235+ for (const t_edge_size rr_edge_idx : rr_graph.edges (src_rr_node)) {
236+ const RRNodeId sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
269237 if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
270238 // We assume that the nodes driving the inter-layer connection or being driven by it
271239 // are not stretched across multiple tiles
@@ -290,20 +258,20 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
290258 // Initialize the first row and column
291259 for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
292260 acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
293- tile_num_inter_die_conn[x][0 ];
261+ tile_num_inter_die_conn[x][0 ];
294262 }
295263
296264 for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
297265 acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
298- tile_num_inter_die_conn[0 ][y];
266+ tile_num_inter_die_conn[0 ][y];
299267 }
300268
301269 for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
302270 for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
303271 acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
304- acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
305- tile_num_inter_die_conn[x_high][y_high] -
306- acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
272+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
273+ tile_num_inter_die_conn[x_high][y_high] -
274+ acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
307275 }
308276 }
309277}
@@ -1421,7 +1389,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14211389
14221390 const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move ().bb_coords [net_id];
14231391
1424- double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
1392+ const double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
14251393
14261394 /* Could insert a check for xmin == xmax. In that case, assume *
14271395 * connection will be made with no bends and hence no x-cost. *
@@ -1437,8 +1405,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14371405 */
14381406
14391407 double ncost;
1440- ncost = (bb.xmax - bb.xmin + 1 ) * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1441- ncost += (bb.ymax - bb.ymin + 1 ) * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1408+ const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_ (bb);
1409+ ncost = (bb.xmax - bb.xmin + 1 ) * chanx_cost_fac;
1410+ ncost += (bb.ymax - bb.ymin + 1 ) * chany_cost_fac;
14421411 if (is_multi_layer_) {
14431412 ncost += (bb.layer_max - bb.layer_min ) * get_chanz_cost_factor_ (bb);
14441413 }
@@ -1448,6 +1417,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14481417 return ncost;
14491418}
14501419
1420+
14511421double NetCostHandler::get_net_per_layer_bb_cost_ (ClusterNetId net_id , bool use_ts) {
14521422 const auto & move_ctx = placer_state_.move ();
14531423
@@ -1469,7 +1439,7 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use
14691439 /* Adjust the bounding box half perimeter by the wirelength correction
14701440 * factor based on terminal count, which is 1 for the source + the number
14711441 * of sinks on this layer. */
1472- double crossing = wirelength_crossing_count (layer_pin_sink_count[layer_num] + 1 );
1442+ const double crossing = wirelength_crossing_count (layer_pin_sink_count[layer_num] + 1 );
14731443
14741444 /* Could insert a check for xmin == xmax. In that case, assume *
14751445 * connection will be made with no bends and hence no x-cost. *
@@ -1484,11 +1454,10 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use
14841454 * chan?_place_cost_fac_ objects can handle -1 indices internally.
14851455 */
14861456
1487- ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1 ) * crossing
1488- * chanx_place_cost_fac_[bb[layer_num].ymax ][bb[layer_num].ymin - 1 ];
1489-
1490- ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1 ) * crossing
1491- * chany_place_cost_fac_[bb[layer_num].xmax ][bb[layer_num].xmin - 1 ];
1457+ const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_ (bb[layer_num]);
1458+ ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1 ) * chanx_cost_fac;
1459+ ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1 ) * chany_cost_fac;
1460+ ncost *= crossing;
14921461 }
14931462
14941463 return ncost;
@@ -1546,8 +1515,6 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) {
15461515}
15471516
15481517float NetCostHandler::get_chanz_cost_factor_ (const t_bb& bb) {
1549- float place_cost_exp = placer_opts_.place_cost_exp ;
1550-
15511518 int num_inter_dir_conn;
15521519
15531520 if (bb.xmin == 0 && bb.ymin == 0 ) {
@@ -1571,7 +1538,6 @@ float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) {
15711538 } else {
15721539 int bb_num_tiles = (bb.xmax - bb.xmin + 1 ) * (bb.ymax - bb.ymin + 1 );
15731540 z_cost_factor = bb_num_tiles / static_cast <float >(num_inter_dir_conn);
1574- z_cost_factor = pow ((double )z_cost_factor, (double )place_cost_exp);
15751541 }
15761542
15771543 return z_cost_factor;
0 commit comments