@@ -244,61 +244,62 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
244244
245245 acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
246246
247- vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
247+ vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
248+
249+ /*
250+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
251+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
252+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
253+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
254+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
255+ * if someday we have architectures with widely varying connectivity between different layers in a stack.
256+ */
248257
249258 /*
250259 * To calculate the accumulative number of inter-die connections we first need to get the number of
251- * inter-die connection per loaction . To be able to work for the cases that RR Graph is read instead
252- * of being made from the architecture file, we calculate this number by iterating over RR graph. Once
260+ * inter-die connection per location . To be able to work for the cases that RR Graph is read instead
261+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
253262 * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
254263 * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
255- * inter-die connections by adding up the number of inter-die block at that location + the accumulative
256- * for the block below and left to it. Then, since the accumulative number of inter-die connection to
264+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
265+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
257266 * the block on the lower left connection of the block is added twice, that part needs to be removed.
258267 */
259268 for (const auto & src_rr_node : rr_graph.nodes ()) {
260- for (const auto & rr_edge_idx : rr_graph.configurable_edges (src_rr_node)) {
261- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
262- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
263- // We assume that the nodes driving the inter-layer connection or being driven by it
264- // are not streched across multiple tiles
265- int src_x = rr_graph.node_xhigh (src_rr_node);
266- int src_y = rr_graph.node_yhigh (src_rr_node);
267- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
268-
269- tile_num_inter_die_conn[src_x][src_y]++;
270- }
271- }
272-
273- for (const auto & rr_edge_idx : rr_graph.non_configurable_edges (src_rr_node)) {
274- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
275- if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
276- int src_x = rr_graph.node_xhigh (src_rr_node);
277- VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_xlow (src_rr_node) == src_x);
278- int src_y = rr_graph.node_yhigh (src_rr_node);
279- VTR_ASSERT (rr_graph.node_ylow (src_rr_node) == src_y && rr_graph.node_ylow (src_rr_node) == src_y);
280- tile_num_inter_die_conn[src_x][src_y]++;
269+ for (auto edge_range: {rr_graph.configurable_edges (src_rr_node), rr_graph.non_configurable_edges (src_rr_node)}) {
270+ for (const auto & rr_edge_idx : edge_range) {
271+ const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
272+ if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
273+ // We assume that the nodes driving the inter-layer connection or being driven by it
274+ // are not stretched across multiple tiles
275+ int src_x = rr_graph.node_xhigh (src_rr_node);
276+ int src_y = rr_graph.node_yhigh (src_rr_node);
277+ VTR_ASSERT (rr_graph.node_xlow (src_rr_node) == src_x && rr_graph.node_ylow (src_rr_node) == src_y);
278+
279+ tile_num_inter_die_conn[src_x][src_y]++;
280+ }
281281 }
282282 }
283283 }
284284
285+ // Step 2: Calculate prefix sum of the inter-die connectivity up to and including the channel at (x, y).
285286 acc_tile_num_inter_die_conn_[0 ][0 ] = tile_num_inter_die_conn[0 ][0 ];
286287 // Initialize the first row and column
287288 for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
288- acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] + \
289+ acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
289290 tile_num_inter_die_conn[x][0 ];
290291 }
291292
292293 for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
293- acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] + \
294+ acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
294295 tile_num_inter_die_conn[0 ][y];
295296 }
296297
297298 for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
298299 for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
299- acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] + \
300- acc_tile_num_inter_die_conn_[x_high][y_high-1 ] + \
301- tile_num_inter_die_conn[x_high][y_high] - \
300+ acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
301+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
302+ tile_num_inter_die_conn[x_high][y_high] -
302303 acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
303304 }
304305 }
@@ -1604,15 +1605,15 @@ float NetCostHandler::get_chanz_cost_factor(const t_bb& bounding_box) {
16041605 if (x_low == 0 && y_low == 0 ) {
16051606 num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high];
16061607 } else if (x_low == 0 ) {
1607- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1608+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
16081609 acc_tile_num_inter_die_conn_[x_high][y_low-1 ];
16091610 } else if (y_low == 0 ) {
1610- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1611+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
16111612 acc_tile_num_inter_die_conn_[x_low-1 ][y_high];
16121613 } else {
1613- num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] - \
1614- acc_tile_num_inter_die_conn_[x_low-1 ][y_high] - \
1615- acc_tile_num_inter_die_conn_[x_high][y_low-1 ] + \
1614+ num_inter_dir_conn = acc_tile_num_inter_die_conn_[x_high][y_high] -
1615+ acc_tile_num_inter_die_conn_[x_low-1 ][y_high] -
1616+ acc_tile_num_inter_die_conn_[x_high][y_low-1 ] +
16161617 acc_tile_num_inter_die_conn_[x_low-1 ][y_low-1 ];
16171618 }
16181619
0 commit comments