diff --git a/parmys/parmys-plugin/core/multiplier.cc b/parmys/parmys-plugin/core/multiplier.cc index 3a411217f25..2876a2ab93a 100644 --- a/parmys/parmys-plugin/core/multiplier.cc +++ b/parmys/parmys-plugin/core/multiplier.cc @@ -937,7 +937,7 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b) *-----------------------------------------------------------------------*/ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist) { - nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig; + nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addsmall2, *addbig; int size; /* Check for a legitimate split */ @@ -976,50 +976,151 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t * init_split_multiplier(node, a1b0, a0, a1, 0, b0, a1b1, a0b0); mult_list = insert_in_vptr_list(mult_list, a1b0); - /* New node for the initial add */ - addsmall = allocate_nnode(node->loc); - addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); - strcpy(addsmall->name, node->name); - strcat(addsmall->name, "-add0"); - // this addition will have a carry out in the worst case, add to input pins and connect then to gnd - init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); - - /* New node for the BIG add */ - addbig = allocate_nnode(node->loc); - addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); - strcpy(addbig->name, node->name); - strcat(addbig->name, "-add1"); - init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); - - // connect inputs to port a of addsmall - for (int i = 0; i < a1b0->num_output_pins; i++) - connect_nodes(a1b0, i, addsmall, i); - add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); - // connect inputs to port b of addsmall - for (int i = 0; i < a0b1->num_output_pins; i++) - connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); - add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); - - // connect inputs to port a of addbig - size = addsmall->num_output_pins; - for (int i = 0; i < size; i++) - connect_nodes(addsmall, i, addbig, i); - - // connect inputs to port b of addbig - for (int i = b0; i < a0b0->output_port_sizes[0]; i++) - connect_nodes(a0b0, i, addbig, i - b0 + size); - size = size + a0b0->output_port_sizes[0] - b0; - for (int i = 0; i < a1b1->output_port_sizes[0]; i++) - connect_nodes(a1b1, i, addbig, i + size); - - // remap the multiplier outputs coming directly from a0b0 - for (int i = 0; i < b0; i++) { - remap_pin_to_new_node(node->output_pins[i], a0b0, i); - } + // using the balenced addition method only works if a0 and b0 are the same size + // (i.e. if the input ports on the hardware multiplier are equal) + if (b0 == a0) { + /* New node for the initial add */ + addsmall = allocate_nnode(node->loc); + addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall->name, node->name); + strcat(addsmall->name, "-add0"); + // this addition will have a carry out in the worst case, add to input pins and connect then to gnd + init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); + + // connect inputs to port a of addsmall + for (int i = 0; i < a1b0->num_output_pins; i++) + connect_nodes(a1b0, i, addsmall, i); + + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); + // connect inputs to port b of addsmall + for (int i = 0; i < a0b1->num_output_pins; i++) + connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); + + /* New node for the BIG add */ + addbig = allocate_nnode(node->loc); + addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addbig->name, node->name); + strcat(addbig->name, "-add1"); + init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); + + // connect inputs to port a of addbig + size = addsmall->num_output_pins; + for (int i = 0; i < size; i++) + connect_nodes(addsmall, i, addbig, i); + + // connect inputs to port b of addbig + for (int i = b0; i < a0b0->output_port_sizes[0]; i++) + connect_nodes(a0b0, i, addbig, i - b0 + size); + size = size + a0b0->output_port_sizes[0] - b0; + for (int i = 0; i < a1b1->output_port_sizes[0]; i++) + connect_nodes(a1b1, i, addbig, i + size); + + // remap the multiplier outputs coming directly from a0b0 + for (int i = 0; i < b0; i++) { + remap_pin_to_new_node(node->output_pins[i], a0b0, i); + } + + // remap the multiplier outputs coming from addbig + for (int i = 0; i < addbig->num_output_pins; i++) { + remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); + } + } else { + /* Expounding upon the description for the method in this function. + if we have two numbers A and B and we have a hardware multiplier of size a0xb0, + we can split them into two parts: + A = A1 << a0 + A0 + B = B1 << b0 + B0 + where A1 and B1 are the high bits of A and B, and A0 and B0 are the low bits. + Note that len(A0) = a0 and len(B0) = b0 by definition. + The multiplication of A and B can be expressed as: + A * B = (A1 << a0 + A0) * (B1 << b0 + B0) + = {A1 * B1 << (a0 + b0)} + {(A1 * B0) << a0 + (A0 * B1) << b0} + {A0 * B0} + we define split the editions up like so: + addsmall = (A1 * B0) << a0 + (A0 * B1) << b0 // can have carry + addsmall2 = (A1 * B1 << (a0 + b0)) + (A0 * B0) // Will not have carry + addbig = addsmall + addsmall2 + This is a slightly modified version of the Karatsuba algorithm. + */ + /////////////// Addsmall ///////////////////// + addsmall = allocate_nnode(node->loc); + addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall->name, node->name); + strcat(addsmall->name, "-add0"); + init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + a0 + 1, a0b1->num_output_pins + b0 + 1); + + // The first a0 pins of addsmall input connecting to a1b0 are connected to zero + for (int i = 0; i < a0; i++) { + add_input_pin_to_node(addsmall, get_zero_pin(netlist), i); + } + + // connect inputs to port a of addsmall + for (int i = 0; i < a1b0->num_output_pins; i++) { + connect_nodes(a1b0, i, addsmall, i + a0); + } + + // add zero pin for carry + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins + a0); + + // The first b0 pins of addsmall input connecting to a0b1 are connected to zero + for (int i = 0; i < b0; i++) { + add_input_pin_to_node(addsmall, get_zero_pin(netlist), i + addsmall->input_port_sizes[0]); + } + + // connect inputs to port b of addsmall + for (int i = 0; i < a0b1->num_output_pins; i++) { + connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0] + b0); + } + + // add zero pin for carry + add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0] + b0); + + /////////////// Addsmall2 ///////////////////// + addsmall2 = allocate_nnode(node->loc); + addsmall2->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addsmall2->name, node->name); + strcat(addsmall2->name, "-add1"); + init_multiplier_adder(addsmall2, a1b1, a1b1->num_output_pins + a0 + b0, a0b0->num_output_pins); - // remap the multiplier outputs coming from addbig - for (int i = 0; i < addbig->num_output_pins; i++) { - remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); + // connect first a0+ b0 pins of addsmall2 to zero + for (int i = 0; i < a0 + b0; i++) { + add_input_pin_to_node(addsmall2, get_zero_pin(netlist), i); + } + + // connect inputs to port a of addsmall2 + for (int i = 0; i < a1b1->num_output_pins; i++) { + connect_nodes(a1b1, i, addsmall2, i + a0 + b0); + } + + // connect inputs to port b of addsmall2 + for (int i = 0; i < a0b0->output_port_sizes[0]; i++) { + connect_nodes(a0b0, i, addsmall2, i + addsmall2->input_port_sizes[0]); + } + + /////////////// Addbig ///////////////////// + addbig = allocate_nnode(node->loc); + addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); + strcpy(addbig->name, node->name); + strcat(addbig->name, "-add2"); + init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, addsmall2->num_output_pins); + // Here the final addition can have a carry out in the worst case, however, + // our final product will always only be the length of the longest input port so regardless of the carry the + // final adds carry will always drop out. + + // connect inputs to port a of addbig + for (int i = 0; i < addsmall->num_output_pins; i++) { + connect_nodes(addsmall, i, addbig, i); + } + + // connect inputs to port b of addbig + for (int i = 0; i < addsmall2->num_output_pins; i++) { + connect_nodes(addsmall2, i, addbig, i + addbig->input_port_sizes[0]); + } + + // remap the multiplier outputs coming directly from a0b0 + for (int i = 0; i < addbig->num_output_pins; i++) { + remap_pin_to_new_node(node->output_pins[i], addbig, i); + } } // CLEAN UP @@ -1060,7 +1161,6 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b) strcat(a0b->name, "-0"); init_split_multiplier(node, a0b, 0, a0, 0, b, nullptr, nullptr); mult_list = insert_in_vptr_list(mult_list, a0b); - /* New node for a1b multiply */ a1b = allocate_nnode(node->loc); a1b->name = (char *)vtr::malloc(strlen(node->name) + 3); @@ -1184,7 +1284,6 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) oassert(node->type == MULTIPLY); oassert(hard_multipliers != NULL); - sizea = node->input_port_sizes[0]; sizeb = node->input_port_sizes[1]; sizeout = node->output_port_sizes[0]; @@ -1199,6 +1298,13 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) } diffa = ina - sizea; diffb = inb - sizeb; + // input multiplier size on middle range of unequal Hard Block size(ex; mul_size>18 && mul_size<25) + if (diffb < 0) { + std::swap(ina, inb); + diffa = ina - sizea; + diffb = inb - sizeb; + } + diffout = hard_multipliers->outputs->size - sizeout; if (configuration.split_hard_multiplier == 1) { @@ -1281,11 +1387,9 @@ void iterate_multipliers(netlist_t *netlist) int mula, mulb; int a0, a1, b0, b1; nnode_t *node; - /* Can only perform the optimisation if hard multipliers exist! */ if (hard_multipliers == NULL) return; - sizea = hard_multipliers->inputs->size; sizeb = hard_multipliers->inputs->next->size; if (sizea < sizeb) { @@ -1313,7 +1417,6 @@ void iterate_multipliers(netlist_t *netlist) sizea = sizeb; sizeb = swap; } - /* Do I need to split the multiplier on both inputs? */ if ((mula > sizea) && (mulb > sizeb)) { a0 = sizea; @@ -1890,4 +1993,4 @@ void free_multipliers() hard_multipliers->instances = NULL; } -} +} \ No newline at end of file diff --git a/parmys/parmys-plugin/netlist/netlist_utils.cc b/parmys/parmys-plugin/netlist/netlist_utils.cc index 9c3fb060b4f..60fc7025f04 100644 --- a/parmys/parmys-plugin/netlist/netlist_utils.cc +++ b/parmys/parmys-plugin/netlist/netlist_utils.cc @@ -485,6 +485,7 @@ void remap_pin_to_new_net(npin_t *pin, nnet_t *new_net) *-----------------------------------------------------------------------*/ void remap_pin_to_new_node(npin_t *pin, nnode_t *new_node, int pin_idx) { + oassert(pin != NULL); if (pin->type == INPUT) { /* clean out the entry in the old net */ pin->node->input_pins[pin->pin_node_idx] = NULL; diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt index d1b5a73eb72..956f23e6634 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/config.txt @@ -12,6 +12,7 @@ circuits_dir=benchmarks/verilog arch_list_add=7series_BRAM_DSP_carry.xml # Add circuits to list to sweep +circuit_list_add=mcml.v circuit_list_add=LU32PEEng.v circuit_list_add=LU8PEEng.v circuit_list_add=bgm.v diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt index da5393b0e1c..b563c7587c4 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vtr_xilinx_qor/config/golden_results.txt @@ -1,7 +1,8 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time -7series_BRAM_DSP_carry.xml LU32PEEng.v common 8462.06 vpr 3.48 GiB -1 -1 129.11 1222556 93 791.19 -1 -1 380736 -1 -1 -1 114 153 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 3646292 114 102 123003 108818 1 77498 10464 113 113 12769 CLB auto 1072.1 MiB 4396.87 7.43631e+06 1262567 15340198 6033356 8730977 575865 2684.0 MiB 2280.76 12.98 135.884 109.205 -232244 -109.205 109.205 0.16 0.360282 0.320818 54.09 46.7631 -1 -1 -1 -1 1465145 11.9176 653098 5.31233 473500 2451803 465565033 75467213 7.58182e+08 6.15544e+08 2.46355e+08 19293.2 19 5806063 87504646 2231668 86.4379 86.4379 -454383 -86.4379 0 0 128.89 59.66 17.27 3560.6 MiB 384.96 68.2088 58.22 2684.0 MiB 0.16 263.36 -7series_BRAM_DSP_carry.xml LU8PEEng.v common 1748.11 vpr 1.03 GiB -1 -1 43.47 387956 98 87.95 -1 -1 117496 -1 -1 -1 114 45 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 1076724 114 102 36473 32106 1 22515 3201 62 62 3844 CLB auto 334.7 MiB 1220.93 1.18078e+06 296971 2644273 850518 1614737 179018 814.9 MiB 219.67 2.12 125.894 111.93 -52668.6 -111.93 111.93 0.05 0.0878002 0.0782625 11.1992 9.49294 -1 -1 -1 -1 347231 9.52753 164732 4.52002 139504 610360 78202730 8509396 2.21078e+08 1.79004e+08 7.33801e+07 19089.5 17 1711877 25804105 642295 88.66 88.66 -80517 -88.66 0 0 36.95 16.76 4.47 1051.3 MiB 59.59 14.4391 12.1487 814.9 MiB 0.15 56.68 -7series_BRAM_DSP_carry.xml bgm.v common 1453.26 vpr 1.09 GiB -1 -1 34.02 505276 14 87.55 -1 -1 124216 -1 -1 -1 257 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 1141844 257 32 36772 33710 1 23560 3586 66 66 4356 CLB auto 366.4 MiB 895.18 1.43373e+06 291085 2774622 943137 1789486 41999 912.5 MiB 219.36 2.13 38.4025 22.1265 -27810.4 -22.1265 22.1265 0.05 0.0889869 0.0799615 10.9486 9.40508 -1 -1 -1 -1 410749 11.1711 193767 5.26985 171335 749781 115947662 12762222 2.52497e+08 1.8145e+08 8.29171e+07 19035.1 18 1855857 29118461 737145 20.7189 20.7189 -28709.3 -20.7189 0 0 41.79 19.45 5.47 1108.0 MiB 80.62 14.4948 12.3194 912.5 MiB 0.15 67.83 -7series_BRAM_DSP_carry.xml stereovision0.v common 631.20 vpr 478.05 MiB -1 -1 8.43 103696 5 7.86 -1 -1 69424 -1 -1 -1 169 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 489528 169 197 23125 21329 1 9476 1664 42 42 1764 CLB auto 184.2 MiB 521.98 200804 67090 888704 241514 572204 74986 403.1 MiB 25.54 0.26 6.25602 4.24154 -16239.3 -4.24154 4.24154 0.02 0.0285299 0.0249303 2.90125 2.47762 -1 -1 -1 -1 64191 2.77799 33017 1.42887 50936 110793 12919651 1591963 9.88618e+07 6.99558e+07 3.28179e+07 18604.2 11 823277 11448038 269515 3.62506 3.62506 -19319.1 -3.62506 0 0 16.19 7.33 2.13 478.1 MiB 16.34 3.63149 3.10496 403.1 MiB 0.15 26.15 -7series_BRAM_DSP_carry.xml stereovision1.v common 657.92 vpr 836.50 MiB -1 -1 7.17 118048 3 11.50 -1 -1 72280 -1 -1 -1 115 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 856580 115 145 22828 19265 1 10325 1531 66 66 4356 DSP auto 179.1 MiB 458.64 311225 103708 909991 287175 612059 10757 778.3 MiB 24.00 0.24 5.73398 5.10375 -20985.5 -5.10375 5.10375 0.05 0.0277094 0.0241873 3.12136 2.68518 -1 -1 -1 -1 100891 4.42136 40545 1.77681 54616 114442 15706832 1926588 2.52497e+08 9.58701e+07 8.29171e+07 19035.1 11 1520183 28962658 248173 4.70403 4.70403 -25526.2 -4.70403 0 0 41.56 14.80 2.43 836.5 MiB 33.01 3.84255 3.31109 778.3 MiB 0.15 72.12 -7series_BRAM_DSP_carry.xml stereovision2.v common 2311.65 vpr 2.05 GiB -1 -1 10.93 180464 3 5.19 -1 -1 140368 -1 -1 -1 149 0 -1 success 5160a12-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-47-generic x86_64 2025-06-17T12:01:36 agent-2 /home/pooladam/actions-runner/_work/vtr-verilog-to-routing/vtr-verilog-to-routing 2144512 149 182 55133 36882 1 33040 4000 106 106 11236 DSP auto 329.2 MiB 1607.37 1.70601e+06 435687 3463551 1091033 2221855 150663 1922.9 MiB 232.94 1.79 22.4663 15.6766 -47524.7 -15.6766 15.6766 0.14 0.0743063 0.066452 9.08666 7.81491 -1 -1 -1 -1 417311 7.57137 134875 2.44707 129629 213413 54356800 6676827 6.67318e+08 2.77775e+08 2.17352e+08 19344.2 15 3832009 76669346 592051 15.1625 15.1625 -61375.1 -15.1625 0 0 116.92 41.18 6.35 2094.2 MiB 105.70 11.7825 10.0862 1922.9 MiB 0.15 210.17 +7series_BRAM_DSP_carry.xml mcml.v common 6313.11 vpr 4.28 GiB -1 -1 87.19 1031944 26 1703.45 -1 -1 397084 -1 -1 -1 36 159 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 4487004 36 356 198637 165032 1 95923 14364 132 132 17424 CLB auto 1363.3 MiB 1601.63 7.24933e+06 1524256 30905820 11032851 11396471 8476498 3423.6 MiB 2480.46 8.69 108.93 73.9809 -371259 -73.9809 73.9809 0.14 0.162937 0.141987 31.3309 26.6374 -1 -1 -1 -1 1035273 5.22707 458769 2.31631 480955 1548202 178093609 19783675 1.04106e+09 8.35036e+08 3.37280e+08 19357.2 16 7938917 120124248 2733689 60.9084 60.9084 -656666 -60.9084 0 0 78.01 40.16 10.79 4381.8 MiB 131.54 36.9474 31.2048 3423.6 MiB 0.08 161.67 +7series_BRAM_DSP_carry.xml LU32PEEng.v common 4141.71 vpr 3.47 GiB -1 -1 62.78 1217024 93 414.51 -1 -1 380360 -1 -1 -1 114 153 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 3640452 114 102 123003 108818 1 77498 10464 113 113 12769 CLB auto 1067.7 MiB 2291.63 7.43631e+06 1262567 15340198 6033356 8730977 575865 2576.1 MiB 1003.63 6.20 135.884 109.205 -232244 -109.205 109.205 0.10 0.162848 0.141804 25.0696 21.5063 -1 -1 -1 -1 1398688 11.3770 626094 5.09268 439657 2220380 322294781 37162681 7.58182e+08 6.15544e+08 2.46355e+08 19293.2 18 5806063 87504646 2231668 85.8248 85.8248 -451588 -85.8248 0 0 57.91 29.77 8.16 3555.0 MiB 139.50 32.2133 27.3232 2576.1 MiB 0.08 125.61 +7series_BRAM_DSP_carry.xml LU8PEEng.v common 861.93 vpr 1.03 GiB -1 -1 20.03 380748 97 40.89 -1 -1 116508 -1 -1 -1 114 45 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 1079496 114 102 36458 32091 1 22453 3197 62 62 3844 CLB auto 336.4 MiB 626.70 1.15759e+06 286430 2686985 897479 1605424 184082 785.5 MiB 91.37 0.82 127.808 111.347 -51526.1 -111.347 111.347 0.03 0.0403543 0.0350875 5.17473 4.40197 -1 -1 -1 -1 327752 8.99701 158009 4.33745 138790 590818 72646827 7812502 2.21078e+08 1.78788e+08 7.33801e+07 19089.5 19 1711674 25804385 642695 88.5126 88.5126 -79974.1 -88.5126 0 0 16.08 8.34 2.33 1054.0 MiB 27.92 6.71731 5.66531 785.5 MiB 0.08 27.47 +7series_BRAM_DSP_carry.xml bgm.v common 753.57 vpr 1.09 GiB -1 -1 14.84 508304 14 40.42 -1 -1 125028 -1 -1 -1 257 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 1142528 257 32 37166 34104 1 23802 3586 66 66 4356 CLB auto 369.1 MiB 497.29 1.40047e+06 292740 2856930 988242 1818956 49732 879.1 MiB 98.21 0.99 36.7393 22.4885 -27648.5 -22.4885 22.4885 0.03 0.0421788 0.0370497 5.12815 4.34656 -1 -1 -1 -1 394981 10.6283 186576 5.02048 171693 731956 103907577 11160888 2.52497e+08 1.8145e+08 8.29171e+07 19035.1 20 1861896 29128908 739360 20.5236 20.5236 -28025.6 -20.5236 0 0 18.27 9.55 2.79 1115.8 MiB 38.76 6.88362 5.77265 879.1 MiB 0.08 32.15 +7series_BRAM_DSP_carry.xml stereovision0.v common 312.33 vpr 481.78 MiB -1 -1 3.37 103432 5 2.85 -1 -1 69068 -1 -1 -1 169 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 493340 169 197 23132 21336 1 9472 1670 42 42 1764 CLB auto 186.5 MiB 264.83 192189 68164 853414 231424 553421 68569 397.7 MiB 11.46 0.12 5.42523 4.02318 -16155.7 -4.02318 4.02318 0.01 0.0123845 0.0104627 1.26103 1.04979 -1 -1 -1 -1 65033 2.81358 33016 1.42840 50854 111172 13334467 1642868 9.88618e+07 7.02791e+07 3.28179e+07 18604.2 13 823689 11449301 270023 3.61197 3.61197 -19327.2 -3.61197 0 0 6.88 3.67 1.11 481.8 MiB 8.06 1.62306 1.34782 397.7 MiB 0.08 10.78 +7series_BRAM_DSP_carry.xml stereovision1.v common 334.87 vpr 838.88 MiB -1 -1 2.89 117460 3 4.60 -1 -1 71612 -1 -1 -1 115 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 859008 115 145 22828 19265 1 10325 1531 66 66 4356 DSP auto 179.2 MiB 244.04 311225 103708 909991 287175 612059 10757 742.8 MiB 11.66 0.12 5.73398 5.10375 -20985.5 -5.10375 5.10375 0.03 0.0133465 0.0113694 1.53107 1.2955 -1 -1 -1 -1 100160 4.38932 39961 1.75122 54266 112180 15375570 1836846 2.52497e+08 9.58701e+07 8.29171e+07 19035.1 10 1520183 28962658 248173 4.56731 4.56731 -24939.2 -4.56731 0 0 18.29 7.24 1.23 838.9 MiB 15.85 1.83682 1.55126 742.8 MiB 0.08 32.59 +7series_BRAM_DSP_carry.xml stereovision2.v common 1112.29 vpr 2.05 GiB -1 -1 4.46 179764 3 2.16 -1 -1 140184 -1 -1 -1 149 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 2151592 149 182 55138 36887 1 33057 4001 106 106 11236 DSP auto 330.5 MiB 807.58 1.74045e+06 426423 3528140 1119893 2255709 152538 1831.9 MiB 86.41 0.74 24.8584 14.831 -47167.6 -14.831 14.831 0.09 0.0338907 0.0296075 4.25753 3.60632 -1 -1 -1 -1 407551 7.39362 135018 2.44944 129230 211734 56370804 6974901 6.67318e+08 2.77829e+08 2.17352e+08 19344.2 16 3832290 76670110 592393 14.5948 14.5948 -60596.1 -14.5948 0 0 50.05 19.78 3.06 2101.2 MiB 50.73 5.45946 4.61585 1831.9 MiB 0.08 99.78 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt index 67ebcf3f6cb..ccc1aea6050 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/config.txt @@ -6,6 +6,7 @@ arch_list_add=7series_BRAM_DSP_carry.xml # Add circuits to list to sweep circuit_list_add=stereovision3.v +circuit_list_add=diffeq2.v # Parse info and how to parse diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt index f1c30f92f81..76842a94d9c 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_xilinx_flagship/config/golden_results.txt @@ -1,2 +1,3 @@ arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time initial_placed_wirelength_est placed_wirelength_est total_swap accepted_swap rejected_swap aborted_swap place_mem place_time place_quench_time initial_placed_CPD_est placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time ap_mem ap_time ap_full_legalizer_mem ap_full_legalizer_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time -7series_BRAM_DSP_carry.xml stereovision3.v common 3.90 vpr 71.88 MiB -1 -1 0.31 24572 5 0.11 -1 -1 32512 -1 -1 -1 11 0 -1 success v8.0.0-13084-g071ad3865 release IPO VTR_ASSERT_LEVEL=2 GNU 13.3.0 on Linux-6.8.0-60-generic x86_64 2025-06-17T09:37:40 betzgrp-wintermute /home/pooladam/vtr-verilog-to-routing 73604 11 2 303 283 2 114 35 7 7 49 CLB auto 32.5 MiB 1.52 569.025 404 890 113 545 232 71.9 MiB 0.02 0.00 3.1717 3.1717 -181.811 -3.1717 2.89952 0.16 0.000406774 0.000362597 0.00963025 0.00880151 -1 -1 -1 -1 44 491 11 1.34735e+06 1.18567e+06 177202. 3616.36 0.69 0.0981787 0.0839421 6848 92556 -1 373 9 263 823 105258 48434 2.83816 2.83244 -218.271 -2.83816 -2.452 -0.04 257836. 5261.96 0.04 0.03 0.07 -1 -1 0.04 0.0161707 0.0149638 +7series_BRAM_DSP_carry.xml stereovision3.v common 3.82 vpr 74.09 MiB -1 -1 0.22 31628 4 0.07 -1 -1 35776 -1 -1 -1 11 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 75864 11 2 305 285 2 111 34 7 7 49 CLB auto 34.8 MiB 1.51 540.712 424 584 93 389 102 74.1 MiB 0.01 0.00 3.28585 3.2897 -182.249 -3.2897 3.21493 0.12 0.000279618 0.000250801 0.00524824 0.00486604 -1 -1 -1 -1 44 571 17 1.34735e+06 1.13177e+06 177202. 3616.36 1.29 0.0832215 0.0712857 6848 92556 -1 426 8 252 839 111127 49869 3.46476 3.12023 -219.956 -3.46476 -2.342 -0.04 257836. 5261.96 0.03 0.02 0.05 -1 -1 0.03 0.0108628 0.0101501 +7series_BRAM_DSP_carry.xml diffeq2.v common 49.95 vpr 119.88 MiB -1 -1 0.14 33104 5 0.08 -1 -1 38164 -1 -1 -1 66 0 -1 success v8.0.0-14124-g5725a225d release IPO VTR_ASSERT_LEVEL=2 GNU 11.4.0 on Linux-5.15.0-119-generic x86_64 2025-10-09T09:04:20 goeders10 /home/chem3000/GitClones/vtr_pulls/vtr_ccl/vtr-verilog-to-routing/vtr_flow/tasks 122760 66 96 1817 1078 1 1146 337 26 26 676 DSP auto 42.3 MiB 2.63 20173 9674 89409 23988 58684 6737 118.6 MiB 0.87 0.01 22.2237 19.8977 -1060.81 -19.8977 19.8977 3.42 0.00131729 0.00122751 0.109971 0.102313 -1 -1 -1 -1 68 12670 49 3.53732e+07 1.31946e+07 5.24855e+06 7764.12 34.52 0.723609 0.66052 131170 3079406 -1 11823 14 5719 9575 3216984 800670 21.5154 21.5154 -1313 -21.5154 -1.7 -0.034 6.96973e+06 10310.3 1.92 0.42 1.67 -1 -1 1.92 0.0555499 0.0519533