diff --git a/README.md b/README.md index 931cf31..b366e45 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,8 @@ fpnew_top #( .tag_o, .out_valid_o, .out_ready_i, - .busy_o + .busy_o, + .early_valid_o ); ``` diff --git a/docs/README.md b/docs/README.md index 529193e..48f9cd3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -70,6 +70,7 @@ As the width of some input/output signals is defined by the configuration, it is | `out_valid_o` | out | `logic` | Output data valid (see [Handshake](#handshake-interface)) | | `out_ready_i` | in | `logic` | Output interface ready (see [Handshake](#handshake-interface)) | | `busy_o` | out | `logic` | FPU operation in flight | +| `early_valid_o` | out | `logic` | Output data valid in the next cycle | #### Data Types diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 25e5bf5..d89f3db 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -60,7 +60,9 @@ module fpnew_cast_multi #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -803,4 +805,19 @@ module fpnew_cast_multi #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_MID_REGS > 0) begin + assign early_out_valid_o = |{mid_pipe_valid_q[NUM_MID_REGS] & ~mid_pipe_ready[NUM_OUT_REGS], + mid_pipe_valid_q[NUM_MID_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end + endmodule diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index d47f71b..78eaf04 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -60,7 +60,9 @@ module fpnew_divsqrt_multi #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -381,4 +383,16 @@ module fpnew_divsqrt_multi #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end + endmodule diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 8ddb80e..ddae0e3 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -57,7 +57,9 @@ module fpnew_divsqrt_th_32 #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -476,4 +478,15 @@ module fpnew_divsqrt_th_32 #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end endmodule diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index 2e66399..0c0807d 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -62,7 +62,9 @@ module fpnew_divsqrt_th_64_multi #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -496,5 +498,16 @@ module fpnew_divsqrt_th_64_multi #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index 60097b1..d93d6e3 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -53,7 +53,9 @@ module fpnew_fma #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -620,7 +622,7 @@ module fpnew_fma #( // Classification after rounding assign uf_after_round = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal - || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_q == fpnew_pkg::RNE) || (rnd_mode_q == fpnew_pkg::RMM))))); assign of_after_round = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // exponent all ones @@ -698,4 +700,18 @@ module fpnew_fma #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_MID_REGS > 0) begin + assign early_out_valid_o = |{mid_pipe_valid_q[NUM_MID_REGS] & ~mid_pipe_ready[NUM_OUT_REGS], + mid_pipe_valid_q[NUM_MID_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index d5583d9..6f8c7bf 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -57,7 +57,9 @@ module fpnew_fma_multi #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -796,9 +798,9 @@ module fpnew_fma_multi #( if (FpFmtConfig[fmt]) begin : active_format always_comb begin : post_process - // detect of / uf + // detect of / uf fmt_uf_after_round[fmt] = (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) // denormal - || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && + || ((pre_round_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '0) && (rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == 1) && ((round_sticky_bits != 2'b11) || (!sum_sticky_bits[MAN_BITS*2 + 4] && ((rnd_mode_q == fpnew_pkg::RNE) || (rnd_mode_q == fpnew_pkg::RMM))))); fmt_of_after_round[fmt] = rounded_abs[EXP_BITS+MAN_BITS-1:MAN_BITS] == '1; // inf exp. @@ -892,4 +894,19 @@ module fpnew_fma_multi #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_MID_REGS > 0) begin + assign early_out_valid_o = |{mid_pipe_valid_q[NUM_MID_REGS] & ~mid_pipe_ready[NUM_OUT_REGS], + mid_pipe_valid_q[NUM_MID_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end + endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index a8cf765..3e41a97 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -55,7 +55,9 @@ module fpnew_noncomp #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); // ---------- @@ -417,4 +419,16 @@ module fpnew_noncomp #( assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); + + // Early valid_o signal. This is used for dispatching instructions for dual-issue processor. + if (NUM_OUT_REGS > 0) begin + assign early_out_valid_o = |{out_pipe_valid_q[NUM_OUT_REGS] & ~out_pipe_ready[NUM_OUT_REGS], + out_pipe_valid_q[NUM_OUT_REGS-1]}; + end else if (NUM_INP_REGS > 0) begin + assign early_out_valid_o = |{inp_pipe_valid_q[NUM_INP_REGS] & ~inp_pipe_ready[NUM_INP_REGS], + inp_pipe_valid_q[NUM_INP_REGS-1]}; + end else begin + assign early_out_valid_o = 1'b0; + end + endmodule diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index e9e7f9f..021a378 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -59,7 +59,8 @@ module fpnew_opgroup_block #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + output logic early_valid_o ); // ---------------- @@ -74,6 +75,7 @@ module fpnew_opgroup_block #( // Handshake signals for the slices logic [NUM_FORMATS-1:0] fmt_in_ready, fmt_out_valid, fmt_out_ready, fmt_busy; + logic [NUM_FORMATS-1:0] early_valid; output_t [NUM_FORMATS-1:0] fmt_outputs; // ----------- @@ -115,25 +117,26 @@ module fpnew_opgroup_block #( ) i_fmt_slice ( .clk_i, .rst_ni, - .operands_i ( operands_i ), - .is_boxed_i ( is_boxed_i[fmt] ), + .operands_i ( operands_i ), + .is_boxed_i ( is_boxed_i[fmt] ), .rnd_mode_i, .op_i, .op_mod_i, .vectorial_op_i, .tag_i, - .simd_mask_i ( mask_slice ), - .in_valid_i ( in_valid ), - .in_ready_o ( fmt_in_ready[fmt] ), + .simd_mask_i ( mask_slice ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[fmt] ), .flush_i, - .result_o ( fmt_outputs[fmt].result ), - .status_o ( fmt_outputs[fmt].status ), - .extension_bit_o( fmt_outputs[fmt].ext_bit ), - .tag_o ( fmt_outputs[fmt].tag ), - .out_valid_o ( fmt_out_valid[fmt] ), - .out_ready_i ( fmt_out_ready[fmt] ), - .busy_o ( fmt_busy[fmt] ), - .reg_ena_i ( '0 ) + .result_o ( fmt_outputs[fmt].result ), + .status_o ( fmt_outputs[fmt].status ), + .extension_bit_o ( fmt_outputs[fmt].ext_bit ), + .tag_o ( fmt_outputs[fmt].tag ), + .out_valid_o ( fmt_out_valid[fmt] ), + .out_ready_i ( fmt_out_ready[fmt] ), + .busy_o ( fmt_busy[fmt] ), + .reg_ena_i ( '0 ), + .early_out_valid_o( early_valid[fmt] ) ); // If the format wants to use merged ops, tie off the dangling ones not used here end else if (FpFmtMask[fmt] && ANY_MERGED && !IS_FIRST_MERGED) begin : merged_unused @@ -149,7 +152,7 @@ module fpnew_opgroup_block #( assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); - + assign early_valid[fmt] = 1'b0; // Tie off disabled formats end else if (!FpFmtMask[fmt] || (FmtUnitTypes[fmt] == fpnew_pkg::DISABLED)) begin : disable_fmt assign fmt_in_ready[fmt] = 1'b0; // don't accept operations @@ -160,6 +163,7 @@ module fpnew_opgroup_block #( assign fmt_outputs[fmt].status = '{default: fpnew_pkg::DONT_CARE}; assign fmt_outputs[fmt].ext_bit = fpnew_pkg::DONT_CARE; assign fmt_outputs[fmt].tag = TagType'(fpnew_pkg::DONT_CARE); + assign early_valid[fmt] = 1'b0; end end @@ -198,18 +202,19 @@ module fpnew_opgroup_block #( .int_fmt_i, .vectorial_op_i, .tag_i, - .simd_mask_i ( simd_mask_i ), - .in_valid_i ( in_valid ), - .in_ready_o ( fmt_in_ready[FMT] ), + .simd_mask_i ( simd_mask_i ), + .in_valid_i ( in_valid ), + .in_ready_o ( fmt_in_ready[FMT] ), .flush_i, - .result_o ( fmt_outputs[FMT].result ), - .status_o ( fmt_outputs[FMT].status ), - .extension_bit_o ( fmt_outputs[FMT].ext_bit ), - .tag_o ( fmt_outputs[FMT].tag ), - .out_valid_o ( fmt_out_valid[FMT] ), - .out_ready_i ( fmt_out_ready[FMT] ), - .busy_o ( fmt_busy[FMT] ), - .reg_ena_i ( '0 ) + .result_o ( fmt_outputs[FMT].result ), + .status_o ( fmt_outputs[FMT].status ), + .extension_bit_o ( fmt_outputs[FMT].ext_bit ), + .tag_o ( fmt_outputs[FMT].tag ), + .out_valid_o ( fmt_out_valid[FMT] ), + .out_ready_i ( fmt_out_ready[FMT] ), + .busy_o ( fmt_busy[FMT] ), + .reg_ena_i ( '0 ), + .early_out_valid_o( early_valid[FMT] ) ); end @@ -244,6 +249,8 @@ module fpnew_opgroup_block #( assign extension_bit_o = arbiter_output.ext_bit; assign tag_o = arbiter_output.tag; + assign early_valid_o = |early_valid; + assign busy_o = (| fmt_busy); endmodule diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 9aeb469..b2c9d9c 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -56,7 +56,9 @@ module fpnew_opgroup_fmt_slice #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); @@ -75,6 +77,7 @@ module fpnew_opgroup_fmt_slice #( TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito + logic [NUM_LANES-1:0] lane_early_out_valid; logic result_is_vector, result_is_class; @@ -118,27 +121,28 @@ module fpnew_opgroup_fmt_slice #( ) i_fma ( .clk_i, .rst_ni, - .operands_i ( local_operands ), - .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), + .operands_i ( local_operands ), + .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), .rnd_mode_i, .op_i, .op_mod_i, .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( vectorial_op ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_vectorial[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; @@ -189,23 +193,24 @@ module fpnew_opgroup_fmt_slice #( .op_i, .op_mod_i, .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( vectorial_op ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), + .aux_i ( vectorial_op ), // Remember whether operation was vectorial + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .class_mask_o ( lane_class_mask[lane] ), - .is_class_o ( lane_is_class[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_vectorial[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_vectorial[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i, + .early_out_valid_o ( lane_early_out_valid[lane] ) ); end // ADD OTHER OPTIONS HERE @@ -285,6 +290,7 @@ module fpnew_opgroup_fmt_slice #( assign tag_o = lane_tags[0]; // upper lanes unused assign busy_o = (| lane_busy); assign out_valid_o = lane_out_valid[0]; // upper lanes unused + assign early_out_valid_o = |lane_early_out_valid; // Collapse the lane status diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index 6114556..efe3088 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -63,7 +63,9 @@ module fpnew_opgroup_multifmt_slice #( // Indication of valid data in flight output logic busy_o, // External register enable override - input logic [ExtRegEnaWidth-1:0] reg_ena_i + input logic [ExtRegEnaWidth-1:0] reg_ena_i, + // Early valid for external structural hazard generation + output logic early_out_valid_o ); if ((OpGroup == fpnew_pkg::DIVSQRT)) begin @@ -108,6 +110,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used logic [NUM_LANES-1:0] lane_busy; // dito + logic [NUM_LANES-1:0] lane_early_out_valid; + logic result_is_vector; logic [FMT_BITS-1:0] result_fmt; @@ -233,30 +237,31 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 ) i_fpnew_fma_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands ), + .operands_i ( local_operands ), .is_boxed_i, .rnd_mode_i, .op_i, .op_mod_i, .src_fmt_i, - .src2_fmt_i ( op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i ), + .src2_fmt_i ( op_i == fpnew_pkg::ADDS ? src_fmt_i : dst_fmt_i ), .dst_fmt_i, .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance @@ -270,26 +275,27 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 ) i_fpnew_divsqrt_multi_th ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands .rnd_mode_i, .op_i, .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( @@ -326,7 +332,8 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ), - .reg_ena_i + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( @@ -363,9 +370,11 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), .busy_o ( lane_busy[lane] ), - .reg_ena_i + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); end + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance @@ -388,21 +397,22 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 .dst_fmt_i, .int_fmt_i, .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), + .aux_i ( aux_data ), + .in_valid_i ( in_valid ), + .in_ready_o ( lane_in_ready[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ), - .reg_ena_i + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), + .aux_o ( lane_aux[lane] ), + .out_valid_o ( out_valid ), + .out_ready_i ( out_ready ), + .busy_o ( lane_busy[lane] ), + .reg_ena_i, + .early_out_valid_o( lane_early_out_valid[lane] ) ); end // ADD OTHER OPTIONS HERE @@ -548,6 +558,7 @@ FP8. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8 assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones assign tag_o = lane_tags[0]; // don't care about upper ones assign busy_o = (| lane_busy); + assign early_out_valid_o = |lane_early_out_valid; assign out_valid_o = lane_out_valid[0]; // don't care about upper ones diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index 1facc72..1d0c71a 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -53,7 +53,8 @@ module fpnew_top #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + output logic early_valid_o ); localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; @@ -70,6 +71,7 @@ module fpnew_top #( // Handshake signals for the blocks logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; + logic [NUM_OPGROUPS-1:0] opgrp_early_valid; output_t [NUM_OPGROUPS-1:0] opgrp_outputs; logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; @@ -157,7 +159,8 @@ module fpnew_top #( .tag_o ( opgrp_outputs[opgrp].tag ), .out_valid_o ( opgrp_out_valid[opgrp] ), .out_ready_i ( opgrp_out_ready[opgrp] ), - .busy_o ( opgrp_busy[opgrp] ) + .busy_o ( opgrp_busy[opgrp] ), + .early_valid_o ( opgrp_early_valid[opgrp] ) ); end @@ -190,6 +193,7 @@ module fpnew_top #( assign status_o = arbiter_output.status; assign tag_o = arbiter_output.tag; + assign early_valid_o = |opgrp_early_valid; assign busy_o = (| opgrp_busy); endmodule