@@ -31,10 +31,11 @@ module VX_tcu_fedp_drl #(
3131);
3232
3333 localparam TCK = 2 * N ;
34- localparam FMUL_LATENCY = 2 ;
34+ localparam FMUL_LATENCY = 1 ;
35+ localparam ALN_LATENCY = 1 ;
3536 localparam ACC_LATENCY = 1 ;
3637 localparam FRND_LATENCY = 1 ;
37- localparam TOTAL_LATENCY = FMUL_LATENCY + ACC_LATENCY + FRND_LATENCY ;
38+ localparam TOTAL_LATENCY = FMUL_LATENCY + ALN_LATENCY + ACC_LATENCY + FRND_LATENCY ;
3839 `STATIC_ASSERT (LATENCY == 0 || LATENCY == TOTAL_LATENCY , (" invalid latency! expected=%0d , actual=%0d " , TOTAL_LATENCY , LATENCY ));
3940
4041 `UNUSED_VAR ({ fmt_d, c_val} );
@@ -50,52 +51,86 @@ module VX_tcu_fedp_drl #(
5051 assign b_col16[2 * i+ 1 ] = b_col[i][31 : 16 ];
5152 end
5253
53- // Transprecision Mul & Max Exp & Align Sigs
54- wire [7 : 0 ] raw_max_exp;
55- wire [TCK : 0 ][24 : 0 ] aln_sigs;
54+ // Transprecision Mul & Max Exp
5655 wire [6 : 0 ] hi_c = c_val[31 : 25 ]; // c_val[24:0] acc is taken care of in acc stage
5756 wire fmt_sel = fmt_s[3 ];
57+ wire [7 : 0 ] raw_max_exp;
58+ wire [TCK : 0 ][7 : 0 ] shift_amounts;
59+ wire [TCK : 0 ][24 : 0 ] raw_sigs;
5860
5961 VX_tcu_drl_mul_exp # (
6062 .N (TCK + 1 )
6163 ) mul_exp (
62- .enable (enable),
63- .fmt_s (fmt_s),
64- .a_rows (a_row16),
65- .b_cols (b_col16),
66- .c_val (c_val[31 : 0 ]),
67- .raw_max_exp (raw_max_exp),
68- .sigs_out (aln_sigs)
64+ .enable (enable),
65+ .fmt_s (fmt_s),
66+ .a_rows (a_row16),
67+ .b_cols (b_col16),
68+ .c_val (c_val[31 : 0 ]),
69+ .raw_max_exp (raw_max_exp),
70+ .shift_amounts (shift_amounts),
71+ .raw_sigs (raw_sigs)
6972 );
7073
71- // Stage 1/2 pipeline reg
74+ // Stage 1 pipeline reg
7275 wire [7 : 0 ] pipe_raw_max_exp;
73- wire [TCK : 0 ][24 : 0 ] pipe_aln_sigs;
76+ wire [TCK : 0 ][7 : 0 ] pipe_shift_amounts;
77+ wire [TCK : 0 ][24 : 0 ] pipe_raw_sigs;
7478 wire [6 : 0 ] pipe_hi_c;
7579 wire pipe_fmt_sel;
7680 VX_pipe_register # (
77- .DATAW (8 + ((TCK + 1 )* 25 )+ 7 + 1 ),
81+ .DATAW (8 + ((TCK + 1 )* 8 ) + (( TCK + 1 ) * 25 )+ 7 + 1 ),
7882 .DEPTH (FMUL_LATENCY )
79- ) pipe_align (
83+ ) pipe_fmul (
84+ .clk (clk),
85+ .reset (reset),
86+ .enable (enable),
87+ .data_in ({ raw_max_exp, shift_amounts, raw_sigs, hi_c, fmt_sel} ),
88+ .data_out ({ pipe_raw_max_exp, pipe_shift_amounts, pipe_raw_sigs, pipe_hi_c, pipe_fmt_sel} )
89+ );
90+
91+ // Significand Alignment
92+ wire [TCK : 0 ][24 : 0 ] aln_sigs;
93+ wire [7 : 0 ] aln_max_exp = pipe_raw_max_exp;
94+ wire [6 : 0 ] aln_hi_c = pipe_hi_c;
95+ wire aln_fmt_sel = pipe_fmt_sel;
96+
97+ VX_tcu_drl_align # (
98+ .N (TCK + 1 )
99+ ) sigs_aln (
100+ .shift_amounts (pipe_shift_amounts),
101+ .sigs_in (pipe_raw_sigs),
102+ .fmt_sel (pipe_fmt_sel),
103+ .sigs_out (aln_sigs)
104+ );
105+
106+ // Stage 2 pipeline reg
107+ wire [7 : 0 ] pipe_aln_max_exp;
108+ wire [TCK : 0 ][24 : 0 ] pipe_aln_sigs;
109+ wire [6 : 0 ] pipe_aln_hi_c;
110+ wire pipe_aln_fmt_sel;
111+ VX_pipe_register # (
112+ .DATAW (8 + ((TCK + 1 )* 25 )+ 7 + 1 ),
113+ .DEPTH (ALN_LATENCY )
114+ ) pipe_aln (
80115 .clk (clk),
81116 .reset (reset),
82117 .enable (enable),
83- .data_in ({ raw_max_exp , aln_sigs, hi_c, fmt_sel } ),
84- .data_out ({ pipe_raw_max_exp , pipe_aln_sigs, pipe_hi_c, pipe_fmt_sel } )
118+ .data_in ({ aln_max_exp , aln_sigs, aln_hi_c, aln_fmt_sel } ),
119+ .data_out ({ pipe_aln_max_exp , pipe_aln_sigs, pipe_aln_hi_c, pipe_aln_fmt_sel } )
85120 );
86121
87122 // Accumulate CSA reduction tree
88- wire [7 : 0 ] acc_max_exp = pipe_raw_max_exp ;
89- wire [6 : 0 ] acc_hi_c = pipe_hi_c ;
90- wire acc_fmt_sel = pipe_fmt_sel ;
123+ wire [7 : 0 ] acc_max_exp = pipe_aln_max_exp ;
124+ wire [6 : 0 ] acc_hi_c = pipe_aln_hi_c ;
125+ wire acc_fmt_sel = pipe_aln_fmt_sel ;
91126 wire [25 + $clog2 (TCK + 1 ): 0 ] acc_sig; // 23 mantissa + 1 hidden + 1 sign + log2(N) bits
92127 wire [TCK - 1 : 0 ] sigs_sign; // sign bits of all operands (for int math)
93128
94129 VX_tcu_drl_acc # (
95130 .N (TCK + 1 )
96131 ) csa_acc (
97132 .sigsIn (pipe_aln_sigs),
98- .fmt_sel (pipe_fmt_sel ),
133+ .fmt_sel (pipe_aln_fmt_sel ),
99134 .sigOut (acc_sig),
100135 .signOuts (sigs_sign)
101136 );
0 commit comments