diff --git a/hw/ip/snitch/src/riscv_instr.sv b/hw/ip/snitch/src/riscv_instr.sv index 089181a1..16eda666 100644 --- a/hw/ip/snitch/src/riscv_instr.sv +++ b/hw/ip/snitch/src/riscv_instr.sv @@ -1338,4 +1338,5 @@ package riscv_instr; localparam logic [11:0] CSR_MHPMCOUNTER29H = 12'hb9d; localparam logic [11:0] CSR_MHPMCOUNTER30H = 12'hb9e; localparam logic [11:0] CSR_MHPMCOUNTER31H = 12'hb9f; + endpackage diff --git a/hw/ip/snitch/src/snitch.sv b/hw/ip/snitch/src/snitch.sv index 5e2d1a93..ecea6bd0 100644 --- a/hw/ip/snitch/src/snitch.sv +++ b/hw/ip/snitch/src/snitch.sv @@ -2335,6 +2335,16 @@ module snitch import snitch_pkg::*; import riscv_instr::*; #( riscv_instr::VREDMINU_VS, riscv_instr::VREDMAX_VS, riscv_instr::VREDMAXU_VS, + // CMY: add VMANDNOT VMAND VMOR VMXOR VMORNOT VMNAND VMNOR VMXNOR, 8 masking instructions + riscv_instr::VMANDN_MM, + riscv_instr::VMAND_MM, + riscv_instr::VMOR_MM, + riscv_instr::VMXOR_MM, + riscv_instr::VMORN_MM, + riscv_instr::VMNAND_MM, + riscv_instr::VMNOR_MM, + riscv_instr::VMXNOR_MM, + //---------------------------------------------------------- riscv_instr::VMSEQ_VV, riscv_instr::VMSEQ_VI, riscv_instr::VMSNE_VV, diff --git a/hw/ip/spatz/src/spatz.sv b/hw/ip/spatz/src/spatz.sv index cfe81ea2..dedc606b 100644 --- a/hw/ip/spatz/src/spatz.sv +++ b/hw/ip/spatz/src/spatz.sv @@ -289,7 +289,7 @@ module spatz import spatz_pkg::*; import rvv_pkg::*; import fpnew_pkg::*; #( .vfu_rsp_o (vfu_rsp ), // VRF .vrf_waddr_o (vrf_waddr[VFU_VD_WD] ), - .vrf_wdata_o (vrf_wdata[VFU_VD_WD] ), + .vrf_wdata_o (vrf_wdata[VFU_VD_WD] ), // N_FU*ELEN bits .vrf_we_o (sb_we[VFU_VD_WD] ), .vrf_wbe_o (vrf_wbe[VFU_VD_WD] ), .vrf_wvalid_i (vrf_wvalid[VFU_VD_WD] ), diff --git a/hw/ip/spatz/src/spatz_controller.sv b/hw/ip/spatz/src/spatz_controller.sv index 2e8837e0..e6ed0a36 100644 --- a/hw/ip/spatz/src/spatz_controller.sv +++ b/hw/ip/spatz/src/spatz_controller.sv @@ -199,7 +199,7 @@ module spatz_controller logic req_buffer_ready, req_buffer_valid, req_buffer_pop; // One element wide instruction buffer - fall_through_register #( + fall_through_register #( // a fifo. .T(spatz_req_t) ) i_req_buffer ( .clk_i (clk_i ), @@ -339,6 +339,12 @@ module spatz_controller scoreboard_d[spatz_req.id].deps[write_table_d[spatz_req.vd].id] |= write_table_d[spatz_req.vd].valid; read_table_d[spatz_req.vd] = {spatz_req.id, 1'b1}; end + // CMY: tackling v0 RAW hazard------------------------------------------------------- + if (!spatz_req.op_arith.vm) begin + scoreboard_d[spatz_req.id].deps[write_table_d[0].id] |= write_table_d[0].valid; + read_table_d[0] = {spatz_req.id, 1'b1}; + end + //-------------------------------------------------------------------------------------- // WAW and WAR hazards if (spatz_req.use_vd) begin @@ -454,7 +460,7 @@ module spatz_controller running_insn_d = running_insn_q; // New instruction! - if (spatz_req_valid && spatz_req.ex_unit != CON) + if (spatz_req_valid && spatz_req.ex_unit != CON) // declare a new instruction running_insn_d[next_insn_id] = 1'b1; // Finished a instruction diff --git a/hw/ip/spatz/src/spatz_decoder.sv b/hw/ip/spatz/src/spatz_decoder.sv index a41c5436..955dc18e 100644 --- a/hw/ip/spatz/src/spatz_decoder.sv +++ b/hw/ip/spatz/src/spatz_decoder.sv @@ -274,6 +274,16 @@ module spatz_decoder riscv_instr::VREDMINU_VS, riscv_instr::VREDMAX_VS, riscv_instr::VREDMAXU_VS, +// CMY: add VMANDNOT VMAND VMOR VMXOR VMORNOT VMNAND VMNOR VMXNOR, 8 masking instructions + riscv_instr::VMANDN_MM, + riscv_instr::VMAND_MM, + riscv_instr::VMOR_MM, + riscv_instr::VMXOR_MM, + riscv_instr::VMORN_MM, + riscv_instr::VMNAND_MM, + riscv_instr::VMNOR_MM, + riscv_instr::VMXNOR_MM, +//------------------------------------------------------------- riscv_instr::VMSEQ_VV, riscv_instr::VMSEQ_VX, riscv_instr::VMSEQ_VI, @@ -348,7 +358,7 @@ module spatz_decoder automatic vreg_t arith_s1 = decoder_req_i.instr[19:15]; automatic vreg_t arith_s2 = decoder_req_i.instr[24:20]; automatic vreg_t arith_d = decoder_req_i.instr[11:7]; - automatic logic arith_vm = decoder_req_i.instr[25]; + automatic logic arith_vm = decoder_req_i.instr[25]; //Vector Arithmetic Masking Enable bit spatz_req.op_arith.vm = arith_vm; spatz_req.op_sld.vm = arith_vm; @@ -827,6 +837,39 @@ module spatz_decoder end end + // CMY: Mask operations + riscv_instr::VMANDN_MM: begin + spatz_req.op = VMANDNOT; + end + + riscv_instr::VMAND_MM: begin + spatz_req.op = VMAND; + end + + riscv_instr::VMOR_MM: begin + spatz_req.op = VMOR; + end + + riscv_instr::VMXOR_MM: begin + spatz_req.op = VMXOR; + end + + riscv_instr::VMORN_MM: begin + spatz_req.op = VMORNOT; + end + + riscv_instr::VMNAND_MM: begin + spatz_req.op = VMNAND; + end + + riscv_instr::VMNOR_MM: begin + spatz_req.op = VMNOR; + end + + riscv_instr::VMXNOR_MM: begin + spatz_req.op = VMXNOR; + end + default: illegal_instr = 1'b1; endcase // Arithmetic Instruction Type end diff --git a/hw/ip/spatz/src/spatz_ipu.sv b/hw/ip/spatz/src/spatz_ipu.sv index 9d6f6485..2e4c1253 100644 --- a/hw/ip/spatz/src/spatz_ipu.sv +++ b/hw/ip/spatz/src/spatz_ipu.sv @@ -478,6 +478,7 @@ module spatz_ipu import spatz_pkg::*; import rvv_pkg::vew_e; #( /////////////// // Collect results from the SIMD lanes + // each lane is responsible for calculating one element. always_comb begin : collector unique case (sew) rvv_pkg::EW_8 : begin diff --git a/hw/ip/spatz/src/spatz_pkg.sv.tpl b/hw/ip/spatz/src/spatz_pkg.sv.tpl index 7d565323..c3c1cd8c 100644 --- a/hw/ip/spatz/src/spatz_pkg.sv.tpl +++ b/hw/ip/spatz/src/spatz_pkg.sv.tpl @@ -108,6 +108,10 @@ package spatz_pkg; typedef logic [$clog2(NrVRFWords)-1:0] vrf_addr_t; typedef logic [N_FU*ELENB-1:0] vrf_be_t; typedef logic [N_FU*ELEN-1:0] vrf_data_t; + // ELEN = 64 + // The VRF is centralized and serves all functional units. + // Each VRF port is 64F-bit wide. F denotes the number of FPUs. + // the FU here doesn't refer to Functioan Units. N_FU=max{N_IPU,N_FPU} // Instruction ID typedef logic [$clog2(NrParallelInstructions)-1:0] spatz_id_t; diff --git a/hw/ip/spatz/src/spatz_simd_lane.sv b/hw/ip/spatz/src/spatz_simd_lane.sv index bc5c684a..2fa4bb9b 100644 --- a/hw/ip/spatz/src/spatz_simd_lane.sv +++ b/hw/ip/spatz/src/spatz_simd_lane.sv @@ -220,9 +220,14 @@ module spatz_simd_lane import spatz_pkg::*; import rvv_pkg::vew_e; #( VSUB, VRSUB, VNMSAC, VNMSUB, VSBC: simd_result = subtractor_result[Width-1:0]; VMIN, VMINU : simd_result = $signed({op_s1_i[Width-1] & is_signed_i, op_s1_i}) <= $signed({op_s2_i[Width-1] & is_signed_i, op_s2_i}) ? op_s1_i : op_s2_i; VMAX, VMAXU : simd_result = $signed({op_s1_i[Width-1] & is_signed_i, op_s1_i}) > $signed({op_s2_i[Width-1] & is_signed_i, op_s2_i}) ? op_s1_i : op_s2_i; - VAND : simd_result = op_s1_i & op_s2_i; - VOR : simd_result = op_s1_i | op_s2_i; - VXOR : simd_result = op_s1_i ^ op_s2_i; + VAND, VMAND : simd_result = op_s1_i & op_s2_i; // CMY: add masking support + VOR , VMOR : simd_result = op_s1_i | op_s2_i; // like above + VXOR, VMXOR : simd_result = op_s1_i ^ op_s2_i; // like above + VMANDNOT : simd_result = ~op_s1_i & op_s2_i; // like above + VMORNOT : simd_result = ~op_s1_i | op_s2_i; // like above + VMNAND : simd_result = ~(op_s1_i & op_s2_i); // like above + VMNOR : simd_result = ~(op_s1_i | op_s2_i); // like above + VMXNOR : simd_result = ~(op_s1_i ^ op_s2_i); // like above VSLL : simd_result = shift_operand << shift_amount; VSRL : simd_result = shift_operand >> shift_amount; VSRA : simd_result = $signed(shift_operand) >>> shift_amount; diff --git a/hw/ip/spatz/src/spatz_vfu.sv b/hw/ip/spatz/src/spatz_vfu.sv index 01a73a56..1847e61d 100644 --- a/hw/ip/spatz/src/spatz_vfu.sv +++ b/hw/ip/spatz/src/spatz_vfu.sv @@ -125,12 +125,20 @@ module spatz_vfu // Do we have the reduction operand? logic reduction_operand_ready_d, reduction_operand_ready_q; +// CMY: Are we reading operands or v0.t? + typedef enum logic{ + READ_OPERANDS, READ_V0_t + } operand_state_t; + operand_state_t operand_state_d, operand_state_q; + `FF(operand_state_q, operand_state_d, READ_OPERANDS) + // Are the VFU operands ready? logic op1_is_ready, op2_is_ready, op3_is_ready, operands_ready; - assign op1_is_ready = spatz_req_valid && ((!spatz_req.op_arith.is_reduction && (!spatz_req.use_vs1 || vrf_rvalid_i[1])) || (spatz_req.op_arith.is_reduction && reduction_operand_ready_q)); - assign op2_is_ready = spatz_req_valid && ((!spatz_req.use_vs2 || vrf_rvalid_i[0]) || spatz_req.op_arith.is_reduction); - assign op3_is_ready = spatz_req_valid && (!spatz_req.vd_is_src || vrf_rvalid_i[2]); + assign op1_is_ready = spatz_req_valid && (operand_state_q == READ_OPERANDS) && ((!spatz_req.op_arith.is_reduction && (!spatz_req.use_vs1 || vrf_rvalid_i[1])) || (spatz_req.op_arith.is_reduction && reduction_operand_ready_q)); + assign op2_is_ready = spatz_req_valid && (operand_state_q == READ_OPERANDS) && ((!spatz_req.use_vs2 || vrf_rvalid_i[0]) || spatz_req.op_arith.is_reduction); + assign op3_is_ready = spatz_req_valid && (operand_state_q == READ_OPERANDS) && (!spatz_req.vd_is_src || vrf_rvalid_i[2]); assign operands_ready = op1_is_ready && op2_is_ready && op3_is_ready && (!spatz_req.op_arith.is_scalar || vfu_rsp_ready_i) && !stall; +// CMY: added (operand_state_q == READ_OPERANDS). // Valid operations logic [N_FU*ELENB-1:0] valid_operations; @@ -150,6 +158,7 @@ module spatz_vfu // Is this a FPU instruction logic is_fpu_insn; assign is_fpu_insn = FPU && spatz_req.op inside {[VFADD:VSDOTP]}; + // FPU is defined in spart_pkg , localparam bit FPU = N_FPU != 0; // Is the FPU busy? logic is_fpu_busy; @@ -167,6 +176,7 @@ module spatz_vfu typedef enum logic [2:0] { Reduction_NormalExecution, Reduction_Wait, + Reduction_Read_V0_t, // CMY added a state Reduction_Init, Reduction_Reduce, Reduction_WriteBack @@ -217,7 +227,7 @@ module spatz_vfu vl_d = vl_q + nr_elem_word; // Update narrowing information narrowing_upper_d = narrowing_upper_q ^ spatz_req.op_arith.is_narrowing; - widening_upper_d = widening_upper_q ^ (spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2); + widening_upper_d = widening_upper_q ^ (spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2); // toggle the signal if requires widening end // Current state of the VFU @@ -226,7 +236,7 @@ module spatz_vfu VFU_RunningIPU: begin // Only go to the FPU state once the IPUs are no longer busy if (is_fpu_insn) begin - if (is_ipu_busy) + if (is_ipu_busy) // CMY: why should we waid for ipu idle when it is a fpu_insn? stall = 1'b1; else begin state_d = VFU_RunningFPU; @@ -283,9 +293,126 @@ module spatz_vfu // Operands // ////////////// +//CMY: put the fpu_decoder in front of the reduction_useless_value selection. +// because the value selection depends on the FPU source format. + operation_e fpu_op; + fp_format_e fpu_src_fmt, fpu_dst_fmt; + int_format_e fpu_int_fmt; + logic fpu_op_mode; + logic fpu_vectorial_op; + + logic [N_FPU-1:0] fpu_busy_d, fpu_busy_q; + `FF(fpu_busy_q, fpu_busy_d, '0) + + status_t [N_FPU-1:0] fpu_status_d, fpu_status_q; + `FF(fpu_status_q, fpu_status_d, '0) + + always_comb begin: gen_decoder + fpu_op = fpnew_pkg::FMADD; + fpu_op_mode = 1'b0; + fpu_vectorial_op = 1'b0; + is_fpu_busy = |fpu_busy_q; + fpu_src_fmt = fpnew_pkg::FP32; + fpu_dst_fmt = fpnew_pkg::FP32; + fpu_int_fmt = fpnew_pkg::INT32; + + fpu_status_o = '0; + for (int fpu = 0; fpu < N_FPU; fpu++) + fpu_status_o |= fpu_status_q[fpu]; + + if (FPU) begin + unique case (spatz_req.vtype.vsew) + EW_64: begin + if (RVD) begin + fpu_src_fmt = fpnew_pkg::FP64; + fpu_dst_fmt = fpnew_pkg::FP64; + fpu_int_fmt = fpnew_pkg::INT64; + end + end + EW_32: begin + fpu_src_fmt = spatz_req.op_arith.is_narrowing || spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 ? fpnew_pkg::FP64 : fpnew_pkg::FP32; + fpu_dst_fmt = spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 || spatz_req.op == VSDOTP ? fpnew_pkg::FP64 : fpnew_pkg::FP32; + fpu_int_fmt = spatz_req.op_arith.is_narrowing && spatz_req.op inside {VI2F, VU2F} ? fpnew_pkg::INT64 : fpnew_pkg::INT32; + fpu_vectorial_op = FLEN > 32; + end + EW_16: begin + fpu_src_fmt = spatz_req.op_arith.is_narrowing || spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 ? fpnew_pkg::FP32 : (spatz_req.fm.src ? fpnew_pkg::FP16ALT : fpnew_pkg::FP16); + fpu_dst_fmt = spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 || spatz_req.op == VSDOTP ? fpnew_pkg::FP32 : (spatz_req.fm.dst ? fpnew_pkg::FP16ALT : fpnew_pkg::FP16); + fpu_int_fmt = spatz_req.op_arith.is_narrowing && spatz_req.op inside {VI2F, VU2F} ? fpnew_pkg::INT32 : fpnew_pkg::INT16; + fpu_vectorial_op = 1'b1; + end + EW_8: begin + fpu_src_fmt = spatz_req.op_arith.is_narrowing || spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 ? (spatz_req.fm.src ? fpnew_pkg::FP16ALT : fpnew_pkg::FP16) : (spatz_req.fm.src ? fpnew_pkg::FP8ALT : fpnew_pkg::FP8); + fpu_dst_fmt = spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2 || spatz_req.op == VSDOTP ? (spatz_req.fm.dst ? fpnew_pkg::FP16ALT : fpnew_pkg::FP16) : (spatz_req.fm.dst ? fpnew_pkg::FP8ALT : fpnew_pkg::FP8); + fpu_int_fmt = spatz_req.op_arith.is_narrowing && spatz_req.op inside {VI2F, VU2F} ? fpnew_pkg::INT16 : fpnew_pkg::INT8; + fpu_vectorial_op = 1'b1; + end + default:; + endcase + + unique case (spatz_req.op) + VFADD: fpu_op = fpnew_pkg::ADD; + VFSUB: begin + fpu_op = fpnew_pkg::ADD; + fpu_op_mode = 1'b1; + end + VFMUL : fpu_op = fpnew_pkg::MUL; + VFMADD : fpu_op = fpnew_pkg::FMADD; + VFMSUB : begin + fpu_op = fpnew_pkg::FMADD; + fpu_op_mode = 1'b1; + end + VFNMSUB: fpu_op = fpnew_pkg::FNMSUB; + VFNMADD: begin + fpu_op = fpnew_pkg::FNMSUB; + fpu_op_mode = 1'b1; + end + + VFMINMAX: begin + fpu_op = fpnew_pkg::MINMAX; + fpu_dst_fmt = fpu_src_fmt; + end + + + VFSGNJ : begin + fpu_op = fpnew_pkg::SGNJ; + fpu_dst_fmt = fpu_src_fmt; + end + VFCLASS: begin + fpu_op = fpnew_pkg::CLASSIFY; + fpu_dst_fmt = fpu_src_fmt; + end + VFCMP : begin + fpu_op = fpnew_pkg::CMP; + fpu_dst_fmt = fpu_src_fmt; + end + + VF2F: fpu_op = fpnew_pkg::F2F; + VF2I: fpu_op = fpnew_pkg::F2I; + VF2U: begin + fpu_op = fpnew_pkg::F2I; + fpu_op_mode = 1'b1; + end + VI2F: fpu_op = fpnew_pkg::I2F; + VU2F: begin + fpu_op = fpnew_pkg::I2F; + fpu_op_mode = 1'b1; + end + + VSDOTP: fpu_op = fpnew_pkg::SDOTP; + + default:; + endcase + end + end: gen_decoder +//--------------------------------------------------- + + + // Reduction registers elen_t [1:0] reduction_q, reduction_d; `FFL(reduction_q, reduction_d, reduction_operand_ready_d, '0) + elen_t reduction_useless_value; // IPU results logic [N_FU*ELEN-1:0] ipu_result; @@ -299,34 +426,101 @@ module spatz_vfu // Operands and result signals logic [N_FU*ELEN-1:0] operand1, operand2, operand3; + logic [N_FU*ELEN-1:0] operand_v0_t_lo,operand_v0_t_lo_q; // CMY: v0 should be read from vrf + logic [N_FU*ELEN-1:0] operand_v0_t_hi,operand_v0_t_hi_q; logic [N_FU*ELENB-1:0] in_ready; - always_comb begin: operand_proc - if (spatz_req.op_arith.is_scalar) - operand1 = {1*N_FU{spatz_req.rs1}}; - else if (spatz_req.use_vs1) - operand1 = spatz_req.op_arith.is_reduction ? $unsigned(reduction_q[1]) : vrf_rdata_i[1]; - else begin - // Replicate scalar operands - unique case (spatz_req.op == VSDOTP ? vew_e'(spatz_req.vtype.vsew + 1) : spatz_req.vtype.vsew) - EW_8 : operand1 = MAXEW == EW_32 ? {4*N_FU{spatz_req.rs1[7:0]}} : {8*N_FU{spatz_req.rs1[7:0]}}; - EW_16: operand1 = MAXEW == EW_32 ? {2*N_FU{spatz_req.rs1[15:0]}} : {4*N_FU{spatz_req.rs1[15:0]}}; - EW_32: operand1 = MAXEW == EW_32 ? {1*N_FU{spatz_req.rs1[31:0]}} : {2*N_FU{spatz_req.rs1[31:0]}}; - default: operand1 = {1*N_FU{spatz_req.rs1}}; - endcase - end - if ((!spatz_req.op_arith.is_scalar || spatz_req.op == VADD) && spatz_req.use_vs2) - operand2 = spatz_req.op_arith.is_reduction ? $unsigned(reduction_q[0]) : vrf_rdata_i[0]; - else - // Replicate scalar operands - unique case (spatz_req.op == VSDOTP ? vew_e'(spatz_req.vtype.vsew + 1) : spatz_req.vtype.vsew) - EW_8 : operand2 = MAXEW == EW_32 ? {4*N_FU{spatz_req.rs2[7:0]}} : {8*N_FU{spatz_req.rs2[7:0]}}; - EW_16: operand2 = MAXEW == EW_32 ? {2*N_FU{spatz_req.rs2[15:0]}} : {4*N_FU{spatz_req.rs2[15:0]}}; - EW_32: operand2 = MAXEW == EW_32 ? {1*N_FU{spatz_req.rs2[31:0]}} : {2*N_FU{spatz_req.rs2[31:0]}}; - default: operand2 = {1*N_FU{spatz_req.rs2}}; + //CMY: have we fetched the v0.t in reduction masking instructions. + logic reduction_v0_t_is_ready; + assign reduction_v0_t_is_ready = (reduction_state_q == Reduction_Read_V0_t) && vrf_rvalid_i[0] && vrf_rvalid_i[1]; + logic reduction_v0_t_read_done; + `FFLARNC(reduction_v0_t_read_done,1'b1,reduction_v0_t_is_ready,vfu_rsp_valid_o,1'b0,clk_i,rst_ni); + //---------------------------------------------------------------- + + // CMY: back up v0.t for reduction instructions.----------------------- + logic [N_FU*ELEN-1:0] reduction_operand_v0_t_lo,reduction_operand_v0_t_lo_q; + logic [N_FU*ELEN-1:0] reduction_operand_v0_t_hi,reduction_operand_v0_t_hi_q; + `FFL(reduction_operand_v0_t_lo_q, reduction_operand_v0_t_lo, reduction_v0_t_is_ready, '0) + `FFL(reduction_operand_v0_t_hi_q, reduction_operand_v0_t_hi, reduction_v0_t_is_ready, '0) + logic [VLEN-1:0] reduction_operand_v0_t_q; + assign reduction_operand_v0_t_q = {reduction_operand_v0_t_hi_q, reduction_operand_v0_t_lo_q}; + //--------------------------------------------------------------------------------- + + // CMY:an FSM to manage operands between normal calculation and v0.t fetching----------------- + + logic v0_t_is_ready; + assign v0_t_is_ready = (operand_state_q == READ_V0_t) && vrf_rvalid_i[0] && vrf_rvalid_i[1]; + logic v0_t_read_done; + `FFLARNC(v0_t_read_done,1'b1,v0_t_is_ready,vfu_rsp_valid_o,1'b0,clk_i,rst_ni); + + always_comb begin: operand_selection + operand_state_d = operand_state_q; + // if(spatz_req_valid) begin + unique case(operand_state_q) + READ_V0_t: + if(v0_t_is_ready) operand_state_d = READ_OPERANDS; + else operand_state_d = operand_state_q; + READ_OPERANDS: + if(spatz_req_valid && !spatz_req.op_arith.is_scalar && !spatz_req.op_arith.vm && !v0_t_read_done && !spatz_req.op_arith.is_reduction) + operand_state_d = READ_V0_t; + else operand_state_d = READ_OPERANDS; + default: operand_state_d = operand_state_q; endcase + // end + end:operand_selection + + vlen_t vl_q_plus_nr_elem_word; + assign vl_q_plus_nr_elem_word = vl_q + nr_elem_word; // CMY: for monitoring. + + //-------------------------------------------- + + always_comb begin: operand_proc // CMY: turn it into a FSM + reduction_operand_v0_t_lo = '0; + reduction_operand_v0_t_hi = '0; + operand_v0_t_lo = '0; + operand_v0_t_hi = '0; + operand1 = '0; + operand2 = '0; + case (operand_state_q) + READ_OPERANDS: begin + if(reduction_state_q == Reduction_Read_V0_t) begin + reduction_operand_v0_t_lo = vrf_rdata_i[0]; + reduction_operand_v0_t_hi = vrf_rdata_i[1]; + end + else begin + if (spatz_req.op_arith.is_scalar) + operand1 = {1*N_FU{spatz_req.rs1}}; + else if (spatz_req.use_vs1) + operand1 = spatz_req.op_arith.is_reduction ? $unsigned(reduction_q[1]) : vrf_rdata_i[1]; + else begin + // Replicate scalar operands + unique case (spatz_req.op == VSDOTP ? vew_e'(spatz_req.vtype.vsew + 1) : spatz_req.vtype.vsew) + EW_8 : operand1 = MAXEW == EW_32 ? {4*N_FU{spatz_req.rs1[7:0]}} : {8*N_FU{spatz_req.rs1[7:0]}}; + EW_16: operand1 = MAXEW == EW_32 ? {2*N_FU{spatz_req.rs1[15:0]}} : {4*N_FU{spatz_req.rs1[15:0]}}; + EW_32: operand1 = MAXEW == EW_32 ? {1*N_FU{spatz_req.rs1[31:0]}} : {2*N_FU{spatz_req.rs1[31:0]}}; + default: operand1 = {1*N_FU{spatz_req.rs1}}; + endcase + end - operand3 = spatz_req.op_arith.is_scalar ? {1*N_FU{spatz_req.rsd}} : vrf_rdata_i[2]; + if ((!spatz_req.op_arith.is_scalar || spatz_req.op == VADD) && spatz_req.use_vs2) + operand2 = spatz_req.op_arith.is_reduction ? $unsigned(reduction_q[0]) : vrf_rdata_i[0]; + else + // Replicate scalar operands + unique case (spatz_req.op == VSDOTP ? vew_e'(spatz_req.vtype.vsew + 1) : spatz_req.vtype.vsew) + EW_8 : operand2 = MAXEW == EW_32 ? {4*N_FU{spatz_req.rs2[7:0]}} : {8*N_FU{spatz_req.rs2[7:0]}}; + EW_16: operand2 = MAXEW == EW_32 ? {2*N_FU{spatz_req.rs2[15:0]}} : {4*N_FU{spatz_req.rs2[15:0]}}; + EW_32: operand2 = MAXEW == EW_32 ? {1*N_FU{spatz_req.rs2[31:0]}} : {2*N_FU{spatz_req.rs2[31:0]}}; + default: operand2 = {1*N_FU{spatz_req.rs2}}; + endcase + end + end + READ_V0_t: begin + operand_v0_t_lo = vrf_rdata_i[0]; + operand_v0_t_hi = vrf_rdata_i[1]; + end + default:; + endcase + operand3 = spatz_req.op_arith.is_scalar ? {1*N_FU{spatz_req.rsd}} : vrf_rdata_i[2]; // VFU_VD_RD // operand3 is used in MAC computation, like VMADD end: operand_proc assign in_ready = state_q == VFU_RunningIPU ? ipu_in_ready : fpu_in_ready; @@ -335,6 +529,12 @@ module spatz_vfu assign scalar_result = result[ELEN-1:0]; + `FFL(operand_v0_t_lo_q, operand_v0_t_lo, v0_t_is_ready, '0) // CMY: backup v0.t + `FFL(operand_v0_t_hi_q, operand_v0_t_hi, v0_t_is_ready, '0) + + logic [VLEN-1:0] operand_v0_t_q; + assign operand_v0_t_q = {operand_v0_t_hi_q,operand_v0_t_lo_q}; + /////////////////////// // Reduction logic // /////////////////////// @@ -349,6 +549,77 @@ module spatz_vfu // Do we need to request reduction operands? logic [1:0] reduction_operand_request; + // CMY: reduction_useless_value selection----------------------- + always_comb begin: reduction_useless_value_selection + reduction_useless_value = '0; + if(spatz_req.op_arith.is_reduction == 1'b1) begin + case(spatz_req.op) + VADD: // riscv_instr::VREDSUM_VS,riscv_instr::VFREDUSUM_VS,riscv_instr::VFREDOSUM_VS + reduction_useless_value = '0; + VAND: //riscv_instr::VREDAND_VS: + reduction_useless_value = '1; + VOR, //riscv_instr::VREDOR_VS, + VXOR: //riscv_instr::VREDXOR_VS: + reduction_useless_value = '0; + VMINU: //riscv_instr::VREDMINU_VS: + reduction_useless_value = '1; + VMIN: //riscv_instr::VREDMIN_VS: + unique case(spatz_req.vtype.vsew) + EW_8:reduction_useless_value = {1'b0,7'h7f}; + EW_16:reduction_useless_value = {1'b0,15'h7fff}; + EW_32:reduction_useless_value = {1'b0,31'h7fffffff}; + default: + if(MAXEW == EW_64) reduction_useless_value = {1'b0,63'h7fffffffffffffff}; + endcase + VMAXU: //riscv_instr::VREDMAXU_VS: + reduction_useless_value = '0; + VMAX: //riscv_instr::VREDMAX_VS: //complement code of -infinity + unique case(spatz_req.vtype.vsew) + EW_8:reduction_useless_value = {1'b1,7'h0}; + EW_16:reduction_useless_value = {1'b1,15'h0}; + EW_32:reduction_useless_value = {1'b1,31'h0}; + default: + if(MAXEW == EW_64) reduction_useless_value = {1'b1,63'h0}; + endcase + VFMINMAX: begin + if(spatz_req.rm == fpnew_pkg::RNE) begin //riscv_instr::VFREDMIN_VS: + unique case(fpu_src_fmt) // fpu_src_fmt is synchronous with spatz_req.op, while fpu_src_fmt_q is synchronous with op_q + // + infinity + fpnew_pkg::FP64:reduction_useless_value = {1'b0,11'h7ff,52'h0}; + fpnew_pkg::FP32:reduction_useless_value = {1'b0,8'hff,23'h0}; + fpnew_pkg::FP16:reduction_useless_value = {1'b0,5'h1f,10'h0}; + fpnew_pkg::FP16ALT:reduction_useless_value = {1'b0,8'hff,7'h0}; + fpnew_pkg::FP8:reduction_useless_value = {1'b0,5'h1f,2'h0}; + fpnew_pkg::FP8ALT:reduction_useless_value = {1'b0,4'hf,3'h0}; + endcase + end + if (spatz_req.rm == fpnew_pkg::RTZ) begin //riscv_instr::VFREDMAX_VS: + unique case(fpu_src_fmt) + // - infinity + fpnew_pkg::FP64:reduction_useless_value = {1'b1,11'h7ff,52'h0}; + fpnew_pkg::FP32:reduction_useless_value = {1'b1,8'hff,23'h0}; + fpnew_pkg::FP16:reduction_useless_value = {1'b1,5'h1f,10'h0}; + fpnew_pkg::FP16ALT:reduction_useless_value = {1'b1,8'hff,7'h0}; + fpnew_pkg::FP8:reduction_useless_value = {1'b1,5'h1f,2'h0}; + fpnew_pkg::FP8ALT:reduction_useless_value = {1'b1,4'hf,3'h0}; + endcase + end + end + default: reduction_useless_value='0; + endcase + end + end + + // ----------------------------------------------------------- + + // CMY: add monitor signals for reduction_pointer_q and reduction_opreand_v0_t_q[reduction_pointer_q] + logic [idx_width(N_FU*ELENB)-1:0] reduction_pointer_q_idx_width_N_FU_ELENB_0; + assign reduction_pointer_q_idx_width_N_FU_ELENB_0 = reduction_pointer_q[idx_width(N_FU*ELENB):0]; + + logic v0_t_reduction_pointer_q_idx_width_N_FU_ELENB_0; + assign v0_t_reduction_pointer_q_idx_width_N_FU_ELENB_0 = reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]; + //------------------------------------------------------------------------------ + always_comb begin: proc_reduction // Maintain state reduction_state_d = reduction_state_q; @@ -372,7 +643,7 @@ module spatz_vfu reduction_operand_request[1] = (reduction_state_q inside {Reduction_Init, Reduction_Reduce}) || !spatz_req.op_arith.is_reduction; unique case (reduction_state_q) - Reduction_NormalExecution: begin + Reduction_NormalExecution: begin // not a reduction instruction // Did we issue a word to the FUs? word_issued = spatz_req_valid && &(in_ready | ~valid_operations) && operands_ready && !stall; @@ -384,7 +655,7 @@ module spatz_vfu // Do we have a new reduction instruction? if (spatz_req_valid && !running_q[spatz_req.id] && spatz_req.op_arith.is_reduction) - reduction_state_d = is_fpu_busy ? Reduction_Wait : Reduction_Init; + reduction_state_d = (!spatz_req.op_arith.vm) ? Reduction_Read_V0_t : is_fpu_busy ? Reduction_Wait : Reduction_Init; // CMY: added Reduction_Read_V0_t state end Reduction_Wait: begin @@ -395,21 +666,38 @@ module spatz_vfu reduction_state_d = Reduction_Init; end + Reduction_Read_V0_t:begin + if(reduction_v0_t_is_ready) + if (!is_fpu_busy) + reduction_state_d = Reduction_Init; + else reduction_state_d = Reduction_Wait; + else reduction_state_d = Reduction_Read_V0_t; + end + Reduction_Init: begin // Initialize the reduction // verilator lint_off SELRANGE unique case (spatz_req.vtype.vsew) EW_8 : begin reduction_d[0] = $unsigned(vrf_rdata_i[0][7:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][8*reduction_pointer_q[idx_width(N_FU*ELENB)-1:0] +: 8]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][8*reduction_pointer_q[idx_width(N_FU*ELENB)-1:0] +: 8]); end EW_16: begin reduction_d[0] = $unsigned(vrf_rdata_i[0][15:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][16*reduction_pointer_q[idx_width(N_FU*ELENB)-2:0] +: 16]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][16*reduction_pointer_q[idx_width(N_FU*ELENB)-2:0] +: 16]); end EW_32: begin reduction_d[0] = $unsigned(vrf_rdata_i[0][31:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); end default: begin `ifdef MEMPOOL_SPATZ @@ -417,7 +705,10 @@ module spatz_vfu `else if (MAXEW == EW_64) begin reduction_d[0] = $unsigned(vrf_rdata_i[0][63:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); end `endif end @@ -444,15 +735,24 @@ module spatz_vfu unique case (spatz_req.vtype.vsew) EW_8 : begin reduction_d[0] = $unsigned(result[7:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][8*reduction_pointer_q[idx_width(N_FU*ELENB)-1:0] +: 8]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][8*reduction_pointer_q[idx_width(N_FU*ELENB)-1:0] +: 8]); end EW_16: begin reduction_d[0] = $unsigned(result[15:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][16*reduction_pointer_q[idx_width(N_FU*ELENB)-2:0] +: 16]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][16*reduction_pointer_q[idx_width(N_FU*ELENB)-2:0] +: 16]); end EW_32: begin reduction_d[0] = $unsigned(result[31:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][32*reduction_pointer_q[idx_width(N_FU*ELENB)-3:0] +: 32]); end default: begin `ifdef MEMPOOL_SPATZ @@ -460,7 +760,10 @@ module spatz_vfu `else if (MAXEW == EW_64) begin reduction_d[0] = $unsigned(result[63:0]); - reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); + if(!spatz_req.op_arith.vm && !reduction_operand_v0_t_q[reduction_pointer_q[idx_width(N_FU*ELENB):0]]) + reduction_d[1] = reduction_useless_value; + else + reduction_d[1] = $unsigned(vrf_rdata_i[1][64*reduction_pointer_q[idx_width(N_FU*ELENB)-4:0] +: 64]); end `endif end @@ -546,52 +849,135 @@ module spatz_vfu reduction : spatz_req.op_arith.is_reduction }; - if (spatz_req_valid && vl_q == '0) begin - vreg_addr_d[0] = (spatz_req.vs2 + vstart) << $clog2(NrWordsPerVector); - vreg_addr_d[1] = (spatz_req.vs1 + vstart) << $clog2(NrWordsPerVector); - vreg_addr_d[2] = (spatz_req.vd + vstart) << $clog2(NrWordsPerVector); - - // Direct feedthrough - vrf_raddr_o = vreg_addr_d; - if (!spatz_req.op_arith.is_scalar) - input_tag.vd_addr = vreg_addr_d[2]; - - // Did we commit a word already? - if (word_issued) begin - vreg_addr_d[0] = vreg_addr_d[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q); - vreg_addr_d[1] = vreg_addr_d[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q); - vreg_addr_d[2] = vreg_addr_d[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q)); - end - end else if (spatz_req_valid && vl_q < spatz_req.vl && word_issued) begin - vreg_addr_d[0] = vreg_addr_q[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q); - vreg_addr_d[1] = vreg_addr_q[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q); - vreg_addr_d[2] = vreg_addr_q[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q)); - end + case(operand_state_q)// CMY modified + READ_OPERANDS:begin + if(reduction_state_q == Reduction_Read_V0_t) begin + vreg_addr_d[0] = 0 << $clog2(NrWordsPerVector); + vreg_addr_d[1] = 1 << $clog2(NrWordsPerVector); + vrf_raddr_o = vreg_addr_d; + end + else begin + + if (spatz_req_valid && vl_q == '0) begin + vreg_addr_d[0] = (spatz_req.vs2 + vstart) << $clog2(NrWordsPerVector); + vreg_addr_d[1] = (spatz_req.vs1 + vstart) << $clog2(NrWordsPerVector); + vreg_addr_d[2] = (spatz_req.vd + vstart) << $clog2(NrWordsPerVector); + + // Direct feedthrough + vrf_raddr_o = vreg_addr_d; + if (!spatz_req.op_arith.is_scalar) + input_tag.vd_addr = vreg_addr_d[2]; + + // Did we commit a word already? + if (word_issued) begin + vreg_addr_d[0] = vreg_addr_d[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q); + vreg_addr_d[1] = vreg_addr_d[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q); // if it is a widening operands, addr shouldn't add when reading the upper part. + vreg_addr_d[2] = vreg_addr_d[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q)); + end + end else if (spatz_req_valid && vl_q < spatz_req.vl && word_issued) begin + vreg_addr_d[0] = vreg_addr_q[0] + (!spatz_req.op_arith.widen_vs2 || widening_upper_q); + vreg_addr_d[1] = vreg_addr_q[1] + (!spatz_req.op_arith.widen_vs1 || widening_upper_q); + vreg_addr_d[2] = vreg_addr_q[2] + (!spatz_req.op_arith.is_reduction && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q)); + end + end + end + READ_V0_t: begin + vreg_addr_d[0] = ( 0 + vstart) << $clog2(NrWordsPerVector); + vreg_addr_d[1] = ( 1 + vstart) << $clog2(NrWordsPerVector); + vrf_raddr_o = vreg_addr_d; + end + default:; + endcase end: vreg_addr_proc always_comb begin : operand_req_proc vreg_r_req = '0; vreg_we = '0; - vreg_wbe = '0; - - if (spatz_req_valid && vl_q < spatz_req.vl) - // Request operands - vreg_r_req = {spatz_req.vd_is_src, spatz_req.use_vs1 && reduction_operand_request[1], spatz_req.use_vs2 && reduction_operand_request[0]}; + unique case(operand_state_q) // CMY: turn it into FSM logic + READ_V0_t: vreg_r_req = 3'b011; + READ_OPERANDS: begin + if(reduction_state_q == Reduction_Read_V0_t) vreg_r_req = 3'b011; + else + if (spatz_req_valid && vl_q < spatz_req.vl) + // Request operands + vreg_r_req = {spatz_req.vd_is_src, spatz_req.use_vs1 && reduction_operand_request[1], spatz_req.use_vs2 && reduction_operand_request[0]}; + end + default:; + endcase // Got a new result if (&(result_valid | ~pending_results) && !result_tag.reduction) begin vreg_we = !result_tag.wb; - vreg_wbe = '1; + end + + // Reduction finished execution + if (reduction_state_q == Reduction_WriteBack && result_valid[0]) begin + vreg_we = 1'b1; + end + end : operand_req_proc + + // CMY: vreg_wbe logic---------------------- + vlen_t vreg_wb_word_cnt_q, vreg_wb_word_cnt_d; + `FF(vreg_wb_word_cnt_q, vreg_wb_word_cnt_d, '0) + vew_e sew_wb; + logic widening_wb; + assign widening_wb = spatz_req.op_arith.widen_vs1 || spatz_req.op_arith.widen_vs2; + assign sew_wb = vew_e'(int'(spatz_req.vtype.vsew) + widening_wb); - if (result_tag.narrowing) begin - // Only write half of the elements - vreg_wbe = result_tag.narrowing_upper ? {{(N_FU*ELENB/2){1'b1}}, {(N_FU*ELENB/2){1'b0}}} : {{(N_FU*ELENB/2){1'b0}}, {(N_FU*ELENB/2){1'b1}}}; + vrf_be_t vreg_wbe_pre; + +always_comb begin : vreg_wbe_proc + vreg_wbe = '0; + vreg_wb_word_cnt_d = vreg_wb_word_cnt_q; + if ((result_tag.last && &(result_valid | ~pending_results) && reduction_state_q inside {Reduction_NormalExecution, Reduction_Wait}) || reduction_done) + vreg_wb_word_cnt_d = 0; + else if (&(result_valid | ~pending_results) /*&& !result_tag.reduction*/ && (!spatz_req.op_arith.is_narrowing || narrowing_upper_q)) vreg_wb_word_cnt_d = vreg_wb_word_cnt_q + 1; + // Got a new result + if (&(result_valid | ~pending_results) && !result_tag.reduction) begin + // vreg_we = !result_tag.wb; + //vreg_wbe = '1; + // vreg_wb_word_cnt_d = vreg_wb_word_cnt_q + 1; + if(!spatz_req.op_arith.vm && !spatz_req.op_arith.is_scalar && !result_tag.narrowing)// CMY: masking the wb results + // unique case (spatz_req.vtype.vsew) + unique case (sew_wb) // CMY: add widening support + EW_8:for(int i=0;i LSU -> VRF +// store: VRF -> LSU -> Memory + module spatz_vlsu import spatz_pkg::*; import rvv_pkg::*; @@ -67,7 +70,7 @@ module spatz_vlsu ////////////// typedef logic [IdWidth-1:0] id_t; - typedef logic [$clog2(NrWordsPerVector*8)-1:0] vreg_elem_t; + typedef logic [$clog2(NrWordsPerVector*8)-1:0] vreg_elem_t; // element index. a word is 256bit and an element is 64bit /////////////////////// // Operation queue // @@ -93,6 +96,8 @@ module spatz_vlsu ); // Convert the vl to number of bytes for all element widths + // CMY: spatz_req_i.vl: the number of elements for this instruction + // CMY: spatz_req_d.vl: the number of bytes for this instruction always_comb begin: proc_spatz_req spatz_req_d = spatz_req_i; @@ -118,7 +123,7 @@ module spatz_vlsu // Do we have a strided memory access logic mem_is_strided; - assign mem_is_strided = (mem_spatz_req.op == VLSE) || (mem_spatz_req.op == VSSE); + assign mem_is_strided = (mem_spatz_req.op == VLSE) || (mem_spatz_req.op == VSSE); // CMY: Vector Load/Store Strided Elements // Do we have an indexed memory access logic mem_is_indexed; @@ -128,15 +133,15 @@ module spatz_vlsu // State // ///////////// - typedef enum logic { - VLSU_RunningLoad, VLSU_RunningStore + typedef enum logic [1:0] { + VLSU_RunningLoad, VLSU_RunningStore, VLSU_ReadingV0_t } state_t; state_t state_d, state_q; `FF(state_q, state_d, VLSU_RunningLoad) - id_t [NrMemPorts-1:0] store_count_q; - id_t [NrMemPorts-1:0] store_count_d; + id_t [NrMemPorts-1:0] store_count_q; // id_t = 3: width of NrPendingLoads + id_t [NrMemPorts-1:0] store_count_d; // NrMemPorts = N_FU = 4 for (genvar port = 0; port < NrMemPorts; port++) begin: gen_store_count_q `FF(store_count_q[port], store_count_d[port], '0) @@ -149,7 +154,7 @@ module spatz_vlsu for (int port = 0; port < NrMemPorts; port++) begin if (spatz_mem_req_o[port].write && spatz_mem_req_valid_o[port] && spatz_mem_req_ready_i[port]) // Did we send a store? - store_count_d[port]++; + store_count_d[port]++; // number of outstanding store // Did we get the ack of a store? `ifdef MEMPOOL_SPATZ @@ -301,6 +306,7 @@ module spatz_vlsu vlen_t vstart; logic [2:0] rs1; + logic vm; // CMY: if it is a maskede memory instruction logic is_load; logic is_strided; logic is_indexed; @@ -327,7 +333,7 @@ module spatz_vlsu .full_o (/* Unused */ ), .data_o (commit_insn_q ), .empty_o (commit_insn_empty), - .pop_i (commit_insn_pop ), + .pop_i (commit_insn_pop ), // finish the execution .usage_o (/* Unused */ ) ); @@ -339,6 +345,7 @@ module spatz_vlsu vl : mem_spatz_req.vl, vstart : mem_spatz_req.vstart, rs1 : mem_spatz_req.rs1[2:0], + vm : mem_spatz_req.op_mem.vm, is_load : mem_spatz_req.op_mem.is_load, is_strided: mem_is_strided, is_indexed: mem_is_indexed @@ -385,7 +392,7 @@ module spatz_vlsu logic [NrMemPorts-1:0] commit_finished_q; logic [NrMemPorts-1:0] commit_finished_d; - for (genvar fu = 0; fu < N_FU; fu++) begin: gen_vreg_counters + for (genvar fu = 0; fu < N_FU; fu++) begin: gen_vreg_counters // N_FU: number of FPUs delta_counter #( .WIDTH($bits(vlen_t)) ) i_delta_counter_vreg ( @@ -407,15 +414,16 @@ module spatz_vlsu //////////////////////// // Address Generation // - //////////////////////// + //////////////////////// // CMY: VRF address generation - elen_t [NrMemPorts-1:0] mem_req_addr; + elen_t [NrMemPorts-1:0] mem_req_addr; // CMY: why elen_t? (64bits/hardware element) vrf_addr_t vd_vreg_addr; vrf_addr_t vs2_vreg_addr; + vrf_addr_t v0_t_vreg_addr_lo, v0_t_vreg_addr_hi; // Current element index and byte index that are being accessed at the register file - vreg_elem_t vd_elem_id; + vreg_elem_t vd_elem_id; // 256/64=4 [3:0] vreg_elem_t vs2_elem_id_d, vs2_elem_id_q; `FF(vs2_elem_id_q, vs2_elem_id_d, '0) @@ -435,25 +443,33 @@ module spatz_vlsu assign idx_offset = mem_idx_counter_q[port]; always_comb begin + addr = '0; + stride ='0; + offset ='0; stride = mem_is_strided ? mem_spatz_req.rs2 >> mem_spatz_req.vtype.vsew : 'd1; - + // stride here(HW) is in element, compared to Byte-based in SW if (mem_is_indexed) begin // What is the relationship between data and index width? automatic logic [1:0] data_index_width_diff = int'(mem_spatz_req.vtype.vsew) - int'(mem_spatz_req.op_mem.ew); - + // // op_mem.ew encodes the index element width (EW of VS2) // Pointer to index automatic logic [idx_width(N_FU*ELENB)-1:0] word_index = (port << (MAXEW - data_index_width_diff)) + (maxew_t'(idx_offset << data_index_width_diff) >> data_index_width_diff) + (maxew_t'(idx_offset >> (MAXEW - data_index_width_diff)) << (MAXEW - data_index_width_diff)) * NrMemPorts; - - // Index - unique case (mem_spatz_req.op_mem.ew) - EW_8 : offset = $signed(vrf_rdata_i[1][8 * word_index +: 8]); - EW_16: offset = $signed(vrf_rdata_i[1][8 * word_index +: 16]); - default: offset = $signed(vrf_rdata_i[1][8 * word_index +: 32]); - endcase - end else begin + // CMY: word_index: the index of byte in vs2 // starting point of a VRF port + lower bits of idx_offset, the index of bytes inside one port + higher bits of idx_offset, the starting point of a VRF word + // word_index = (start of this port’s slice) + (byte offset within the slice) + (how many whole slices we’ve advanced). + // It computes the byte address inside the VS2 (index) vector + if(state_d == VLSU_RunningLoad || state_d == VLSU_RunningStore) begin + // Index + unique case (mem_spatz_req.op_mem.ew) // op_mem.ew encodes the index element width (EW of VS2) + EW_8 : offset = $signed(vrf_rdata_i[1][8 * word_index +: 8]); + EW_16: offset = $signed(vrf_rdata_i[1][8 * word_index +: 16]); + default: offset = $signed(vrf_rdata_i[1][8 * word_index +: 32]); + endcase + end + end else begin // strieded or normal (stride = 1) offset = ({mem_counter_q[port][$bits(vlen_t)-1:MAXEW] << $clog2(NrMemPorts), mem_counter_q[port][int'(MAXEW)-1:0]} + (port << MAXEW)) * stride; end - + // CMY: the starting point of a 32B block , in-port offset + adds a port base offset so each port starts at a different initial byte position + // mem_counter_q: how many elements this port has issued/consumed addr = mem_spatz_req.rs1 + offset; mem_req_addr[port] = (addr >> MAXEW) << MAXEW; mem_req_addr_offset[port] = addr[int'(MAXEW)-1:0]; @@ -462,10 +478,20 @@ module spatz_vlsu end end: gen_mem_req_addr + logic v0_t_is_ready; + assign v0_t_is_ready = (state_q == VLSU_ReadingV0_t) && (&vrf_rvalid_i); // reuse vrf_read[1] for V0 reading + logic [VLEN-1:0] operand_v0_t,operand_v0_t_q; // CMY: v0 should be read from vrf + assign operand_v0_t = (state_q == VLSU_ReadingV0_t)? {vrf_rdata_i[1],vrf_rdata_i[0]}:'0; + + `FFL(operand_v0_t_q, operand_v0_t, v0_t_is_ready, '0) // CMY: backup v0.t + + // Calculate the register file address always_comb begin : gen_vreg_addr vd_vreg_addr = (commit_insn_q.vd << $clog2(NrWordsPerVector)) + $unsigned(vd_elem_id); vs2_vreg_addr = (mem_spatz_req.vs2 << $clog2(NrWordsPerVector)) + $unsigned(vs2_elem_id_q); + v0_t_vreg_addr_lo = 0 << $clog2(NrWordsPerVector); // CMY: align prestart elements inside VLSU + v0_t_vreg_addr_hi = 1 << $clog2(NrWordsPerVector); end /////////////// @@ -651,7 +677,7 @@ module spatz_vlsu commit_counter_d[fu] += ELENB; else if (commit_insn_q.vstart[idx_width(N_FU*ELENB)-1:$clog2(ELENB)] == fu) commit_counter_d[fu] += commit_insn_q.vstart[$clog2(ELENB)-1:0]; - commit_operation_valid[fu] = commit_insn_valid && (commit_counter_q[fu] != max_elements) && (catchup[fu] || (!catchup[fu] && ~|catchup)); + commit_operation_valid[fu] = (state_q == VLSU_RunningLoad || state_q == VLSU_RunningStore)&& commit_insn_valid && (commit_counter_q[fu] != max_elements) && (catchup[fu] || (!catchup[fu] && ~|catchup)); // CMY: added current state judgement commit_operation_last[fu] = commit_operation_valid[fu] && ((max_elements - commit_counter_q[fu]) <= (commit_is_single_element_operation ? commit_single_element_size : ELENB)); commit_counter_delta[fu] = !commit_operation_valid[fu] ? vlen_t'('d0) : commit_is_single_element_operation ? vlen_t'(commit_single_element_size) : commit_operation_last[fu] ? (max_elements - commit_counter_q[fu]) : vlen_t'(ELENB); commit_counter_en[fu] = commit_operation_valid[fu] && (commit_insn_q.is_load && vrf_req_valid_d && vrf_req_ready_d) || (!commit_insn_q.is_load && vrf_rvalid_i[0] && vrf_re_o[0] && (!mem_is_indexed || vrf_rvalid_i[1])); @@ -702,18 +728,39 @@ module spatz_vlsu // State // /////////// + logic vlsu_rsp_valid_q; // register the instruction finish signal + logic v0_t_is_ready_q; + logic v0_t_read_done; + `FFLARNC(v0_t_read_done,1'b1,v0_t_is_ready,vlsu_rsp_valid_o,1'b0,clk_i,rst_ni); + `FF(v0_t_is_ready_q,v0_t_is_ready,'0); + always_comb begin: p_state // Maintain state state_d = state_q; unique case (state_q) VLSU_RunningLoad: begin + // if(mem_spatz_req_valid && !mem_spatz_req.op_mem.vm && !v0_t_read_done) + if(commit_insn_valid && !commit_insn_q.vm && !v0_t_read_done) + state_d = VLSU_ReadingV0_t; if (commit_insn_valid && !commit_insn_q.is_load) if (&rob_empty) state_d = VLSU_RunningStore; end + VLSU_ReadingV0_t: + if(/*v0_t_is_ready*/v0_t_is_ready & ~v0_t_is_ready_q) begin + state_d = VLSU_RunningLoad; + if (commit_insn_valid && !commit_insn_q.is_load) + // if (&rob_empty) // CMY: we don't need to wait rob_empty because read_v0_t doesn't go through rob. + state_d = VLSU_RunningStore; + // else state_d = VLSU_RunningLoad; + end + else state_d = state_q; + VLSU_RunningStore: begin + if(commit_insn_valid && !commit_insn_q.vm && !v0_t_read_done) + state_d = VLSU_ReadingV0_t; if (commit_insn_valid && commit_insn_q.is_load) if (&rob_empty) state_d = VLSU_RunningLoad; @@ -756,9 +803,64 @@ module spatz_vlsu end end + // CMY: generate masking based on V0.t----------------------------------- + logic [VLEN-1:0] vm_masking; + always_comb begin + vm_masking = '1; // to avoid latch + if(!commit_insn_q.vm) begin + case (commit_insn_q.vsew) + EW_8:for(int i=0;i> 5; + // Are we on the first/last VRF operation? logic vreg_operation_first; logic vreg_operation_last; - // FSM to decide whether we are on the first operation or not - typedef enum logic { + // FSM to decide whether we are on the first operation + /*typedef enum logic[1:0] { + VREG_READ_V0_t, // CMY: added a state to read v0.t VREG_IDLE, VREG_WAIT_FIRST_WRITE } vreg_operation_first_t; vreg_operation_first_t vreg_operation_first_q, vreg_operation_first_d; - `FF(vreg_operation_first_q, vreg_operation_first_d, VREG_IDLE) + `FF(vreg_operation_first_q, vreg_operation_first_d, VREG_IDLE)*/ + + logic v0_t_lo_is_ready,v0_t_hi_is_ready; + assign v0_t_lo_is_ready = (vreg_operation_first_q == VREG_READ_V0_t_lo) && vrf_rvalid_i; + assign v0_t_hi_is_ready = (vreg_operation_first_q == VREG_READ_V0_t_hi) && vrf_rvalid_i; + logic v0_t_lo_read_done,v0_t_hi_read_done; + `FFLARNC(v0_t_lo_read_done,1'b1,v0_t_lo_is_ready,vsldu_rsp_valid_o,1'b0,clk_i,rst_ni); + `FFLARNC(v0_t_hi_read_done,1'b1,v0_t_hi_is_ready,vsldu_rsp_valid_o,1'b0,clk_i,rst_ni); + + vrf_data_t operand_v0_t_lo,operand_v0_t_lo_q; // CMY: v0 should be read from vrf + vrf_data_t operand_v0_t_hi,operand_v0_t_hi_q; + assign operand_v0_t_lo = (vreg_operation_first_q == VREG_READ_V0_t_lo)? vrf_rdata_i:'0; + assign operand_v0_t_hi = (vreg_operation_first_q == VREG_READ_V0_t_hi)? vrf_rdata_i:'0; + `FFL(operand_v0_t_lo_q, operand_v0_t_lo, v0_t_lo_is_ready, '0) // CMY: backup v0.t + `FFL(operand_v0_t_hi_q, operand_v0_t_hi, v0_t_hi_is_ready, '0) + + logic [VLEN-1:0] operand_v0_t_q; + assign operand_v0_t_q = {operand_v0_t_hi_q,operand_v0_t_lo_q}; + + // CMY: generate masking based on V0.t----------------------------------- + logic [VLEN-1:0] vm_masking; + always_comb begin + vm_masking = '1; + if(!spatz_req.op_sld.vm) begin + case (spatz_req.vtype.vsew) + EW_8:for(int i=0;i vrf_req_valid_q -> vrf_we_o + vreg_operation_first_d = VREG_IDLE; // vrf_req_ready_q = vrf_wvalid_i end default:; endcase @@ -280,7 +362,7 @@ module spatz_vsldu end // Do we have to increment the counter? - vreg_counter_en = ((spatz_req.use_vs2 && vrf_re_o && vrf_rvalid_i) || !spatz_req.use_vs2) && ((spatz_req.use_vd && vrf_req_valid_d && vrf_req_ready_d) || !spatz_req.use_vd); + vreg_counter_en = (vreg_operation_first_q!=VREG_READ_V0_t_lo) && (vreg_operation_first_q!=VREG_READ_V0_t_hi) && ((spatz_req.use_vs2 && vrf_re_o && vrf_rvalid_i) || !spatz_req.use_vs2) && ((spatz_req.use_vd && vrf_req_valid_d && vrf_req_ready_d) || !spatz_req.use_vd); if (vreg_counter_en) begin if (vreg_operation_last) // Reset the counter @@ -351,6 +433,7 @@ module spatz_vsldu // Data signals for different stages of the shift vrf_data_t data_in, data_out, data_low, data_high; + vrf_be_t slide_wbe; // CMY: used for monitor wbe signals before vm_masking always_comb begin shift_overflow_d = shift_overflow_q; @@ -363,17 +446,20 @@ module spatz_vsldu vrf_req_d.wbe = '0; vrf_req_d.wdata = '0; + slide_wbe = '0; + // Is there a vector instruction executing now? if (!is_vl_zero) begin - if (is_slide_up && spatz_req.op_sld.insert && spatz_req.op_sld.vmv) begin - for (int b_src = 0; b_src < VRFWordBWidth; b_src++) - data_in[(VRFWordBWidth-b_src-1)*8 +: 8] = spatz_req.rs1[b_src*8%ELEN +: 8]; - end else if (is_slide_up) begin - // If we have a slide up operation, flip all bytes around (d[-i] = d[i]) - for (int b_src = 0; b_src < VRFWordBWidth; b_src++) - data_in[(VRFWordBWidth-b_src-1)*8 +: 8] = vrf_rdata_i[b_src*8 +: 8]; - end else begin - data_in = vrf_rdata_i; + if (is_slide_up && spatz_req.op_sld.insert && spatz_req.op_sld.vmv) begin + for (int b_src = 0; b_src < VRFWordBWidth; b_src++) + data_in[(VRFWordBWidth-b_src-1)*8 +: 8] = spatz_req.rs1[b_src*8%ELEN +: 8]; // CMY: rs1: value in the x[rs1] + end + else if (is_slide_up) begin + // If we have a slide up operation, flip all bytes around (d[-i] = d[i]) + for (int b_src = 0; b_src < VRFWordBWidth; b_src++) + data_in[(VRFWordBWidth-b_src-1)*8 +: 8] = (vreg_operation_first_q == VREG_READ_V0_t_lo || vreg_operation_first_q == VREG_READ_V0_t_hi )? data_in[(VRFWordBWidth-b_src-1)*8 +: 8] : vrf_rdata_i[b_src*8 +: 8]; + end else begin + data_in = (vreg_operation_first_q == VREG_READ_V0_t_lo || vreg_operation_first_q == VREG_READ_V0_t_hi)? data_in : vrf_rdata_i; // If we are already over the MAXVL, all continuing elements are zero if ((vreg_counter_q >= MAXVL - slide_amount_q) || (vreg_operation_last && spatz_req.op_sld.insert)) @@ -420,29 +506,33 @@ module spatz_vsldu // Insert rs1 element at the first position if (spatz_req.op_sld.insert && !spatz_req.op_sld.vmv && vreg_operation_first && spatz_req.vstart == 'd0) - vrf_req_d.wdata = vrf_req_d.wdata | vrf_data_t'(spatz_req.rs1); + vrf_req_d.wdata = vrf_req_d.wdata | vrf_data_t'(spatz_req.rs1); // CMY: fill the LSB with spatz_req.rs1 end else begin vrf_req_d.wdata = data_out; end // Create byte enable mask for (int i = 0; i < VRFWordBWidth; i++) - vrf_req_d.wbe[i] = i < vreg_counter_delta; + // vrf_req_d.wbe[i] = i < vreg_counter_delta; + slide_wbe[i] = i < vreg_counter_delta; // Special byte enable mask case when we are operating on the first register element. if (vreg_operation_first && is_slide_up) for (int i = 0; i < VRFWordBWidth; i++) - vrf_req_d.wbe[i] = (spatz_req.op_sld.insert || (i >= slide_amount_d[$clog2(VRFWordBWidth)-1:0])) & (i < (vreg_counter_q[$clog2(VRFWordBWidth)-1:0] + vreg_counter_delta)); + // vrf_req_d.wbe[i] = (spatz_req.op_sld.insert || (i >= slide_amount_d[$clog2(VRFWordBWidth)-1:0])) & (i < (vreg_counter_q[$clog2(VRFWordBWidth)-1:0] + vreg_counter_delta)); + slide_wbe[i] = (spatz_req.op_sld.insert || (i >= slide_amount_d[$clog2(VRFWordBWidth)-1:0])) & (i < (vreg_counter_q[$clog2(VRFWordBWidth)-1:0] + vreg_counter_delta)); end // Reset overflow register when finished if (vreg_operations_finished) shift_overflow_d = '0; + + vrf_req_d.wbe = slide_wbe & vm_masking[vreg_counter_mod32*32 +:32]; end // VRF signals - assign vrf_re_o = spatz_req.use_vs2 && (spatz_req_valid || prefetch_q) && running_q[spatz_req.id]; - assign vrf_req_valid_d = spatz_req_valid && spatz_req.use_vd && (vrf_re_o || !spatz_req.use_vs2) && (vrf_rvalid_i || !spatz_req.use_vs2) && !prefetch_q; + assign vrf_re_o = (vreg_operation_first_q == VREG_READ_V0_t_lo)||(vreg_operation_first_q == VREG_READ_V0_t_hi)||(spatz_req.use_vs2 && (spatz_req_valid || prefetch_q) && running_q[spatz_req.id]); + assign vrf_req_valid_d = (vreg_operation_first_q != VREG_READ_V0_t_lo)&&(vreg_operation_first_q != VREG_READ_V0_t_hi)&& spatz_req_valid && spatz_req.use_vd && (vrf_re_o || !spatz_req.use_vs2) && (vrf_rvalid_i || !spatz_req.use_vs2) && !prefetch_q; //////////////////////// // Address Generation // @@ -452,8 +542,14 @@ module spatz_vsldu always_comb begin sld_offset_rd = is_slide_up ? (prefetch_q ? -slide_amount_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] - 1 : -slide_amount_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)]) : prefetch_q ? slide_amount_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] : slide_amount_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] + 1; - vrf_raddr_o = {spatz_req.vs2, $clog2(NrWordsPerVector)'(1'b0)} + vreg_counter_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] + sld_offset_rd; + vrf_raddr_o = (vreg_operation_first_q == VREG_READ_V0_t_lo) ? + {0, $clog2(NrWordsPerVector)'(1'b0)} : + ((vreg_operation_first_q == VREG_READ_V0_t_hi) ? + {1, $clog2(NrWordsPerVector)'(1'b0)} : + ({spatz_req.vs2, $clog2(NrWordsPerVector)'(1'b0)} + vreg_counter_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] + sld_offset_rd)); + // vs2 base in VRF + the number of Word under operation + number of elements to slide vrf_req_d.waddr = {spatz_req.vd, $clog2(NrWordsPerVector)'(1'b0)} + vreg_counter_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)]; end - +// CMY: the number of elements to slide. slide_amount_q[$bits(vlen_t)-1:$clog2(VRFWordBWidth)] : word offset +// CMY: endmodule : spatz_vsldu diff --git a/hw/system/spatz_cluster/Makefile b/hw/system/spatz_cluster/Makefile index 5c5a4234..6f906bec 100644 --- a/hw/system/spatz_cluster/Makefile +++ b/hw/system/spatz_cluster/Makefile @@ -26,9 +26,14 @@ SPATZ_CLUSTER_CFG_DEFINES += -DSNRT_NFPU_PER_CORE=$(shell python3 -c "import jst include $(ROOT)/util/Makefrag # QuestaSim -VSIM = questa-2021.3-kgf vsim -VLOG = questa-2021.3-kgf vlog -VSIM_HOME = /usr/pack/questa-2021.3-kgf/questasim +#VSIM = questa-2021.3-kgf vsim +#VLOG = questa-2021.3-kgf vlog +#VSIM_HOME = /usr/pack/questa-2021.3-kgf/questasim +VSIM = vsim +VLOG = vlog +VSIM_HOME = /sw/CAD/Siemens/questa/2024.3/questasim +#CMY modified + VSIM_FLAGS += -t 1ps VSIM_FLAGS += -do "log -r /*; source ${SPATZ_CLUSTER_DIR}/script/vsim/wave.tcl; run -a" @@ -164,11 +169,14 @@ clean.vcs: # SPYGLASS # ############ -SNPS_SG ?= spyglass-2022.06 +#SNPS_SG ?= spyglass-2022.06 +SNPS_SG ?= spyglass #Mamothones .PHONY: lint lint/tmp/files lint: generate lint/tmp/files lint/sdc/func.sdc lint/script/lint.tcl - cd lint && $(SNPS_SG) sg_shell -tcl script/lint.tcl +# cd lint && $(SNPS_SG) sg_shell -tcl script/lint.tcl + cd lint && sg_shell -tcl script/lint.tcl + lint/tmp/files: ${BENDER} mkdir -p lint/tmp diff --git a/hw/system/spatz_cluster/test/bootrom.elf b/hw/system/spatz_cluster/test/bootrom.elf index 22191858..179b2e60 100755 Binary files a/hw/system/spatz_cluster/test/bootrom.elf and b/hw/system/spatz_cluster/test/bootrom.elf differ diff --git a/sw/riscvTests/CMakeLists.txt b/sw/riscvTests/CMakeLists.txt index d5838211..7d17468f 100644 --- a/sw/riscvTests/CMakeLists.txt +++ b/sw/riscvTests/CMakeLists.txt @@ -85,6 +85,9 @@ add_snitch_test(vslide1up isa/rv64uv/vslide1up.c) add_snitch_test(vslideup isa/rv64uv/vslideup.c) add_snitch_test(vslide1down isa/rv64uv/vslide1down.c) add_snitch_test(vslidedown isa/rv64uv/vslidedown.c) +#CMY: float slide +#add_snitch_test(vfslide1down isa/rv64uv/vfslide1down.c) +#add_snitch_test(vfslide1up isa/rv64uv/vfslide1up.c) add_snitch_test(vdiv isa/rv64uv/vdiv.c) add_snitch_test(vdivu isa/rv64uv/vdivu.c) @@ -128,3 +131,26 @@ add_snitch_test(vfcvt isa/rv64uv/vfcvt.c) add_snitch_test(vfncvt isa/rv64uv/vfncvt.c) add_snitch_test(vfmv isa/rv64uv/vfmv.c) + +# CMY: masking logic instructions +add_snitch_test(vmand isa/rv64uv/vmand.c) +add_snitch_test(vmor isa/rv64uv/vmor.c) +add_snitch_test(vmandnot isa/rv64uv/vmandnot.c) +add_snitch_test(vmnand isa/rv64uv/vmnand.c) +add_snitch_test(vmnor isa/rv64uv/vmnor.c) +add_snitch_test(vmornot isa/rv64uv/vmornot.c) +add_snitch_test(vmxnor isa/rv64uv/vmxnor.c) +add_snitch_test(vmxor isa/rv64uv/vmxor.c) + +# CMY: Load/Store instructions +add_snitch_test(vle8 isa/rv64uv/vle8.c) +add_snitch_test(vle16 isa/rv64uv/vle16.c) +add_snitch_test(vle32 isa/rv64uv/vle32.c) +add_snitch_test(vle64 isa/rv64uv/vle64.c) +#add_snitch_test(vluxei isa/rv64uv/vluxei.c) # not supported by the original design. +add_snitch_test(vse8 isa/rv64uv/vse8.c) +add_snitch_test(vse16 isa/rv64uv/vse16.c) +add_snitch_test(vse32 isa/rv64uv/vse32.c) +add_snitch_test(vse64 isa/rv64uv/vse64.c) +add_snitch_test(vss isa/rv64uv/vss.c) # on fixing + diff --git a/sw/riscvTests/isa/rv64uv/vadd.c b/sw/riscvTests/isa/rv64uv/vadd.c index cfe9eac0..722fa5b7 100644 --- a/sw/riscvTests/isa/rv64uv/vadd.c +++ b/sw/riscvTests/isa/rv64uv/vadd.c @@ -36,7 +36,7 @@ void TEST_CASE1(void) { } void TEST_CASE2(void) { - VSET(16, e8, m8); + VSET(16, e8, m8); // #define VSET(VLEN, VTYPE, LMUL) VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); VLOAD_8(v0, 0xAA, 0xAA); @@ -187,16 +187,52 @@ void TEST_CASE6(void) { #endif } +void TEST_CASE7(void) { + const uint32_t scalar = 5; + + VSET(16, e8, m8); + VLOAD_8(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAB); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U8(21, v24, 0, 7, 0, 9, 0, 11, 0, 13, 6, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e16, m8); + VLOAD_16(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAB); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U16(22, v24, 0, 7, 0, 9, 0, 11, 0, 13, 6, 7, 0, 9, 0, 11, 0, 13); + + VSET(16, e32, m8); + VLOAD_32(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAB); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U32(23, v24, 0, 7, 0, 9, 0, 11, 0, 13, 6, 7, 0, 9, 0, 11, 0, 13); + +#if ELEN == 64 + VSET(16, e64, m8); + VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); + VLOAD_8(v0, 0xAA, 0xAB); + VCLEAR(v24); + asm volatile("vadd.vx v24, v8, %[A], v0.t" ::[A] "r"(scalar)); + VCMP_U64(24, v24, 0, 7, 0, 9, 0, 11, 0, 13, 6, 7, 0, 9, 0, 11, 0, 13); +#endif +} + int main(void) { INIT_CHECK(); enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE6(); + TEST_CASE7(); - EXIT_CHECK(); + EXIT_CHECK(); } + \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vand.c b/sw/riscvTests/isa/rv64uv/vand.c index 99c76169..55c0c73e 100644 --- a/sw/riscvTests/isa/rv64uv/vand.c +++ b/sw/riscvTests/isa/rv64uv/vand.c @@ -161,7 +161,8 @@ void TEST_CASE3() { } void TEST_CASE4() { - const uint32_t scalar = 0x0ff00ff0; + const uint32_t scalar = 0x0ff00ff0; // snitch is 32-bit scalar core +// const uint64_t scalar = 0x0ff00ff00ff00ff0; VSET(12, e8, m8); VLOAD_8(v16, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, 0xf0, 0xff, 0x01, @@ -208,10 +209,14 @@ void TEST_CASE4() { 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef, 0xdeadbeefdeadbeef); asm volatile("vand.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); - VCMP_U64(16, v8, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, + /*VCMP_U64(16, v8, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0, - 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0); + 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x00f000f000f000f0);*/ + VCMP_U64(16, v8, 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x0000000000f000f0, + 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x0000000000f000f0, + 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x0000000000f000f0, + 0x000000000ff00ff0, 0xdeadbeefdeadbeef, 0x0000000000f000f0); #endif } @@ -310,12 +315,12 @@ int main(void) { INIT_CHECK(); enable_vec(); - TEST_CASE1(); - // TEST_CASE2(); +// TEST_CASE1(); +// TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); - TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE4(); +// TEST_CASE5(); +// TEST_CASE6(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vdiv.c b/sw/riscvTests/isa/rv64uv/vdiv.c index 03012cd9..eae8dea2 100644 --- a/sw/riscvTests/isa/rv64uv/vdiv.c +++ b/sw/riscvTests/isa/rv64uv/vdiv.c @@ -237,9 +237,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfadd.c b/sw/riscvTests/isa/rv64uv/vfadd.c index 0cfac68a..1ab37a31 100644 --- a/sw/riscvTests/isa/rv64uv/vfadd.c +++ b/sw/riscvTests/isa/rv64uv/vfadd.c @@ -445,14 +445,14 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); TEST_CASE5(); TEST_CASE6(); - // TEST_CASE7(); - // TEST_CASE8(); + TEST_CASE7(); + TEST_CASE8(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfmadd.c b/sw/riscvTests/isa/rv64uv/vfmadd.c index 3f83835e..0dc01387 100644 --- a/sw/riscvTests/isa/rv64uv/vfmadd.c +++ b/sw/riscvTests/isa/rv64uv/vfmadd.c @@ -434,9 +434,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfncvt.c b/sw/riscvTests/isa/rv64uv/vfncvt.c index 1e7f7874..80c5ab93 100644 --- a/sw/riscvTests/isa/rv64uv/vfncvt.c +++ b/sw/riscvTests/isa/rv64uv/vfncvt.c @@ -779,25 +779,25 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE6(); TEST_CASE7(); - // TEST_CASE8(); + TEST_CASE8(); TEST_CASE9(); - // TEST_CASE10(); + TEST_CASE10(); TEST_CASE11(); - // TEST_CASE12(); + TEST_CASE12(); TEST_CASE13(); - // TEST_CASE14(); + TEST_CASE14(); /* vfncvt.rod.f.f is not supported yet diff --git a/sw/riscvTests/isa/rv64uv/vfnmacc.c b/sw/riscvTests/isa/rv64uv/vfnmacc.c index 9d0e0b94..8ca73502 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfnmacc.c @@ -457,9 +457,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfnmadd.c b/sw/riscvTests/isa/rv64uv/vfnmadd.c index 9bc23a86..a40cb3e1 100644 --- a/sw/riscvTests/isa/rv64uv/vfnmadd.c +++ b/sw/riscvTests/isa/rv64uv/vfnmadd.c @@ -459,9 +459,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfredmax.c b/sw/riscvTests/isa/rv64uv/vfredmax.c index b1d26274..6d4e193e 100644 --- a/sw/riscvTests/isa/rv64uv/vfredmax.c +++ b/sw/riscvTests/isa/rv64uv/vfredmax.c @@ -345,10 +345,10 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); - // TEST_CASE5(); + TEST_CASE5(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfredmin.c b/sw/riscvTests/isa/rv64uv/vfredmin.c index e776b433..be21683a 100644 --- a/sw/riscvTests/isa/rv64uv/vfredmin.c +++ b/sw/riscvTests/isa/rv64uv/vfredmin.c @@ -345,10 +345,10 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); - // TEST_CASE5(); + TEST_CASE5(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfredosum.c b/sw/riscvTests/isa/rv64uv/vfredosum.c index a1976ed4..97a2e1af 100644 --- a/sw/riscvTests/isa/rv64uv/vfredosum.c +++ b/sw/riscvTests/isa/rv64uv/vfredosum.c @@ -345,10 +345,10 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); - // TEST_CASE5(); + TEST_CASE5(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfredusum.c b/sw/riscvTests/isa/rv64uv/vfredusum.c index 88e81a58..e6480169 100644 --- a/sw/riscvTests/isa/rv64uv/vfredusum.c +++ b/sw/riscvTests/isa/rv64uv/vfredusum.c @@ -345,10 +345,10 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); - // TEST_CASE5(); + TEST_CASE5(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfslide1down.c b/sw/riscvTests/isa/rv64uv/vfslide1down.c index 7c7d5342..a07ebc31 100644 --- a/sw/riscvTests/isa/rv64uv/vfslide1down.c +++ b/sw/riscvTests/isa/rv64uv/vfslide1down.c @@ -101,7 +101,7 @@ int main(void) { enable_fp(); TEST_CASE1(); - TEST_CASE2(); + // TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfslide1up.c b/sw/riscvTests/isa/rv64uv/vfslide1up.c index 4acd161b..e647380a 100644 --- a/sw/riscvTests/isa/rv64uv/vfslide1up.c +++ b/sw/riscvTests/isa/rv64uv/vfslide1up.c @@ -84,7 +84,7 @@ int main(void) { enable_fp(); TEST_CASE1(); - TEST_CASE2(); + // TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfwmacc.c b/sw/riscvTests/isa/rv64uv/vfwmacc.c index 3306c7da..35768871 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmacc.c +++ b/sw/riscvTests/isa/rv64uv/vfwmacc.c @@ -352,9 +352,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfwmsac.c b/sw/riscvTests/isa/rv64uv/vfwmsac.c index 492273d0..75f56346 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmsac.c +++ b/sw/riscvTests/isa/rv64uv/vfwmsac.c @@ -354,9 +354,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vfwmul.c b/sw/riscvTests/isa/rv64uv/vfwmul.c index 3ea2f5d9..c7651f61 100644 --- a/sw/riscvTests/isa/rv64uv/vfwmul.c +++ b/sw/riscvTests/isa/rv64uv/vfwmul.c @@ -255,9 +255,9 @@ int main(void) { enable_fp(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vle16.c b/sw/riscvTests/isa/rv64uv/vle16.c new file mode 100644 index 00000000..99b9a87d --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vle16.c @@ -0,0 +1,67 @@ +// Author: CMY + +#include "vector_macros.h" + +void TEST_CASE0(void) { // test vm signal + VSET(16, e16, m1); + VLOAD_16(v1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + VLOAD_8(v0, 0xAA, 0xAA); + volatile uint16_t INP1[] = {0xaabb,0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, + 0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123}; + asm volatile("vle16.v v1, (%0),v0.t" ::"r"(INP1)); + VCMP_U16(0, v1, 0x1,0x0123, 0x3, 0x89ab, 0x5, 0xcdef, 0x7, 0x4567, + 0x9, 0x4567, 11, 0xcdef, 13, 0x89ab, 15, 0x0123); +} + +void TEST_CASE1(void) { + VSET(16, e16, m1); + volatile uint16_t INP1[] = {0xaabb,0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, + 0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123}; + asm volatile("vle16.v v1, (%0)" ::"r"(INP1)); + VCMP_U16(1, v1, 0xaabb,0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, + 0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123); +} + +// Positive-stride tests +void TEST_CASE2(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123, + 0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123}; + uint64_t stride = 6; // stride unit is BYTE + asm volatile("vlse16.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U16(2, v1, 0x0123, 0xcdef, 0x4567, 0x4567); +} + +void TEST_CASE3(void) { + VSET(16, e16, m1); // SET the VLEN to 16 to use the 4 memory ports + volatile uint16_t INP1[] = {0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123, + 0x0123, 0x4567, 0x89ab, 0xcdef, 0xcdef, 0x89ab, 0x4567, 0x0123}; + uint64_t stride = 6; // stride unit is BYTE + asm volatile("vlse16.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U16(3, v1, 0x0123, 0xcdef, 0x4567, 0x4567,0xcdef,0x0123); +} + +void TEST_CASE4(void) { + VSET(4, e16, m1); + volatile uint16_t INP1[] = {0x9fe4, 0x1920, 0x8f2e, 0x05e0, + 0xf9aa, 0x71f0, 0xc394, 0xbbd3}; + uint64_t stride = 4; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse16.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U16(4, v1, 0, 0x8f2e, 0, 0xc394); +} + + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE0(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vle32.c b/sw/riscvTests/isa/rv64uv/vle32.c new file mode 100644 index 00000000..6d5fa70c --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vle32.c @@ -0,0 +1,181 @@ +// Author: CMY + +#include "vector_macros.h" + + +void TEST_CASE1(void) { + VSET(16, e32, m1); + volatile uint32_t ALIGNED_I32[16] = { + 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + asm volatile("vle32.v v0, (%0)" ::"r"(ALIGNED_I32)); + VCMP_U32(1, v0, 0x9fe41920,0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, + 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +// Positive-stride tests +void TEST_CASE2(void) { + VSET(32, e32, m8); + volatile uint32_t INP1[] = {1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 0xa, 0xb, 0xc, + 0xd, 0xe, 0xf, + 0x10, + 0x10,0xf,0xe,0xd,0xc,0xb,0xa,9,8,7,6,5,4,3,2,1}; + VCLEAR(v2); + asm volatile("vle32.v v2, (%0)" ::"r"(INP1)); + VCMP_U32(32, v2, 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 0xa, 0xb, 0xc, + 0xd, 0xe, 0xf, + 0x10, + 0x10,0xf,0xe,0xd,0xc,0xb,0xa,9,8,7,6,5,4,3,2,1); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse32.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U32(3, v1, 0, 0xf9aa71f0, 0, 0x99991348); +} + +void TEST_CASE10(void) { + VSET(8, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + VCLEAR(v1); + asm volatile("vle32.v v1, (%0)" ::"r"(INP1)); + VCMP_U32(10, v1, 0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1); +} + +void TEST_CASE11(void) { + VSET(16, e32, m2); + volatile uint32_t INP1[] = {0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + VCLEAR(v1); + asm volatile("vle32.v v1, (%0)" ::"r"(INP1)); + VCMP_U32(11, v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +void TEST_CASE12(void) { + VSET(16, e64, m8); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + // VCLEAR(v8); + VLOAD_8(v0, 0xFF, 0xFF); + VCLEAR(v8); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(INP1)); + VCMP_U64(12, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(4, v8, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0x9fa831c7a11a9384, 0x1893179501093489, 0x1874754791888188, + 0x9013930148815808, 0x9031850931584902, 0x8319599991911111); +} + +void TEST_CASE5(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(5, v8, 0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0,0x8319599991911111); +} + +void TEST_CASE6(void) { + VSET(4, e64, m1); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1}; + uint64_t stride = 8; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(6, v1, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1); +} + +void TEST_CASE7(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x01015ac1309bb678}; + uint64_t stride = 40; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(9, v1, 0x99991348a9f38cd1, 0x01015ac1309bb678); +} + +void TEST_CASE8(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAB); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(8, v8, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0,0x8319599991911111); +} + + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); +// TEST_CASE10(); +// TEST_CASE11(); +// TEST_CASE12(); +// // TEST_CASE4(); +// TEST_CASE5(); +// // TEST_CASE6(); +// // TEST_CASE7(); +// TEST_CASE8(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vle64.c b/sw/riscvTests/isa/rv64uv/vle64.c new file mode 100644 index 00000000..be5002cb --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vle64.c @@ -0,0 +1,220 @@ +// Author: CMY + +#include "vector_macros.h" + +// void TEST_CASE0(void) { // test vm signal +// VSET(16, e64, m8); +// volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, +// 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, +// 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, +// 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, +// 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, +// 0x8913984898951989}; +// asm volatile("vle64.v v8, (%0)" ::"r"(INP1)); +// VCMP_U64(0, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, +// 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, +// 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, +// 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, +// 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, +// 0x8913984898951989); +// } + +void TEST_CASE0(void) { // test vm signal + VSET(16, e64, m8); + // VLOAD_64(v8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + volatile uint64_t INP1[] = {1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 0xa, 0xb, 0xc, + 0xd, 0xe, 0xf, + 0x10}; + asm volatile("vle64.v v8, (%0)" ::"r"(INP1)); + VCMP_U64(0, v8, 1, 2, 3, + 4, 5, 6, + 7, 8, 9, + 0xa, 0xb, 0xc, + 0xd, 0xe, 0xf, + 0x10); +} + +void TEST_CASE1(void) { + VSET(16, e64, m2); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + asm volatile("vle64.v v1, (%0)" ::"r"(INP1)); + VCMP_U64(1, v1, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +// Positive-stride tests +void TEST_CASE2(void) { + VSET(16, e64, m2); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + VCLEAR(v2); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vle64.v v2, (%0)" ::"r"(INP1)); + VCMP_U64(2, v2, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + uint64_t stride = 8; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse32.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U32(3, v1, 0, 0xf9aa71f0, 0, 0x99991348); +} + +void TEST_CASE10(void) { + VSET(8, e32, m1); + volatile uint32_t INP1[] = {0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1}; + VCLEAR(v1); + asm volatile("vle32.v v1, (%0)" ::"r"(INP1)); + VCMP_U32(10, v1, 0x9fe41920, 0x8f2e05e0, 0xf9aa71f0, 0xc394bbd3, + 0xa11a9384, 0xa7163840, 0x99991348, 0xa9f38cd1); +} + +void TEST_CASE11(void) { + VSET(16, e32, m2); + volatile uint32_t INP1[] = {0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848}; + VCLEAR(v1); + asm volatile("vle32.v v1, (%0)" ::"r"(INP1)); + VCMP_U32(11, v1, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, 0x38197598, + 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, 0x90139301, 0xab8b9148, + 0x90318509, 0x31897598, 0x83195999, 0x89139848); +} + +void TEST_CASE12(void) { + VSET(16, e64, m8); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + // VCLEAR(v8); + VLOAD_8(v0, 0xFF, 0xFF); + VCLEAR(v8); + asm volatile("vle64.v v8, (%0), v0.t" ::"r"(INP1)); + VCMP_U64(12, v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); +} + +void TEST_CASE4(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(4, v8, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0x9fa831c7a11a9384, 0x1893179501093489, 0x1874754791888188, + 0x9013930148815808, 0x9031850931584902, 0x8319599991911111); +} + +void TEST_CASE5(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAA); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(5, v8, 0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0,0x8319599991911111); +} + +void TEST_CASE6(void) { + VSET(4, e64, m1); + volatile uint64_t INP1[] = {0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1}; + uint64_t stride = 8; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(6, v1, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1); +} + +void TEST_CASE7(void) { + VSET(2, e64, m1); + volatile uint64_t INP1[] = {0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x9fa831c7a11a9384, + 0x9fa831c7a11a9384, 0x01015ac1309bb678}; + uint64_t stride = 40; + asm volatile("vlse64.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U64(9, v1, 0x99991348a9f38cd1, 0x01015ac1309bb678); +} + +void TEST_CASE8(void) { + VSET(8, e64, m2); + volatile uint64_t INP1[] = { + 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989}; + uint64_t stride = 16; + VLOAD_8(v0, 0xAB); + VCLEAR(v8); + asm volatile("vlse64.v v8, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U64(8, v8, 0x9fe419208f2e05e0, 0xa11a9384a7163840, 0, 0x1893179501093489, 0, + 0x9013930148815808, 0,0x8319599991911111); +} + + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE0(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE10(); + TEST_CASE11(); + TEST_CASE12(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + TEST_CASE7(); + TEST_CASE8(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vle8.c b/sw/riscvTests/isa/rv64uv/vle8.c new file mode 100644 index 00000000..7f80ddb7 --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vle8.c @@ -0,0 +1,52 @@ +// Author: CMY + +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + asm volatile("vlse8.v v1, (%0), %1" ::"r"(INP1), "r"(stride)); + VCMP_U8(1, v1, 0x9f, 0x20, 0x05, 0xaa); +} + +void TEST_CASE2(void) { + VSET(4, e8, m1); + VLOAD_8(v2, 0, 1, 2, 3); + volatile uint8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + asm volatile("vluxei8.v v1, (%0), v2" ::"r"(INP)); + VCMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +void TEST_CASE3(void) { + VSET(4, e8, m1); + VLOAD_8(v2, 0, 1, 2, 3); + volatile uint8_t INP[] = {0xff, 0x00, 0x0f, 0xf0}; + asm volatile("vloxei8.v v1, (%0), v2" ::"r"(INP)); + VCMP_U8(1, v1, 0xff, 0x00, 0x0f, 0xf0); +} + +void TEST_CASE4(void) { + VSET(4, e8, m1); + volatile uint8_t INP1[] = {0x9f, 0xe4, 0x19, 0x20, 0x8f, 0x2e, 0x05, 0xe0, + 0xf9, 0xaa, 0x71, 0xf0, 0xc3, 0x94, 0xbb, 0xd3}; + uint64_t stride = 3; + VLOAD_8(v0, 0xAA); + VCLEAR(v1); + asm volatile("vlse8.v v1, (%0), %1, v0.t" ::"r"(INP1), "r"(stride)); + VCMP_U8(4, v1, 0x00, 0x20,0x00, 0xaa); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + // TEST_CASE2(); + // TEST_CASE3(); + TEST_CASE4(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vmacc.c b/sw/riscvTests/isa/rv64uv/vmacc.c index e24d7323..8688f62b 100644 --- a/sw/riscvTests/isa/rv64uv/vmacc.c +++ b/sw/riscvTests/isa/rv64uv/vmacc.c @@ -82,6 +82,7 @@ void TEST_CASE1() { void TEST_CASE2() { VSET(16, e8, m8); +// VLOAD_8(v0, 0xAA, 0xAA); // just for test VLOAD_8(v24, 0x21, 0x75, 0x7f, 0x3a, 0x50, 0x6d, 0x3f, 0x3e, 0x74, 0x11, 0x29, 0xea, 0x14, 0xce, 0xb0, 0x37); VLOAD_8(v16, 0xfe, 0xa7, 0x06, 0xaa, 0x35, 0x3c, 0x2c, 0x58, 0xa1, 0xc4, 0x40, @@ -226,6 +227,7 @@ void TEST_CASE3() { void TEST_CASE4() { VSET(16, e8, m8); int64_t scalar = 5; +// VLOAD_8(v0, 0xAA, 0xAA); VLOAD_8(v16, 0x60, 0xe3, 0xa0, 0xb7, 0x35, 0x23, 0xa3, 0xf4, 0x5f, 0x6e, 0x07, 0x01, 0xe7, 0x51, 0x53, 0x29); VLOAD_8(v8, 0xfb, 0x1b, 0xc0, 0x36, 0xa7, 0xe0, 0xc8, 0x47, 0x57, 0xe0, 0x51, @@ -298,9 +300,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vmand.c b/sw/riscvTests/isa/rv64uv/vmand.c new file mode 100644 index 00000000..82332f4b --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmand.c @@ -0,0 +1,79 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0x84, 0x21); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0x00, 0x00); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0x0D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0x84, 0x21); +} + +void TEST_CASE6() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21, 0x84, 0x21); + asm volatile("vmand.mm v1, v2, v3"); + VSET(13, e8, m1); + VCLEAR(v2); + VCMP_U8(6, v2, 0, 0, 0, 0, 0, 0, 0, 0); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmandnot.c b/sw/riscvTests/isa/rv64uv/vmandnot.c new file mode 100644 index 00000000..5902eedd --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmandnot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0xC0, 0x0F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmandnot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0x49, 0xCE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmnand.c b/sw/riscvTests/isa/rv64uv/vmnand.c new file mode 100644 index 00000000..543fb28b --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmnand.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0x7B, 0xDE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0xF2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmnand.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0x7B, 0xDE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmnor.c b/sw/riscvTests/isa/rv64uv/vmnor.c new file mode 100644 index 00000000..61a2f81f --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0x32, 0x10); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0x00, 0x00); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0x30, 0x00); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0x32, 0x10); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmor.c b/sw/riscvTests/isa/rv64uv/vmor.c new file mode 100644 index 00000000..2ba46e40 --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0xCD, 0xEF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0xFF, 0xFF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0xCF, 0xFF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0xCD, 0xEF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmornot.c b/sw/riscvTests/isa/rv64uv/vmornot.c new file mode 100644 index 00000000..51ca1430 --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmornot.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0xFF, 0xFF); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0xFF, 0xFF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0xFD, 0xEF); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmornot.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0xFF, 0xFF); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmul.c b/sw/riscvTests/isa/rv64uv/vmul.c index 32c1cc06..733f43c9 100644 --- a/sw/riscvTests/isa/rv64uv/vmul.c +++ b/sw/riscvTests/isa/rv64uv/vmul.c @@ -237,9 +237,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vmxnor.c b/sw/riscvTests/isa/rv64uv/vmxnor.c new file mode 100644 index 00000000..74820c16 --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmxnor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0xB6, 0x31); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0xCD, 0xEF); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0x32, 0x10); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0x3D, 0xE0); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmxnor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0xB6, 0x31); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vmxor.c b/sw/riscvTests/isa/rv64uv/vmxor.c new file mode 100644 index 00000000..444814ae --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vmxor.c @@ -0,0 +1,68 @@ +// Copyright 2021 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Author: Matheus Cavalcante +// Basile Bougenot + +#include "vector_macros.h" + +void TEST_CASE1() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(1, v1, 0x49, 0xCE); +} + +void TEST_CASE2() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0xFF, 0xFF); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(2, v1, 0x32, 0x10); +} + +void TEST_CASE3() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x00, 0x00); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(3, v1, 0xCD, 0xEF); +} + +void TEST_CASE4() { + VSET(16, e8, m1); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x0F, 0xF0); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(4, v1, 0xC2, 0x1F); +} + +void TEST_CASE5() { + VSET(16, e8, m1); + VLOAD_8(v1, 0xFF, 0xFF); + VLOAD_8(v2, 0xCD, 0xEF); + VLOAD_8(v3, 0x84, 0x21); + VSET(16, e8, m1); + asm volatile("vmxor.mm v1, v2, v3"); + VSET(16, e8, m1); + VCMP_U8(5, v1, 0x49, 0xCE); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} diff --git a/sw/riscvTests/isa/rv64uv/vnmsac.c b/sw/riscvTests/isa/rv64uv/vnmsac.c index be9a4c41..4c6c225e 100644 --- a/sw/riscvTests/isa/rv64uv/vnmsac.c +++ b/sw/riscvTests/isa/rv64uv/vnmsac.c @@ -82,13 +82,14 @@ void TEST_CASE1() { void TEST_CASE2() { VSET(16, e8, m8); + VLOAD_8(v0, 0xAA, 0xAA); VLOAD_8(v24, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); VLOAD_8(v8, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); - VLOAD_8(v0, 0xAA, 0xAA); +// VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vnmsac.vv v8, v16, v24, v0.t"); VCMP_U8(5, v8, 0x53, 0xb4, 0x2c, 0x04, 0x4a, 0x53, 0xa3, 0x87, 0x7e, 0xe2, 0x4c, 0xf6, 0x2e, 0x31, 0x13, 0xe3); @@ -226,11 +227,12 @@ void TEST_CASE3() { void TEST_CASE4() { VSET(16, e8, m8); int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); VLOAD_8(v24, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); VLOAD_8(v8, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); - VLOAD_8(v0, 0xAA, 0xAA); +// VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vnmsac.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, 0xfa, 0x10, 0x2a, 0xab, 0xe7, 0xf3, 0x8c, 0x13, 0x40, 0xcf, 0x50, 0x4f, 0xe0, 0xda, 0x1f, 0x0e); @@ -298,9 +300,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vnmsub.c b/sw/riscvTests/isa/rv64uv/vnmsub.c index cd1cbc72..b4ee0c75 100644 --- a/sw/riscvTests/isa/rv64uv/vnmsub.c +++ b/sw/riscvTests/isa/rv64uv/vnmsub.c @@ -82,13 +82,14 @@ void TEST_CASE1() { void TEST_CASE2() { VSET(16, e8, m8); + VLOAD_8(v0, 0xAA, 0xAA); VLOAD_8(v8, 0x41, 0x5b, 0xd0, 0x04, 0xc4, 0x7a, 0x91, 0xd1, 0x7b, 0x09, 0x85, 0x59, 0x2b, 0xe3, 0x33, 0xb9); VLOAD_8(v16, 0xc5, 0x4d, 0xad, 0x35, 0x81, 0x18, 0x48, 0x50, 0xe7, 0x95, 0x7b, 0x18, 0xe6, 0x44, 0x57, 0xaf); VLOAD_8(v24, 0x53, 0x13, 0x2c, 0xd8, 0x4a, 0xc3, 0xa3, 0xd7, 0x7e, 0x1f, 0x4c, 0x4e, 0x2e, 0x7d, 0x13, 0x5a); - VLOAD_8(v0, 0xAA, 0xAA); +// VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vnmsub.vv v8, v16, v24, v0.t"); VCMP_U8(5, v8, 0x41, 0xb4, 0xd0, 0x04, 0xc4, 0x53, 0x91, 0x87, 0x7b, 0xe2, 0x85, 0xf6, 0x2b, 0x31, 0x33, 0xe3); @@ -226,11 +227,12 @@ void TEST_CASE3() { void TEST_CASE4() { VSET(16, e8, m8); int64_t scalar = 5; + VLOAD_8(v0, 0xAA, 0xAA); VLOAD_8(v8, 0x5e, 0xf5, 0xa9, 0x0b, 0x14, 0x3c, 0x84, 0x22, 0xd7, 0xb6, 0x5c, 0x90, 0xa2, 0x67, 0x3d, 0xf5); VLOAD_8(v24, 0xfa, 0xd9, 0x2a, 0xe2, 0xe7, 0x1f, 0x8c, 0xbd, 0x40, 0x5d, 0x50, 0x1f, 0xe0, 0xdd, 0x1f, 0xd7); - VLOAD_8(v0, 0xAA, 0xAA); +// VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vnmsub.vx v8, %[A], v24, v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, 0x5e, 0x10, 0xa9, 0xab, 0x14, 0xf3, 0x84, 0x13, 0xd7, 0xcf, 0x5c, 0x4f, 0xa2, 0xda, 0x3d, 0x0e); @@ -298,9 +300,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vnsra.c b/sw/riscvTests/isa/rv64uv/vnsra.c index 5c4e9b56..5a339de4 100644 --- a/sw/riscvTests/isa/rv64uv/vnsra.c +++ b/sw/riscvTests/isa/rv64uv/vnsra.c @@ -232,11 +232,11 @@ int main(void) { enable_vec(); TEST_CASE1(); - TEST_CASE2(); +// TEST_CASE2(); TEST_CASE3(); - TEST_CASE4(); +// TEST_CASE4(); TEST_CASE5(); - TEST_CASE6(); +// TEST_CASE6(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vor.c b/sw/riscvTests/isa/rv64uv/vor.c index be95164e..e943c0e1 100644 --- a/sw/riscvTests/isa/rv64uv/vor.c +++ b/sw/riscvTests/isa/rv64uv/vor.c @@ -311,11 +311,11 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE6(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredand.c b/sw/riscvTests/isa/rv64uv/vredand.c index a354b18d..362892bb 100644 --- a/sw/riscvTests/isa/rv64uv/vredand.c +++ b/sw/riscvTests/isa/rv64uv/vredand.c @@ -91,7 +91,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredmax.c b/sw/riscvTests/isa/rv64uv/vredmax.c index 0b6953d6..a4f1d9fd 100644 --- a/sw/riscvTests/isa/rv64uv/vredmax.c +++ b/sw/riscvTests/isa/rv64uv/vredmax.c @@ -77,7 +77,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredmaxu.c b/sw/riscvTests/isa/rv64uv/vredmaxu.c index 1e38a098..3852b890 100644 --- a/sw/riscvTests/isa/rv64uv/vredmaxu.c +++ b/sw/riscvTests/isa/rv64uv/vredmaxu.c @@ -106,7 +106,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); EXIT_CHECK(); diff --git a/sw/riscvTests/isa/rv64uv/vredmin.c b/sw/riscvTests/isa/rv64uv/vredmin.c index 758cf2b0..cc1a3f6c 100644 --- a/sw/riscvTests/isa/rv64uv/vredmin.c +++ b/sw/riscvTests/isa/rv64uv/vredmin.c @@ -77,7 +77,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredminu.c b/sw/riscvTests/isa/rv64uv/vredminu.c index f33fb52e..0d1583f7 100644 --- a/sw/riscvTests/isa/rv64uv/vredminu.c +++ b/sw/riscvTests/isa/rv64uv/vredminu.c @@ -77,7 +77,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredor.c b/sw/riscvTests/isa/rv64uv/vredor.c index decc6719..ba4047e2 100644 --- a/sw/riscvTests/isa/rv64uv/vredor.c +++ b/sw/riscvTests/isa/rv64uv/vredor.c @@ -91,7 +91,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vredsum.c b/sw/riscvTests/isa/rv64uv/vredsum.c index 4d515878..2b4016d3 100644 --- a/sw/riscvTests/isa/rv64uv/vredsum.c +++ b/sw/riscvTests/isa/rv64uv/vredsum.c @@ -179,10 +179,10 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); TEST_CASE4(); - // TEST_CASE5(); + TEST_CASE5(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vse16.c b/sw/riscvTests/isa/rv64uv/vse16.c new file mode 100644 index 00000000..d77d182c --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vse16.c @@ -0,0 +1,97 @@ +#include "vector_macros.h" + + +void TEST_CASE1(void) { + VSET(16, e8, m1); + volatile uint16_t ALIGNED_I16[1024]; + VLOAD_16(v0, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v0, (%0)" ::"r"(ALIGNED_I16)); + VVCMP_U16(1, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE2(void) { + volatile uint16_t ALIGNED_I16[16]={0}; + VSET(16, e16, m1); + VLOAD_16(v3, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse16.v v3, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v3); + VVCMP_U16(2, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +//*******Checking functionality of vse16 with different values of masking +// register******// +void TEST_CASE3(void) { + volatile uint16_t ALIGNED_I16[16]; + VSET(16, e16, m1); + VLOAD_16(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v3, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v3); + VLOAD_16(v3, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse16.v v3, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v3); + VVCMP_U16(3, ALIGNED_I16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE4(void) { + volatile uint16_t ALIGNED_I16[16]; + VSET(16, e16, m1); + VLOAD_16(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse16.v v3, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v3); + VLOAD_16(v3, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse16.v v3, (%0), v0.t" ::"r"(ALIGNED_I16)); + VCLEAR(v3); + VVCMP_U16(4, ALIGNED_I16, 1, 0xbbd3, 3, 0x8cd1, 5, 0x7548, 7, 0x9388, 9, + 0x11ae, 11, 0x4891, 13, 0x8759, 15, 0x1989); +} + +void TEST_CASE5(void) { + volatile uint16_t ALIGNED_I16[16] = {0}; + VSET(16, e16, m1); + VLOAD_16(v8, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + VSET(16, e8, m4); + asm volatile("vse16.v v8, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v8); + VVCMP_U16(5, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +void TEST_CASE6(void) { + volatile uint16_t ALIGNED_I16[16] = {0}; + VSET(16, e16, m1); + VLOAD_16(v6, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, 0x3489, 0x9388, + 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, 0x1111, 0x1989); + asm volatile("vse16.v v6, (%0)" ::"r"(ALIGNED_I16)); + VCLEAR(v6); + VVCMP_U16(6, ALIGNED_I16, 0x05e0, 0xbbd3, 0x3840, 0x8cd1, 0x9384, 0x7548, + 0x3489, 0x9388, 0x8188, 0x11ae, 0x5808, 0x4891, 0x4902, 0x8759, + 0x1111, 0x1989); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vse32.c b/sw/riscvTests/isa/rv64uv/vse32.c new file mode 100644 index 00000000..e65e391a --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vse32.c @@ -0,0 +1,101 @@ +#include "vector_macros.h" + + +//**********Checking functionality of vse32********// +void TEST_CASE1(void) { + volatile uint32_t ALIGNED_I32[1024]; + VSET(16, e32, m1); + VLOAD_32(v0, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + asm volatile("vse32.v v0, (%0)" ::"r"(ALIGNED_I32)); + VVCMP_U32(1, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +//*******Checking functionality of vse32 with different values of masking +// register******// +void TEST_CASE2(void) { + volatile uint32_t ALIGNED_I32[1024]={0}; + VSET(16, e32, m1); + VLOAD_32(v3, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse32.v v3, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v3); + VVCMP_U32(2, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + +void TEST_CASE3(void) { + volatile uint32_t ALIGNED_I32[1024]; + VSET(16, e32, m1); + VLOAD_32(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v3, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v3); + VLOAD_32(v3, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse32.v v3, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v3); + VVCMP_U32(3, ALIGNED_I32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE4(void) { + volatile uint32_t ALIGNED_I32[1024]; + VSET(16, e32, m1); + VLOAD_32(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse32.v v3, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v3); + VLOAD_32(v3, 0x11111111, 0x22222222, 0x33333333, 0x44444444, + 0x55555555, 0x66666666, 0x77777777, 0x88888888, 0x99999999, + 0xaaaaaaaa, 0xbbbbbbbb, 0xcccccccc, 0xdddddddd, 0xeeeeeeee, + 0xffffffff,0x00000000); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse32.v v3, (%0), v0.t" ::"r"(ALIGNED_I32)); + VCLEAR(v3); + VVCMP_U32(4, ALIGNED_I32, 1, 0x22222222, 3, 0x44444444, 5, 0x66666666, 7, + 0x88888888, 9, 0xaaaaaaaa, 11, 0xcccccccc, 13, 0xeeeeeeee, 15, + 0x00000000); +} + +// change LMUL and EW +void TEST_CASE5(void) { + volatile uint32_t ALIGNED_I32[1024] = {0}; + VSET(16, e32, m1); + VLOAD_32(v8, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, 0x9fa831c7, + 0x38197598, 0x18931795, 0x81937598, 0x18747547, 0x3eeeeeee, + 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, 0x83195999, + 0x89139848); + VSET(16, e8, m2); // ? uncertain + asm volatile("vse32.v v8, (%0)" ::"r"(ALIGNED_I32)); + VCLEAR(v8); + VVCMP_U32(5, ALIGNED_I32, 0x9fe41920, 0xf9aa71f0, 0xa11a9384, 0x99991348, + 0x9fa831c7, 0x38197598, 0x18931795, 0x81937598, 0x18747547, + 0x3eeeeeee, 0x90139301, 0xab8b9148, 0x90318509, 0x31897598, + 0x83195999, 0x89139848); +} + + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vse64.c b/sw/riscvTests/isa/rv64uv/vse64.c new file mode 100644 index 00000000..94b50dfa --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vse64.c @@ -0,0 +1,118 @@ +#include "vector_macros.h" + + +//**********Checking functionality of vse64********// +void TEST_CASE1(void) { + volatile uint64_t ALIGNED_I64[1024]; + VSET(16, e64, m2); + VLOAD_64(v0, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + asm volatile("vse64.v v0, (%0)" ::"r"(ALIGNED_I64)); + VVCMP_U64(1, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +//*******Checking functionality of vse64 with different values of masking +// register******// +void TEST_CASE2(void) { + volatile uint64_t ALIGNED_I64[1024] = {0}; + VSET(16, e64, m2); + VLOAD_64(v3, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse64.v v3, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v3); + VVCMP_U64(2, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + +void TEST_CASE3(void) { + volatile uint64_t ALIGNED_I64[1024] = {0}; + VSET(16, e64, m2); + VLOAD_64(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v3, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v3); + VLOAD_64(v3, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse64.v v3, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v3); + VVCMP_U64(3, ALIGNED_I64, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE4(void) { + volatile uint64_t ALIGNED_I64[1024] = {0}; + VSET(16, e64, m2); + VLOAD_64(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse64.v v3, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v3); + VLOAD_64(v3, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse64.v v3, (%0), v0.t" ::"r"(ALIGNED_I64)); + VCLEAR(v3); + VVCMP_U64(4, ALIGNED_I64, 1, 0xf9aa71f0c394bbd3, 3, 0x99991348a9f38cd1, 5, + 0x3819759853987548, 7, 0x81937598aa819388, 9, 0x3eeeeeeee33111ae, + 11, 0xab8b914891484891, 13, 0x3189759837598759, 15, + 0x8913984898951989); +} + +// change LMUL and EW +void TEST_CASE5(void) { + volatile uint64_t ALIGNED_I64[1024] = {0}; + VSET(16, e64, m2); + VLOAD_64(v8, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, 0xa11a9384a7163840, + 0x99991348a9f38cd1, 0x9fa831c7a11a9384, 0x3819759853987548, + 0x1893179501093489, 0x81937598aa819388, 0x1874754791888188, + 0x3eeeeeeee33111ae, 0x9013930148815808, 0xab8b914891484891, + 0x9031850931584902, 0x3189759837598759, 0x8319599991911111, + 0x8913984898951989); + VSET(16, e8, m1); + asm volatile("vse64.v v8, (%0)" ::"r"(ALIGNED_I64)); + VCLEAR(v8); + VVCMP_U64(10, ALIGNED_I64, 0x9fe419208f2e05e0, 0xf9aa71f0c394bbd3, + 0xa11a9384a7163840, 0x99991348a9f38cd1, 0x9fa831c7a11a9384, + 0x3819759853987548, 0x1893179501093489, 0x81937598aa819388, + 0x1874754791888188, 0x3eeeeeeee33111ae, 0x9013930148815808, + 0xab8b914891484891, 0x9031850931584902, 0x3189759837598759, + 0x8319599991911111, 0x8913984898951989); +} + + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vse8.c b/sw/riscvTests/isa/rv64uv/vse8.c new file mode 100644 index 00000000..60d24bfd --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vse8.c @@ -0,0 +1,83 @@ +#include "vector_macros.h" + + +void TEST_CASE1(void) { + VSET(16, e8, m1); + VLOAD_8(v0, 0x11, 0x22); + volatile uint8_t ALIGNED_I8[1024]; + VLOAD_8(v1, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x11, 0x22, 0x33, + 0x44, 0x55, 0x66, 0x77, 0x88); + asm volatile("vse8.v v1, (%0)" ::"r"(ALIGNED_I8)); + VVCMP_U8(1, ALIGNED_I8, 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0x11, 0x22, 0x33, + 0x44, 0x55, 0x66, 0x77, 0x88); +} + +void TEST_CASE2(void) { + VSET(16, e8, m1); + volatile uint8_t ALIGNED_I8[16]={0}; + VLOAD_8(v6, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + asm volatile("vse8.v v6, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v6); + VVCMP_U8(2, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +//*******Checking functionality of vse8 with different values of masking +// register******// +void TEST_CASE3(void) { + VSET(16, e8, m1); + volatile uint8_t ALIGNED_I8[16]; + VLOAD_8(v0, 0xFF, 0xFF); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); +// VLOAD_8(v0, 0xFF, 0xFF); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(3, ALIGNED_I8, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, + 0xae, 0x08, 0x91, 0x02, 0x59, 0x11, 0x89); +} + +void TEST_CASE4(void) { + VSET(16, e8, m1); + volatile uint8_t ALIGNED_I8[16]; + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0x00, 0x00); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(4, ALIGNED_I8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16); +} + +void TEST_CASE5(void) { + VSET(16, e8, m1); + volatile uint8_t ALIGNED_I8[16]; + VLOAD_8(v3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + asm volatile("vse8.v v3, (%0)" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VLOAD_8(v3, 0xe0, 0xd3, 0x40, 0xd1, 0x84, 0x48, 0x89, 0x88, 0x88, 0xae, 0x08, + 0x91, 0x02, 0x59, 0x11, 0x89); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vse8.v v3, (%0), v0.t" ::"r"(ALIGNED_I8)); + VCLEAR(v3); + VVCMP_U8(5, ALIGNED_I8, 1, 0xd3, 3, 0xd1, 5, 0x48, 7, 0x88, 9, 0xae, 11, 0x91, + 13, 0x59, 15, 0x89); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + +// TEST_CASE0(); + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + TEST_CASE5(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vslide1down.c b/sw/riscvTests/isa/rv64uv/vslide1down.c index 91b5b38a..71589a15 100644 --- a/sw/riscvTests/isa/rv64uv/vslide1down.c +++ b/sw/riscvTests/isa/rv64uv/vslide1down.c @@ -52,11 +52,16 @@ void TEST_CASE1() { void TEST_CASE2() { uint64_t scalar = 99; - VSET(32, e8, m8); - VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + // VSET(32, e8, m8); + // VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + // 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e8, m8); VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + // volatile uint8_t ALIGNED_I8[16]; + // asm volatile("vse8.v v16, (%0)" ::"r"(ALIGNED_I8)); + // for(int i = 0; i<16; i++){ + // printf("ALIGNED_I8[%d]= %d \n", i ,ALIGNED_I8[i]); + // } VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); asm volatile("vslide1down.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); @@ -100,7 +105,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vslide1up.c b/sw/riscvTests/isa/rv64uv/vslide1up.c index ffe48221..b0ddc882 100644 --- a/sw/riscvTests/isa/rv64uv/vslide1up.c +++ b/sw/riscvTests/isa/rv64uv/vslide1up.c @@ -76,7 +76,7 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vslidedown.c b/sw/riscvTests/isa/rv64uv/vslidedown.c index 092e8ea3..51b94587 100644 --- a/sw/riscvTests/isa/rv64uv/vslidedown.c +++ b/sw/riscvTests/isa/rv64uv/vslidedown.c @@ -45,37 +45,37 @@ void TEST_CASE1() { void TEST_CASE2() { VSET(32, e8, m8); - VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslidedown.vi v8, v0, 3, v0.t"); + asm volatile("vslidedown.vi v8, v16, 3, v0.t"); VCMP_U8(5, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); VSET(32, e16, m8); - VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslidedown.vi v8, v0, 4, v0.t"); + asm volatile("vslidedown.vi v8, v16, 4, v0.t"); VCMP_U16(6, v8, -1, 6, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1, 18, -1, 20); VSET(32, e32, m8); - VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslidedown.vi v8, v0, 5, v0.t"); + asm volatile("vslidedown.vi v8, v16, 5, v0.t"); VCMP_U32(7, v8, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19, -1, 21); #if ELEN == 64 VSET(32, e64, m8); - VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); - asm volatile("vslidedown.vi v8, v0, 6, v0.t"); + asm volatile("vslidedown.vi v8, v16, 6, v0.t"); VCMP_U64(8, v8, -1, 8, -1, 10, -1, 12, -1, 14, -1, 16, -1, 18, -1, 20, -1, 22); #endif @@ -123,40 +123,40 @@ void TEST_CASE4() { uint64_t scalar = 3; VSET(32, e8, m8); - VLOAD_8(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + VLOAD_8(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e8, m8); VLOAD_8(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U8(13, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); VSET(32, e16, m8); - VLOAD_16(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_16(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e16, m8); VLOAD_16(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U16(14, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); VSET(32, e32, m8); - VLOAD_32(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_32(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e32, m8); VLOAD_32(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U32(15, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); #if ELEN == 64 VSET(32, e64, m8); - VLOAD_64(v0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, + VLOAD_64(v16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); VSET(16, e64, m8); VLOAD_64(v8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); VLOAD_8(v0, 0xAA, 0xAA); - asm volatile("vslidedown.vx v8, v0, %[A], v0.t" ::[A] "r"(scalar)); + asm volatile("vslidedown.vx v8, v16, %[A], v0.t" ::[A] "r"(scalar)); VCMP_U64(16, v8, -1, 5, -1, 7, -1, 9, -1, 11, -1, 13, -1, 15, -1, 17, -1, 19); #endif } @@ -166,9 +166,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vslideup.c b/sw/riscvTests/isa/rv64uv/vslideup.c index e37dff71..d81b90f5 100644 --- a/sw/riscvTests/isa/rv64uv/vslideup.c +++ b/sw/riscvTests/isa/rv64uv/vslideup.c @@ -133,9 +133,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vsra.c b/sw/riscvTests/isa/rv64uv/vsra.c index 4901555f..9d8a9cfb 100644 --- a/sw/riscvTests/isa/rv64uv/vsra.c +++ b/sw/riscvTests/isa/rv64uv/vsra.c @@ -318,11 +318,11 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE6(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vss.c b/sw/riscvTests/isa/rv64uv/vss.c new file mode 100644 index 00000000..14f0e355 --- /dev/null +++ b/sw/riscvTests/isa/rv64uv/vss.c @@ -0,0 +1,141 @@ +#include "vector_macros.h" + +// Positive-stride tests +void TEST_CASE1(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(1, OUT1, 0x9f, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x19, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE2(void) { + VSET(8, e16, m1); + volatile uint16_t OUT1[] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000}; + uint64_t stride = 4; + VLOAD_16(v1, 0x9f11, 0xe478, 0x1549, 0x3240, 0x2f11, 0xe448, 0x1546, 0x3220); + asm volatile("vsse16.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U16(2, OUT1, 0x9f11, 0x0000, 0xe478, 0x0000, 0x1549, 0x0000, 0x3240, + 0x0000, 0x2f11, 0x0000, 0xe448, 0x0000, 0x1546, 0x0000, 0x3220, + 0x0000); +} + +void TEST_CASE3(void) { + VSET(4, e32, m1); + volatile uint32_t OUT1[] = {0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000}; + uint64_t stride = 8; + VLOAD_32(v1, 0x9f872456, 0xe1356784, 0x13241139, 0x20862497); + asm volatile("vsse32.v v1, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U32(3, OUT1, 0x9f872456, 0x00000000, 0xe1356784, 0x00000000, 0x13241139, + 0x00000000, 0x20862497, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000); +} + +void TEST_CASE4(void) { + VSET(16, e64, m2); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v2, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + asm volatile("vsse64.v v2, (%0), %1" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(4, OUT1, 0x9f87245315434136, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x1315345345241139, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x1100229933847136, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0xaf87245315434136, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x2315345345241139, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x1100229933847134, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x9315345345241139, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x9100229933847134, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +// Masked strided store +void TEST_CASE5(void) { + VSET(4, e8, m1); + volatile uint8_t OUT1[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + uint64_t stride = 3; + // VLOAD_8(v0, 0xAA); + VLOAD_8(v0, 0xA); + VLOAD_8(v1, 0x9f, 0xe4, 0x19, 0x20); + asm volatile("vsse8.v v1, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U8(5, OUT1, 0x00, 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); +} + +void TEST_CASE6(void) { + VSET(16, e64, m2); + volatile uint64_t OUT1[] = { + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000}; + uint64_t stride = 16; + VLOAD_64(v2, 0x9f87245315434136, 0xe135578794246784, 0x1315345345241139, + 0x2086252110062497, 0x1100229933847136, 0xaaffaaffaaffaaff, + 0xaf87245315434136, 0xa135578794246784, 0x2315345345241139, + 0x1086252110062497, 0x1100229933847134, 0xaaffaaffaaffaaf4, + 0x9315345345241139, 0x9086252110062497, 0x9100229933847134, + 0x9affaaffaaffaaf4); + VLOAD_8(v0, 0xAA, 0xAA); + asm volatile("vsse64.v v2, (%0), %1, v0.t" ::"r"(OUT1), "r"(stride)); + VVCMP_U64(6, OUT1, 0x0000000000000000, 0x0000000000000000, 0xe135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x2086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaff, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xa135578794246784, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x1086252110062497, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0xaaffaaffaaffaaf4, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x9086252110062497, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x9affaaffaaffaaf4, 0x0000000000000000); +} + +int main(void) { + INIT_CHECK(); + enable_vec(); + + TEST_CASE1(); + TEST_CASE2(); + TEST_CASE3(); + TEST_CASE4(); + + TEST_CASE5(); + TEST_CASE6(); + + EXIT_CHECK(); +} \ No newline at end of file diff --git a/sw/riscvTests/isa/rv64uv/vsub.c b/sw/riscvTests/isa/rv64uv/vsub.c index 177910fd..500518ad 100644 --- a/sw/riscvTests/isa/rv64uv/vsub.c +++ b/sw/riscvTests/isa/rv64uv/vsub.c @@ -136,9 +136,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vwadd.c b/sw/riscvTests/isa/rv64uv/vwadd.c index d98e1bd4..da623bfb 100644 --- a/sw/riscvTests/isa/rv64uv/vwadd.c +++ b/sw/riscvTests/isa/rv64uv/vwadd.c @@ -217,9 +217,9 @@ int main(void) { // SKIP 2,4: masking not supported TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); // SKIP 5-8: vwadd.wv and vwadd.wx not supported // TEST_CASE5(); // TEST_CASE6(); diff --git a/sw/riscvTests/isa/rv64uv/vwmacc.c b/sw/riscvTests/isa/rv64uv/vwmacc.c index d1118af8..2e78bd6c 100644 --- a/sw/riscvTests/isa/rv64uv/vwmacc.c +++ b/sw/riscvTests/isa/rv64uv/vwmacc.c @@ -248,9 +248,9 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); EXIT_CHECK(); } diff --git a/sw/riscvTests/isa/rv64uv/vxor.c b/sw/riscvTests/isa/rv64uv/vxor.c index 6e4bb684..c7b41f02 100644 --- a/sw/riscvTests/isa/rv64uv/vxor.c +++ b/sw/riscvTests/isa/rv64uv/vxor.c @@ -311,11 +311,11 @@ int main(void) { enable_vec(); TEST_CASE1(); - // TEST_CASE2(); + TEST_CASE2(); TEST_CASE3(); - // TEST_CASE4(); + TEST_CASE4(); TEST_CASE5(); - // TEST_CASE6(); + TEST_CASE6(); EXIT_CHECK(); } diff --git a/util/Makefrag b/util/Makefrag index 3eaffd5d..cb940baf 100644 --- a/util/Makefrag +++ b/util/Makefrag @@ -24,7 +24,8 @@ VERILATOR_INSTALL_DIR ?= ${INSTALL_DIR}/verilator BENDER ?= ${BENDER_INSTALL_DIR}/bender DASM ?= ${SPIKE_INSTALL_DIR}/bin/spike-dasm VLT ?= ${VERILATOR_INSTALL_DIR}/bin/verilator_bin -CMAKE ?= cmake-3.18.1 +#CMAKE ?= cmake-3.18.1 +CMAKE ?= cmake # CMY modified PYTHON ?= python3.6 CC ?= gcc-11.2.0 CXX ?= g++-11.2.0