From bb8dc7fac78671d5835e1a1f8511c1b792843036 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 10:06:10 +0200 Subject: [PATCH 01/39] deps: Update common_cells to v1.38.0 --- Bender.lock | 36 ++++++++++++++++++------------------ Bender.yml | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Bender.lock b/Bender.lock index 1a9a5c98..6bf3ab46 100644 --- a/Bender.lock +++ b/Bender.lock @@ -15,8 +15,8 @@ packages: - apb - register_interface axi: - revision: 39f5f2d51c5e524f6fc5cf8b6e901f7dcc5622d7 - version: 0.39.6 + revision: f07498d53ecd5518b277c7d213ec3b71ca4df93c + version: 0.39.7 source: Git: https://github.com/pulp-platform/axi.git dependencies: @@ -24,8 +24,8 @@ packages: - common_verification - tech_cells_generic axi_llc: - revision: 559bcbd09a5a884dbe31e2d72fd95d024e357f39 - version: 0.2.1 + revision: 59bb8a681347e1133f11a82190fbf4bc11900d9e + version: 0.2.2 source: Git: https://github.com/pulp-platform/axi_llc.git dependencies: @@ -60,8 +60,8 @@ packages: dependencies: - common_cells axi_vga: - revision: 3718b9930f94a9eaad8ee50b4bccc71df0403084 - version: 0.1.3 + revision: 4d3e70d4f47bb74edc1ab68d99ffc02382e0fb9e + version: 0.1.4 source: Git: https://github.com/pulp-platform/axi_vga.git dependencies: @@ -85,16 +85,16 @@ packages: - common_cells - register_interface common_cells: - revision: 13f28aa0021fc22c0d01a12d618fda58d2c93239 - version: 1.33.0 + revision: 9afda9abb565971649c2aa0985639c096f351171 + version: 1.38.0 source: Git: https://github.com/pulp-platform/common_cells.git dependencies: - common_verification - tech_cells_generic common_verification: - revision: 9c07fa860593b2caabd9b5681740c25fac04b878 - version: 0.2.3 + revision: fb1885f48ea46164a10568aeff51884389f67ae3 + version: 0.2.5 source: Git: https://github.com/pulp-platform/common_verification.git dependencies: [] @@ -131,8 +131,8 @@ packages: dependencies: - common_cells idma: - revision: c12caf59bb482fe44b27361f6924ad346b2d22fe - version: 0.6.3 + revision: ff5d56fffb3767814db88d6bf8f381974ea33aa5 + version: 0.6.4 source: Git: https://github.com/pulp-platform/iDMA.git dependencies: @@ -152,8 +152,8 @@ packages: - common_cells - register_interface obi: - revision: 5321106817e177d6c16ecc4daa922b96b1bc946b - version: 0.1.5 + revision: 0155fc34e900c7c884e081c0a1114a247937ff69 + version: 0.1.7 source: Git: https://github.com/pulp-platform/obi.git dependencies: @@ -169,8 +169,8 @@ packages: - register_interface - tech_cells_generic register_interface: - revision: ae616e5a1ec2b41e72d200e5ab09c65e94aebd3d - version: 0.4.4 + revision: 5daa85d164cf6b54ad061ea1e4c6f3624556e467 + version: 0.4.5 source: Git: https://github.com/pulp-platform/register_interface.git dependencies: @@ -187,8 +187,8 @@ packages: - common_cells - tech_cells_generic serial_link: - revision: 5a25f5a71074f1ebb6de7b5280f2b16924bcc666 - version: 1.1.1 + revision: c55df03a1da06b00e567cf968b1b1a5f40c9f802 + version: 1.1.2 source: Git: https://github.com/pulp-platform/serial_link.git dependencies: diff --git a/Bender.yml b/Bender.yml index 677449d5..212053f5 100644 --- a/Bender.yml +++ b/Bender.yml @@ -20,7 +20,7 @@ dependencies: axi_vga: { git: "https://github.com/pulp-platform/axi_vga.git", version: 0.1.3 } clic: { git: "https://github.com/pulp-platform/clic.git", version: 2.0.0 } clint: { git: "https://github.com/pulp-platform/clint.git", version: 0.2.0 } - common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.33.0 } + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 } common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 } cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v1.0.0 } iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.6.3 } From 850970f1b87ac96ddba119366411e30b2d7fc277 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 10:08:03 +0200 Subject: [PATCH 02/39] target/sim/verilator: Initial verilator support --- .gitignore | 8 + Bender.yml | 4 + cheshire.mk | 2 + .../sim/verilator/src/cheshire_soc_wrapper.sv | 174 ++++++++++++++++++ target/sim/verilator/src/sim_main.cpp | 118 ++++++++++++ target/sim/verilator/verilator.mk | 35 ++++ 6 files changed, 341 insertions(+) create mode 100644 target/sim/verilator/src/cheshire_soc_wrapper.sv create mode 100644 target/sim/verilator/src/sim_main.cpp create mode 100644 target/sim/verilator/verilator.mk diff --git a/.gitignore b/.gitignore index 4c8e9b87..8ea089b3 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,14 @@ target/sim/vcs/ucli.key target/sim/vcs/work* target/sim/vcs/vc_hdrs.h +# Verilator generated files +target/sim/verilator/obj_dir/ +target/sim/verilator/*.flist +target/sim/verilator/*.vlt +target/sim/verilator/*.vcd +target/sim/verilator/logs/ +target/sim/verilator/trace_hart_*.dasm + # Xilinx generated files target/xilinx/build target/xilinx/out diff --git a/Bender.yml b/Bender.yml index 212053f5..00c7acce 100644 --- a/Bender.yml +++ b/Bender.yml @@ -54,6 +54,10 @@ sources: - target/sim/src/fixture_cheshire_soc.sv - target/sim/src/tb_cheshire_soc.sv + - target: any(verilator) + files: + - target/sim/verilator/src/cheshire_soc_wrapper.sv + - target: all(fpga, xilinx) files: - target/xilinx/src/phy_definitions.svh diff --git a/cheshire.mk b/cheshire.mk index f838911b..799833d9 100644 --- a/cheshire.mk +++ b/cheshire.mk @@ -175,6 +175,8 @@ CHS_SIM_ALL += $(CHS_ROOT)/target/sim/models/24FC1025.v CHS_SIM_ALL += $(CHS_ROOT)/target/sim/vsim/compile.cheshire_soc.tcl CHS_SIM_ALL += $(CHS_ROOT)/target/sim/vcs/compile.cheshire_soc.sh +include $(CHS_ROOT)/target/sim/verilator/verilator.mk + ########### # DRAMSys # ########### diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv new file mode 100644 index 00000000..badc2ff4 --- /dev/null +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -0,0 +1,174 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Max Wipfli + +module cheshire_soc_wrapper #( + parameter time ClkPeriodSys = 5ns, + parameter time ClkPeriodRtc = 30518ns, + parameter int unsigned RstClkCycles = 5 +) ( + output logic clk_o, + output logic rst_no +); + + `include "cheshire/typedef.svh" + + import cheshire_pkg::*; + + localparam cheshire_cfg_t DutCfg = DefaultCfg; + + `CHESHIRE_TYPEDEF_ALL(, DutCfg) + + /////////// + // DUT // + /////////// + + logic clk; + logic rst_n; + logic test_mode; + logic [1:0] boot_mode; + logic rtc; + + axi_llc_req_t axi_llc_mst_req; + axi_llc_rsp_t axi_llc_mst_rsp; + + logic jtag_tck; + logic jtag_trst_n; + logic jtag_tms; + logic jtag_tdi; + logic jtag_tdo; + + logic uart_tx; + logic uart_rx; + + logic i2c_sda_o; + logic i2c_sda_i; + logic i2c_sda_en; + logic i2c_scl_o; + logic i2c_scl_i; + logic i2c_scl_en; + + logic spih_sck_o; + logic spih_sck_en; + logic [SpihNumCs-1:0] spih_csb_o; + logic [SpihNumCs-1:0] spih_csb_en; + logic [ 3:0] spih_sd_o; + logic [ 3:0] spih_sd_i; + logic [ 3:0] spih_sd_en; + + logic [SlinkNumChan-1:0] slink_rcv_clk_i; + logic [SlinkNumChan-1:0] slink_rcv_clk_o; + logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_i; + logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_o; + + cheshire_soc #( + .Cfg ( DutCfg ), + .ExtHartinfo ( '0 ), + .axi_ext_llc_req_t ( axi_llc_req_t ), + .axi_ext_llc_rsp_t ( axi_llc_rsp_t ), + .axi_ext_mst_req_t ( axi_mst_req_t ), + .axi_ext_mst_rsp_t ( axi_mst_rsp_t ), + .axi_ext_slv_req_t ( axi_slv_req_t ), + .axi_ext_slv_rsp_t ( axi_slv_rsp_t ), + .reg_ext_req_t ( reg_req_t ), + .reg_ext_rsp_t ( reg_rsp_t ) + ) dut ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .test_mode_i ( test_mode ), + .boot_mode_i ( boot_mode ), + .rtc_i ( rtc ), + .axi_llc_mst_req_o ( axi_llc_mst_req ), + .axi_llc_mst_rsp_i ( axi_llc_mst_rsp ), + .axi_ext_mst_req_i ( '0 ), + .axi_ext_mst_rsp_o ( ), + .axi_ext_slv_req_o ( ), + .axi_ext_slv_rsp_i ( '0 ), + .reg_ext_slv_req_o ( ), + .reg_ext_slv_rsp_i ( '0 ), + .intr_ext_i ( '0 ), + .intr_ext_o ( ), + .xeip_ext_o ( ), + .mtip_ext_o ( ), + .msip_ext_o ( ), + .dbg_active_o ( ), + .dbg_ext_req_o ( ), + .dbg_ext_unavail_i ( '0 ), + .jtag_tck_i ( jtag_tck ), + .jtag_trst_ni ( jtag_trst_n ), + .jtag_tms_i ( jtag_tms ), + .jtag_tdi_i ( jtag_tdi ), + .jtag_tdo_o ( jtag_tdo ), + .jtag_tdo_oe_o ( ), + .uart_tx_o ( uart_tx ), + .uart_rx_i ( uart_rx ), + .uart_rts_no ( ), + .uart_dtr_no ( ), + .uart_cts_ni ( 1'b0 ), + .uart_dsr_ni ( 1'b0 ), + .uart_dcd_ni ( 1'b0 ), + .uart_rin_ni ( 1'b0 ), + .i2c_sda_o ( i2c_sda_o ), + .i2c_sda_i ( i2c_sda_i ), + .i2c_sda_en_o ( i2c_sda_en ), + .i2c_scl_o ( i2c_scl_o ), + .i2c_scl_i ( i2c_scl_i ), + .i2c_scl_en_o ( i2c_scl_en ), + .spih_sck_o ( spih_sck_o ), + .spih_sck_en_o ( spih_sck_en ), + .spih_csb_o ( spih_csb_o ), + .spih_csb_en_o ( spih_csb_en ), + .spih_sd_o ( spih_sd_o ), + .spih_sd_en_o ( spih_sd_en ), + .spih_sd_i ( spih_sd_i ), + .gpio_i ( '0 ), + .gpio_o ( ), + .gpio_en_o ( ), + .slink_rcv_clk_i ( slink_rcv_clk_i ), + .slink_rcv_clk_o ( slink_rcv_clk_o ), + .slink_i ( slink_i ), + .slink_o ( slink_o ), + .vga_hsync_o ( ), + .vga_vsync_o ( ), + .vga_red_o ( ), + .vga_green_o ( ), + .vga_blue_o ( ), + .usb_clk_i ( 1'b0 ), + .usb_rst_ni ( 1'b1 ), + .usb_dm_i ( '0 ), + .usb_dm_o ( ), + .usb_dm_oe_o ( ), + .usb_dp_i ( '0 ), + .usb_dp_o ( ), + .usb_dp_oe_o ( ) + ); + + ///////////////////////////// + // Clock/Reset Generation // + ///////////////////////////// + clk_rst_gen #( + .ClkPeriod ( ClkPeriodSys ), + .RstClkCycles ( RstClkCycles ) + ) i_clk_rst_sys ( + .clk_o ( clk ), + .rst_no ( rst_n ) + ); + + clk_rst_gen #( + .ClkPeriod ( ClkPeriodRtc ), + .RstClkCycles ( RstClkCycles ) + ) i_clk_rst_rtc ( + .clk_o ( rtc ), + .rst_no ( ) + ); + + ///////////////// + // I/O to C++ // + ///////////////// + + assign clk_o = clk; + assign rst_no = rst_n; + +endmodule diff --git a/target/sim/verilator/src/sim_main.cpp b/target/sim/verilator/src/sim_main.cpp new file mode 100644 index 00000000..c8290b5a --- /dev/null +++ b/target/sim/verilator/src/sim_main.cpp @@ -0,0 +1,118 @@ +#include // std::unique_ptr + +#include // common Verilator routines +#include // trace to VCD + +#include "Vcheshire_soc_wrapper.h" // Verilated model + +#define TRACE + +int main(int argc, char** argv) { + // This is a more complicated example, please also see the simpler examples/make_hello_c. + + // Create logs/ directory in case we have traces to put under it + Verilated::mkdir("logs"); + + // Construct a VerilatedContext to hold simulation time, etc. + // Multiple modules (made later below with Vtop) may share the same + // context to share time, or modules may have different contexts if + // they should be independent from each other. + + // Using unique_ptr is similar to + // "VerilatedContext* contextp = new VerilatedContext" then deleting at end. + const auto contextp = std::make_unique(); + // const std::unique_ptr contextp{new VerilatedContext}; + // Do not instead make Vtop as a file-scope static variable, as the + // "C++ static initialization order fiasco" may cause a crash + + // Set debug level, 0 is off, 9 is highest presently used + // May be overridden by commandArgs argument parsing + contextp->debug(0); + + // Randomization reset policy + // May be overridden by commandArgs argument parsing + contextp->randReset(2); + + // Verilator must compute traced signals + contextp->traceEverOn(true); + + // Pass arguments so Verilated code can see them, e.g. $value$plusargs + // This needs to be called before you create any model + contextp->commandArgs(argc, argv); + + // "WRAPPER" will be the hierarchical name of the module + const auto top = std::make_unique(contextp.get(), "TOP"); + +#ifdef TRACE + Verilated::traceEverOn(true); + const auto trace = std::make_unique(); + top->trace(trace.get(), 2); + trace->open("dump.vcd"); +#endif + + // Set Vtop's input signals + // top->reset_l = !0; + // top->clk = 0; + // top->in_small = 1; + // top->in_quad = 0x1234; + // top->in_wide[0] = 0x11111111; + // top->in_wide[1] = 0x22222222; + // top->in_wide[2] = 0x3; + + // Simulate until $finish + while (!contextp->gotFinish()) { + contextp->timeInc(1); // 1 timeprecision period passes... + + // VL_PRINTF("toggle...\n"); + + // Toggle control signals on an edge that doesn't correspond + // to where the controls are sampled; in this example we do + // this only on a negedge of clk, because we know + // reset is not sampled there. + // if (!top->clk) { + // if (contextp->time() > 1 && contextp->time() < 10) { + // top->reset_l = !1; // Assert reset + // } else { + // top->reset_l = !0; // Deassert reset + // } + // // Assign some other inputs + // top->in_quad += 0x12; + // } + + // Evaluate model + // (If you have multiple models being simulated in the same + // timestep then instead of eval(), call eval_step() on each, then + // eval_end_step() on each. See the manual.) + top->eval(); + +#ifdef TRACE + trace->dump(contextp->time()); +#endif + + // // Read outputs + // VL_PRINTF("[%" PRId64 "] clk=%x rstl=%x iquad=%" PRIx64 " -> oquad=%" PRIx64 + // " owide=%x_%08x_%08x\n", + // contextp->time(), top->clk, top->reset_l, top->in_quad, top->out_quad, + // top->out_wide[2], top->out_wide[1], top->out_wide[0]); + } + + // Final model cleanup + top->final(); + +#ifdef TRACE + trace->close(); +#endif + + // Coverage analysis (calling write only after the test is known to pass) +#if VM_COVERAGE + Verilated::mkdir("logs"); + contextp->coveragep()->write("logs/coverage.dat"); +#endif + + // Final simulation summary + contextp->statsPrintSummary(); + + // Return good completion status + // Don't use exit() or destructor won't get called + return 0; +} diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk new file mode 100644 index 00000000..2b0f5bc6 --- /dev/null +++ b/target/sim/verilator/verilator.mk @@ -0,0 +1,35 @@ +# Copyright 2025 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# Max Wipfli +# Paul Scheffler + +CHS_VERILATOR_DIR ?= $(CHS_ROOT)/target/sim/verilator + +VERILATOR = oseda verilator + +VERILATOR_CFLAGS = -O3 +VERILATOR_OPT = -march=native -mtune=native -Wno-deprecated-experimental-coroutine +# Silly Verilator warnings: these are perfectly valid and should not be warnings +VERILATOR_WNO = -Wno-fatal -Wno-style -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-ASCRANGE +VERILATOR_FIX = --unroll-count 51 --unroll-stmts 1 +VERILATOR_ARGS ?= -j 0 -Wall --timing -timescale 1ns/1ns $(VERILATOR_WNO) $(VERILATOR_FIX) -O3 \ + --trace --trace-depth 2 + +VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/src/sim_main.cpp + +$(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml + $(BENDER) script verilator $(CHS_BENDER_RTL_FLAGS) > $@ + +$(CHS_ROOT)/target/sim/verilator/obj_dir/cheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) + +cd $(dir $@) && $(VERILATOR) $(VERILATOR_ARGS) -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ + --cc --exe --build --top-module cheshire_soc_wrapper + +$(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper + @echo "#!/bin/sh" > $@ + @echo 'set -eu' >> $@ + @echo 'cd $$(dirname "$$0")' >> $@ + @echo 'oseda ./obj_dir/Vcheshire_soc_wrapper' >> $@ + @chmod +x $@ + From 9dd2c19269f0e3ff30c57e30dd86d0605ff95f35 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 18:10:24 +0200 Subject: [PATCH 03/39] target/sim/verilator: Add JTAG support using OpenOCD remote_bitbang --- .gitignore | 1 + .../{src/sim_main.cpp => sim/main.cpp} | 37 +++++++++++++++++-- .../sim/verilator/src/cheshire_soc_wrapper.sv | 29 ++++++++++----- target/sim/verilator/verilator.mk | 15 +++++--- util/openocd.verilator.tcl | 15 ++++++++ 5 files changed, 78 insertions(+), 19 deletions(-) rename target/sim/verilator/{src/sim_main.cpp => sim/main.cpp} (81%) create mode 100644 util/openocd.verilator.tcl diff --git a/.gitignore b/.gitignore index 8ea089b3..80ef51f9 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ target/sim/vcs/vc_hdrs.h # Verilator generated files target/sim/verilator/obj_dir/ target/sim/verilator/*.flist +target/sim/verilator/*.fst target/sim/verilator/*.vlt target/sim/verilator/*.vcd target/sim/verilator/logs/ diff --git a/target/sim/verilator/src/sim_main.cpp b/target/sim/verilator/sim/main.cpp similarity index 81% rename from target/sim/verilator/src/sim_main.cpp rename to target/sim/verilator/sim/main.cpp index c8290b5a..2fb09a33 100644 --- a/target/sim/verilator/src/sim_main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -1,12 +1,35 @@ #include // std::unique_ptr #include // common Verilator routines -#include // trace to VCD +#include // trace to VCD #include "Vcheshire_soc_wrapper.h" // Verilated model #define TRACE +extern int jtag_tick(int port, unsigned char *jtag_TCK, unsigned char *jtag_TMS, + unsigned char *jtag_TDI, unsigned char *jtag_TRSTn, unsigned char jtag_TDO); + + +static void jtag_tick_io(Vcheshire_soc_wrapper& top) { + static int count = 0; + if (count < 5) { + count++; + return; + } + count = 0; + + unsigned char tck, tms, tdi, trst_n; + int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, top.jtag_tdo_o); + if (ret) + VL_PRINTF("ret = 0x%08x\n", ret); + top.jtag_tck_i = tck; + top.jtag_tms_i = tms; + top.jtag_tdi_i = tdi; + top.jtag_trst_ni = trst_n; +} + + int main(int argc, char** argv) { // This is a more complicated example, please also see the simpler examples/make_hello_c. @@ -45,9 +68,9 @@ int main(int argc, char** argv) { #ifdef TRACE Verilated::traceEverOn(true); - const auto trace = std::make_unique(); - top->trace(trace.get(), 2); - trace->open("dump.vcd"); + const auto trace = std::make_unique(); + top->trace(trace.get(), 5); + trace->open("dump.fst"); #endif // Set Vtop's input signals @@ -61,6 +84,8 @@ int main(int argc, char** argv) { // Simulate until $finish while (!contextp->gotFinish()) { + unsigned char clk_prev = top->clk_o; + contextp->timeInc(1); // 1 timeprecision period passes... // VL_PRINTF("toggle...\n"); @@ -89,6 +114,10 @@ int main(int argc, char** argv) { trace->dump(contextp->time()); #endif + if (top->rst_no && !top->clk_o && clk_prev) { + jtag_tick_io(*top); + } + // // Read outputs // VL_PRINTF("[%" PRId64 "] clk=%x rstl=%x iquad=%" PRIx64 " -> oquad=%" PRIx64 // " owide=%x_%08x_%08x\n", diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index badc2ff4..d528bb5e 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -7,10 +7,20 @@ module cheshire_soc_wrapper #( parameter time ClkPeriodSys = 5ns, parameter time ClkPeriodRtc = 30518ns, - parameter int unsigned RstClkCycles = 5 + parameter int unsigned RstClkCycles = 5, + + parameter int unsigned RemoteBitbangPort = 3335 ) ( output logic clk_o, - output logic rst_no + output logic rst_no, + + // JTAG + input logic jtag_tck_i, + input logic jtag_trst_ni, + input logic jtag_tms_i, + input logic jtag_tdi_i, + output logic jtag_tdo_o, + output logic jtag_tdo_oe_o ); `include "cheshire/typedef.svh" @@ -39,6 +49,7 @@ module cheshire_soc_wrapper #( logic jtag_tms; logic jtag_tdi; logic jtag_tdo; + logic jtag_tdo_oe; logic uart_tx; logic uart_rx; @@ -74,7 +85,7 @@ module cheshire_soc_wrapper #( .axi_ext_slv_rsp_t ( axi_slv_rsp_t ), .reg_ext_req_t ( reg_req_t ), .reg_ext_rsp_t ( reg_rsp_t ) - ) dut ( + ) i_dut ( .clk_i ( clk ), .rst_ni ( rst_n ), .test_mode_i ( test_mode ), @@ -96,12 +107,12 @@ module cheshire_soc_wrapper #( .dbg_active_o ( ), .dbg_ext_req_o ( ), .dbg_ext_unavail_i ( '0 ), - .jtag_tck_i ( jtag_tck ), - .jtag_trst_ni ( jtag_trst_n ), - .jtag_tms_i ( jtag_tms ), - .jtag_tdi_i ( jtag_tdi ), - .jtag_tdo_o ( jtag_tdo ), - .jtag_tdo_oe_o ( ), + .jtag_tck_i ( jtag_tck_i ), + .jtag_trst_ni ( jtag_trst_ni ), + .jtag_tms_i ( jtag_tms_i ), + .jtag_tdi_i ( jtag_tdi_i ), + .jtag_tdo_o ( jtag_tdo_o ), + .jtag_tdo_oe_o ( jtag_tdo_oe_o ), .uart_tx_o ( uart_tx ), .uart_rx_i ( uart_rx ), .uart_rts_no ( ), diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 2b0f5bc6..3bf72818 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -6,24 +6,27 @@ # Paul Scheffler CHS_VERILATOR_DIR ?= $(CHS_ROOT)/target/sim/verilator +RISCV_DBG_DIR = $(shell bender path riscv-dbg) VERILATOR = oseda verilator -VERILATOR_CFLAGS = -O3 VERILATOR_OPT = -march=native -mtune=native -Wno-deprecated-experimental-coroutine # Silly Verilator warnings: these are perfectly valid and should not be warnings -VERILATOR_WNO = -Wno-fatal -Wno-style -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-ASCRANGE +VERILATOR_WNO = -Wno-fatal -Wno-style \ + -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE VERILATOR_FIX = --unroll-count 51 --unroll-stmts 1 VERILATOR_ARGS ?= -j 0 -Wall --timing -timescale 1ns/1ns $(VERILATOR_WNO) $(VERILATOR_FIX) -O3 \ - --trace --trace-depth 2 + --trace-fst --trace-structs --trace-threads 1 --no-trace-top --trace-depth 5 -VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/src/sim_main.cpp +VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ + $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ + $(RISCV_DBG_DIR)/tb/remote_bitbang/sim_jtag.c $(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml $(BENDER) script verilator $(CHS_BENDER_RTL_FLAGS) > $@ -$(CHS_ROOT)/target/sim/verilator/obj_dir/cheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) - +cd $(dir $@) && $(VERILATOR) $(VERILATOR_ARGS) -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ +$(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) + +cd $(CHS_VERILATOR_DIR) && $(VERILATOR) $(VERILATOR_ARGS) -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ --cc --exe --build --top-module cheshire_soc_wrapper $(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper diff --git a/util/openocd.verilator.tcl b/util/openocd.verilator.tcl new file mode 100644 index 00000000..105af840 --- /dev/null +++ b/util/openocd.verilator.tcl @@ -0,0 +1,15 @@ +# Copyright 2024 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 +# +# OpenOCD script for Cheshire on Verilator (using remote_bitbang). + +adapter_khz 8000 + +interface remote_bitbang +remote_bitbang_host localhost +remote_bitbang_port 3335 + +set irlen 5 + +source [file dirname [info script]]/openocd.common.tcl From 3aeb6e0c719fe59ed597981443689fd499b5eb40 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 18:11:50 +0200 Subject: [PATCH 04/39] target/sim/verilator: Tie off unused signals --- .../sim/verilator/src/cheshire_soc_wrapper.sv | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index d528bb5e..1f819020 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -37,10 +37,14 @@ module cheshire_soc_wrapper #( logic clk; logic rst_n; + logic test_mode; logic [1:0] boot_mode; logic rtc; + assign test_mode = 1'b0; + assign boot_mode = 2'b00; // passive + axi_llc_req_t axi_llc_mst_req; axi_llc_rsp_t axi_llc_mst_rsp; @@ -60,6 +64,8 @@ module cheshire_soc_wrapper #( logic i2c_scl_o; logic i2c_scl_i; logic i2c_scl_en; + assign i2c_sda_i = 1'b0; + assign i2c_scl_i = 1'b0; logic spih_sck_o; logic spih_sck_en; @@ -68,11 +74,14 @@ module cheshire_soc_wrapper #( logic [ 3:0] spih_sd_o; logic [ 3:0] spih_sd_i; logic [ 3:0] spih_sd_en; + assign spih_sd_i = 1'b0; logic [SlinkNumChan-1:0] slink_rcv_clk_i; logic [SlinkNumChan-1:0] slink_rcv_clk_o; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_i; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_o; + assign slink_rcv_clk_i = '0; + assign slink_i = '0; cheshire_soc #( .Cfg ( DutCfg ), @@ -156,9 +165,10 @@ module cheshire_soc_wrapper #( .usb_dp_oe_o ( ) ); - ///////////////////////////// - // Clock/Reset Generation // - ///////////////////////////// + ////////////////////////////// + // Clock/Reset Generation // + ////////////////////////////// + clk_rst_gen #( .ClkPeriod ( ClkPeriodSys ), .RstClkCycles ( RstClkCycles ) @@ -175,9 +185,9 @@ module cheshire_soc_wrapper #( .rst_no ( ) ); - ///////////////// - // I/O to C++ // - ///////////////// + ////////////////// + // I/O to C++ // + ////////////////// assign clk_o = clk; assign rst_no = rst_n; From a3f961111a2d7da65f056f886ea9c6950cc18e5b Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 20:35:51 +0200 Subject: [PATCH 05/39] target/sim/verilator: Disable serial link --- target/sim/verilator/src/cheshire_soc_wrapper.sv | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 1f819020..29f11ac3 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -27,7 +27,13 @@ module cheshire_soc_wrapper #( import cheshire_pkg::*; - localparam cheshire_cfg_t DutCfg = DefaultCfg; + function automatic cheshire_pkg::cheshire_cfg_t gen_cheshire_cfg(); + cheshire_pkg::cheshire_cfg_t ret = cheshire_pkg::DefaultCfg; + ret.SerialLink = 1'b0; + return ret; + endfunction + + localparam cheshire_cfg_t DutCfg = gen_cheshire_cfg(); `CHESHIRE_TYPEDEF_ALL(, DutCfg) @@ -64,8 +70,8 @@ module cheshire_soc_wrapper #( logic i2c_scl_o; logic i2c_scl_i; logic i2c_scl_en; - assign i2c_sda_i = 1'b0; - assign i2c_scl_i = 1'b0; + assign i2c_sda_i = 1'b1; + assign i2c_scl_i = 1'b1; logic spih_sck_o; logic spih_sck_en; @@ -80,8 +86,8 @@ module cheshire_soc_wrapper #( logic [SlinkNumChan-1:0] slink_rcv_clk_o; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_i; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_o; - assign slink_rcv_clk_i = '0; - assign slink_i = '0; + assign slink_rcv_clk_i = '1; + assign slink_i = '1; cheshire_soc #( .Cfg ( DutCfg ), From 2c8c87b8b3fe62957d40cc67a8251e0c8be0b460 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 19 Jun 2025 20:37:25 +0200 Subject: [PATCH 06/39] target/sim/verilator: Add simulated DRAM memory We have to be careful where to place this, as its reset logic is very flaky with respect to event order. This should be fixed within axi_sim_mem itself. --- .../sim/verilator/src/cheshire_soc_wrapper.sv | 36 +++++++++++++++++++ target/sim/verilator/verilator.mk | 2 ++ 2 files changed, 38 insertions(+) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 29f11ac3..09f57fba 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -191,6 +191,42 @@ module cheshire_soc_wrapper #( .rst_no ( ) ); + //////////// + // DRAM // + //////////// + + axi_sim_mem #( + .AddrWidth ( DutCfg.AddrWidth ), + .DataWidth ( DutCfg.AxiDataWidth ), + .IdWidth ( $bits(axi_llc_id_t) ), + .UserWidth ( DutCfg.AxiUserWidth ), + .axi_req_t ( axi_llc_req_t ), + .axi_rsp_t ( axi_llc_rsp_t ), + .WarnUninitialized ( 0 ), + .ClearErrOnAccess ( 1 ), + .ApplDelay ( 0ps ), + .AcqDelay ( 0ps ) + ) i_dram_sim_mem ( + .clk_i ( clk ), + .rst_ni ( rst_n ), + .axi_req_i ( axi_llc_mst_req ), + .axi_rsp_o ( axi_llc_mst_rsp ), + .mon_w_valid_o ( ), + .mon_w_addr_o ( ), + .mon_w_data_o ( ), + .mon_w_id_o ( ), + .mon_w_user_o ( ), + .mon_w_beat_count_o ( ), + .mon_w_last_o ( ), + .mon_r_valid_o ( ), + .mon_r_addr_o ( ), + .mon_r_data_o ( ), + .mon_r_id_o ( ), + .mon_r_user_o ( ), + .mon_r_beat_count_o ( ), + .mon_r_last_o ( ) + ); + ////////////////// // I/O to C++ // ////////////////// diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 3bf72818..e526d4a3 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -24,6 +24,8 @@ VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ $(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml $(BENDER) script verilator $(CHS_BENDER_RTL_FLAGS) > $@ + # TODO: Add verilator target for these upstream to avoid patch-in + echo '$(shell $(BENDER) path axi)/src/axi_sim_mem.sv' >> $@ $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) +cd $(CHS_VERILATOR_DIR) && $(VERILATOR) $(VERILATOR_ARGS) -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ From 0bf844836f9d79afae0627343ea9f11e152645d4 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 11:53:48 +0200 Subject: [PATCH 07/39] target/sim/verilator: Exit when JTAG client disconnects --- target/sim/verilator/sim/main.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 2fb09a33..9ce9498e 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -5,24 +5,23 @@ #include "Vcheshire_soc_wrapper.h" // Verilated model -#define TRACE + +bool do_exit = false; +int exit_code = 0; extern int jtag_tick(int port, unsigned char *jtag_TCK, unsigned char *jtag_TMS, unsigned char *jtag_TDI, unsigned char *jtag_TRSTn, unsigned char jtag_TDO); static void jtag_tick_io(Vcheshire_soc_wrapper& top) { - static int count = 0; - if (count < 5) { - count++; + unsigned char tck, tms, tdi, trst_n; + int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, top.jtag_tdo_o); + if (ret) { + do_exit = true; + exit_code = ret >> 1; return; } - count = 0; - unsigned char tck, tms, tdi, trst_n; - int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, top.jtag_tdo_o); - if (ret) - VL_PRINTF("ret = 0x%08x\n", ret); top.jtag_tck_i = tck; top.jtag_tms_i = tms; top.jtag_tdi_i = tdi; @@ -83,7 +82,7 @@ int main(int argc, char** argv) { // top->in_wide[2] = 0x3; // Simulate until $finish - while (!contextp->gotFinish()) { + while (!contextp->gotFinish() && !do_exit) { unsigned char clk_prev = top->clk_o; contextp->timeInc(1); // 1 timeprecision period passes... @@ -143,5 +142,5 @@ int main(int argc, char** argv) { // Return good completion status // Don't use exit() or destructor won't get called - return 0; + return exit_code; } From c59d9bff581e5c37b84cddab3183ed679a5c9449 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 12:39:32 +0200 Subject: [PATCH 08/39] target/sim/verilator: Disable tracing --- target/sim/verilator/sim/main.cpp | 1 + target/sim/verilator/verilator.mk | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 9ce9498e..b8587db1 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -5,6 +5,7 @@ #include "Vcheshire_soc_wrapper.h" // Verilated model +// #define TRACE bool do_exit = false; int exit_code = 0; diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index e526d4a3..6b7529f8 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -16,7 +16,7 @@ VERILATOR_WNO = -Wno-fatal -Wno-style \ -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE VERILATOR_FIX = --unroll-count 51 --unroll-stmts 1 VERILATOR_ARGS ?= -j 0 -Wall --timing -timescale 1ns/1ns $(VERILATOR_WNO) $(VERILATOR_FIX) -O3 \ - --trace-fst --trace-structs --trace-threads 1 --no-trace-top --trace-depth 5 + # --trace --trace-structs --no-trace-top --trace-depth 5 VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ From a19e7681663f211ce9e0ded442f38111637bb55e Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 12:42:48 +0200 Subject: [PATCH 09/39] target/sim/verilator: Track simulation rate regularly --- target/sim/verilator/sim/main.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index b8587db1..39a0cc38 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -1,3 +1,4 @@ +#include // timers #include // std::unique_ptr #include // common Verilator routines @@ -5,6 +6,8 @@ #include "Vcheshire_soc_wrapper.h" // Verilated model +#define SIMULATION_RATE_CHUNK 1024 + // #define TRACE bool do_exit = false; @@ -82,7 +85,11 @@ int main(int argc, char** argv) { // top->in_wide[1] = 0x22222222; // top->in_wide[2] = 0x3; + // Simulate until $finish + auto start = std::chrono::high_resolution_clock::now(); + auto last = start; + uint64_t cycle = 0; while (!contextp->gotFinish() && !do_exit) { unsigned char clk_prev = top->clk_o; @@ -118,6 +125,21 @@ int main(int argc, char** argv) { jtag_tick_io(*top); } + if (!clk_prev && top->clk_o) { + // posedge clk + cycle++; + if (cycle % SIMULATION_RATE_CHUNK == 0) { + auto current = std::chrono::high_resolution_clock::now(); + auto total_elapsed_us = std::chrono::duration_cast(current - start).count(); + auto last_elapsed_us = std::chrono::duration_cast(current - last).count(); + last = current; + auto total_cycles_per_sec = 1000000.0 * cycle / total_elapsed_us; + auto last_cycles_per_sec = 1000000.0 * SIMULATION_RATE_CHUNK / last_elapsed_us; + VL_PRINTF("elapsed: %lu us, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", + total_elapsed_us, total_cycles_per_sec, last_cycles_per_sec); + } + } + // // Read outputs // VL_PRINTF("[%" PRId64 "] clk=%x rstl=%x iquad=%" PRIx64 " -> oquad=%" PRIx64 // " owide=%x_%08x_%08x\n", From 64d8a3804ffd76bc54b426e23f61da21713e9c49 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 20 Jun 2025 14:11:42 +0200 Subject: [PATCH 10/39] target/sim/verilator: Drive clock/reset from C++ --- target/sim/verilator/sim/main.cpp | 89 +++++++------- .../sim/verilator/src/cheshire_soc_wrapper.sv | 110 ++++++------------ 2 files changed, 81 insertions(+), 118 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 39a0cc38..76a24e7a 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -6,6 +6,13 @@ #include "Vcheshire_soc_wrapper.h" // Verilated model +// clock periods and time step in pico seconds +#define CLK_PERIOD_PS 5000 +#define RTC_PERIOD_PS 30520000 +#define TIME_STEP_PS 2500 + +#define RST_CYCLES 5 + #define SIMULATION_RATE_CHUNK 1024 // #define TRACE @@ -66,7 +73,7 @@ int main(int argc, char** argv) { // This needs to be called before you create any model contextp->commandArgs(argc, argv); - // "WRAPPER" will be the hierarchical name of the module + // "TOP" will be the hierarchical name of the module const auto top = std::make_unique(contextp.get(), "TOP"); #ifdef TRACE @@ -76,58 +83,52 @@ int main(int argc, char** argv) { trace->open("dump.fst"); #endif - // Set Vtop's input signals - // top->reset_l = !0; - // top->clk = 0; - // top->in_small = 1; - // top->in_quad = 0x1234; - // top->in_wide[0] = 0x11111111; - // top->in_wide[1] = 0x22222222; - // top->in_wide[2] = 0x3; - + // Initial Inputs + top->clk_i = 1; + top->rtc_i = 1; + top->rst_ni = 0; - // Simulate until $finish auto start = std::chrono::high_resolution_clock::now(); auto last = start; uint64_t cycle = 0; + uint64_t next_rtc_toggle_ps = 0; + + // Simulate until $finish while (!contextp->gotFinish() && !do_exit) { - unsigned char clk_prev = top->clk_o; - - contextp->timeInc(1); // 1 timeprecision period passes... - - // VL_PRINTF("toggle...\n"); - - // Toggle control signals on an edge that doesn't correspond - // to where the controls are sampled; in this example we do - // this only on a negedge of clk, because we know - // reset is not sampled there. - // if (!top->clk) { - // if (contextp->time() > 1 && contextp->time() < 10) { - // top->reset_l = !1; // Assert reset - // } else { - // top->reset_l = !0; // Deassert reset - // } - // // Assign some other inputs - // top->in_quad += 0x12; - // } + // Toggle Clock + top->clk_i = !top->clk_i; + + // Apply Inputs (negedge clk_i) + if (!top->clk_i) { + cycle++; + + // Release Reset + if (cycle == RST_CYCLES) + top->rst_ni = 1; + + // Toggle Real Time Clock + if (contextp->time() < next_rtc_toggle_ps) { + top->rtc_i = !top->rtc_i; + next_rtc_toggle_ps += RTC_PERIOD_PS / 2; + } + + // JTAG I/O + if (top->rst_ni) { + jtag_tick_io(*top); + } + } + + contextp->timeInc(TIME_STEP_PS); // Evaluate model - // (If you have multiple models being simulated in the same - // timestep then instead of eval(), call eval_step() on each, then - // eval_end_step() on each. See the manual.) top->eval(); #ifdef TRACE trace->dump(contextp->time()); #endif - if (top->rst_no && !top->clk_o && clk_prev) { - jtag_tick_io(*top); - } - - if (!clk_prev && top->clk_o) { - // posedge clk - cycle++; + // Monitoring (posedge clk_i) + if (top->clk_i) { if (cycle % SIMULATION_RATE_CHUNK == 0) { auto current = std::chrono::high_resolution_clock::now(); auto total_elapsed_us = std::chrono::duration_cast(current - start).count(); @@ -138,13 +139,9 @@ int main(int argc, char** argv) { VL_PRINTF("elapsed: %lu us, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", total_elapsed_us, total_cycles_per_sec, last_cycles_per_sec); } + if (cycle == 100000) + break; } - - // // Read outputs - // VL_PRINTF("[%" PRId64 "] clk=%x rstl=%x iquad=%" PRIx64 " -> oquad=%" PRIx64 - // " owide=%x_%08x_%08x\n", - // contextp->time(), top->clk, top->reset_l, top->in_quad, top->out_quad, - // top->out_wide[2], top->out_wide[1], top->out_wide[0]); } // Final model cleanup diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 09f57fba..9c2a217e 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -4,15 +4,10 @@ // // Max Wipfli -module cheshire_soc_wrapper #( - parameter time ClkPeriodSys = 5ns, - parameter time ClkPeriodRtc = 30518ns, - parameter int unsigned RstClkCycles = 5, - - parameter int unsigned RemoteBitbangPort = 3335 -) ( - output logic clk_o, - output logic rst_no, +module cheshire_soc_wrapper ( + input logic clk_i, + input logic rtc_i, + input logic rst_ni, // JTAG input logic jtag_tck_i, @@ -41,9 +36,6 @@ module cheshire_soc_wrapper #( // DUT // /////////// - logic clk; - logic rst_n; - logic test_mode; logic [1:0] boot_mode; logic rtc; @@ -101,8 +93,8 @@ module cheshire_soc_wrapper #( .reg_ext_req_t ( reg_req_t ), .reg_ext_rsp_t ( reg_rsp_t ) ) i_dut ( - .clk_i ( clk ), - .rst_ni ( rst_n ), + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), .test_mode_i ( test_mode ), .boot_mode_i ( boot_mode ), .rtc_i ( rtc ), @@ -171,67 +163,41 @@ module cheshire_soc_wrapper #( .usb_dp_oe_o ( ) ); - ////////////////////////////// - // Clock/Reset Generation // - ////////////////////////////// - - clk_rst_gen #( - .ClkPeriod ( ClkPeriodSys ), - .RstClkCycles ( RstClkCycles ) - ) i_clk_rst_sys ( - .clk_o ( clk ), - .rst_no ( rst_n ) - ); - - clk_rst_gen #( - .ClkPeriod ( ClkPeriodRtc ), - .RstClkCycles ( RstClkCycles ) - ) i_clk_rst_rtc ( - .clk_o ( rtc ), - .rst_no ( ) - ); - //////////// // DRAM // //////////// - axi_sim_mem #( - .AddrWidth ( DutCfg.AddrWidth ), - .DataWidth ( DutCfg.AxiDataWidth ), - .IdWidth ( $bits(axi_llc_id_t) ), - .UserWidth ( DutCfg.AxiUserWidth ), - .axi_req_t ( axi_llc_req_t ), - .axi_rsp_t ( axi_llc_rsp_t ), - .WarnUninitialized ( 0 ), - .ClearErrOnAccess ( 1 ), - .ApplDelay ( 0ps ), - .AcqDelay ( 0ps ) - ) i_dram_sim_mem ( - .clk_i ( clk ), - .rst_ni ( rst_n ), - .axi_req_i ( axi_llc_mst_req ), - .axi_rsp_o ( axi_llc_mst_rsp ), - .mon_w_valid_o ( ), - .mon_w_addr_o ( ), - .mon_w_data_o ( ), - .mon_w_id_o ( ), - .mon_w_user_o ( ), - .mon_w_beat_count_o ( ), - .mon_w_last_o ( ), - .mon_r_valid_o ( ), - .mon_r_addr_o ( ), - .mon_r_data_o ( ), - .mon_r_id_o ( ), - .mon_r_user_o ( ), - .mon_r_beat_count_o ( ), - .mon_r_last_o ( ) - ); - - ////////////////// - // I/O to C++ // - ////////////////// - - assign clk_o = clk; - assign rst_no = rst_n; + // axi_sim_mem #( + // .AddrWidth ( DutCfg.AddrWidth ), + // .DataWidth ( DutCfg.AxiDataWidth ), + // .IdWidth ( $bits(axi_llc_id_t) ), + // .UserWidth ( DutCfg.AxiUserWidth ), + // .axi_req_t ( axi_llc_req_t ), + // .axi_rsp_t ( axi_llc_rsp_t ), + // .WarnUninitialized ( 0 ), + // .ClearErrOnAccess ( 1 ), + // .ApplDelay ( 0ps ), + // .AcqDelay ( 0ps ) + // ) i_dram_sim_mem ( + // .clk_i ( clk_i ), + // .rst_ni ( rst_ni ), + // .axi_req_i ( axi_llc_mst_req ), + // .axi_rsp_o ( axi_llc_mst_rsp ), + // .mon_w_valid_o ( ), + // .mon_w_addr_o ( ), + // .mon_w_data_o ( ), + // .mon_w_id_o ( ), + // .mon_w_user_o ( ), + // .mon_w_beat_count_o ( ), + // .mon_w_last_o ( ), + // .mon_r_valid_o ( ), + // .mon_r_addr_o ( ), + // .mon_r_data_o ( ), + // .mon_r_id_o ( ), + // .mon_r_user_o ( ), + // .mon_r_beat_count_o ( ), + // .mon_r_last_o ( ) + // ); + assign axi_llc_mst_rsp = '0; endmodule From 27aba0ac4bed59c8b39ad774939d0c57ed5794f2 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Sat, 21 Jun 2025 12:47:31 +0200 Subject: [PATCH 11/39] target/sim/verilator: Clean up Makefile and sim/main.cpp --- target/sim/verilator/sim/main.cpp | 18 ++++++---- .../sim/verilator/src/cheshire_soc_wrapper.sv | 10 +----- target/sim/verilator/verilator.mk | 36 +++++++++++++------ 3 files changed, 38 insertions(+), 26 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 76a24e7a..c64d12ce 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -2,7 +2,7 @@ #include // std::unique_ptr #include // common Verilator routines -#include // trace to VCD +#include // trace to VCD #include "Vcheshire_soc_wrapper.h" // Verilated model @@ -15,7 +15,7 @@ #define SIMULATION_RATE_CHUNK 1024 -// #define TRACE +// #define BENCHMARK bool do_exit = false; int exit_code = 0; @@ -76,11 +76,11 @@ int main(int argc, char** argv) { // "TOP" will be the hierarchical name of the module const auto top = std::make_unique(contextp.get(), "TOP"); -#ifdef TRACE +#if CHS_TRACE_VCD Verilated::traceEverOn(true); - const auto trace = std::make_unique(); + const auto trace = std::make_unique(); top->trace(trace.get(), 5); - trace->open("dump.fst"); + trace->open("dump.vcd"); #endif // Initial Inputs @@ -107,14 +107,16 @@ int main(int argc, char** argv) { top->rst_ni = 1; // Toggle Real Time Clock - if (contextp->time() < next_rtc_toggle_ps) { + if (contextp->time() >= next_rtc_toggle_ps) { top->rtc_i = !top->rtc_i; next_rtc_toggle_ps += RTC_PERIOD_PS / 2; } // JTAG I/O if (top->rst_ni) { +#ifndef BENCHMARK jtag_tick_io(*top); +#endif } } @@ -123,7 +125,7 @@ int main(int argc, char** argv) { // Evaluate model top->eval(); -#ifdef TRACE +#if CHS_TRACE_VCD trace->dump(contextp->time()); #endif @@ -139,8 +141,10 @@ int main(int argc, char** argv) { VL_PRINTF("elapsed: %lu us, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", total_elapsed_us, total_cycles_per_sec, last_cycles_per_sec); } +#ifdef BENCHMARK if (cycle == 100000) break; +#endif } } diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 9c2a217e..4ca712d9 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -38,7 +38,6 @@ module cheshire_soc_wrapper ( logic test_mode; logic [1:0] boot_mode; - logic rtc; assign test_mode = 1'b0; assign boot_mode = 2'b00; // passive @@ -46,13 +45,6 @@ module cheshire_soc_wrapper ( axi_llc_req_t axi_llc_mst_req; axi_llc_rsp_t axi_llc_mst_rsp; - logic jtag_tck; - logic jtag_trst_n; - logic jtag_tms; - logic jtag_tdi; - logic jtag_tdo; - logic jtag_tdo_oe; - logic uart_tx; logic uart_rx; @@ -97,7 +89,7 @@ module cheshire_soc_wrapper ( .rst_ni ( rst_ni ), .test_mode_i ( test_mode ), .boot_mode_i ( boot_mode ), - .rtc_i ( rtc ), + .rtc_i ( rtc_i ), .axi_llc_mst_req_o ( axi_llc_mst_req ), .axi_llc_mst_rsp_i ( axi_llc_mst_rsp ), .axi_ext_mst_req_i ( '0 ), diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 6b7529f8..0ea7ddf0 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -8,19 +8,34 @@ CHS_VERILATOR_DIR ?= $(CHS_ROOT)/target/sim/verilator RISCV_DBG_DIR = $(shell bender path riscv-dbg) -VERILATOR = oseda verilator +VERILATOR_PREFIX ?= oseda +VERILATOR ?= verilator -VERILATOR_OPT = -march=native -mtune=native -Wno-deprecated-experimental-coroutine # Silly Verilator warnings: these are perfectly valid and should not be warnings VERILATOR_WNO = -Wno-fatal -Wno-style \ - -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE -VERILATOR_FIX = --unroll-count 51 --unroll-stmts 1 -VERILATOR_ARGS ?= -j 0 -Wall --timing -timescale 1ns/1ns $(VERILATOR_WNO) $(VERILATOR_FIX) -O3 \ - # --trace --trace-structs --no-trace-top --trace-depth 5 + -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE +VERILATOR_ARGS ?= -j 0 -Wall $(VERILATOR_WNO) -timescale 1ns/1ps +# Verilation optimizations +VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert +# Disable common_cells assertions +VERILATOR_ARGS += -DASSERTS_OFF +# C++ Compiler Optimization + +# VERILATOR_ARGS += -CFLAGS "-march=native" +# VERILATOR_ARGS += -CFLAGS "-mtune=native" +# VERILATOR_ARGS += -CFLAGS "-O2" + +# Profiling +# generates `gmon.out` that can be processed by `gprof` and then `verilator_profcfunc` +# VERILATOR_ARGS += --prof-cfuncs --report-unoptflat + +# Tracing +# enables VCD tracing of the topmost 5 layers +VERILATOR_ARGS += --trace --trace-structs --no-trace-top --trace-depth 5 -CFLAGS "-DCHS_TRACE_VCD=1" VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ - $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ - $(RISCV_DBG_DIR)/tb/remote_bitbang/sim_jtag.c + $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ + $(RISCV_DBG_DIR)/tb/remote_bitbang/sim_jtag.c $(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml $(BENDER) script verilator $(CHS_BENDER_RTL_FLAGS) > $@ @@ -28,13 +43,14 @@ $(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml echo '$(shell $(BENDER) path axi)/src/axi_sim_mem.sv' >> $@ $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) - +cd $(CHS_VERILATOR_DIR) && $(VERILATOR) $(VERILATOR_ARGS) -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ + +cd $(CHS_VERILATOR_DIR) && $(VERILATOR_PREFIX) $(VERILATOR) $(VERILATOR_ARGS) \ + -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ --cc --exe --build --top-module cheshire_soc_wrapper $(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper @echo "#!/bin/sh" > $@ @echo 'set -eu' >> $@ @echo 'cd $$(dirname "$$0")' >> $@ - @echo 'oseda ./obj_dir/Vcheshire_soc_wrapper' >> $@ + @echo '$(VERILATOR_PREFIX) ./obj_dir/Vcheshire_soc_wrapper' >> $@ @chmod +x $@ From c31c1e2ff21f7323f73be8be8ffccbd4f96aec87 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Sat, 21 Jun 2025 13:44:15 +0200 Subject: [PATCH 12/39] target/sim/verilator: Add UART receiver --- Bender.yml | 1 + target/sim/verilator/sim/main.cpp | 17 ++- .../sim/verilator/src/cheshire_soc_wrapper.sv | 23 +++- target/sim/verilator/src/verilator_uart_rx.sv | 122 ++++++++++++++++++ target/sim/verilator/verilator.mk | 2 +- 5 files changed, 161 insertions(+), 4 deletions(-) create mode 100644 target/sim/verilator/src/verilator_uart_rx.sv diff --git a/Bender.yml b/Bender.yml index 00c7acce..fd5bf3f0 100644 --- a/Bender.yml +++ b/Bender.yml @@ -56,6 +56,7 @@ sources: - target: any(verilator) files: + - target/sim/verilator/src/verilator_uart_rx.sv - target/sim/verilator/src/cheshire_soc_wrapper.sv - target: all(fpga, xilinx) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index c64d12ce..3a186444 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -13,7 +13,7 @@ #define RST_CYCLES 5 -#define SIMULATION_RATE_CHUNK 1024 +#define SIMULATION_RATE_CHUNK 10000 // #define BENCHMARK @@ -39,6 +39,15 @@ static void jtag_tick_io(Vcheshire_soc_wrapper& top) { top.jtag_trst_ni = trst_n; } +static void handle_uart(char data) { + static std::string uart_buffer; + uart_buffer.push_back(data); + + if (data == '\r' || data == '\n') { + VL_PRINTF("[UART] %s", uart_buffer.c_str()); + uart_buffer.clear(); + } +} int main(int argc, char** argv) { // This is a more complicated example, please also see the simpler examples/make_hello_c. @@ -131,6 +140,10 @@ int main(int argc, char** argv) { // Monitoring (posedge clk_i) if (top->clk_i) { + if (top->uart_data_valid_o) { + handle_uart(top->uart_data_o); + } + if (cycle % SIMULATION_RATE_CHUNK == 0) { auto current = std::chrono::high_resolution_clock::now(); auto total_elapsed_us = std::chrono::duration_cast(current - start).count(); @@ -142,7 +155,7 @@ int main(int argc, char** argv) { total_elapsed_us, total_cycles_per_sec, last_cycles_per_sec); } #ifdef BENCHMARK - if (cycle == 100000) + if (cycle == 1000000) break; #endif } diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 4ca712d9..3fd06e64 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -15,7 +15,11 @@ module cheshire_soc_wrapper ( input logic jtag_tms_i, input logic jtag_tdi_i, output logic jtag_tdo_o, - output logic jtag_tdo_oe_o + output logic jtag_tdo_oe_o, + + // UART + output logic uart_data_valid_o, + output logic [7:0] uart_data_o ); `include "cheshire/typedef.svh" @@ -192,4 +196,21 @@ module cheshire_soc_wrapper ( // ); assign axi_llc_mst_rsp = '0; + //////////// + // UART // + //////////// + + verilator_uart_rx #( + .BaudPeriodCycles(1000 * 1000 * 1000 / 115200 / 5) // 1 second / baud rate / clock period + ) i_uart_rx ( + .clk_i, + .rst_ni, + .uart_rx_i ( uart_tx ), + .data_valid_o ( uart_data_valid_o ), + .data_o ( uart_data_o ) + ); + + // no UART input into DUT + assign uart_rx = 1'b0; + endmodule diff --git a/target/sim/verilator/src/verilator_uart_rx.sv b/target/sim/verilator/src/verilator_uart_rx.sv new file mode 100644 index 00000000..16e47633 --- /dev/null +++ b/target/sim/verilator/src/verilator_uart_rx.sv @@ -0,0 +1,122 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 +// +// Max Wipfli + +`include "common_cells/registers.svh" + +module verilator_uart_rx #( + parameter int unsigned BaudPeriodCycles = 1736, + localparam int unsigned DataBits = 8 +) ( + input logic clk_i, + input logic rst_ni, + input logic uart_rx_i, + + output logic data_valid_o, + output logic [DataBits-1:0] data_o +); + + localparam int unsigned CounterWidth = $clog2(BaudPeriodCycles) + 1; + + logic start; + logic sample; + + ////////////////// + // baud clock // + ////////////////// + + + logic [CounterWidth-1:0] baud_counter_q, baud_counter_d; + `FF(baud_counter_q, baud_counter_d, '0); + + always_comb begin + baud_counter_d = baud_counter_q + 1'b1; + sample = 1'b0; + + if (start) begin + baud_counter_d = BaudPeriodCycles / 2; + end else if (baud_counter_d == BaudPeriodCycles) begin + baud_counter_d = '0; + sample = 1'b1; + end + end + + ////////////////////// + // shift register // + ////////////////////// + + logic [DataBits-1:0] data_sr_q, data_sr_d; + `FF(data_sr_q, data_sr_d, '0); + + logic data_sr_push; + + always_comb begin + data_sr_d = data_sr_q; + if (data_sr_push) begin + data_sr_d = {uart_rx_i, data_sr_q[DataBits-1:1]}; + end + end + + assign data_o = data_sr_q; + + ///////////////////// + // state machine // + ///////////////////// + + typedef enum logic [1:0] { + StIdle, + StStartBit, + StDataBit, + StStopBit + } state_e; + + state_e state_q, state_d; + `FF(state_q, state_d, StIdle); + + logic [$clog2(DataBits):0] bit_counter_q, bit_counter_d; + `FF(bit_counter_q, bit_counter_d, '0); + + always_comb begin + state_d = state_q; + start = 1'b0; + data_sr_push = 1'b0; + bit_counter_d = bit_counter_q; + data_valid_o = 1'b0; + + unique case (state_q) + StIdle: begin + if (uart_rx_i == 1'b0) begin + state_d = StStartBit; + start = 1'b1; + bit_counter_d = 1'b0; + end + end + StStartBit: begin + if (sample) begin + state_d = StDataBit; + end + end + StDataBit: begin + if (sample) begin + data_sr_push = 1'b1; + bit_counter_d = bit_counter_q + 1'b1; + if (bit_counter_d == DataBits) begin + state_d = StStopBit; + end + end + end + StStopBit: begin + if (sample) begin + state_d = StIdle; + data_valid_o = 1'b1; + end + end + default: begin + state_d = StIdle; + end + endcase + end + +endmodule diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 0ea7ddf0..3f3da45b 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -31,7 +31,7 @@ VERILATOR_ARGS += -DASSERTS_OFF # Tracing # enables VCD tracing of the topmost 5 layers -VERILATOR_ARGS += --trace --trace-structs --no-trace-top --trace-depth 5 -CFLAGS "-DCHS_TRACE_VCD=1" +# VERILATOR_ARGS += --trace --trace-structs --no-trace-top --trace-depth 5 -CFLAGS "-DCHS_TRACE_VCD=1" VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ From 649e64c40e65034840b2426e1d91826bec32bbba Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Sat, 21 Jun 2025 13:44:59 +0200 Subject: [PATCH 13/39] target/sim/verilator: Minor C++ cleanup --- target/sim/verilator/sim/main.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 3a186444..51a126c8 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -25,6 +25,13 @@ extern int jtag_tick(int port, unsigned char *jtag_TCK, unsigned char *jtag_TMS, static void jtag_tick_io(Vcheshire_soc_wrapper& top) { + static int count = 0; + if (count < 10) { + count++; + return; + } + count = 0; + unsigned char tck, tms, tdi, trst_n; int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, top.jtag_tdo_o); if (ret) { @@ -123,7 +130,7 @@ int main(int argc, char** argv) { // JTAG I/O if (top->rst_ni) { -#ifndef BENCHMARK +#if 0 jtag_tick_io(*top); #endif } From f021065ed68819787dc63309e0164eb4e2e63da4 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Mon, 23 Jun 2025 11:06:29 +0200 Subject: [PATCH 14/39] target/sim/verilator: Add working serial link preloading --- target/sim/src/elfloader.cpp | 8 +- target/sim/verilator/sim/Mem64Master.h | 100 +++++++++++ target/sim/verilator/sim/main.cpp | 156 +++++++++++++----- .../sim/verilator/src/cheshire_soc_wrapper.sv | 106 ++++++++++-- target/sim/verilator/verilator.mk | 4 +- 5 files changed, 317 insertions(+), 57 deletions(-) create mode 100644 target/sim/verilator/sim/Mem64Master.h diff --git a/target/sim/src/elfloader.cpp b/target/sim/src/elfloader.cpp index b02860c9..b273f566 100644 --- a/target/sim/src/elfloader.cpp +++ b/target/sim/src/elfloader.cpp @@ -158,6 +158,7 @@ int section_index = 0; extern "C" { char get_entry(long long *entry_ret); char get_section(long long *address_ret, long long *len_ret); + char read_section_raw(long long address, char *buf, long long len); char read_section(long long address, const svOpenArrayHandle buffer, long long len); char read_elf(const char *filename); } @@ -199,7 +200,12 @@ extern "C" char read_section(long long address, const svOpenArrayHandle buffer, { // get actual pointer char *buf = (char *) svGetArrayPtr(buffer); - + + return read_section_raw(address, buf, len); +} + +extern "C" char read_section_raw(long long address, char *buf, long long len) +{ // check that the address points to a section if (!mems.count(address)) { printf("[ELF] ERROR: No section found for address %p\n", address); diff --git a/target/sim/verilator/sim/Mem64Master.h b/target/sim/verilator/sim/Mem64Master.h new file mode 100644 index 00000000..1d265366 --- /dev/null +++ b/target/sim/verilator/sim/Mem64Master.h @@ -0,0 +1,100 @@ +// Copyright 2025 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Max Wipfli + +#include +#include +#include +#include +#include + +// 64-bit memory interface master (currently only supporting aligned 64-bit writes) +class Mem64Master { +private: + struct WriteTransaction { + uint64_t addr; + uint64_t data; + }; + +public: + Mem64Master( + uint8_t* mem_req_o, + uint64_t* mem_addr_o, + uint8_t* mem_we_o, + uint64_t* mem_wdata_o, + uint8_t* mem_be_o, + uint8_t* mem_gnt_i + ) : m_mem_req_o(mem_req_o) + , m_mem_addr_o(mem_addr_o) + , m_mem_we_o(mem_we_o) + , m_mem_wdata_o(mem_wdata_o) + , m_mem_be_o(mem_be_o) + , m_mem_gnt_i(mem_gnt_i) { + // zero all outputs + *m_mem_req_o = 0; + *m_mem_addr_o = 0; + *m_mem_we_o = 0; + *m_mem_wdata_o = 0; + *m_mem_be_o = 0; + } + + void write(uint64_t addr, uint64_t data) { + m_write_queue.push({ addr, data }); + } + + void write_chunk(uint64_t addr, void *data, size_t bytes) { + assert(addr % sizeof(uint64_t) == 0 && "unaligned writes not yet supported"); + assert(bytes % sizeof(uint64_t) == 0 && "unaligned write size not yet supported"); + + for (size_t i = 0; i < bytes; i += sizeof(uint64_t)) { + uint64_t word; + memcpy(&word, (char *)data + i, sizeof(uint64_t)); + write(addr + i, word); + } + } + + // handle before @(posedge clk): reads signals, does not modify signals + void handle_before() { + m_handshake = (*m_mem_req_o && *m_mem_gnt_i); + } + + // handle before @(posedge clk): modifies signals for next cycle + void handle_after() { + if (m_handshake) { + // current request was handled + m_handshake = false; + *m_mem_req_o = 0; + } + + if (!*m_mem_req_o && !m_write_queue.empty()) { + // apply new request + const auto& transaction = m_write_queue.front(); + *m_mem_req_o = 1; + *m_mem_addr_o = transaction.addr; + *m_mem_we_o = 1; + *m_mem_wdata_o = transaction.data; + *m_mem_be_o = 0xff; + m_write_queue.pop(); + if (m_write_queue.empty()) { + printf("Mem64Master: emptied write queue\n"); + } + } + } + +private: + // holds pairs of (addr, data) + std::queue m_write_queue; + + // handshake detected + bool m_handshake; + + // interface + uint8_t* m_mem_req_o; + uint64_t* m_mem_addr_o; + uint8_t* m_mem_we_o; + uint64_t* m_mem_wdata_o; + uint8_t* m_mem_be_o; + uint8_t* m_mem_gnt_i; +}; diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 51a126c8..ea296ca0 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -2,10 +2,14 @@ #include // std::unique_ptr #include // common Verilator routines +#if VM_TRACE #include // trace to VCD +#endif #include "Vcheshire_soc_wrapper.h" // Verilated model +#include "Mem64Master.h" + // clock periods and time step in pico seconds #define CLK_PERIOD_PS 5000 #define RTC_PERIOD_PS 30520000 @@ -17,6 +21,16 @@ // #define BENCHMARK +extern "C" { + char get_entry(long long *entry_ret); + char get_section(long long *address_ret, long long *len_ret); + char read_section_raw(long long address, char *buf, long long len); + char read_elf(const char *filename); +} + +std::unique_ptr contextp; +std::unique_ptr topp; +std::unique_ptr mem_master; bool do_exit = false; int exit_code = 0; @@ -24,7 +38,7 @@ extern int jtag_tick(int port, unsigned char *jtag_TCK, unsigned char *jtag_TMS, unsigned char *jtag_TDI, unsigned char *jtag_TRSTn, unsigned char jtag_TDO); -static void jtag_tick_io(Vcheshire_soc_wrapper& top) { +static void jtag_tick_io() { static int count = 0; if (count < 10) { count++; @@ -33,46 +47,74 @@ static void jtag_tick_io(Vcheshire_soc_wrapper& top) { count = 0; unsigned char tck, tms, tdi, trst_n; - int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, top.jtag_tdo_o); + int ret = jtag_tick(3335, &tck, &tms, &tdi, &trst_n, topp->jtag_tdo_o); if (ret) { do_exit = true; exit_code = ret >> 1; return; } - top.jtag_tck_i = tck; - top.jtag_tms_i = tms; - top.jtag_tdi_i = tdi; - top.jtag_trst_ni = trst_n; + topp->jtag_tck_i = tck; + topp->jtag_tms_i = tms; + topp->jtag_tdi_i = tdi; + topp->jtag_trst_ni = trst_n; } static void handle_uart(char data) { static std::string uart_buffer; uart_buffer.push_back(data); - if (data == '\r' || data == '\n') { + if (data == '\n') { VL_PRINTF("[UART] %s", uart_buffer.c_str()); uart_buffer.clear(); } } -int main(int argc, char** argv) { - // This is a more complicated example, please also see the simpler examples/make_hello_c. +static bool elf_preload_open(const char *filename) { + char ret = read_elf(filename); + if (ret != 0) { + VL_PRINTF("[ELF] failed to read ELF: %d\n", ret); + return false; + } + return true; +} + +static void elf_preload_write_enqueue() { + long long section_address, section_len; + size_t num_writes = 0; + + while (get_section(§ion_address, §ion_len)) { + VL_PRINTF("[ELF] loading section at 0x%llx (%lld bytes)\n", section_address, section_len); + + char *buf = (char *)calloc(section_len + sizeof(uint64_t), 1); + read_section_raw(section_address, buf, section_len); + + for (size_t i = 0; i < section_len; i += sizeof(uint64_t)) { + mem_master->write(section_address + i, *(uint64_t *)(buf + i)); + num_writes++; + } + + free(buf); + } + + long long entry; + get_entry(&entry); + // write entrypoint + mem_master->write(0x03000000, entry); + num_writes++; + // set start bit (read by boot ROM) + mem_master->write(0x03000008, 2); + num_writes++; + VL_PRINTF("[ELF] enqueued %zu memory writes\n", num_writes); +} + +int main(int argc, char** argv) { // Create logs/ directory in case we have traces to put under it Verilated::mkdir("logs"); // Construct a VerilatedContext to hold simulation time, etc. - // Multiple modules (made later below with Vtop) may share the same - // context to share time, or modules may have different contexts if - // they should be independent from each other. - - // Using unique_ptr is similar to - // "VerilatedContext* contextp = new VerilatedContext" then deleting at end. - const auto contextp = std::make_unique(); - // const std::unique_ptr contextp{new VerilatedContext}; - // Do not instead make Vtop as a file-scope static variable, as the - // "C++ static initialization order fiasco" may cause a crash + contextp = std::make_unique(); // Set debug level, 0 is off, 9 is highest presently used // May be overridden by commandArgs argument parsing @@ -90,65 +132,99 @@ int main(int argc, char** argv) { contextp->commandArgs(argc, argv); // "TOP" will be the hierarchical name of the module - const auto top = std::make_unique(contextp.get(), "TOP"); + topp = std::make_unique(contextp.get(), "TOP"); -#if CHS_TRACE_VCD +#if VM_TRACE Verilated::traceEverOn(true); const auto trace = std::make_unique(); - top->trace(trace.get(), 5); + topp->trace(trace.get(), 5); trace->open("dump.vcd"); #endif // Initial Inputs - top->clk_i = 1; - top->rtc_i = 1; - top->rst_ni = 0; + topp->clk_i = 1; + topp->rtc_i = 1; + topp->rst_ni = 1; auto start = std::chrono::high_resolution_clock::now(); auto last = start; uint64_t cycle = 0; uint64_t next_rtc_toggle_ps = 0; + mem_master = std::make_unique( + &topp->slink_mem_req_i, + &topp->slink_mem_addr_i, + &topp->slink_mem_we_i, + &topp->slink_mem_wdata_i, + &topp->slink_mem_be_i, + &topp->slink_mem_gnt_o + ); + + // ELF preloading + const char *filename = "../../../sw/tests/helloworld.spm.elf"; + if (!elf_preload_open(filename)) + return 1; + // Simulate until $finish while (!contextp->gotFinish() && !do_exit) { // Toggle Clock - top->clk_i = !top->clk_i; + topp->clk_i = !topp->clk_i; // Apply Inputs (negedge clk_i) - if (!top->clk_i) { - cycle++; + if (!topp->clk_i) { + // Apply Reset + if (cycle == 1) + topp->rst_ni = 0; // Release Reset - if (cycle == RST_CYCLES) - top->rst_ni = 1; + if (cycle == RST_CYCLES + 1) + topp->rst_ni = 1; // Toggle Real Time Clock if (contextp->time() >= next_rtc_toggle_ps) { - top->rtc_i = !top->rtc_i; + topp->rtc_i = !topp->rtc_i; next_rtc_toggle_ps += RTC_PERIOD_PS / 2; } - // JTAG I/O - if (top->rst_ni) { + // TODO: This is determined experimentally. + // We should rather poll until the SPM has been configured properly. + if (cycle == 2000) + elf_preload_write_enqueue(); + + // I/O + if (cycle > RST_CYCLES && topp->rst_ni) { #if 0 - jtag_tick_io(*top); + jtag_tick_io(); #endif } } + contextp->timeInc(TIME_STEP_PS); + // Monitor Synchronous Outputs: just before @(posedge clk_i) + if (topp->clk_i) { + mem_master->handle_before(); + } + // Evaluate model - top->eval(); + topp->eval(); -#if CHS_TRACE_VCD + // Apply Synchronous Inputs: just after @(posedge clk_i) + if (topp->clk_i) { + mem_master->handle_after(); + } + +#if VM_TRACE trace->dump(contextp->time()); #endif // Monitoring (posedge clk_i) - if (top->clk_i) { - if (top->uart_data_valid_o) { - handle_uart(top->uart_data_o); + if (topp->clk_i) { + cycle++; + + if (topp->uart_data_valid_o) { + handle_uart(topp->uart_data_o); } if (cycle % SIMULATION_RATE_CHUNK == 0) { @@ -169,7 +245,7 @@ int main(int argc, char** argv) { } // Final model cleanup - top->final(); + topp->final(); #ifdef TRACE trace->close(); diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 3fd06e64..7800d1cf 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -4,7 +4,14 @@ // // Max Wipfli -module cheshire_soc_wrapper ( +function automatic cheshire_pkg::cheshire_cfg_t gen_cheshire_cfg(); + cheshire_pkg::cheshire_cfg_t ret = cheshire_pkg::DefaultCfg; + return ret; +endfunction + +module cheshire_soc_wrapper # ( + parameter cheshire_pkg::cheshire_cfg_t DutCfg = gen_cheshire_cfg() +) ( input logic clk_i, input logic rtc_i, input logic rst_ni, @@ -19,21 +26,20 @@ module cheshire_soc_wrapper ( // UART output logic uart_data_valid_o, - output logic [7:0] uart_data_o -); + output logic [7:0] uart_data_o, - `include "cheshire/typedef.svh" + // Memory Interface (Serial Link) + input logic slink_mem_req_i, + input logic [DutCfg.AddrWidth-1:0] slink_mem_addr_i, + input logic slink_mem_we_i, + input logic [DutCfg.AxiDataWidth-1:0] slink_mem_wdata_i, + input logic [DutCfg.AxiDataWidth/8-1:0] slink_mem_be_i, + output logic slink_mem_gnt_o +); import cheshire_pkg::*; - function automatic cheshire_pkg::cheshire_cfg_t gen_cheshire_cfg(); - cheshire_pkg::cheshire_cfg_t ret = cheshire_pkg::DefaultCfg; - ret.SerialLink = 1'b0; - return ret; - endfunction - - localparam cheshire_cfg_t DutCfg = gen_cheshire_cfg(); - + `include "cheshire/typedef.svh" `CHESHIRE_TYPEDEF_ALL(, DutCfg) /////////// @@ -74,8 +80,6 @@ module cheshire_soc_wrapper ( logic [SlinkNumChan-1:0] slink_rcv_clk_o; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_i; logic [SlinkNumChan-1:0][SlinkNumLanes-1:0] slink_o; - assign slink_rcv_clk_i = '1; - assign slink_i = '1; cheshire_soc #( .Cfg ( DutCfg ), @@ -213,4 +217,78 @@ module cheshire_soc_wrapper ( // no UART input into DUT assign uart_rx = 1'b0; + /////////////////// + // Serial Link // + /////////////////// + + axi_mst_req_t slink_axi_mst_req; + axi_mst_rsp_t slink_axi_mst_rsp; + + // Mirror instance of serial link, reflecting another chip + serial_link #( + .axi_req_t ( axi_mst_req_t ), + .axi_rsp_t ( axi_mst_rsp_t ), + .cfg_req_t ( reg_req_t ), + .cfg_rsp_t ( reg_rsp_t ), + .aw_chan_t ( axi_mst_aw_chan_t ), + .ar_chan_t ( axi_mst_ar_chan_t ), + .r_chan_t ( axi_mst_r_chan_t ), + .w_chan_t ( axi_mst_w_chan_t ), + .b_chan_t ( axi_mst_b_chan_t ), + .hw2reg_t ( serial_link_single_channel_reg_pkg::serial_link_single_channel_hw2reg_t ), + .reg2hw_t ( serial_link_single_channel_reg_pkg::serial_link_single_channel_reg2hw_t ), + .NumChannels ( SlinkNumChan ), + .NumLanes ( SlinkNumLanes ), + .MaxClkDiv ( SlinkMaxClkDiv ) + ) i_serial_link ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .clk_sl_i ( clk_i ), + .rst_sl_ni ( rst_ni ), + .clk_reg_i ( clk_i ), + .rst_reg_ni ( rst_ni ), + .testmode_i ( test_mode ), + .axi_in_req_i ( slink_axi_mst_req ), + .axi_in_rsp_o ( slink_axi_mst_rsp ), + .axi_out_req_o ( ), + .axi_out_rsp_i ( '0 ), + .cfg_req_i ( '0 ), + .cfg_rsp_o ( ), + .ddr_rcv_clk_i ( slink_rcv_clk_o ), + .ddr_rcv_clk_o ( slink_rcv_clk_i ), + .ddr_i ( slink_o ), + .ddr_o ( slink_i ), + .isolated_i ( '0 ), + .isolate_o ( ), + .clk_ena_o ( ), + .reset_no ( ) + ); + + // Adapter to memory interface for easier C++ handling + axi_from_mem #( + .MemAddrWidth ( DutCfg.AddrWidth ), + .AxiAddrWidth ( DutCfg.AddrWidth ), + .DataWidth ( DutCfg.AxiDataWidth ), + .MaxRequests ( 8 ), + .AxiProt ( 3'b000 ), + .axi_req_t ( axi_mst_req_t ), + .axi_rsp_t ( axi_mst_rsp_t ) + ) i_serial_link_mem ( + .clk_i, + .rst_ni, + .mem_req_i ( slink_mem_req_i ), + .mem_addr_i ( slink_mem_addr_i ), + .mem_we_i ( slink_mem_we_i ), + .mem_wdata_i ( slink_mem_wdata_i ), + .mem_be_i ( slink_mem_be_i ), + .mem_gnt_o ( slink_mem_gnt_o ), + .mem_rsp_valid_o ( ), + .mem_rsp_rdata_o ( ), + .mem_rsp_error_o ( ), + .slv_aw_cache_i ( 4'b0000 ), + .slv_ar_cache_i ( 4'b0000 ), + .axi_req_o ( slink_axi_mst_req ), + .axi_rsp_i ( slink_axi_mst_rsp ) + ); + endmodule diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 3f3da45b..1d65cf9d 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -20,7 +20,6 @@ VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions VERILATOR_ARGS += -DASSERTS_OFF # C++ Compiler Optimization - # VERILATOR_ARGS += -CFLAGS "-march=native" # VERILATOR_ARGS += -CFLAGS "-mtune=native" # VERILATOR_ARGS += -CFLAGS "-O2" @@ -31,9 +30,10 @@ VERILATOR_ARGS += -DASSERTS_OFF # Tracing # enables VCD tracing of the topmost 5 layers -# VERILATOR_ARGS += --trace --trace-structs --no-trace-top --trace-depth 5 -CFLAGS "-DCHS_TRACE_VCD=1" +# VERILATOR_ARGS += --trace --trace-structs --no-trace-top --trace-depth 5 VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ + $(CHS_ROOT)/target/sim/src/elfloader.cpp \ $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ $(RISCV_DBG_DIR)/tb/remote_bitbang/sim_jtag.c From 3297a5d99eaf4f654c098b09dec3be269aca1d36 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Mon, 23 Jun 2025 12:58:14 +0200 Subject: [PATCH 15/39] target/sim/verilator: Improve reset handling --- target/sim/verilator/sim/main.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index ea296ca0..eb2b4767 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -124,8 +124,10 @@ int main(int argc, char** argv) { // May be overridden by commandArgs argument parsing contextp->randReset(2); +#if VM_TRACE // Verilator must compute traced signals contextp->traceEverOn(true); +#endif // Pass arguments so Verilated code can see them, e.g. $value$plusargs // This needs to be called before you create any model @@ -150,6 +152,7 @@ int main(int argc, char** argv) { auto last = start; uint64_t cycle = 0; uint64_t next_rtc_toggle_ps = 0; + bool reset_done = false; mem_master = std::make_unique( &topp->slink_mem_req_i, @@ -172,13 +175,16 @@ int main(int argc, char** argv) { // Apply Inputs (negedge clk_i) if (!topp->clk_i) { - // Apply Reset - if (cycle == 1) + if (cycle == 1) { + // Apply Reset topp->rst_ni = 0; + } - // Release Reset - if (cycle == RST_CYCLES + 1) + if (cycle == RST_CYCLES + 1) { + // Release Reset topp->rst_ni = 1; + reset_done = true; + } // Toggle Real Time Clock if (contextp->time() >= next_rtc_toggle_ps) { @@ -192,7 +198,7 @@ int main(int argc, char** argv) { elf_preload_write_enqueue(); // I/O - if (cycle > RST_CYCLES && topp->rst_ni) { + if (reset_done) { #if 0 jtag_tick_io(); #endif @@ -203,7 +209,7 @@ int main(int argc, char** argv) { contextp->timeInc(TIME_STEP_PS); // Monitor Synchronous Outputs: just before @(posedge clk_i) - if (topp->clk_i) { + if (reset_done && topp->clk_i) { mem_master->handle_before(); } @@ -211,7 +217,7 @@ int main(int argc, char** argv) { topp->eval(); // Apply Synchronous Inputs: just after @(posedge clk_i) - if (topp->clk_i) { + if (reset_done && topp->clk_i) { mem_master->handle_after(); } @@ -234,8 +240,8 @@ int main(int argc, char** argv) { last = current; auto total_cycles_per_sec = 1000000.0 * cycle / total_elapsed_us; auto last_cycles_per_sec = 1000000.0 * SIMULATION_RATE_CHUNK / last_elapsed_us; - VL_PRINTF("elapsed: %lu us, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", - total_elapsed_us, total_cycles_per_sec, last_cycles_per_sec); + VL_PRINTF("elapsed: %lu us, %lu cycles, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", + total_elapsed_us, cycle, total_cycles_per_sec, last_cycles_per_sec); } #ifdef BENCHMARK if (cycle == 1000000) @@ -247,7 +253,7 @@ int main(int argc, char** argv) { // Final model cleanup topp->final(); -#ifdef TRACE +#if VM_TRACE trace->close(); #endif From ea56ba4089d04f751abd7af1180d050389453c9d Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Mon, 7 Jul 2025 12:23:47 +0200 Subject: [PATCH 16/39] target/sim/verilator: Modify status print --- target/sim/verilator/sim/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index eb2b4767..60048f7f 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -240,8 +240,8 @@ int main(int argc, char** argv) { last = current; auto total_cycles_per_sec = 1000000.0 * cycle / total_elapsed_us; auto last_cycles_per_sec = 1000000.0 * SIMULATION_RATE_CHUNK / last_elapsed_us; - VL_PRINTF("elapsed: %lu us, %lu cycles, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", - total_elapsed_us, cycle, total_cycles_per_sec, last_cycles_per_sec); + VL_PRINTF("elapsed: %.3f sec, %lu cycles, %.1f cycles/sec (total), %.1f cycles/sec (last)\n", + total_elapsed_us / 1e6, cycle, total_cycles_per_sec, last_cycles_per_sec); } #ifdef BENCHMARK if (cycle == 1000000) From 73804873bab7644817d72f559613d60c0c1ed3e3 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Mon, 7 Jul 2025 12:23:58 +0200 Subject: [PATCH 17/39] target/sim/verilator: Enable multi-threading --- target/sim/verilator/verilator.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 1d65cf9d..4a6cedc4 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -19,6 +19,8 @@ VERILATOR_ARGS ?= -j 0 -Wall $(VERILATOR_WNO) -timescale 1ns/1ps VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions VERILATOR_ARGS += -DASSERTS_OFF +# multithreading +VERILATOR_ARGS += --threads 12 # C++ Compiler Optimization # VERILATOR_ARGS += -CFLAGS "-march=native" # VERILATOR_ARGS += -CFLAGS "-mtune=native" From ae5b05a630b630e3e3868209efc45fac3430e6a6 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Mon, 7 Jul 2025 13:23:32 +0200 Subject: [PATCH 18/39] target/sim/verilator: Add support for SW exit codes via DDR link --- target/sim/verilator/sim/Mem64Master.h | 91 ++++++++++++++++--- target/sim/verilator/sim/main.cpp | 33 ++++++- .../sim/verilator/src/cheshire_soc_wrapper.sv | 30 +++--- 3 files changed, 126 insertions(+), 28 deletions(-) diff --git a/target/sim/verilator/sim/Mem64Master.h b/target/sim/verilator/sim/Mem64Master.h index 1d265366..c219e242 100644 --- a/target/sim/verilator/sim/Mem64Master.h +++ b/target/sim/verilator/sim/Mem64Master.h @@ -8,12 +8,28 @@ #include #include #include +#include #include // 64-bit memory interface master (currently only supporting aligned 64-bit writes) class Mem64Master { private: - struct WriteTransaction { + struct WriteRequest { + uint64_t addr; + uint64_t data; + }; + + struct ReadRequest { + uint64_t addr; + }; + + struct InflightRequest { + uint64_t addr; + bool is_write; + }; + +public: + struct ReadResponse { uint64_t addr; uint64_t data; }; @@ -25,13 +41,17 @@ class Mem64Master { uint8_t* mem_we_o, uint64_t* mem_wdata_o, uint8_t* mem_be_o, - uint8_t* mem_gnt_i + uint8_t* mem_gnt_i, + uint8_t* mem_rsp_valid_i, + uint64_t* mem_rsp_rdata_i ) : m_mem_req_o(mem_req_o) , m_mem_addr_o(mem_addr_o) , m_mem_we_o(mem_we_o) , m_mem_wdata_o(mem_wdata_o) , m_mem_be_o(mem_be_o) - , m_mem_gnt_i(mem_gnt_i) { + , m_mem_gnt_i(mem_gnt_i) + , m_mem_rsp_valid_i(mem_rsp_valid_i) + , m_mem_rsp_rdata_i(mem_rsp_rdata_i) { // zero all outputs *m_mem_req_o = 0; *m_mem_addr_o = 0; @@ -41,7 +61,7 @@ class Mem64Master { } void write(uint64_t addr, uint64_t data) { - m_write_queue.push({ addr, data }); + m_write_request_queue.push({ addr, data }); } void write_chunk(uint64_t addr, void *data, size_t bytes) { @@ -55,9 +75,34 @@ class Mem64Master { } } + void read(uint64_t addr) { + m_read_request_queue.push({ addr }); + } + + std::optional get_read_response() { + if (m_read_response_queue.empty()) + return {}; + auto response = m_read_response_queue.front(); + m_read_response_queue.pop(); + return response; + } + // handle before @(posedge clk): reads signals, does not modify signals void handle_before() { + // request channel m_handshake = (*m_mem_req_o && *m_mem_gnt_i); + + // response channel + if (*m_mem_rsp_valid_i) { + const auto& request = m_inflight_request_queue.front(); + if (request.is_write) { + // no response needed + } else { + // push to read repsonse queue (now including response data) + m_read_response_queue.push({ request.addr, *m_mem_rsp_rdata_i }); + } + m_inflight_request_queue.pop(); + } } // handle before @(posedge clk): modifies signals for next cycle @@ -68,24 +113,42 @@ class Mem64Master { *m_mem_req_o = 0; } - if (!*m_mem_req_o && !m_write_queue.empty()) { - // apply new request - const auto& transaction = m_write_queue.front(); + if (!*m_mem_req_o && !m_write_request_queue.empty()) { + // apply new write request + const auto& request = m_write_request_queue.front(); *m_mem_req_o = 1; - *m_mem_addr_o = transaction.addr; + *m_mem_addr_o = request.addr; *m_mem_we_o = 1; - *m_mem_wdata_o = transaction.data; + *m_mem_wdata_o = request.data; *m_mem_be_o = 0xff; - m_write_queue.pop(); - if (m_write_queue.empty()) { + + m_inflight_request_queue.push({ request.addr, true }); + m_write_request_queue.pop(); + + if (m_write_request_queue.empty()) { printf("Mem64Master: emptied write queue\n"); } } + + if (!*m_mem_req_o && !m_read_request_queue.empty()) { + // apply new read request + const auto& request = m_read_request_queue.front(); + *m_mem_req_o = 1; + *m_mem_addr_o = request.addr; + *m_mem_we_o = 0; + *m_mem_wdata_o = 0; + *m_mem_be_o = 0xff; + + m_inflight_request_queue.push({ request.addr, false }); + m_read_request_queue.pop(); + } } private: - // holds pairs of (addr, data) - std::queue m_write_queue; + std::queue m_write_request_queue; + std::queue m_read_request_queue; + std::queue m_inflight_request_queue; + std::queue m_read_response_queue; // handshake detected bool m_handshake; @@ -97,4 +160,6 @@ class Mem64Master { uint64_t* m_mem_wdata_o; uint8_t* m_mem_be_o; uint8_t* m_mem_gnt_i; + uint8_t* m_mem_rsp_valid_i; + uint64_t* m_mem_rsp_rdata_i; }; diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index 60048f7f..c46c10eb 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -109,6 +109,34 @@ static void elf_preload_write_enqueue() { VL_PRINTF("[ELF] enqueued %zu memory writes\n", num_writes); } +static void poll_for_exit() { + static bool request_inflight = false; + static uint64_t idle_cycles = 0; + + if (request_inflight) { + auto maybe_response = mem_master->get_read_response(); + if (maybe_response) { + auto data = maybe_response->data; + + request_inflight = false; + idle_cycles = 0; + if (data & 0x1) { + do_exit = true; + exit_code = data >> 1; + VL_PRINTF("[EXIT] received exit code from software: %d\n", exit_code); + } + } + } else { + idle_cycles++; + + if (idle_cycles >= 1000) { + mem_master->read(0x03000008); + request_inflight = true; + } + } + +} + int main(int argc, char** argv) { // Create logs/ directory in case we have traces to put under it Verilated::mkdir("logs"); @@ -160,7 +188,9 @@ int main(int argc, char** argv) { &topp->slink_mem_we_i, &topp->slink_mem_wdata_i, &topp->slink_mem_be_i, - &topp->slink_mem_gnt_o + &topp->slink_mem_gnt_o, + &topp->slink_mem_rsp_valid_o, + &topp->slink_mem_rsp_rdata_o ); // ELF preloading @@ -199,6 +229,7 @@ int main(int argc, char** argv) { // I/O if (reset_done) { + poll_for_exit(); #if 0 jtag_tick_io(); #endif diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 7800d1cf..a07736d1 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -34,7 +34,9 @@ module cheshire_soc_wrapper # ( input logic slink_mem_we_i, input logic [DutCfg.AxiDataWidth-1:0] slink_mem_wdata_i, input logic [DutCfg.AxiDataWidth/8-1:0] slink_mem_be_i, - output logic slink_mem_gnt_o + output logic slink_mem_gnt_o, + output logic slink_mem_rsp_valid_o, + output logic [DutCfg.AxiDataWidth-1:0] slink_mem_rsp_rdata_o ); import cheshire_pkg::*; @@ -276,19 +278,19 @@ module cheshire_soc_wrapper # ( ) i_serial_link_mem ( .clk_i, .rst_ni, - .mem_req_i ( slink_mem_req_i ), - .mem_addr_i ( slink_mem_addr_i ), - .mem_we_i ( slink_mem_we_i ), - .mem_wdata_i ( slink_mem_wdata_i ), - .mem_be_i ( slink_mem_be_i ), - .mem_gnt_o ( slink_mem_gnt_o ), - .mem_rsp_valid_o ( ), - .mem_rsp_rdata_o ( ), - .mem_rsp_error_o ( ), - .slv_aw_cache_i ( 4'b0000 ), - .slv_ar_cache_i ( 4'b0000 ), - .axi_req_o ( slink_axi_mst_req ), - .axi_rsp_i ( slink_axi_mst_rsp ) + .mem_req_i ( slink_mem_req_i ), + .mem_addr_i ( slink_mem_addr_i ), + .mem_we_i ( slink_mem_we_i ), + .mem_wdata_i ( slink_mem_wdata_i ), + .mem_be_i ( slink_mem_be_i ), + .mem_gnt_o ( slink_mem_gnt_o ), + .mem_rsp_valid_o ( slink_mem_rsp_valid_o ), + .mem_rsp_rdata_o ( slink_mem_rsp_rdata_o ), + .mem_rsp_error_o ( ), + .slv_aw_cache_i ( 4'b0000 ), + .slv_ar_cache_i ( 4'b0000 ), + .axi_req_o ( slink_axi_mst_req ), + .axi_rsp_i ( slink_axi_mst_rsp ) ); endmodule From 8a7f04b6b7973ad5b415271a39bf27ce1d62e72b Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 10:15:10 +0200 Subject: [PATCH 19/39] target/sim/verilator: Use 8 threads, more C optimizations --- target/sim/verilator/verilator.mk | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 4a6cedc4..3d7adce5 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -20,11 +20,9 @@ VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions VERILATOR_ARGS += -DASSERTS_OFF # multithreading -VERILATOR_ARGS += --threads 12 +VERILATOR_ARGS += --threads 8 # C++ Compiler Optimization -# VERILATOR_ARGS += -CFLAGS "-march=native" -# VERILATOR_ARGS += -CFLAGS "-mtune=native" -# VERILATOR_ARGS += -CFLAGS "-O2" +VERILATOR_ARGS += -CFLAGS "-O3" -CFLAGS "-march=native" -CFLAGS "-mtune=native" # Profiling # generates `gmon.out` that can be processed by `gprof` and then `verilator_profcfunc` From eeb4324a4687b95fe0dbb7611e358b48599939ab Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 10:25:31 +0200 Subject: [PATCH 20/39] target/sim/verilator: Switch to Clang --- target/sim/verilator/verilator.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 3d7adce5..043bc636 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -23,6 +23,8 @@ VERILATOR_ARGS += -DASSERTS_OFF VERILATOR_ARGS += --threads 8 # C++ Compiler Optimization VERILATOR_ARGS += -CFLAGS "-O3" -CFLAGS "-march=native" -CFLAGS "-mtune=native" +# Use Clang (faster simulation than GCC) +VERILATOR_ARGS += --compiler clang -MAKEFLAGS "CC=clang" -MAKEFLAGS "CXX=clang++" # Profiling # generates `gmon.out` that can be processed by `gprof` and then `verilator_profcfunc` From 3d8cd28a654404a2647253864f41da081b906e66 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 10:26:07 +0200 Subject: [PATCH 21/39] target/sim/verilator: Allow passing CLI arguments to simulation --- target/sim/verilator/verilator.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 043bc636..f4ae94a5 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -53,6 +53,6 @@ $(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verila @echo "#!/bin/sh" > $@ @echo 'set -eu' >> $@ @echo 'cd $$(dirname "$$0")' >> $@ - @echo '$(VERILATOR_PREFIX) ./obj_dir/Vcheshire_soc_wrapper' >> $@ + @echo '$(VERILATOR_PREFIX) ./obj_dir/Vcheshire_soc_wrapper "$$@"' >> $@ @chmod +x $@ From 27516b91119619c5312f411bac139762c44f63bb Mon Sep 17 00:00:00 2001 From: Paul Scheffler Date: Mon, 7 Jul 2025 19:06:39 +0200 Subject: [PATCH 22/39] sw [WIP]: Add some tests --- sw/tests/dma_long.spm.c | 38 + sw/tests/matmul.c | 168 ++ sw/tests/matmul_data.h | 3082 ++++++++++++++++++++++++ sw/tests/uart_fast.c | 26 + target/sim/src/fixture_cheshire_soc.sv | 6 +- target/sim/src/tb_cheshire_soc.sv | 8 +- target/sim/vcs/start.cheshire_soc.sh | 1 + target/sim/vsim/start.cheshire_soc.tcl | 1 + 8 files changed, 3325 insertions(+), 5 deletions(-) create mode 100644 sw/tests/dma_long.spm.c create mode 100644 sw/tests/matmul.c create mode 100644 sw/tests/matmul_data.h create mode 100644 sw/tests/uart_fast.c diff --git a/sw/tests/dma_long.spm.c b/sw/tests/dma_long.spm.c new file mode 100644 index 00000000..4d598df4 --- /dev/null +++ b/sw/tests/dma_long.spm.c @@ -0,0 +1,38 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nicole Narr +// Christopher Reinwardt +// Paul Scheffler + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "params.h" +#include "util.h" +#include "dif/dma.h" + +int main(void) { + // Source and dst + volatile uint64_t dma_src = 0x80000000; + volatile uint64_t dma_dst = 0x80010000; + + volatile uint64_t mcycle_start = get_mcycle(); + volatile uint64_t mcycle_tot = get_mcycle() - mcycle_start; + + for (int i = 1; i <= 11; ++i) { + fence(); + uint64_t len = 1 << i; + uint64_t reps = 1024*1024 >> i; + + mcycle_start = get_mcycle(); + + *((volatile uint32_t*)(void*)(uintptr_t)(0x010000d4)) = 1; + + sys_dma_2d_blk_memcpy(dma_dst, (uintptr_t)(void*)dma_src, len, 0, + 0, reps, DMA_CONF_DECOUPLE_NONE); + mcycle_tot = get_mcycle() - mcycle_start; + } + + return 0; +} diff --git a/sw/tests/matmul.c b/sw/tests/matmul.c new file mode 100644 index 00000000..1574fd58 --- /dev/null +++ b/sw/tests/matmul.c @@ -0,0 +1,168 @@ +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "params.h" +#include "util.h" + +#include "matmul_data.h" +int sN = 32; + +const int CHKSUM = 10614161; +const int ITERATIONS = 16; + + +// Get cycle count since reset +static inline uint64_t get_minstret() { + uint64_t reg; + asm volatile("csrr %0, minstret" : "=r"(reg)::"memory"); + return reg; +} + +#define ROWOP(c0, c1, c2, c3) \ + bb0 = &b[(n+0)*b_colstride + k]; \ + bb1 = &b[(n+1)*b_colstride + k]; \ + bb2 = &b[(n+2)*b_colstride + k]; \ + bb3 = &b[(n+3)*b_colstride + k]; \ + asm volatile( \ + "fld f0, 0(%[bb0]) \n" \ + "fld f1, 0(%[bb1]) \n" \ + "fld f2, 0(%[bb2]) \n" \ + "fld f3, 0(%[bb3]) \n" \ + "fld f4, 8(%[bb0]) \n" \ + "fld f5, 8(%[bb1]) \n" \ + "fld f6, 8(%[bb2]) \n" \ + "fld f7, 8(%[bb3]) \n" \ + "fmadd.d %[cx0], %[ax0], f0, %[cx0] \n" \ + "fmadd.d %[cx1], %[ax0], f1, %[cx1] \n" \ + "fmadd.d %[cx2], %[ax0], f2, %[cx2] \n" \ + "fmadd.d %[cx3], %[ax0], f3, %[cx3] \n" \ + "fmadd.d %[cx0], %[ax1], f4, %[cx0] \n" \ + "fmadd.d %[cx1], %[ax1], f5, %[cx1] \n" \ + "fmadd.d %[cx2], %[ax1], f6, %[cx2] \n" \ + "fmadd.d %[cx3], %[ax1], f7, %[cx3] \n" \ + "fld f0, 16(%[bb0]) \n" \ + "fld f1, 16(%[bb1]) \n" \ + "fld f2, 16(%[bb2]) \n" \ + "fld f3, 16(%[bb3]) \n" \ + "fld f4, 24(%[bb0]) \n" \ + "fld f5, 24(%[bb1]) \n" \ + "fld f6, 24(%[bb2]) \n" \ + "fld f7, 24(%[bb3]) \n" \ + "fmadd.d %[cx0], %[ax2], f0, %[cx0] \n" \ + "fmadd.d %[cx1], %[ax2], f1, %[cx1] \n" \ + "fmadd.d %[cx2], %[ax2], f2, %[cx2] \n" \ + "fmadd.d %[cx3], %[ax2], f3, %[cx3] \n" \ + "fmadd.d %[cx0], %[ax3], f4, %[cx0] \n" \ + "fmadd.d %[cx1], %[ax3], f5, %[cx1] \n" \ + "fmadd.d %[cx2], %[ax3], f6, %[cx2] \n" \ + "fmadd.d %[cx3], %[ax3], f7, %[cx3] \n" \ + : \ + [bb0]"+&r"(bb0), [bb1]"+&r"(bb1), [bb2]"+&r"(bb2), [bb3]"+&r"(bb3), \ + [cx0]"+&f"(c0), [cx1]"+&f"(c1), [cx2]"+&f"(c2), [cx3]"+&f"(c3), \ + [ax0]"+&f"(ax[0]), [ax1]"+&f"(ax[1]), [ax2]"+&f"(ax[2]), [ax3]"+&f"(ax[3]) \ + :: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" \ + ); \ + +int __attribute__ ((visibility("hidden"))) __attribute__((noinline)) mmopt( + double* __restrict a, + double* __restrict b, + double* __restrict c, + int N, + int M, + int K, + int a_rowstride, + int b_colstride, + int c_rowstride +) { + for (int m = 0; m < M; m+=4) + for (int n = 0; n < N; n+=4) { + // One output block per n-m pair + + register double cb0 = c[c_rowstride*(m+0)+n+0]; + register double cb1 = c[c_rowstride*(m+0)+n+1]; + register double cb2 = c[c_rowstride*(m+0)+n+2]; + register double cb3 = c[c_rowstride*(m+0)+n+3]; + register double cb4 = c[c_rowstride*(m+1)+n+0]; + register double cb5 = c[c_rowstride*(m+1)+n+1]; + register double cb6 = c[c_rowstride*(m+1)+n+2]; + register double cb7 = c[c_rowstride*(m+1)+n+3]; + register double cb8 = c[c_rowstride*(m+2)+n+0]; + register double cb9 = c[c_rowstride*(m+2)+n+1]; + register double cb10 = c[c_rowstride*(m+2)+n+2]; + register double cb11 = c[c_rowstride*(m+2)+n+3]; + register double cb12 = c[c_rowstride*(m+3)+n+0]; + register double cb13 = c[c_rowstride*(m+3)+n+1]; + register double cb14 = c[c_rowstride*(m+3)+n+2]; + register double cb15 = c[c_rowstride*(m+3)+n+3]; + + for (int k = 0; k < K; k+=4) { + register double *bb0, *bb1, *bb2, *bb3; + + register double* ax = &a[m*a_rowstride + k]; + ROWOP(cb0, cb1, cb2, cb3) + + ax += a_rowstride; + ROWOP(cb4, cb5, cb6, cb7) + + ax += a_rowstride; + ROWOP(cb8, cb9, cb10, cb11) + + ax += a_rowstride; + ROWOP(cb12, cb13, cb14, cb15) + } + // Write back output block + c[(m+0)*c_rowstride + (n+0)] = cb0; + c[(m+0)*c_rowstride + (n+1)] = cb1; + c[(m+0)*c_rowstride + (n+2)] = cb2; + c[(m+0)*c_rowstride + (n+3)] = cb3; + c[(m+1)*c_rowstride + (n+0)] = cb4; + c[(m+1)*c_rowstride + (n+1)] = cb5; + c[(m+1)*c_rowstride + (n+2)] = cb6; + c[(m+1)*c_rowstride + (n+3)] = cb7; + c[(m+2)*c_rowstride + (n+0)] = cb8; + c[(m+2)*c_rowstride + (n+1)] = cb9; + c[(m+2)*c_rowstride + (n+2)] = cb10; + c[(m+2)*c_rowstride + (n+3)] = cb11; + c[(m+3)*c_rowstride + (n+0)] = cb12; + c[(m+3)*c_rowstride + (n+1)] = cb13; + c[(m+3)*c_rowstride + (n+2)] = cb14; + c[(m+3)*c_rowstride + (n+3)] = cb15; + } + return 0; +} + +int main(void) { + + // Get start cycle count + uint32_t instret = get_minstret(); + uint32_t cycles = get_mcycle(); + + for (int i = 0; i < ITERATIONS; ++i) { + mmopt( + float_data_a, + float_data_b, + float_data_c, + sN, sN, sN, + sN, sN, sN + ); + } + + // Get end cycle count + cycles = get_mcycle() - cycles; + instret = get_minstret() - instret; + + // Compute checksum + double checksum = 0.0; + for (int y = 0; y < sN; y++) { + double sign = (y & 1) ? -1.0 : 1.0; + for (int x = 0; x < sN; x++) { + checksum += sign*1000.0*1000.0*float_data_c[y*sN+x]; + } + } + + // Scale checksum to int. + // Take ~2 digits off (divide by 128) to account for FP rounding. + uint64_t chkint = (uint64_t)(checksum) >> 7; + + // Check return + return (chkint != CHKSUM); +} diff --git a/sw/tests/matmul_data.h b/sw/tests/matmul_data.h new file mode 100644 index 00000000..95b57d4b --- /dev/null +++ b/sw/tests/matmul_data.h @@ -0,0 +1,3082 @@ +#define NN 1024 + +__attribute__((aligned(8))) double float_data_a [NN] = { + 0.5352350007344391, + 0.4340999186868998, + 0.3573451265334625, + 0.659877532628472, + 0.40853921437460894, + 0.3354228481944037, + 0.48833816165085, + 0.03940478777764589, + 0.7585197684433818, + 0.7308074291972139, + 0.12187569370840279, + 0.5914408651199228, + 0.33901095078768173, + 0.934096273831717, + 0.5217518670462218, + 0.896709983408266, + 0.991984552154219, + 0.16769154033383038, + 0.8276114461606534, + 0.130490395423713, + 0.37102367657065705, + 0.03819386018411575, + 0.8787352942183696, + 0.0670103601745633, + 0.464775185852243, + 0.5523675879615805, + 0.32012811136551833, + 0.4148976360932447, + 0.008806127930397811, + 0.6206644817725114, + 0.3083133613960074, + 0.7777989615621607, + 0.3001731367360575, + 0.5451352039072916, + 0.7543933435572973, + 0.15044999045782415, + 0.5623470545100572, + 0.8746708163827082, + 0.46054924018583465, + 0.03588711504886111, + 0.3186146168786429, + 0.6123257191226615, + 0.8292670490557523, + 0.4667497831385504, + 0.6500578343072372, + 0.7033950328107438, + 0.4767846807955809, + 0.5643026947238519, + 0.4667231897508465, + 0.047747185571421547, + 0.4014910742993021, + 0.2799344730073534, + 0.248644318867275, + 0.4842955273167646, + 0.9652452690546509, + 0.021766164014830336, + 0.4989635433680225, + 0.8631644794983719, + 0.4713414470089018, + 0.9022864960426287, + 0.6408922635454807, + 0.9655958989669046, + 0.7494955940762992, + 0.19772487031386488, + 0.6131871993091498, + 0.1153300482192483, + 0.8626974808409035, + 0.4683605904126733, + 0.6852147966692926, + 0.11919154822350952, + 0.5353611627851229, + 0.6115256163505468, + 0.19756061509553957, + 0.048376192124068296, + 0.7008521905121031, + 0.5365629235635951, + 0.33034541831671094, + 0.5836903326699904, + 0.4325493218146902, + 0.29162680904624483, + 0.7893267576047148, + 0.03849836071307289, + 0.4163227385756013, + 0.8691911422994578, + 0.319288277047894, + 0.10751688065497367, + 0.45009499972438816, + 0.3674200955364346, + 0.2656554447532048, + 0.1449277176439191, + 0.33447176953647406, + 0.5647780245474932, + 0.3687890828435174, + 0.0370426075140855, + 0.46879660919241406, + 0.7969891083841032, + 0.27467799471562593, + 0.2708796464280676, + 0.13213214175212473, + 0.9795132414957441, + 0.36836118250203465, + 0.09690316182801262, + 0.7978049338955066, + 0.02579276833414812, + 0.22292598082661286, + 0.8870831581099181, + 0.4275597207619688, + 0.49331675554249, + 0.9183030829372818, + 0.393981061073861, + 0.5326304083119843, + 0.8762503929800484, + 0.31414750936905134, + 0.3938021087720709, + 0.5396460808503604, + 0.8686474751422611, + 0.46231357204273693, + 0.24661517903696883, + 0.15779447788212142, + 0.855451540278821, + 0.40656338652193214, + 0.6806113646235357, + 0.33389682514977226, + 0.42918080759436383, + 0.9684440955646876, + 0.5151820254678332, + 0.8533188872214227, + 0.45326888983661884, + 0.5199019760978683, + 0.5912060607348202, + 0.849790177819391, + 0.5682404577587885, + 0.5885573266839357, + 0.5627415798429155, + 0.703669752848177, + 0.3025310295939335, + 0.24923211984440574, + 0.6983818714976057, + 0.9669019371802734, + 0.607857529196033, + 0.043584372786693915, + 0.5160218832096133, + 0.7424835058256062, + 0.22233351557663128, + 0.9208365517273671, + 0.402694352615635, + 0.3360413141646311, + 0.4989779230706036, + 0.05411104789275101, + 0.8028179169686925, + 0.3670400367475437, + 0.8100277289328653, + 0.6320451675735316, + 0.6198269605870753, + 0.9796309001883268, + 0.28970006610181354, + 0.8026306770821668, + 0.28620778692139526, + 0.207399217412649, + 0.3446883083400746, + 0.4513164976114663, + 0.365323553038427, + 0.7540456656243462, + 0.10758964755155542, + 0.47069837103835777, + 0.4353892022037008, + 0.6576225863935043, + 0.03305290193326893, + 0.9832073983978341, + 0.06769761387961681, + 0.45554405645424023, + 0.8074710903684919, + 0.09695686572637008, + 0.3452084616133393, + 0.02027397462540048, + 0.5869067139303112, + 0.6798540822794223, + 0.21940105220469341, + 0.8826709277184172, + 0.1269612695707094, + 0.8083446231761735, + 0.48707963624693784, + 0.7202591418416773, + 0.29880798358118754, + 0.39797914623399033, + 0.16713571560652118, + 0.9681474819872848, + 0.3308929153451038, + 0.047395490536004226, + 0.042614255771095344, + 0.35466387787188725, + 0.6664217903674834, + 0.2783888671793331, + 0.3377368238269802, + 0.326053967593386, + 0.16764421322615497, + 0.4431463083688566, + 0.1789977858753986, + 0.9630258585194897, + 0.9098091950581768, + 0.12755910542111926, + 0.4055945116287927, + 0.23475049455298458, + 0.36415847463829454, + 0.05943047425254522, + 0.4202190793590731, + 0.4264719801421728, + 0.8753887817872771, + 0.6486616151437884, + 0.22373926870407257, + 0.5994499102338221, + 0.6504882153182292, + 0.8087306172269466, + 0.3810486202435903, + 0.7299907168467288, + 0.11533172745026288, + 0.24992148570335948, + 0.29581542974566444, + 0.8279619087977323, + 0.32143253361541557, + 0.08344241128505059, + 0.49795982089030455, + 0.7854925253009634, + 0.4032866056576252, + 0.9111765368768747, + 0.460968120405028, + 0.660835972007481, + 0.45466516265625256, + 0.43008419144873167, + 0.9861971161640486, + 0.05195040582825605, + 0.7240712521480829, + 0.5389519650166185, + 0.8653637700284867, + 0.7886516669603598, + 0.8568313935888643, + 0.41805839920091836, + 0.05549475226855072, + 0.24758950557164794, + 0.8397443516728754, + 0.5468601996975938, + 0.5451229620952618, + 0.16007408778649213, + 0.10333974354349584, + 0.32607012292712223, + 0.12478830239524696, + 0.6016649430330554, + 0.6127728380310865, + 0.17121331591882805, + 0.40906548712470847, + 0.03033802859570167, + 0.07930911968801613, + 0.5337862445813243, + 0.2692698251057649, + 0.6158977164323403, + 0.4217925835152194, + 0.30736369152224297, + 0.30838396671763024, + 0.0375970575113872, + 0.49850134122732714, + 0.77441187216115, + 0.8136044330097995, + 0.7205635715993531, + 0.7369760604702487, + 0.811886839273851, + 0.6559515960269104, + 0.8875939069241975, + 0.6627488513474958, + 0.12285417508495089, + 0.29809460551917866, + 0.7763794395082718, + 0.6387271821341975, + 0.592617388546664, + 0.6007744910934978, + 0.907546839755112, + 0.42092823266629453, + 0.901234245319677, + 0.2043666081992238, + 0.6707314799254874, + 0.42634625202257814, + 0.6899915047915232, + 0.2740840128346993, + 0.7735563264560464, + 0.19347502965255592, + 0.3353059476431077, + 0.26781784258551655, + 0.7669604938889729, + 0.3833142111918322, + 0.8435447831109986, + 0.9516640664163106, + 0.8753304941066569, + 0.13417307883324636, + 0.6637137161936458, + 0.5295456177443467, + 0.32349253897269625, + 0.30962109161295814, + 0.08502192523406038, + 0.971912700430355, + 0.7205754135278039, + 0.831213503174904, + 0.3432798490820521, + 0.8893030073190072, + 0.909862743898476, + 0.5492062937618997, + 0.7713708530125526, + 0.1351469892778775, + 0.7526134509538078, + 0.42479624358095736, + 0.006532010144244094, + 0.6639608252817143, + 0.2711609300635782, + 0.9085086611481819, + 0.8737969709637499, + 0.5935201323020545, + 0.5349919693247276, + 0.3182595267904388, + 0.13371667772961848, + 0.11783953783006795, + 0.2373140700457963, + 0.7816219535670642, + 0.6113372154835065, + 0.8052539769933491, + 0.9533043297334013, + 0.06285005587496617, + 0.9324719393970784, + 0.7709784046907235, + 0.19580341881307017, + 0.319770616477852, + 0.025975182562884447, + 0.9202070480190947, + 0.16783754230959302, + 0.584956623970198, + 0.01516126010710972, + 0.16675842059003465, + 0.9034712394646772, + 0.8346535925827784, + 0.016329886599838028, + 0.7305938567443748, + 0.8964006524420405, + 0.3528305784171728, + 0.11199492956126633, + 0.3420520336444567, + 0.30072830892921276, + 0.6009310178151951, + 0.48783988195886296, + 0.868582233431996, + 0.6650164995782366, + 0.8778770524160735, + 0.4169801981439045, + 0.17498032815015363, + 0.5366722484742011, + 0.90860626044362, + 0.41731494024697924, + 0.14180229598182081, + 0.18363480839907587, + 0.6811465450307493, + 0.3412088776177782, + 0.614383296614584, + 0.010202133899719579, + 0.24306907512594467, + 0.464984791853969, + 0.9919134177332849, + 0.3228089585864502, + 0.17782470581485865, + 0.004717553996881563, + 0.30249786673899515, + 0.3652202340267513, + 0.17040053032549307, + 0.48756689167089173, + 0.8238879177978354, + 0.40919704876983, + 0.9653718307831876, + 0.9037290511869428, + 0.7605572526741096, + 0.9154599291944634, + 0.6128424566088155, + 0.40735390087406753, + 0.8759691037990971, + 0.2088369655573321, + 0.8995665102723834, + 0.17852241134322322, + 0.26780253635884554, + 0.27996389486352846, + 0.8197335635902775, + 0.5960867408098524, + 0.6568577672114474, + 0.006025839640004804, + 0.32118745022291983, + 0.6556439979315002, + 0.7441650048026901, + 0.0942161326699763, + 0.12016640940212098, + 0.48256394187993057, + 0.8567289359552068, + 0.3970787808405134, + 0.41839451670757666, + 0.8942717337508794, + 0.7964342046090619, + 0.5345845251500435, + 0.7149404269903277, + 0.443075904381669, + 0.4964701229038636, + 0.8847206234219985, + 0.5814537383967802, + 0.969774717528564, + 0.8054496074262542, + 0.11056240003600615, + 0.6877530347275408, + 0.0211608984818642, + 0.49817938863657274, + 0.6016187000312135, + 0.2918132286954259, + 0.10492575403163351, + 0.29985150168541463, + 0.4008600777270874, + 0.2266788158961741, + 0.05085014511720043, + 0.8577955511464014, + 0.26826451517913497, + 0.10551309469712356, + 0.8851475779707373, + 0.06937957485257285, + 0.460611962674944, + 0.929989128999304, + 0.5240409170139873, + 0.0742197982026479, + 0.629402302876808, + 0.07111006070139803, + 0.5891867671979074, + 0.501967164238872, + 0.7219217439350702, + 0.20009340008407028, + 0.2115871373147412, + 0.7454139925893613, + 0.1410460636000651, + 0.6598812718298255, + 0.26211540563911884, + 0.9280787808428819, + 0.33903053961855323, + 0.27474188627979357, + 0.629695200650873, + 0.5804711928365193, + 0.08520146568112441, + 0.878838582049462, + 0.48029140268089654, + 0.39762617780167975, + 0.10033183574492699, + 0.6304722820188788, + 0.22994772732806146, + 0.2623289078481067, + 0.9720444662913228, + 0.5063996977048107, + 0.12024023839155207, + 0.1552064687943676, + 0.2551011211481341, + 0.35384400761702284, + 0.3246136311650575, + 0.6756172832687609, + 0.8220928260625058, + 0.7795838318704947, + 0.08961058558131929, + 0.4269485758459397, + 0.9461222182423973, + 0.6380804512047393, + 0.3313011367761689, + 0.9662850740086115, + 0.6880555717857275, + 0.7275138271482404, + 0.4452030065054351, + 0.1148903700845425, + 0.7584387498671139, + 0.6268275327946388, + 0.29252625561187773, + 0.7834980917121167, + 0.19522660530634406, + 0.27812958348586014, + 0.2046016634865686, + 0.2747776812510435, + 0.018332096645854223, + 0.2810194135381231, + 0.25794923009394766, + 0.016481215343720268, + 0.24375508383606082, + 0.8059417925224844, + 0.49290047090759204, + 0.5905349021375007, + 0.07292159916138241, + 0.04603721576526476, + 0.3473112280403896, + 0.45047741092719606, + 0.37536391703134186, + 0.9999453893192546, + 0.14387174803631564, + 0.7126600681435172, + 0.35439650794020716, + 0.16471787976265206, + 0.6524821324597144, + 0.31507854785668454, + 0.5490172059885866, + 0.6532588997728773, + 0.5504260504943403, + 0.627513660652404, + 0.4134350905600429, + 0.12497662345549931, + 0.5024694352335891, + 0.5355152138498602, + 0.21815857932067007, + 0.3376940456301255, + 0.09275763839014317, + 0.7196997993910177, + 0.6159713278555305, + 0.10399729923991374, + 0.559600481726711, + 0.17531505998054675, + 0.2876273714512362, + 0.2529420086461298, + 0.8771934577279719, + 0.43604566644977816, + 0.27134363180944954, + 0.8761169800512976, + 0.8003779227028744, + 0.08751030781762081, + 0.9303931603622732, + 0.08345834435040378, + 0.010218865725034942, + 0.06169158777964179, + 0.9181476747286347, + 0.48870324480278926, + 0.5608872534874613, + 0.5963772279183471, + 0.19305556603268237, + 0.9266357905748044, + 0.6747922696772554, + 0.9466972945635305, + 0.5232018957188574, + 0.6553480453079147, + 0.9961678131601246, + 0.08510824598404598, + 0.24143070685243806, + 0.9739841935352589, + 0.419572137690415, + 0.024552576479950128, + 0.2598643453084244, + 0.12083868265291275, + 0.6319530959771344, + 0.29412825351771077, + 0.8543880148588033, + 0.27737092852060075, + 0.7975056911786306, + 0.7491607714673122, + 0.8994961120061238, + 0.022439661484481066, + 0.5702682292616611, + 0.8060340830374665, + 0.43919869796269906, + 0.5335878674223619, + 0.8589035531167885, + 0.9181864320959211, + 0.037862375990109665, + 0.9933449565120291, + 0.18123845436023123, + 0.1084804267871684, + 0.8563950489450748, + 0.27779970238842944, + 0.06613548109264733, + 0.8627660240625389, + 0.38254384167753075, + 0.4962984645220603, + 0.5579674730049551, + 0.04954786990192794, + 0.07523357739422465, + 0.34928977456518084, + 0.5046149100959929, + 0.6180547245462692, + 0.26605744590700453, + 0.0009923445240620499, + 0.34642252866433854, + 0.8510537740977088, + 0.8767578502482354, + 0.7552798270897796, + 0.8345870747595268, + 0.9035547778303581, + 0.30985479888648837, + 0.7660574981132435, + 0.40051229314981085, + 0.94115473086002, + 0.038944235654628345, + 0.39111113810781184, + 0.6642155082794596, + 0.5700709602152384, + 0.9759562779241348, + 0.02218140969742277, + 0.3916634614180846, + 0.40029774399273177, + 0.7905073100921529, + 0.7356154613198107, + 0.1221507842376327, + 0.40908470285235443, + 0.244869062558973, + 0.5376530570891275, + 0.5663075129446639, + 0.44742567982480574, + 0.22397121599066883, + 0.8027317830759204, + 0.5027820840729919, + 0.4696684919837536, + 0.9285507290581719, + 0.219706028006449, + 0.8619335029759442, + 0.0730219334534089, + 0.28450777144784045, + 0.0784177349855063, + 0.12753841848408187, + 0.24383658384723017, + 0.03491706054486332, + 0.47972882038467546, + 0.4212146735272765, + 0.40005835669995116, + 0.33121659651924573, + 0.8313544105451698, + 0.8687317854235388, + 0.4749626662806642, + 0.6235238013803054, + 0.9526707669204677, + 0.08938905053183632, + 0.08746109616457343, + 0.42742580251773876, + 0.18090692666590258, + 0.5985873797402209, + 0.5467249518432852, + 0.0425390503150177, + 0.6381057411179124, + 0.9318717030120727, + 0.21328758371703194, + 0.5208889812844661, + 0.8278684941988345, + 0.4793633516924226, + 0.6640871263412751, + 0.6631749276877863, + 0.6931795259720793, + 0.04956268775285866, + 0.2389422548134209, + 0.9129154286779392, + 0.8649317682710153, + 0.19589316766864162, + 0.5612749801529595, + 0.25136206961951724, + 0.8306803913706285, + 0.3529698010050628, + 0.4198009526043872, + 0.11966800981953973, + 0.1508940381602909, + 0.125190487340178, + 0.11891267838829711, + 0.2248224761861135, + 0.5779226614102072, + 0.24892337107396245, + 0.6464687643837829, + 0.4935552491538924, + 0.845078083277523, + 0.34904129071011636, + 0.49399028370088227, + 0.025649141239715623, + 0.41504005727415016, + 0.4148809399687241, + 0.08204005043888196, + 0.12944077214448957, + 0.16490724654943612, + 0.7439492358814592, + 0.8127876419953537, + 0.4583966385490562, + 0.5328756759806376, + 0.4009334984229087, + 0.732677404660176, + 0.7250906522077006, + 0.4225406035323799, + 0.34319382066644233, + 0.10460083289664868, + 0.6309420342374419, + 0.9778548275024674, + 0.19915788996865802, + 0.8314642659455326, + 0.41655156788665004, + 0.11890868263934329, + 0.400324612344479, + 0.7534914228929607, + 0.5240066884301969, + 0.1705066430213621, + 0.3959635189317926, + 0.7741735797224483, + 0.08957772078694659, + 0.024546396353141153, + 0.7354609476268321, + 0.9354924478203667, + 0.1490100345089067, + 0.6596091052298141, + 0.9805878670164927, + 0.8884935421522651, + 0.19617370316865157, + 0.5368049316925345, + 0.6599908739686675, + 0.1466315177298576, + 0.5667063441829046, + 0.04287752036617076, + 0.16423935525587507, + 0.8012556614186359, + 0.37027689976042577, + 0.6853202569911476, + 0.8884454888663151, + 0.36276856236530497, + 0.7506657032341513, + 0.40210386533315146, + 0.4972779326902276, + 0.25264090137645523, + 0.814940878528082, + 0.8107719303753705, + 0.42696048647413665, + 0.6558260023152117, + 0.7580143432871663, + 0.1376194962017021, + 0.9733272907613758, + 0.6487813469172395, + 0.66796166430734, + 0.8539412945177982, + 0.9328824565869076, + 0.3520612675754704, + 0.23165170953523595, + 0.5217100370645237, + 0.14291237848804983, + 0.8978179376806297, + 0.5902575249157076, + 0.9167469238284401, + 0.39591319240004264, + 0.5819028886520389, + 0.9202742172804432, + 0.7832820643729524, + 0.6408941826793314, + 0.37719315139127874, + 0.07817664748664876, + 0.5341066839292521, + 0.39213799525315074, + 0.04575678008362283, + 0.3696235503525034, + 0.9602390713088576, + 0.9295758975386352, + 0.5032910636844603, + 0.3338419643375776, + 0.6401965839816297, + 0.07771301250226481, + 0.415261120183953, + 0.5283418145553265, + 0.660695828487648, + 0.8701537516951136, + 0.08928607612915174, + 0.2715869376291168, + 0.1538216147839665, + 0.4008734092927613, + 0.797452438574006, + 0.06694864065122819, + 0.7590984952721052, + 0.32041090410635775, + 0.799799653910041, + 0.3956707536544801, + 0.9417466318464034, + 0.7933351302198876, + 0.5350342326657135, + 0.4327404588394066, + 0.34423429103041037, + 0.16848690274755418, + 0.3637217167537051, + 0.8340555330925002, + 0.14704665999835276, + 0.6307877815945574, + 0.4769873499265511, + 0.9237987946192825, + 0.03839260344972795, + 0.3372474481094738, + 0.030182181089601356, + 0.6545794153650377, + 0.903088705888743, + 0.09710576067740728, + 0.994295955206089, + 0.27551000440043893, + 0.8519187146408596, + 0.4481394801216385, + 0.21690557084943274, + 0.5982626683843337, + 0.5835179365334722, + 0.9561022209939845, + 0.8076213217321764, + 0.15739421270557463, + 0.6119300290870773, + 0.16102556143493874, + 0.019837364953282033, + 0.03928191472870479, + 0.3379678755176999, + 0.176841771089991, + 0.9901325176801309, + 0.25336732341754, + 0.8196413202782158, + 0.1574407137819097, + 0.9833505119678075, + 0.016016961935180984, + 0.7429065818940614, + 0.003899174431213015, + 0.4861895466784736, + 0.5243595845725323, + 0.31780090511144776, + 0.3885026276633726, + 0.5687508060325687, + 0.18463127940269308, + 0.5617757669040719, + 0.7701372757101737, + 0.820061280226123, + 0.2541333798287132, + 0.8178071084689084, + 0.5160830653730059, + 0.8253899968759476, + 0.620338335623332, + 0.8870878877108447, + 0.6474707244276248, + 0.36536634418410785, + 0.2054936354642476, + 0.8709146434444005, + 0.12414350140869368, + 0.6828789666482754, + 0.7787398842582789, + 0.12780177228543999, + 0.8245019430548344, + 0.3762240748616903, + 0.7673841567717823, + 0.3738640878885896, + 0.6826497270587462, + 0.5260009754731482, + 0.4219342655319409, + 0.2216978181887076, + 0.6809497395676809, + 0.47664532078387734, + 0.9702889676331551, + 0.7902619741364696, + 0.16122813355153232, + 0.20134354184992354, + 0.35326856680924745, + 0.6264846431485367, + 0.9804851890360573, + 0.8872285004265579, + 0.5966347309635864, + 0.8520484222717724, + 0.10747812804380341, + 0.34816894330008863, + 0.6277050389298238, + 0.5716005183877348, + 0.821262349048823, + 0.037783527660938954, + 0.4043201962150127, + 0.1625486512167702, + 0.18058430122283398, + 0.3314252598521311, + 0.01218903411500416, + 0.4100987254127014, + 0.9534585578920639, + 0.9265817330330138, + 0.26663632430106576, + 0.5733296941312612, + 0.5736864457140282, + 0.563674784902263, + 0.9805211252961732, + 0.5884674556820086, + 0.6331512617473352, + 0.6308923835460326, + 0.4839129229626943, + 0.424537526391658, + 0.9271830180121533, + 0.7655216180205419, + 0.34648886711586824, + 0.5945104715566297, + 0.3842794162074621, + 0.9667687756845897, + 0.6660370192449973, + 0.5699008798117159, + 0.3199747139182142, + 0.7491173098516989, + 0.1432520026662727, + 0.9193597388231267, + 0.4127695881474316, + 0.6386520073469497, + 0.6453560127946052, + 0.9557737501611391, + 0.9407962717668452, + 0.4900353759155869, + 0.5900693616533552, + 0.6692431214148422, + 0.827198462083472, + 0.5260103141417634, + 0.052379066882808245, + 0.6358989268515134, + 0.6203587168794665, + 0.10494422811907778, + 0.06361027273650721, + 0.20099728527132132, + 0.7504532371452804, + 0.6483095090864837, + 0.8268803926814476, + 0.34449533643685337, + 0.7421989340139904, + 0.6469778346793278, + 0.5732091301599128, + 0.8668610733669159, + 0.3364072723262671, + 0.08785075933891606, + 0.044627032221555174, + 0.1883781646937427, + 0.28071024676790857, + 0.12328864664555916, + 0.3242883707280849, + 0.41390331583430673, + 0.9031736048131612, + 0.06894045077490107, + 0.9729592204561013, + 0.8299381984833198, + 0.8644140134179488, + 0.5982227204623232, + 0.5603479323620143, + 0.5430909294921439, + 0.9536721459301684, + 0.726527164700073, + 0.06014260408202654, + 0.36507313687184983, + 0.5997082432472877, + 0.31159721813661856, + 0.3420834753701887, + 0.6763709611304262, + 0.7781376464118755, + 0.427915098851127, + 0.8503186632529793, + 0.03342822211128871, + 0.23423462305667564, + 0.24687711246954047, + 0.03921229705400353, + 0.10283322599271238, + 0.3523021563960914, + 0.3016110482650093, + 0.4819850164648415, + 0.5720864356166743, + 0.956106699709428, + 0.927372181082301, + 0.13942911889941034, + 0.6543948523999701, + 0.7006435622908681, + 0.1822872805434972, + 0.3490985458143201, + 0.4473693414359111, + 0.9301205439754611, + 0.2042280053199117, + 0.9604987463381719, + 0.7360062127106249, + 0.11910467153469129, + 0.2947849916753468, + 0.5706340356325976, + 0.18067948197494021, + 0.8739548061652476, + 0.8246937216554774, + 0.33088740421326246, + 0.15476470382575402, + 0.8864734082139835, + 0.35832983423092035, + 0.6825411459901792, + 0.6164487713454895, + 0.29881281829966366, + 0.4829792745763213, + 0.63743233240796, + 0.10175840761426569, + 0.6068088148253835, + 0.37923323385023455, + 0.3559917896291984, + 0.9530569299898638, + 0.6545284799230036, + 0.2975991737256304, + 0.8409596082461407, + 0.3383456092416004, + 0.7801598079654645, + 0.7548968294207249, + 0.361059399070346, + 0.5146572198065668, + 0.6381926812660675, + 0.5467912589730515, + 0.9468000460225957, + 0.10737454135132052, + 0.5840621717032644, + 0.3981660682207344, + 0.42234420034104925, + 0.7912187320893674, + 0.29530571492916236, + 0.5285176391479142, + 0.42161691136515533, + 0.10826989477963955, + 0.6954884850206664, + 0.20942130989303576, + 0.07932704047227279, + 0.03883177282761818, + 0.7596056011404145, + 0.42879009979796645, + 0.7155810488363635, + 0.8981601344054728, + 0.16424930111385017, + 0.860511695992005, + 0.10945853920728432, + 0.17959179151458737, + 0.9237606581318103, + 0.4320408787268739, + 0.05991235549009355, + 0.5569790118872673, + 0.8299495701254477, + 0.07207728301062877, + 0.6848672446562766, + 0.8831647240500421, + 0.20895934133178373, + 0.12616395111146306, + 0.4663792557722706, + 0.44726251680733153, + 0.4331515676210451, + 0.9709012220789723, + 0.2502649893274026, + 0.0274102485378388 +}; + +__attribute__((aligned(8))) double float_data_b [NN] = { + 0.9476792298273639, + 0.34591760712145403, + 0.8846416402762464, + 0.005984396960310723, + 0.20846316641328677, + 0.20528968550337945, + 0.22697370575001663, + 0.5400545048447254, + 0.06440718041257898, + 0.9158697332309161, + 0.9160008250722059, + 0.17330687311320625, + 0.4819846210985034, + 0.4140746298323328, + 0.4542959151820055, + 0.5805425025840325, + 0.5713067160633365, + 0.015063054429015899, + 0.02234332136701267, + 0.6772135699078831, + 0.9626649768667207, + 0.8881376454298905, + 0.1930829314893565, + 0.5484652254946012, + 0.7683264468476809, + 0.5919167390245973, + 0.6756791780946312, + 0.9346495262628444, + 0.4836969768276088, + 0.5850214089849909, + 0.2594176999171238, + 0.15786904146437697, + 0.13866471812421755, + 0.5334249760229444, + 0.27011415751245993, + 0.6060917021205628, + 0.8960982598128668, + 0.8207646273977484, + 0.9385460090287607, + 0.019562584872019895, + 0.2984726827616786, + 0.19756456747377504, + 0.40714065753766804, + 0.7915956354926938, + 0.40676257089333967, + 0.1397025902428054, + 0.08285398505643249, + 0.04134955610917601, + 0.05196664064869039, + 0.7468092954873852, + 0.5121048083901085, + 0.07859048555564752, + 0.1643809507820333, + 0.12472053260543492, + 0.6372094789164836, + 0.7597863961448377, + 0.19539321708084456, + 0.3318237634077479, + 0.6222727381084795, + 0.0643456232295172, + 0.32812549081255216, + 0.3467991116957627, + 0.8857800618744679, + 0.19149635769156204, + 0.2938859489328228, + 0.6219342837701063, + 0.05156790914859055, + 0.5158855317276826, + 0.6579153879406313, + 0.3463115643588265, + 0.9803052038768778, + 0.8162410434351587, + 0.5265465972723193, + 0.05247838962107165, + 0.0987604792519835, + 0.006810774404815478, + 0.5317378439355603, + 0.7204862248056156, + 0.07155818690637605, + 0.33923412512038464, + 0.8382488459455147, + 0.4890128081506221, + 0.47957482936772733, + 0.9634297993478533, + 0.9028813265893069, + 0.8783849741901509, + 0.3652261818134408, + 0.5418495576674371, + 0.731574549542269, + 0.45265485972354846, + 0.661492785239494, + 0.8195025384467224, + 0.6114911926565978, + 0.06037548726793052, + 0.7631533843159923, + 0.7332714280986595, + 0.46919223994689285, + 0.12304855483057497, + 0.36455024444479467, + 0.6462682699904433, + 0.8461746633688108, + 0.9821708027122219, + 0.24683387820825842, + 0.9994246563495334, + 0.9812684646638498, + 0.978589911039438, + 0.3539330497428964, + 0.5954770640392185, + 0.37546240431227396, + 0.08727947717801432, + 0.035215974756616863, + 0.6231010306366633, + 0.8722111056137376, + 0.308168045085072, + 0.06069419729327874, + 0.7244488263862581, + 0.31730261124142056, + 0.7117580768244085, + 0.19219153794976052, + 0.17737251569902956, + 0.6983476306163455, + 0.8134459635909675, + 0.21548250175531902, + 0.7774787265990692, + 0.09436064542689548, + 0.23177108205191221, + 0.3625944972918641, + 0.3833050159594862, + 0.48222918410030813, + 0.720051373644395, + 0.27730465065647547, + 0.012341633517563344, + 0.3481363165872279, + 0.8527909027026704, + 0.26254670233354005, + 0.09280437048896295, + 0.44414801952178473, + 0.45176856235261165, + 0.9345276473756758, + 0.5242301937588567, + 0.790763651147261, + 0.7700690909636478, + 0.5002229653483784, + 0.5810002234816862, + 0.5079285405575725, + 0.725713213917804, + 0.7203613953675314, + 0.05207947394345036, + 0.9504866606065121, + 0.031088575458810053, + 0.678721780301101, + 0.26285350528976614, + 0.7570409416750676, + 0.43512183929413417, + 0.5839849565327669, + 0.6465863318205425, + 0.728187030329154, + 0.21422392389905853, + 0.7400212025343039, + 0.9828264321403621, + 0.4115852217765593, + 0.4312655610719973, + 0.7624910000647103, + 0.780066786594858, + 0.6250389052419615, + 0.8199257856310606, + 0.181651129566211, + 0.22695269903028248, + 0.38788387681810443, + 0.293017191592857, + 0.3989611033262477, + 0.5597918279022716, + 0.5484510159277478, + 0.38233100567340184, + 0.2301319584732171, + 0.6575451855266453, + 0.8302872046617846, + 0.25035750266664336, + 0.04073237661962259, + 0.6129221326608605, + 0.5770971410459443, + 0.7656388641679467, + 0.7275080965269298, + 0.06032580416457389, + 0.12807976231589768, + 0.08707062548448996, + 0.4411632583578543, + 0.120110460093388, + 0.04388701186423638, + 0.5643395393468595, + 0.8197954293950154, + 0.9961983306039026, + 0.9858207575429803, + 0.8210561636972763, + 0.5100682676248621, + 0.2301543378964207, + 0.3098594294068969, + 0.5358635136308043, + 0.7370246057727959, + 0.03911780215610017, + 0.7378127257276237, + 0.18400793159004625, + 0.7091798962471825, + 0.06370669418549846, + 0.4063168712557551, + 0.7107726578731052, + 0.4618893177355189, + 0.7095125226495139, + 0.8670714994967497, + 0.22809095240124777, + 0.05880537646869133, + 0.5298929116210432, + 0.8528097803865023, + 0.9005892446274537, + 0.5949525167060528, + 0.5938112615996213, + 0.49575370515318773, + 0.8666242808893687, + 0.7528146210914884, + 0.3839301152213451, + 0.31510156935696954, + 0.5685319150168796, + 0.03991562252345815, + 0.6816709404421922, + 0.38915361524653846, + 0.9866631700759481, + 0.5245367433724144, + 0.4857732468476693, + 0.8983709221933376, + 0.34025025337185943, + 0.4052597169904064, + 0.8803093157280004, + 0.6752201503377462, + 0.08001938106692708, + 0.6484164937184945, + 0.6152608670786123, + 0.357495391773093, + 0.7177437337126061, + 0.12832644471029053, + 0.8001621437854477, + 0.6707700673294981, + 0.2611074999259766, + 0.7983032965121164, + 0.34977038528047943, + 0.6193966613722293, + 0.07440340217285935, + 0.6057729826012523, + 0.0665490866055164, + 0.39836444518886793, + 0.7734862832192322, + 0.24672378847177845, + 0.87863845021853, + 0.875401709287835, + 0.823692453622643, + 0.33369029213475465, + 0.7696864745962337, + 0.30126055912894845, + 0.28150122926822385, + 0.955051417913947, + 0.5813354609183057, + 0.2523296534214674, + 0.21689893544524075, + 0.6541672883755825, + 0.15150484779457207, + 0.025913885688727767, + 0.781103812173443, + 0.734618934963856, + 0.38743052410911516, + 0.5159373693388118, + 0.7588917060893839, + 0.16648553837765123, + 0.26389163559059237, + 0.6004483094218385, + 0.8644785760271342, + 0.8361991657906871, + 0.7063286560859675, + 0.16890458218454985, + 0.6293960611507368, + 0.8112889143582506, + 0.5099896811864162, + 0.8863669282305683, + 0.6473600478947446, + 0.35939227256686235, + 0.4378730121588985, + 0.3275691575480527, + 0.03433193692593528, + 0.16026224951402612, + 0.7308148738318084, + 0.552090475662397, + 0.4504374980626351, + 0.020363962431700244, + 0.8628512064289966, + 0.7191615214103718, + 0.6584077510914033, + 0.14330376032033298, + 0.2970165476400587, + 0.5601120934399556, + 0.20574000017207572, + 0.5152895442072469, + 0.38028960075973983, + 0.050257160794099365, + 0.30151295981109294, + 0.0021057497496854705, + 0.8261878283212919, + 0.17146554001048764, + 0.6738097530024986, + 0.09327759147312198, + 0.3536849738413955, + 0.11228781332639781, + 0.8210882710782528, + 0.4445959264172564, + 0.8014336209245658, + 0.8509964092036668, + 0.5477500527323662, + 0.2691906085957081, + 0.7835909913686487, + 0.849051825178111, + 0.5792762778563657, + 0.7533041790132994, + 0.13256725908267397, + 0.9179362683758442, + 0.8863549946705263, + 0.44208254228163224, + 0.7754019808591301, + 0.8406361231711952, + 0.0764603655374666, + 0.5137484895276897, + 0.8726022399293223, + 0.14023833978123312, + 0.5483690916833946, + 0.5656983210462431, + 0.482383375409575, + 0.07619846364492644, + 0.18316387409544688, + 0.879303530368276, + 0.10661218397092831, + 0.8062372237730675, + 0.3098725439415182, + 0.22071648567237323, + 0.5863235770743692, + 0.7098421004985631, + 0.30976768709180724, + 0.6883968659454052, + 0.05918779772449945, + 0.6194153843722903, + 0.26273996663519317, + 0.3987789287407194, + 0.6724273512701597, + 0.7930898465717866, + 0.7788779335484322, + 0.25756836191601196, + 0.999473250782789, + 0.03530809718383776, + 0.661341971400672, + 0.5130393847811372, + 0.5802105922737022, + 0.6811671101420056, + 0.863365320587163, + 0.4693116550647559, + 0.4379813756738612, + 0.025214500702691378, + 0.059401164027273023, + 0.7236155393273521, + 0.07109498240081846, + 0.9316705456321489, + 0.3243046119278893, + 0.36896539317171617, + 0.4460567550228205, + 0.5136483676638586, + 0.6241376172290012, + 0.4983811974398026, + 0.3588760385077524, + 0.9523113329990051, + 0.8570991086701192, + 0.8562349917934668, + 0.6905617202669856, + 0.04646700716314145, + 0.41757506828098856, + 0.4025391299982778, + 0.06828076453139653, + 0.8722736231201303, + 0.6173544703128612, + 0.04353431214247039, + 0.7262986047017908, + 0.20152147787998154, + 0.8631546880403342, + 0.7821130345092602, + 0.838987021979746, + 0.3967459330172668, + 0.028021402977388754, + 0.7566259049757784, + 0.5056172871262352, + 0.3166497239755698, + 0.44973755378401914, + 0.990184642794674, + 0.07880676678140464, + 0.9135486984307961, + 0.9533447067811259, + 0.7476838976687108, + 0.19467699832682228, + 0.9738604421816968, + 0.3150039323613226, + 0.15193882242955847, + 0.7021605683382937, + 0.6157729814866728, + 0.9825727863358843, + 0.26639321412541483, + 0.35088370332876584, + 0.4865564762001132, + 0.6047630434485118, + 0.04329713777067834, + 0.1324385099236236, + 0.8931157111246396, + 0.7867645266689147, + 0.7482398978505693, + 0.378820263418541, + 0.2845995420246261, + 0.09413583852495055, + 0.019154387411292162, + 0.6992684431567863, + 0.815960138949849, + 0.9942823446999899, + 0.44938122845682404, + 0.688539063305475, + 0.824707575421611, + 0.42921049822972157, + 0.7346581020842596, + 0.0667700383351102, + 0.10871042223526806, + 0.9077205215122396, + 0.18378504322635592, + 0.025055744953504377, + 0.49577391111107316, + 0.7121485179592659, + 0.26079011553677434, + 0.47775539278048973, + 0.5164281218434722, + 0.1156930427438273, + 0.00040914849335071946, + 0.8492977525559297, + 0.031758005854524485, + 0.23112051769552866, + 0.6492385141002286, + 0.029510183871686912, + 0.9218944070024221, + 0.5403198501603163, + 0.44877236825611644, + 0.6382936783672084, + 0.5387078030344674, + 0.759207065953285, + 0.4313192072106907, + 0.9675862520792461, + 0.49982806452542183, + 0.015189836409553292, + 0.8395679422557788, + 0.5224158163699993, + 0.43478604943285437, + 0.9633540841892024, + 0.5839625732945432, + 0.774590930706371, + 0.13466499606914695, + 0.2435796231048577, + 0.522566825040792, + 0.9305232428961128, + 0.017946225105039515, + 0.861057619598813, + 0.3033712169428139, + 0.43076936388639864, + 0.7847222624068075, + 0.6041677832904309, + 0.21534142383749855, + 0.11596755977825313, + 0.3775345112017028, + 0.41142318612366724, + 0.9546926255200631, + 0.6465654220591646, + 0.9264905273637142, + 0.5365613848803092, + 0.45861866088269054, + 0.37122943055420754, + 0.0659134188079431, + 0.575384307795183, + 0.21296495229018642, + 0.3593614749980121, + 0.3382507494844905, + 0.21021019870683055, + 0.37725965670949113, + 0.2568236881409004, + 0.6289804843153892, + 0.4961011162236566, + 0.8007980445861616, + 0.7535786191383734, + 0.95948206314619, + 0.534858938606668, + 0.44338836644383783, + 0.5803352195679893, + 0.11935602249436517, + 0.8214709985526613, + 0.30055088167791133, + 0.05736682646957014, + 0.5169783006735313, + 0.1642784549943468, + 0.7487129435345096, + 0.9468341101823562, + 0.08725040661699146, + 0.9915524323503714, + 0.5445926996363618, + 0.022915411666030505, + 0.10245462337834743, + 0.3616457075035595, + 0.14721812581251026, + 0.010014989598470136, + 0.9630860143606094, + 0.8052399706979069, + 0.40137599656630707, + 0.1404279187196622, + 0.20137886785098214, + 0.3579063484423751, + 0.42676199310708274, + 0.9215717742594494, + 0.5996794880732021, + 0.06080851911638141, + 0.5390326263539817, + 0.8728683302348244, + 0.4021562889647108, + 0.1881595273690141, + 0.9261251572187412, + 0.3583745264580933, + 0.018357870224868678, + 0.3206369705495875, + 0.09139504042159452, + 0.26030859409923823, + 0.5720900501584947, + 0.8261470103824744, + 0.29585322832215, + 0.16828149873182763, + 0.7702134243047803, + 0.6383816210042853, + 0.6971510722366991, + 0.594018508964686, + 0.2914949614277702, + 0.4993185703652937, + 0.3830984692417435, + 0.47918129148394517, + 0.448751442207432, + 0.22957777843199545, + 0.1951929506813953, + 0.03415943459763093, + 0.24646231625561643, + 0.5422820749763068, + 0.23084932542890113, + 0.504466276195762, + 0.5788988602042343, + 0.47017440229794716, + 0.8718294784738838, + 0.6100039279702728, + 0.19673630099299222, + 0.13048148839399076, + 0.09215206718565128, + 0.6752940473182572, + 0.15713838848203576, + 0.4672185904351155, + 0.12022092076211388, + 0.4262995883910501, + 0.7828965826062527, + 0.06518008271523712, + 0.18704270411322188, + 0.5603338102420664, + 0.5458737156741609, + 0.8613074728573371, + 0.9696962327750315, + 0.6323932419925866, + 0.9372110451834712, + 0.660207896199546, + 0.13217955850215168, + 0.07490083182580931, + 0.7838960596194303, + 0.35825458536495036, + 0.5108597096703393, + 0.35726398658269365, + 0.20058308140657255, + 0.019327626718853574, + 0.7387671705507343, + 0.3937174861923438, + 0.008510787832375133, + 0.5510730577525527, + 0.8347068849187149, + 0.6225159224769454, + 0.6933457161935609, + 0.9558925677117541, + 0.6444743330512449, + 0.9393488911812377, + 0.4526759319581386, + 0.4158933063730582, + 0.12016199864755062, + 0.5996069815765964, + 0.37164847636198706, + 0.18468901247797698, + 0.10510058759858554, + 0.12435151906618414, + 0.29749616999796935, + 0.46715813618632074, + 0.4804676726366024, + 0.8902245778435506, + 0.5057004812902581, + 0.07615569063171634, + 0.45429796745433515, + 0.3651080307112111, + 0.42511316204195015, + 0.9626098434932129, + 0.1530836461596059, + 0.54267045165432, + 0.25292680893561725, + 0.8182479801385715, + 0.7698853585118213, + 0.5916533348108549, + 0.8792810536495096, + 0.2089269162682068, + 0.9417196873308148, + 0.3625045500279622, + 0.9358090816919976, + 0.024052448596603604, + 0.14073342898665964, + 0.7495888775620542, + 0.036473329045799696, + 0.5772977505605897, + 0.2753451585139085, + 0.2211513854935684, + 0.8449529341187868, + 0.770418639001008, + 0.25366055241951, + 0.40638114052795293, + 0.02410256499247654, + 0.25262964296752777, + 0.6646807551117294, + 0.6169935281231131, + 0.6235791692219148, + 0.19999129218380074, + 0.5650534343149088, + 0.22794126249415603, + 0.25584948869370094, + 0.16413526081599883, + 0.1313963139194788, + 0.38569745080888074, + 0.7389415625275032, + 0.5441323802579277, + 0.5452059434217853, + 0.06947574600276563, + 0.6857105985924492, + 0.5632978399533798, + 0.20145003184888133, + 0.2626428454878911, + 0.13469831550663058, + 0.8676005030876166, + 0.09791736235781578, + 0.11290805888333277, + 0.5662789200531975, + 0.5532975152682471, + 0.1968909180252747, + 0.31597037379351867, + 0.5881064355067047, + 0.07251773586765198, + 0.5534495833779681, + 0.9279025151416528, + 0.5557836884305745, + 0.938545067080547, + 0.13066752895636757, + 0.1019356700266365, + 0.015775122665618535, + 0.568657489449308, + 0.9154431697609071, + 0.5227771050646528, + 0.11969991252955747, + 0.8114359444622794, + 0.09412961818641818, + 0.6712408193535799, + 0.6282017416458258, + 0.12744819746249358, + 0.47854001246251787, + 0.9839298276674067, + 0.6692011017518616, + 0.8912808265387039, + 0.5051343083044033, + 0.8325083920105506, + 0.9398988912330468, + 0.5379375205187265, + 0.6356036828595635, + 0.7553454815515278, + 0.12654731760892735, + 0.4690087096562523, + 0.4587125424654144, + 0.9183576757981867, + 0.6584090970256743, + 0.14209709890579214, + 0.7265702481178347, + 0.10849086041866862, + 0.8073526273336654, + 0.35398527427639515, + 0.8252440611360407, + 0.47149964590056326, + 0.8426569354165907, + 0.57592102066551, + 0.7885325934130915, + 0.9200469793978965, + 0.07934613208943198, + 0.9287719037399452, + 0.9649176284541376, + 0.019168212342419055, + 0.7437571021210193, + 0.33999813051887307, + 0.847134711166838, + 0.38983287632546026, + 0.1548551866964054, + 0.9979792007981645, + 0.8391828464918628, + 0.441421796033869, + 0.20244232054490086, + 0.443324028307855, + 0.41561130560895543, + 0.8822537368005906, + 0.8831408287031052, + 0.2778574239219269, + 0.21542874768277553, + 0.6061717378741044, + 0.10326764107877018, + 0.7344953591491642, + 0.47060159633211096, + 0.5123354361617505, + 0.6539332334641186, + 0.3992133202323006, + 0.18254161549534687, + 0.058057134761110074, + 0.6723192036260179, + 0.15067549236548006, + 0.17636860974647117, + 0.8989885003665586, + 0.8400102601558949, + 0.6008281021376233, + 0.652056430181526, + 0.18058193363664965, + 0.9823163772865428, + 0.14842412895958368, + 0.12715317180256747, + 0.45438993085727575, + 0.43866553903102734, + 0.5511022465288102, + 0.3550824123946065, + 0.7838174733575789, + 0.756242190578806, + 0.2490126658182431, + 0.2529212885235895, + 0.17129797196932794, + 0.26642214642073836, + 0.8809106971011522, + 0.23618828594467878, + 0.9174914349211557, + 0.7198844173999696, + 0.25448097541528747, + 0.19439728153373115, + 0.7400895685795656, + 0.9988568054081151, + 0.4534192091123038, + 0.7066180958065236, + 0.22744002200445856, + 0.5316376374012981, + 0.6846970108520325, + 0.7337343607924894, + 0.35723502447477895, + 0.38799069435306865, + 0.346092615843013, + 0.5615960049498089, + 0.8430238289159098, + 0.0018169909764319536, + 0.38188645388447706, + 0.9000749801565469, + 0.8843081656921045, + 0.41818786510962536, + 0.1484893167838225, + 0.960339877225284, + 0.81130482270381, + 0.5962403553222159, + 0.406910233839033, + 0.6495033963061365, + 0.9290519402357441, + 0.27182288548855216, + 0.14043698203445898, + 0.619370759414419, + 0.4021505892513657, + 0.7063519653499207, + 0.9678696220349524, + 0.27502222100843166, + 0.04769612207710061, + 0.5304840353283953, + 0.8596418514219429, + 0.9138433236065812, + 0.5051265965928025, + 0.1216026126351939, + 0.2929852022034354, + 0.6967339210651523, + 0.8033475537524875, + 0.5972633299769876, + 0.8909755619856713, + 0.32431609129991623, + 0.6572970109178305, + 0.13976246089415156, + 0.0936832672562814, + 0.5613962300645747, + 0.0659752657250573, + 0.9693004345157341, + 0.8163117328131457, + 0.5526873331663072, + 0.6978948884635421, + 0.7174245908183247, + 0.9091388830379548, + 0.8771979443277648, + 0.36375201657166956, + 0.910229370490943, + 0.9062880995651658, + 0.2475472215075687, + 0.9571856532460695, + 0.7968218241169471, + 0.20907638557065278, + 0.37751534864520053, + 0.037949882135774105, + 0.8257318331263286, + 0.27802981765593937, + 0.23023314537609163, + 0.08024836180724249, + 0.9216515112010334, + 0.0562610751491196, + 0.1597219027175244, + 0.3741284901572979, + 0.8021974293096986, + 0.5356332244133167, + 0.3020389784731201, + 0.30743894512692804, + 0.4717275978981591, + 0.04207245223788758, + 0.3353166557376429, + 0.6397302272328966, + 0.8978188831816213, + 0.9928808641739839, + 0.20705626436445768, + 0.5586436392983166, + 0.08866459568414498, + 0.9697752733284257, + 0.3500729289856048, + 0.8464180677251836, + 0.8910336803117367, + 0.059892161578305236, + 0.25944707558949975, + 0.6041304063308071, + 0.6915577734334685, + 0.9502471077244005, + 0.8496434872785175, + 0.739510601829694, + 0.7310969444239895, + 0.8111068174668262, + 0.3759104347982105, + 0.08614615527760616, + 0.002351361594640111, + 0.5305000972433925, + 0.4430564643445579, + 0.395698457717678, + 0.6593802042717537, + 0.25260995504654227, + 0.276888757980041, + 0.14824109250035444, + 0.120156717109008, + 0.2356758545567924, + 0.9748301399395413, + 0.3335376029634729, + 0.3997684071693288, + 0.8448004886090456, + 0.999081879860235, + 0.03417277183833334, + 0.9849649950124927, + 0.9365576354227314, + 0.7413654412396776, + 0.2872624199003777, + 0.22389456774063776, + 0.3439154683253449, + 0.38128333425424066, + 0.41315039732171477, + 0.2215788817506824, + 0.37582523682636737, + 0.24517399871061263, + 0.2874577559231949, + 0.5405637788016537, + 0.8307710818732983, + 0.5583899052772798, + 0.9338010172858654, + 0.33070273109013315, + 0.4902197736552901, + 0.995018981854085, + 0.6747786237477208, + 0.9889152104917611, + 0.49392441523838104, + 0.4105641545370827, + 0.21262950226199928, + 0.1424558268243563, + 0.37898271842180364, + 0.8040408493002821, + 0.8429439380401175, + 0.7694938192845128, + 0.24474072111054834, + 0.6422517529467944, + 0.8119916744878953, + 0.9399769624926795, + 0.0014711940100414767, + 0.46162815669882684, + 0.3266206034242365, + 0.565321938142971, + 0.4253483285484094, + 0.9928091707766786, + 0.3468335142394521, + 0.283908715578962, + 0.28339391086866017, + 0.1406419037056168, + 0.07460976447571721, + 0.3015118403856327, + 0.8364154726592714, + 0.5442707433576796, + 0.775238123224583, + 0.6988074304502946, + 0.29666655999004965, + 0.5744485948659922, + 0.5888755029922519, + 0.4535295464291106, + 0.6605227926279009, + 0.8066858568892091, + 0.571278690686094, + 0.6617586254570986, + 0.7021116248860395, + 0.26768205385154675, + 0.545038690381253, + 0.4382250753201906, + 0.10144883548954509, + 0.8239354123367632, + 0.6545181151469046, + 0.0768187557466612, + 0.021407516615897015, + 0.5509736237670857, + 0.7516909401191861, + 0.16237100352121037, + 0.287106371912188, + 0.2285634248211067, + 0.6230602644460173, + 0.04719549146822144, + 0.8810004043241481, + 0.32270901647736383, + 0.25444507595862653, + 0.14339028033794787, + 0.7514732757874557, + 0.14477632594962486, + 0.2937049708158004, + 0.12706973907945962, + 0.10451657119866442, + 0.2540268576197062, + 0.1252098208554302, + 0.7330953794183517, + 0.027451485289788247, + 0.7851232328957864, + 0.6168961001228608, + 0.9830114574881325, + 0.7925852827606045, + 0.8544402034525523, + 0.44436522959295, + 0.039594924066016945, + 0.09929421237413627, + 0.5255931668080672, + 0.13793195442779416, + 0.425644297047336, + 0.9549422929536167, + 0.10132592572894616, + 0.16949514548953826, + 0.8619626960761596, + 0.2732361579539657, + 0.9926258038959537, + 0.9139894126187191, + 0.048185258169916256, + 0.905621787376743, + 0.12619973758742742, + 0.8566339455602747, + 0.3369789538642103, + 0.7862933916191656, + 0.7072084403876988, + 0.380779410279493, + 0.04597028724107077, + 0.01964407566073334, + 0.9568791026737442, + 0.7111457201095862, + 0.41945106668581733, + 0.4262980211831856, + 0.3036388335711423, + 0.4484500981533761, + 0.4119910540809639, + 0.9717990391943528, + 0.4871129614066185, + 0.7597339232609441, + 0.3798718685775986, + 0.062204309404876756, + 0.1737341920404033, + 0.876930194128651, + 0.13684873927826113, + 0.20164062502829172, + 0.49677910781133616, + 0.19109649769894033, + 0.6588944742305465, + 0.7820888988747644, + 0.9620462496740666, + 0.7511414143857221, + 0.9443104558395916, + 0.2228072345083082, + 0.5544423577847644, + 0.2748291069197609, + 0.6546039352001982, + 0.3101574103985568, + 0.3022066718189498, + 0.48015140884500385, + 0.833403610831973, + 0.6627384455107815, + 0.8042894455759977, + 0.06790447224224527 +}; + +__attribute__((aligned(8))) double float_data_c [NN] = { + 0.20606228523522918952, + 0.97441859732474817300, + 0.15527265153496972341, + 0.33642464280908588075, + 0.14348659867598468686, + 0.83452889759477735040, + 0.87621407367522555984, + 0.62201494937775962039, + 0.08623698611166075290, + 0.46249135313080714284, + 0.72818222769443025329, + 0.60027007625344355150, + 0.53848450984750957099, + 0.62930852026238170136, + 0.86494547261887370360, + 0.67753169242676560865, + 0.97252145547032950575, + 0.80677816173651488806, + 0.80457619647578523730, + 0.50383105768873365140, + 0.74643736039310261017, + 0.69376681083121195046, + 0.01096729685093766997, + 0.77583761710275681690, + 0.03424502139316965802, + 0.84183360600564291231, + 0.15092703928418183016, + 0.14176099354124761195, + 0.87978314310320547038, + 0.00822499895519512911, + 0.78809838308390556373, + 0.25417775645052110993, + 0.28211315779746937668, + 0.69940790904762775529, + 0.14647381773416634883, + 0.30598731632799736409, + 0.88853062828857197252, + 0.25203153607179707354, + 0.62676560817852825985, + 0.97262835720885372304, + 0.27030820961528323072, + 0.90702612386614683318, + 0.02369828451891809747, + 0.09211837037294357832, + 0.65415408469582284970, + 0.66035290126816061481, + 0.11135796014156474106, + 0.77469220593244063129, + 0.79541788342138231227, + 0.51560549861852423936, + 0.41958189217525324297, + 0.00109512249770316047, + 0.36258151796124716404, + 0.26127297585097076000, + 0.16818526588690064483, + 0.09292524097400179603, + 0.76313062850376036686, + 0.59667128247755237458, + 0.64647704540690228565, + 0.34761618562118946657, + 0.26991421083984218358, + 0.40139441966525194937, + 0.64110442284450562867, + 0.47337490386366290476, + 0.07380130629767123846, + 0.48069197714982657429, + 0.69202474546324387073, + 0.32499599277861047442, + 0.12470290139260654656, + 0.29227621296727895237, + 0.94984584328588770639, + 0.53779328153625523171, + 0.80898793325757489575, + 0.73805566206786265096, + 0.15196479386359918511, + 0.30133893285393620798, + 0.70271761048950338220, + 0.68892864415483874545, + 0.73723893564294040800, + 0.77739150948960210487, + 0.89083912529368778964, + 0.10203920506589836897, + 0.62966526791281042825, + 0.00523407394355834872, + 0.20025657099046001472, + 0.15277558307611855048, + 0.88236758394893312833, + 0.58274306795077350945, + 0.33573701524182020006, + 0.44350744026519084430, + 0.80719933350021094303, + 0.67491144001117346373, + 0.46127579517739736977, + 0.72462827369867516193, + 0.30710474891182515038, + 0.07161919689013785948, + 0.49736757620661929518, + 0.08311912091967322110, + 0.25407458696851353588, + 0.27328512335238645921, + 0.08895576923165627401, + 0.24184281727715584379, + 0.79701193191453032527, + 0.28816319189872239178, + 0.65423334220226766364, + 0.94643158417093645612, + 0.68697433237981296619, + 0.64734870764110424895, + 0.38974310113773075254, + 0.22185422356672284345, + 0.22639853051699665621, + 0.90694534443014981112, + 0.24773385255851865568, + 0.38596059064643356339, + 0.08733789819643711902, + 0.16881924566239181366, + 0.71240533987093344738, + 0.63826261037279137674, + 0.84650129948262654153, + 0.99300843292610824418, + 0.96819996190444973321, + 0.30922998541177585613, + 0.36678945773137547677, + 0.38980107397781645153, + 0.77518582314554586313, + 0.85172181291852968175, + 0.42604822347214524391, + 0.47014960968150795012, + 0.25352438702689406682, + 0.22944148463896020477, + 0.24364151249482258302, + 0.45957430678865255259, + 0.40369652615834831689, + 0.67993009679198938229, + 0.20582041521207795524, + 0.48640688527279163876, + 0.95768104731129863685, + 0.81651185316198050999, + 0.40788373839698244093, + 0.51388650305134280777, + 0.00922083256856953779, + 0.77565593490553886045, + 0.93802168513589969900, + 0.83227108637206787648, + 0.59569660984783710945, + 0.33264814828409661611, + 0.71815220020528069155, + 0.39884621394609226506, + 0.79482480621739952774, + 0.20250684593784940128, + 0.26843210194123518197, + 0.22167021842084494401, + 0.66354864141465060517, + 0.07060446836637324876, + 0.59216436850698111388, + 0.29866086669618510745, + 0.72080940861474129358, + 0.78270521400663519189, + 0.19977849873680198081, + 0.39864322930974278369, + 0.65684848564370220532, + 0.84529433626799068906, + 0.95834268406140248233, + 0.61932893727133799675, + 0.66713185739791569756, + 0.70707530556143863243, + 0.17698783767118276618, + 0.84679019603446877774, + 0.96557475641711775790, + 0.74320533787303921858, + 0.07169618130058009313, + 0.32906081747021443379, + 0.12942394287111725109, + 0.31377175779381646985, + 0.66665099053271076555, + 0.78611694286934670127, + 0.35626235648455173083, + 0.98909242423647259011, + 0.48963697384078786794, + 0.92236855644579620247, + 0.86078061974502432118, + 0.02511191205335539122, + 0.11185938351970492120, + 0.49180270763991960159, + 0.40329737895514284904, + 0.63638506494614113304, + 0.54103788395283561056, + 0.34130846776261686558, + 0.38205205716454316423, + 0.98640612725324640919, + 0.43172472499027437826, + 0.95550339908024883680, + 0.10813293832643411755, + 0.34522675556452864713, + 0.93276247904242516711, + 0.91069116375162434809, + 0.38185088477829637399, + 0.36401458018886845076, + 0.34419315188208344956, + 0.16647741947079386667, + 0.06960500316071860849, + 0.03961818619445312518, + 0.92300644435057244503, + 0.27740736352252345416, + 0.56181379154805323483, + 0.83639842824564190283, + 0.75682186556234051298, + 0.20460131708900342636, + 0.67795417368955355216, + 0.03607407616794933141, + 0.32358451866194673261, + 0.19626911281439507447, + 0.59571241685807418599, + 0.07619865005404636762, + 0.05041780869027346555, + 0.27637758784061653531, + 0.87259662764820636889, + 0.87661092571066364110, + 0.80177031158662028982, + 0.18514705503101336836, + 0.69495321100669795393, + 0.90996988655237745835, + 0.84018319487011371226, + 0.81925147031227642883, + 0.03013028704431464794, + 0.99417201755970920346, + 0.63833455637965466897, + 0.68118907685109340082, + 0.15494897315696077762, + 0.92680464074301472820, + 0.18419150633838632975, + 0.12023661142058639320, + 0.13880973759049886183, + 0.43508287431810255713, + 0.09332661433508616790, + 0.31715783473396513685, + 0.81172510002420242514, + 0.25985295872990466044, + 0.13677573740104320788, + 0.05208706696569387506, + 0.47061913592509125493, + 0.53182879700983863109, + 0.10245134961063651579, + 0.45600311180748914134, + 0.40100208620579053269, + 0.98405675238767956862, + 0.35920029776371924839, + 0.40043487653140656543, + 0.42580803775029252465, + 0.61016472144245751673, + 0.72986910589929349450, + 0.93993666701062338169, + 0.12257121604516263767, + 0.72159090882295924123, + 0.97944264164164766643, + 0.06581284852484892905, + 0.02650893248350006306, + 0.35897685485179214349, + 0.01781929292870465529, + 0.21353375281006926665, + 0.40179565113558109239, + 0.91334124407103648426, + 0.85264890804491873223, + 0.92118128092831417491, + 0.16278486118794566919, + 0.32994416320701283564, + 0.28169000402465131134, + 0.85731799952233616282, + 0.38988881183326555548, + 0.51627909199902918806, + 0.69216747703652554886, + 0.19457626242368187231, + 0.04481143461375125594, + 0.34914966419955656836, + 0.04609206222029849976, + 0.67337605249028728273, + 0.88471280293328161492, + 0.63133614548627851214, + 0.04867494560764818616, + 0.52218350854569684209, + 0.49047084573304534417, + 0.34391335844200238454, + 0.83089695285966254824, + 0.76335293933393535768, + 0.27517343384434472299, + 0.88740917814219592304, + 0.07780742439210886942, + 0.75654501960258010810, + 0.05517328915094954566, + 0.19901405948260775293, + 0.03396884683477952543, + 0.78446973066403210023, + 0.27372142976571387885, + 0.28776007829977990538, + 0.26799607827299421666, + 0.81506810708494644941, + 0.83262477872978936986, + 0.76836988406808654084, + 0.89282606310427083394, + 0.36373590190192839032, + 0.46202004897205635311, + 0.98659101131127210874, + 0.36460479665151935890, + 0.55458217531272474657, + 0.49079694168919826994, + 0.60617236615861011911, + 0.20548225908208018682, + 0.09791440037527366835, + 0.31697346013869365866, + 0.72872214174666653260, + 0.42758980210933684197, + 0.90755776717908937015, + 0.91371414015882583325, + 0.66713797832170951863, + 0.52690611853960633185, + 0.41959200467992175489, + 0.40270235346995853763, + 0.68354801267712131870, + 0.55902343066331528041, + 0.72423031724084394472, + 0.48542182610295768337, + 0.75397956173814518715, + 0.16166653646703977240, + 0.53643075740799382104, + 0.02437362716983626517, + 0.96959374913357742215, + 0.37862284510608246801, + 0.30600220517188326472, + 0.75065735538931178834, + 0.08624324523996307705, + 0.91112605780749497432, + 0.16952389041073068899, + 0.07037040242982118045, + 0.74911329353377118187, + 0.67139470742275813975, + 0.85929301315027914693, + 0.35861173337437372697, + 0.14933544856264747095, + 0.60261282045834130821, + 0.64602454519150655978, + 0.77964781467793054074, + 0.48898052699866557624, + 0.17026198118409175579, + 0.41688564126052257964, + 0.71676806776355155291, + 0.46083785605368854210, + 0.12494492790444967214, + 0.13489720590306124473, + 0.52443313377613903955, + 0.67214710062396172488, + 0.05176517129044544482, + 0.57621619862269314591, + 0.18387920652254786733, + 0.00658738090520960557, + 0.79204649354073084477, + 0.95115087643469199609, + 0.19988281490284687966, + 0.87087895884446429908, + 0.91016088994825454165, + 0.42173735897988840944, + 0.52776787636995965022, + 0.30816537366685814294, + 0.36395746575037580042, + 0.12864683608594918050, + 0.79762440074831055225, + 0.28135763792856869749, + 0.75078331687265948656, + 0.18518282310738743633, + 0.58044310920943535674, + 0.14714445693711486328, + 0.20039556632226396454, + 0.35534025835696799628, + 0.43866012376405262617, + 0.15638869903102971790, + 0.52822818908051809985, + 0.11510519324279134569, + 0.86984943550958253473, + 0.72857252642654025984, + 0.20661692489342742157, + 0.11393359517331314934, + 0.52781033611875969614, + 0.42376474496678995123, + 0.36590900050290565653, + 0.11196153187783076801, + 0.34559381384561216879, + 0.50330819595351958820, + 0.17199515839359436863, + 0.72338007516282855806, + 0.73384118672032227793, + 0.37275678300383464648, + 0.73491116478436775146, + 0.14415675765499647599, + 0.98132317103265174137, + 0.02019410966851187373, + 0.25449992268804826435, + 0.02208065674413584833, + 0.27100913343512198690, + 0.96851343446872114954, + 0.99344328697426525077, + 0.06797359508372547250, + 0.33796865495720117076, + 0.94076124274988184632, + 0.30579598080493859960, + 0.09372021627545635178, + 0.41810065708671884056, + 0.12420128661102403850, + 0.62738433368787183497, + 0.76943009389039439417, + 0.61172425534534486426, + 0.06940896877754614318, + 0.97001144366375721699, + 0.95229562497324679902, + 0.23777286449284762961, + 0.40306008758184625779, + 0.00486777576363317501, + 0.17179122903419175993, + 0.53987954383194316561, + 0.19585147477634853318, + 0.08851837099639215260, + 0.82850770433545744715, + 0.21973496208301435319, + 0.62912289873656518698, + 0.59739480418484160161, + 0.61918495799488977309, + 0.25984276492640745181, + 0.57857940098950960982, + 0.80151731937079229573, + 0.56714096802740233861, + 0.05151093369899151295, + 0.28399596178719531417, + 0.84983477706894926973, + 0.65281953224340345314, + 0.80189242127223833159, + 0.96935538790543687089, + 0.61311765420192179090, + 0.68991597974180622769, + 0.31429913104180635053, + 0.99436249426071634525, + 0.25907218556950346297, + 0.34759633950972911064, + 0.32853807931213359350, + 0.69251974058155657043, + 0.64174206267073498758, + 0.74482595756210363573, + 0.59599980081892176529, + 0.00760596871742058619, + 0.57415451990980840390, + 0.65882895037462428276, + 0.22487891545795025012, + 0.72863396873810593739, + 0.91799030393814747797, + 0.55385838196968720510, + 0.71777411694769060500, + 0.62847961493180781978, + 0.93037193763720626277, + 0.17932887811887765265, + 0.92604106102568220558, + 0.71040718270171924835, + 0.40326459332451037369, + 0.61934683061396050683, + 0.70558641032121935806, + 0.55568875041835658314, + 0.80695887243720742102, + 0.75064598523133861739, + 0.39666151863005968532, + 0.40691145347858487496, + 0.67694047964297798625, + 0.89970510469704146393, + 0.25646605871911043504, + 0.01423055084870961172, + 0.20962038195150249332, + 0.08089906320925999817, + 0.14279997767776535649, + 0.38504275030055718305, + 0.48252776624808183950, + 0.69258745852233987698, + 0.34305812354716965078, + 0.27198018454186785247, + 0.40691083427823579407, + 0.92102982802530510338, + 0.79888778799110285890, + 0.30723064314639180326, + 0.29323362333168587172, + 0.35872605947413377976, + 0.18207945345355588612, + 0.83456007408639883509, + 0.82854636704706211789, + 0.11053743773163394755, + 0.91135387569883513323, + 0.83636352756363431749, + 0.05113557484725895795, + 0.10285482748157804793, + 0.37042774944683731433, + 0.70415585593908354302, + 0.16085028872916485675, + 0.53664357779514249968, + 0.85438435691051939397, + 0.75023911052716259243, + 0.47179400968905868915, + 0.44306227935773723830, + 0.10459266176774884951, + 0.62514468103868942440, + 0.45552971347807055167, + 0.87801955962381486532, + 0.18493629020999858406, + 0.36725262621127086033, + 0.21876399428453663330, + 0.50818036244302680934, + 0.41268016154573917132, + 0.57751012714769587219, + 0.05789257547930837286, + 0.35870252040877492448, + 0.51845515485934980408, + 0.59694156905406289899, + 0.46961911965698385588, + 0.89210331059735380660, + 0.08068347376541662951, + 0.83946992731480657486, + 0.76273630699829017281, + 0.06373186410206744707, + 0.52508068036993836713, + 0.06743941312479296434, + 0.69123487896507659419, + 0.68025493840429528418, + 0.13948798689698727352, + 0.17383937840154884601, + 0.33169167039210997921, + 0.98727793137017464375, + 0.01655541387784831362, + 0.49144164906618686788, + 0.54294880087433336965, + 0.55788990674016959956, + 0.69746767405251027798, + 0.48237806747781729606, + 0.90315778115267266407, + 0.13545450625246476447, + 0.31257013818425171917, + 0.99458611329608076361, + 0.60424323155884728226, + 0.51172312693911398345, + 0.67462857361894861610, + 0.26067673118176973732, + 0.94028301159161635656, + 0.17373900078871447434, + 0.70043799888988191284, + 0.53382986792806316086, + 0.57864161747606150019, + 0.71260796249621844336, + 0.98515390396947917640, + 0.69517966332796479002, + 0.63891075395606122174, + 0.47263286592707389758, + 0.32740376014756833938, + 0.68642595294992741887, + 0.64680344298844060189, + 0.91528800309094860498, + 0.40160966440623694979, + 0.06309230121374065319, + 0.96179074866696256139, + 0.70560628588897057575, + 0.47404579625638343958, + 0.18635323386686012885, + 0.31937177181355788634, + 0.19835104468985731514, + 0.21533051464529462220, + 0.29443142359037512462, + 0.65599972032799080203, + 0.86195540060755958620, + 0.71520627999680969876, + 0.14752232553069755388, + 0.21640473071871236715, + 0.04669741190084464862, + 0.02712741164406645593, + 0.44280074138162643254, + 0.04158941561181728917, + 0.14194279661680498665, + 0.01526327943086769336, + 0.40225952741774057078, + 0.79278842072155740003, + 0.90781271190148273081, + 0.43142090898245644626, + 0.05523361893971679873, + 0.63981725540432547862, + 0.99672696927673840275, + 0.07266336031126129314, + 0.29610789732412450752, + 0.12763548408434455099, + 0.96497999944957077824, + 0.75583708874369842417, + 0.53709976089337959231, + 0.04304228381073360662, + 0.24058023426988007185, + 0.16988616769152176783, + 0.06949888998155396145, + 0.37965345759764735968, + 0.62108483316020010882, + 0.67797881640659684865, + 0.57557434569026273124, + 0.61173697683800271084, + 0.68920471626430222477, + 0.86576421526680230118, + 0.53854251103082422299, + 0.09243884625432808586, + 0.24248373282118274784, + 0.50805838850044568603, + 0.96038821641616287100, + 0.02489152120307236475, + 0.00678852439326638283, + 0.03978585649960320054, + 0.03987035260609850406, + 0.23824325959734964271, + 0.49078530449898840353, + 0.13758269895799502856, + 0.12278113887579689570, + 0.47809053499379931626, + 0.02906658322346955556, + 0.21710303663834935744, + 0.71168209139593415979, + 0.49661356235241758045, + 0.79048191935022181989, + 0.47581378153657516629, + 0.33101987000855704786, + 0.08715234400339111176, + 0.99422579793872225083, + 0.76666970445517670767, + 0.42918331001766612755, + 0.29883102600029077286, + 0.72301074303130788933, + 0.92859112652926475441, + 0.55704416056529768150, + 0.77288005894008959813, + 0.35714196294492984867, + 0.58736467463433238115, + 0.32470816095199979323, + 0.52046439194487843300, + 0.56711150647490754638, + 0.76575396775643607080, + 0.95014812748506491460, + 0.39224670966993425519, + 0.25633610191540373095, + 0.10346437086270599677, + 0.69376689017412340910, + 0.90235697697543845873, + 0.13969794140193914935, + 0.03591341950109956119, + 0.16702025834056355808, + 0.04915563110867205241, + 0.37751319983958875699, + 0.57901070644844387230, + 0.86358220093812897982, + 0.49775518795087825983, + 0.63365625015114017064, + 0.19447161445571237998, + 0.65925052602327568127, + 0.92444973091094648301, + 0.24792047615807755297, + 0.75289879564314670354, + 0.05472349872196380447, + 0.46543886291911333323, + 0.75335294811244753235, + 0.13088869511380522798, + 0.06396610196305663601, + 0.22339846205928256926, + 0.60015811040125141022, + 0.62394274563157088640, + 0.34735001969485113576, + 0.38432929323520263950, + 0.73923022971863483568, + 0.23455214325685168686, + 0.65439201671293234995, + 0.08478961443688504713, + 0.35697291504044026941, + 0.34416504643227422573, + 0.06472304288744923711, + 0.84758124223064214577, + 0.77130265241027886525, + 0.24651204024374784467, + 0.59494500187515512434, + 0.24204971648742998330, + 0.93069346795407220997, + 0.88038347903626589920, + 0.26119898052380718516, + 0.23221926955157183499, + 0.29126845444658988369, + 0.98078830521288614309, + 0.82671823136897824650, + 0.26212874063890357566, + 0.56238893771241682323, + 0.76156290252466887397, + 0.88069994490399644006, + 0.12636403991865197989, + 0.45846693020209036467, + 0.54405875145768343346, + 0.54355027626755374766, + 0.87254811639507891762, + 0.46805485050279405185, + 0.41940148190755877969, + 0.65398934285679726934, + 0.02204529254954390436, + 0.02211001671776169414, + 0.34610755200299537302, + 0.74391585331917131926, + 0.91495772500523749306, + 0.20315215581420532940, + 0.80690870022227434792, + 0.95126325962480260629, + 0.64044078893914636671, + 0.96092273698896874255, + 0.18396788952054244419, + 0.63487490473080540182, + 0.08669743173777332812, + 0.95481502589728323897, + 0.50967657807855499138, + 0.44017201467583833739, + 0.85005955830419141566, + 0.70328767282555922643, + 0.14527585585647596676, + 0.68277793545052836730, + 0.49556696547474834867, + 0.99571447586230264594, + 0.13527712669816327882, + 0.95791724826535166531, + 0.48743825369290694731, + 0.18702631073363973358, + 0.57953185785894662918, + 0.16736141954533723689, + 0.67692371172967569345, + 0.61320509141759963401, + 0.49001514412789393551, + 0.14873906636650471594, + 0.73885374346627099857, + 0.79031268072121476017, + 0.50285095328510146763, + 0.63973982355388384788, + 0.22654956700830997221, + 0.49297594816041299760, + 0.43674630624588495090, + 0.35831689491633745654, + 0.26728872300752670853, + 0.14981742934817688723, + 0.39028067478189203095, + 0.99661152757654988003, + 0.23964199719279167587, + 0.00685025286004370448, + 0.36033091957837915676, + 0.11355744664328515116, + 0.83574086882219590189, + 0.70976176745130485154, + 0.56943795810308284420, + 0.94192444079750096553, + 0.82274309855666074383, + 0.89664787131253182254, + 0.83277011227751830376, + 0.24273060885502643074, + 0.98195116811369436064, + 0.18203653550196945016, + 0.18410503177915759714, + 0.85099957751968341452, + 0.17105785474719815952, + 0.63889746433284790550, + 0.03892814910010955595, + 0.89265534858072392674, + 0.55524124683201211029, + 0.19244075498380397745, + 0.51932318699238049959, + 0.86009959266390886385, + 0.48858822155045745378, + 0.08508344731155615637, + 0.86464679548957593991, + 0.57756935055968848486, + 0.14364710978701345293, + 0.81588842279098462933, + 0.35536608718347660257, + 0.49008572866130984587, + 0.91169847130929358190, + 0.56624733391914240029, + 0.60477207770140446783, + 0.76806147302044532481, + 0.37515188402611139046, + 0.62115037114510725529, + 0.96746979368389639442, + 0.86181283677106242234, + 0.30987858013808084832, + 0.62146127473789413714, + 0.79359746125291131164, + 0.34712017941730823761, + 0.17029063042665815134, + 0.82146530531138424002, + 0.26265723100710625109, + 0.13328926766326354448, + 0.04791785230305534955, + 0.98268477480586371497, + 0.30210788612599393959, + 0.79737321447262443142, + 0.18033989602246336488, + 0.62417300509091417666, + 0.99738831948843154670, + 0.11622936242082693673, + 0.12653636229158383740, + 0.19959343307413281721, + 0.92055926430376830947, + 0.99444405935688636425, + 0.97794411781820926469, + 0.41587934908678525074, + 0.56447481794373921724, + 0.58913244893583643904, + 0.12097142829539387665, + 0.90915397004772637208, + 0.69981904731763292380, + 0.44308780742637377965, + 0.48581679595169946319, + 0.77654167280418876395, + 0.18116156823304241651, + 0.87166044530617495569, + 0.78206216866560920403, + 0.35865511402872459460, + 0.79626372918464648907, + 0.00268895296704752930, + 0.47832394152744364346, + 0.04783190000297543676, + 0.71025420701103815732, + 0.00995655149103769355, + 0.68256409291726566723, + 0.61227735912163459076, + 0.54200892037775037920, + 0.62355302718665721630, + 0.81663085055743762151, + 0.39261127287778937658, + 0.31165972519280094800, + 0.74425376154811956452, + 0.08513971790979413946, + 0.63843218232095240563, + 0.23317487918421542498, + 0.60433075624294991138, + 0.69961332243322228551, + 0.05753990882316173454, + 0.28386034944644256740, + 0.15045579819188892622, + 0.81508325586668435235, + 0.78880215664379283316, + 0.86465902574324760000, + 0.51767758698282769404, + 0.07717263637940786071, + 0.00355448648983502928, + 0.07636407975966253796, + 0.03939996924316324651, + 0.56267546263051788166, + 0.17995462245671412192, + 0.85745325152180755309, + 0.34933945520576698337, + 0.72515062703716027796, + 0.15296748032731016115, + 0.80035235390406987468, + 0.78750130893272374000, + 0.64548350417784872505, + 0.51237032119117327109, + 0.98903609333476564798, + 0.53943589833378553733, + 0.54867706387140105242, + 0.31498371893146493267, + 0.51406824127783413321, + 0.26274515229622664435, + 0.33980851354380460264, + 0.30090762181277386500, + 0.11357165437568773425, + 0.21484492650165448631, + 0.37727530068942083635, + 0.86792581879796236577, + 0.25866347800992434027, + 0.67733542642648333917, + 0.57852371426927439469, + 0.96782167638432046450, + 0.58139148148144384999, + 0.69570731724601490903, + 0.24200795655218954423, + 0.31611433477351714573, + 0.89961618146400679103, + 0.51132623288947144572, + 0.77087836136656531304, + 0.71879752805374419632, + 0.13470702531874912834, + 0.12724787483721922215, + 0.60432986046341928484, + 0.57802711562421953871, + 0.69694072057806111593, + 0.95450565227254848373, + 0.13413943620341828186, + 0.89716844501043616838, + 0.37234044968176452685, + 0.23707474465209732322, + 0.09743720672098088890, + 0.32621045430851126373, + 0.66822957047977297818, + 0.27732848540889554824, + 0.46757737660142507531, + 0.67064135158472056199, + 0.79462435756476820619, + 0.04039598732310234540, + 0.33226664561188976737, + 0.04606238933568074487, + 0.09288139804264246087, + 0.13965611811196033035, + 0.48161866067004315134, + 0.92730360515173079715, + 0.60570594914605413446, + 0.96617311906180536002, + 0.06881566112424053256, + 0.76976573432712797166, + 0.94419080293736026408, + 0.17461969490690028352, + 0.75693254504766128070, + 0.18019965854630893725, + 0.78092031643535219132, + 0.51911171227398842296, + 0.47808409569572596267, + 0.86316958395267841726, + 0.02382509044708714519, + 0.09326625409503786450, + 0.30825151680219354010, + 0.57463248908846981598, + 0.27935442763343965339, + 0.96349234084933141714, + 0.68075681596552636512, + 0.01032868881818205255, + 0.33200976024478608827, + 0.22323167586790715384, + 0.88284610803739458652, + 0.84288830023362883694, + 0.98775806646556654168, + 0.64227626102467324821, + 0.12958567581265899753, + 0.41062692384070065925, + 0.25498105059099422827, + 0.38118132892897057842, + 0.15634177221959644733, + 0.42100988140696516769, + 0.52445706766668158938, + 0.12416905068828898188, + 0.57784257689311555765, + 0.28967362007913590666, + 0.45551389096322492055, + 0.72761084329919716670, + 0.09017240262561719791, + 0.82602321175554395778, + 0.68147434389541028807, + 0.80447886879781088047, + 0.88235741997574567829, + 0.39952516602713284879, + 0.47445591230907114794, + 0.97331724386505189187, + 0.25488957721781325027, + 0.01545207364151424167, + 0.04814615878035458333, + 0.28835284032376750274, + 0.79876166904213449135, + 0.63799319433368267435, + 0.29434168203488530406, + 0.72739418151352558451, + 0.91506135842698816092, + 0.85498030992728540647, + 0.54792920722375537727, + 0.25680010437835308796, + 0.23868820660447054284, + 0.69061664045618632518, + 0.80182466021600855994, + 0.05043564694920832416, + 0.18815255235027506279, + 0.09662645093814415875, + 0.37698505747932272050, + 0.73724596805449539780, + 0.69313195911805180757, + 0.18123883170652932827, + 0.16734423052205483999, + 0.68337370785922072031, + 0.72346412281148926516, + 0.98856642605699222044, + 0.06419975210251705182, + 0.43245328810081025008, + 0.95089541206884764428, + 0.80646774282220293848, + 0.37449673743936345613, + 0.73600724805295183513, + 0.56298760866691894097, + 0.18154843972018286136, + 0.64205837317930876468, + 0.97703178915818642906, + 0.98483114130997542274, + 0.85075665866453556997, + 0.20826227327326149278, + 0.02451473228287948273, + 0.71852639456524491102, + 0.02651913137538242521, + 0.08716590038640948247, + 0.58122030018534173925, + 0.57279112926688983033, + 0.98420187737238812361, + 0.97989381697835392083, + 0.08238849511088764068, + 0.70248863069818018227, + 0.87351228039932275164, + 0.50600127613507258334, + 0.61337265517418554044, + 0.59759228966577248179, + 0.62929343631990340090, + 0.42271064883701395385, + 0.04770197468523873593, + 0.54583150676702040354, + 0.01109998648237475773, + 0.80173996270821599242, + 0.81669481294747162192, + 0.26332521334033725489, + 0.20930201576468677809, + 0.15515407678251514634, + 0.76879175078530755452, + 0.17171983435520655700, + 0.40173136421181055384, + 0.05315059256127863110, + 0.53930146834189540388, + 0.97602282078746786090, + 0.97160032610225386863, + 0.11837388277568273375, + 0.99687069353187421429, + 0.90584619533513149425, + 0.14203438036394977821 +}; diff --git a/sw/tests/uart_fast.c b/sw/tests/uart_fast.c new file mode 100644 index 00000000..6f31f0f6 --- /dev/null +++ b/sw/tests/uart_fast.c @@ -0,0 +1,26 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 +// +// Nicole Narr +// Christopher Reinwardt +// Paul Scheffler + +#include "regs/cheshire.h" +#include "dif/clint.h" +#include "dif/uart.h" +#include "params.h" +#include "util.h" + +int main(void) { + char str[] = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor\r\n" + "incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis\r\n" + "nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.\r\n"; + uint32_t rtc_freq = *reg32(&__base_regs, CHESHIRE_RTC_FREQ_REG_OFFSET); + uint64_t reset_freq = clint_get_core_freq(rtc_freq, 2500); + uart_init(&__base_uart, reset_freq, __BOOT_BAUDRATE * 8); + uart_write_str(&__base_uart, str, sizeof(str) - 1); + uart_write_flush(&__base_uart); + return 0; +} diff --git a/target/sim/src/fixture_cheshire_soc.sv b/target/sim/src/fixture_cheshire_soc.sv index 056f0b33..34eb0a9c 100644 --- a/target/sim/src/fixture_cheshire_soc.sv +++ b/target/sim/src/fixture_cheshire_soc.sv @@ -8,8 +8,9 @@ module fixture_cheshire_soc #( /// The selected simulation configuration from the `tb_cheshire_pkg`. - parameter int unsigned SelectedCfg = 32'd0, - parameter bit UseDramSys = 1'b0 + parameter int unsigned SelectedCfg = 32'd0, + parameter bit UseDramSys = 1'b0, + parameter int unsigned UartBaudRate = 115200 ); `include "cheshire/typedef.svh" @@ -170,6 +171,7 @@ module fixture_cheshire_soc #( vip_cheshire_soc #( .DutCfg ( DutCfg ), .UseDramSys ( UseDramSys ), + .UartBaudRate ( UartBaudRate ), .axi_ext_llc_req_t ( axi_llc_req_t ), .axi_ext_llc_rsp_t ( axi_llc_rsp_t ), .axi_ext_mst_req_t ( axi_mst_req_t ), diff --git a/target/sim/src/tb_cheshire_soc.sv b/target/sim/src/tb_cheshire_soc.sv index 57a15e8b..fd571c3c 100644 --- a/target/sim/src/tb_cheshire_soc.sv +++ b/target/sim/src/tb_cheshire_soc.sv @@ -7,13 +7,15 @@ module tb_cheshire_soc #( /// The selected simulation configuration from the `tb_cheshire_pkg`. - parameter int unsigned SelectedCfg = 32'd0, - parameter bit UseDramSys = 1'b0 + parameter int unsigned SelectedCfg = 32'd0, + parameter bit UseDramSys = 1'b0, + parameter int unsigned UartBaudRate = 115200 ); fixture_cheshire_soc #( .SelectedCfg (SelectedCfg), - .UseDramSys (UseDramSys) + .UseDramSys (UseDramSys), + .UartBaudRate (UartBaudRate) ) fix(); string preload_elf; diff --git a/target/sim/vcs/start.cheshire_soc.sh b/target/sim/vcs/start.cheshire_soc.sh index 6352c520..db3e63fc 100755 --- a/target/sim/vcs/start.cheshire_soc.sh +++ b/target/sim/vcs/start.cheshire_soc.sh @@ -29,6 +29,7 @@ fi flags+="-cpp ${CXX_PATH} " [[ -n "${SELCFG}" ]] && flags+="-pvalue+SelectedCfg=${SELCFG} " +[[ -n "${UARTBAUD}" ]] && flags+="-pvalue+UartBaudRate=${UARTBAUD} " pargs="" [[ -n "${BOOTMODE}" ]] && pargs+="+BOOTMODE=${BOOTMODE} " diff --git a/target/sim/vsim/start.cheshire_soc.tcl b/target/sim/vsim/start.cheshire_soc.tcl index e6a005d4..02d4f334 100644 --- a/target/sim/vsim/start.cheshire_soc.tcl +++ b/target/sim/vsim/start.cheshire_soc.tcl @@ -29,6 +29,7 @@ if { ![info exists VOPTARGS] } { set flags "-suppress 3009 -suppress 8386 -error 7 -cpppath ${CXX_PATH} " if { [info exists SELCFG] } { append flags "-GSelectedCfg=${SELCFG} " } +if { [info exists UARTBAUD] } { append flags "-GUartBaudRate=${UARTBAUD} " } set pargs "" if { [info exists BOOTMODE] } { append pargs "+BOOTMODE=${BOOTMODE} " } From 8ddc0c02937740f2fb21bb7471b746416b017663 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 14:48:52 +0200 Subject: [PATCH 23/39] target/sim/verilator: Properly measure simulation rate without preloading --- target/sim/verilator/sim/Mem64Master.h | 4 +++ target/sim/verilator/sim/main.cpp | 44 ++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/target/sim/verilator/sim/Mem64Master.h b/target/sim/verilator/sim/Mem64Master.h index c219e242..35e6dd7d 100644 --- a/target/sim/verilator/sim/Mem64Master.h +++ b/target/sim/verilator/sim/Mem64Master.h @@ -60,6 +60,10 @@ class Mem64Master { *m_mem_be_o = 0; } + bool has_write() { + return !m_write_request_queue.empty(); + } + void write(uint64_t addr, uint64_t data) { m_write_request_queue.push({ addr, data }); } diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index c46c10eb..c9e0a9f1 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -17,7 +17,7 @@ #define RST_CYCLES 5 -#define SIMULATION_RATE_CHUNK 10000 +#define SIMULATION_RATE_CHUNK 50000 // #define BENCHMARK @@ -38,6 +38,11 @@ extern int jtag_tick(int port, unsigned char *jtag_TCK, unsigned char *jtag_TMS, unsigned char *jtag_TDI, unsigned char *jtag_TRSTn, unsigned char jtag_TDO); +double get_seconds() { + auto duration = std::chrono::system_clock::now().time_since_epoch(); + return std::chrono::duration_cast(duration).count() / 1e6; +} + static void jtag_tick_io() { static int count = 0; if (count < 10) { @@ -161,6 +166,17 @@ int main(int argc, char** argv) { // This needs to be called before you create any model contextp->commandArgs(argc, argv); + const char *filename_plusarg = contextp->commandArgsPlusMatch("BINARY="); + if (strlen(filename_plusarg) == 0) { + VL_PRINTF("Error: no binary specified (+BINARY=...)\n"); + return 1; + } + const char *filename = filename_plusarg + strlen("+BINARY="); + if (!filename || strlen(filename) == 0 || filename[0] != '/') { + VL_PRINTF("Error: +BINARY requires absolute path\n"); + return 1; + } + // "TOP" will be the hierarchical name of the module topp = std::make_unique(contextp.get(), "TOP"); @@ -182,6 +198,12 @@ int main(int argc, char** argv) { uint64_t next_rtc_toggle_ps = 0; bool reset_done = false; + uint64_t start_cycle = cycle; + uint64_t start_time = get_seconds(); + + uint64_t preload_done_cycle = 0; + double preload_done_time = -1; + mem_master = std::make_unique( &topp->slink_mem_req_i, &topp->slink_mem_addr_i, @@ -194,7 +216,6 @@ int main(int argc, char** argv) { ); // ELF preloading - const char *filename = "../../../sw/tests/helloworld.spm.elf"; if (!elf_preload_open(filename)) return 1; @@ -227,6 +248,12 @@ int main(int argc, char** argv) { if (cycle == 2000) elf_preload_write_enqueue(); + if (cycle > 2000 && !preload_done_cycle && !mem_master->has_write()) { + preload_done_cycle = cycle; + preload_done_time = get_seconds(); + VL_PRINTF("[ELF] preload complete\n"); + } + // I/O if (reset_done) { poll_for_exit(); @@ -281,6 +308,18 @@ int main(int argc, char** argv) { } } + uint64_t exit_cycle = cycle; + double exit_time = get_seconds(); + + auto total_cycles = exit_cycle - start_cycle; + auto total_time = exit_time - start_time; + VL_PRINTF("[STAT] total: %.3f seconds, %lu cycles, %.1f kHz\n", + total_time, total_cycles, total_cycles / total_time / 1e3); + auto run_cycles = exit_cycle - preload_done_cycle; + auto run_time = exit_time - preload_done_time; + VL_PRINTF("[STAT] after preloading: %.3f seconds, %lu cycles, %.1f kHz\n", + run_time, run_cycles, run_cycles / run_time / 1e3); + // Final model cleanup topp->final(); @@ -295,6 +334,7 @@ int main(int argc, char** argv) { #endif // Final simulation summary + VL_PRINTF("\n"); contextp->statsPrintSummary(); // Return good completion status From 19d2d3146d6d960280830f5e0f52f7d3e6af7715 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 14:50:12 +0200 Subject: [PATCH 24/39] target/sim/verilator: Speed-up preloading by using bigger axi_from_mem queue --- target/sim/verilator/src/cheshire_soc_wrapper.sv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index a07736d1..ebba1cbb 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -271,7 +271,7 @@ module cheshire_soc_wrapper # ( .MemAddrWidth ( DutCfg.AddrWidth ), .AxiAddrWidth ( DutCfg.AddrWidth ), .DataWidth ( DutCfg.AxiDataWidth ), - .MaxRequests ( 8 ), + .MaxRequests ( 64 ), .AxiProt ( 3'b000 ), .axi_req_t ( axi_mst_req_t ), .axi_rsp_t ( axi_mst_rsp_t ) From 33a0d2e9391188b509b477aca1e85ff1b5ecb8ff Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 14:54:53 +0200 Subject: [PATCH 25/39] target/sim/verilator: Use taskset to avoid slow parallel execution --- target/sim/verilator/verilator.mk | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index f4ae94a5..bdaea719 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -11,6 +11,8 @@ RISCV_DBG_DIR = $(shell bender path riscv-dbg) VERILATOR_PREFIX ?= oseda VERILATOR ?= verilator +CHS_VERILATOR_THREADS ?= 4 + # Silly Verilator warnings: these are perfectly valid and should not be warnings VERILATOR_WNO = -Wno-fatal -Wno-style \ -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE @@ -20,7 +22,7 @@ VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions VERILATOR_ARGS += -DASSERTS_OFF # multithreading -VERILATOR_ARGS += --threads 8 +VERILATOR_ARGS += --threads $(CHS_VERILATOR_THREADS) # C++ Compiler Optimization VERILATOR_ARGS += -CFLAGS "-O3" -CFLAGS "-march=native" -CFLAGS "-mtune=native" # Use Clang (faster simulation than GCC) @@ -53,6 +55,6 @@ $(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verila @echo "#!/bin/sh" > $@ @echo 'set -eu' >> $@ @echo 'cd $$(dirname "$$0")' >> $@ - @echo '$(VERILATOR_PREFIX) ./obj_dir/Vcheshire_soc_wrapper "$$@"' >> $@ + @echo 'taskset -c 0-$(shell expr $(CHS_VERILATOR_THREADS) - 1) $(VERILATOR_PREFIX) ./obj_dir/Vcheshire_soc_wrapper "$$@"' >> $@ @chmod +x $@ From 9a71b76cf802f4b59b4437ad647cf87cb2165f22 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 8 Jul 2025 17:09:47 +0200 Subject: [PATCH 26/39] target/sim/verilator: Implement fake DRAM using tc_sram --- target/sim/verilator/sim/main.cpp | 3 +- .../sim/verilator/src/cheshire_soc_wrapper.sv | 93 ++++++++++++------- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index c9e0a9f1..f3f53c8d 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -256,7 +256,8 @@ int main(int argc, char** argv) { // I/O if (reset_done) { - poll_for_exit(); + if (preload_done_cycle > 0) + poll_for_exit(); #if 0 jtag_tick_io(); #endif diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index ebba1cbb..2c4eef4e 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -169,38 +169,67 @@ module cheshire_soc_wrapper # ( // DRAM // //////////// - // axi_sim_mem #( - // .AddrWidth ( DutCfg.AddrWidth ), - // .DataWidth ( DutCfg.AxiDataWidth ), - // .IdWidth ( $bits(axi_llc_id_t) ), - // .UserWidth ( DutCfg.AxiUserWidth ), - // .axi_req_t ( axi_llc_req_t ), - // .axi_rsp_t ( axi_llc_rsp_t ), - // .WarnUninitialized ( 0 ), - // .ClearErrOnAccess ( 1 ), - // .ApplDelay ( 0ps ), - // .AcqDelay ( 0ps ) - // ) i_dram_sim_mem ( - // .clk_i ( clk_i ), - // .rst_ni ( rst_ni ), - // .axi_req_i ( axi_llc_mst_req ), - // .axi_rsp_o ( axi_llc_mst_rsp ), - // .mon_w_valid_o ( ), - // .mon_w_addr_o ( ), - // .mon_w_data_o ( ), - // .mon_w_id_o ( ), - // .mon_w_user_o ( ), - // .mon_w_beat_count_o ( ), - // .mon_w_last_o ( ), - // .mon_r_valid_o ( ), - // .mon_r_addr_o ( ), - // .mon_r_data_o ( ), - // .mon_r_id_o ( ), - // .mon_r_user_o ( ), - // .mon_r_beat_count_o ( ), - // .mon_r_last_o ( ) - // ); - assign axi_llc_mst_rsp = '0; + // Emulate DRAM using an AXI-to-MEM adapter and a huge tc_sram spanning the full address space. + + logic dram_mem_req; + logic [DutCfg.AddrWidth-1:0] dram_mem_addr; + logic [DutCfg.AxiDataWidth-1:0] dram_mem_wdata; + logic [DutCfg.AxiDataWidth/8-1:0] dram_mem_strb; + logic dram_mem_we; + logic dram_mem_rvalid; + logic [DutCfg.AxiDataWidth-1:0] dram_mem_rdata; + + axi_to_mem #( + .axi_req_t ( axi_llc_req_t ), + .axi_resp_t ( axi_llc_rsp_t ), + .AddrWidth ( DutCfg.AddrWidth ), + .DataWidth ( DutCfg.AxiDataWidth ), + .IdWidth ( $bits(axi_llc_id_t) ), + .NumBanks ( 1 ), + .BufDepth ( 1 ), + .HideStrb ( 1'b1 ), + .OutFifoDepth ( 1 ) + ) i_dram_axi ( + .clk_i, + .rst_ni, + .busy_o ( ), + .axi_req_i ( axi_llc_mst_req ), + .axi_resp_o ( axi_llc_mst_rsp ), + .mem_req_o ( dram_mem_req ), + .mem_gnt_i ( 1'b1 ), + .mem_addr_o ( dram_mem_addr ), + .mem_wdata_o ( dram_mem_wdata ), + .mem_strb_o ( dram_mem_strb ), + .mem_atop_o ( ), + .mem_we_o ( dram_mem_we ), + .mem_rvalid_i ( dram_mem_rvalid ), + .mem_rdata_i ( dram_mem_rdata ) + ); + + // NOTE: This strategy ceases to work once we overflow 32-bit integers for NumDramWords, as + // tc_sram was never design for things like this. + localparam int unsigned DramDataWidth = $clog2(DutCfg.LlcOutRegionEnd - DutCfg.LlcOutRegionStart); + localparam int unsigned NumDramWords = (1 << DramDataWidth) / DutCfg.AxiDataWidth; + tc_sram #( + .NumWords ( NumDramWords ), + .DataWidth ( DutCfg.AxiDataWidth ), + .ByteWidth ( 8 ), + .NumPorts ( 1 ), + .Latency ( 1 ) + ) i_dram ( + .clk_i, + .rst_ni, + .req_i ( dram_mem_req ), + .we_i ( dram_mem_we ), + .addr_i ( dram_mem_addr[DramDataWidth-1:0] ), + .wdata_i ( dram_mem_wdata ), + .be_i ( dram_mem_strb ), + .rdata_o ( dram_mem_rdata ) + ); + + logic dram_mem_req_q; + `FF(dram_mem_req_q, dram_mem_req, 1'b0); + assign dram_mem_rvalid = dram_mem_req_q; //////////// // UART // From c5d69fbc8d5e33c437390c0617dba986f43dd77c Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Wed, 9 Jul 2025 10:44:42 +0200 Subject: [PATCH 27/39] target/sim/verilator: Split R/W channels for DRAM to avoid DMA lock-up --- .../sim/verilator/src/cheshire_soc_wrapper.sv | 75 +++++++++++-------- 1 file changed, 44 insertions(+), 31 deletions(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 2c4eef4e..cd27204a 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -169,34 +169,41 @@ module cheshire_soc_wrapper # ( // DRAM // //////////// - // Emulate DRAM using an AXI-to-MEM adapter and a huge tc_sram spanning the full address space. + // Emulate DRAM using an AXI-to-MEM adapter and a tc_sram - logic dram_mem_req; - logic [DutCfg.AddrWidth-1:0] dram_mem_addr; - logic [DutCfg.AxiDataWidth-1:0] dram_mem_wdata; - logic [DutCfg.AxiDataWidth/8-1:0] dram_mem_strb; - logic dram_mem_we; - logic dram_mem_rvalid; - logic [DutCfg.AxiDataWidth-1:0] dram_mem_rdata; + // NOTE: This strategy ceases to work once we overflow 32-bit integers for NumDramWords, as + // tc_sram was never designed for things like this. + // This also assumes that the DRAM region is naturally aligned to a power of two. + localparam int unsigned DramAddrWidth = $clog2(DutCfg.LlcOutRegionEnd - DutCfg.LlcOutRegionStart); + localparam int unsigned DramDataWidth = DutCfg.AxiDataWidth; + + logic [1:0] dram_mem_req; + logic [1:0][DramAddrWidth-1:0] dram_mem_addr; + logic [1:0][DramDataWidth-1:0] dram_mem_wdata; + logic [1:0][DramDataWidth/8-1:0] dram_mem_strb; + logic [1:0] dram_mem_we; + logic [1:0] dram_mem_rvalid; + logic [1:0][DramDataWidth-1:0] dram_mem_rdata; - axi_to_mem #( + axi_to_mem_split #( .axi_req_t ( axi_llc_req_t ), .axi_resp_t ( axi_llc_rsp_t ), - .AddrWidth ( DutCfg.AddrWidth ), - .DataWidth ( DutCfg.AxiDataWidth ), + .AddrWidth ( DramAddrWidth ), + .AxiDataWidth ( DramDataWidth ), .IdWidth ( $bits(axi_llc_id_t) ), - .NumBanks ( 1 ), + .MemDataWidth ( DramDataWidth ), .BufDepth ( 1 ), .HideStrb ( 1'b1 ), .OutFifoDepth ( 1 ) ) i_dram_axi ( .clk_i, .rst_ni, + .test_i ( 1'b0 ), .busy_o ( ), .axi_req_i ( axi_llc_mst_req ), .axi_resp_o ( axi_llc_mst_rsp ), .mem_req_o ( dram_mem_req ), - .mem_gnt_i ( 1'b1 ), + .mem_gnt_i ( 2'b11 ), .mem_addr_o ( dram_mem_addr ), .mem_wdata_o ( dram_mem_wdata ), .mem_strb_o ( dram_mem_strb ), @@ -206,29 +213,35 @@ module cheshire_soc_wrapper # ( .mem_rdata_i ( dram_mem_rdata ) ); - // NOTE: This strategy ceases to work once we overflow 32-bit integers for NumDramWords, as - // tc_sram was never design for things like this. - localparam int unsigned DramDataWidth = $clog2(DutCfg.LlcOutRegionEnd - DutCfg.LlcOutRegionStart); - localparam int unsigned NumDramWords = (1 << DramDataWidth) / DutCfg.AxiDataWidth; + localparam int unsigned DramWordAddrWidth = DramAddrWidth - $clog2(DramDataWidth / 8); + localparam int unsigned NumDramWords = 1 << DramWordAddrWidth; + + // Translate byte addresses (from axi_to_mem_split) to word addresses (for tc_sram) + logic [1:0][DramWordAddrWidth-1:0] dram_mem_word_addr; + assign dram_mem_word_addr = { + dram_mem_addr[1][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth], + dram_mem_addr[0][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth] + }; + tc_sram #( - .NumWords ( NumDramWords ), - .DataWidth ( DutCfg.AxiDataWidth ), - .ByteWidth ( 8 ), - .NumPorts ( 1 ), - .Latency ( 1 ) - ) i_dram ( + .NumWords ( NumDramWords ), + .DataWidth ( DramDataWidth ), + .ByteWidth ( 8 ), + .NumPorts ( 2 ), + .Latency ( 1 ) + ) i_dram( .clk_i, .rst_ni, - .req_i ( dram_mem_req ), - .we_i ( dram_mem_we ), - .addr_i ( dram_mem_addr[DramDataWidth-1:0] ), - .wdata_i ( dram_mem_wdata ), - .be_i ( dram_mem_strb ), - .rdata_o ( dram_mem_rdata ) + .req_i ( dram_mem_req ), + .we_i ( dram_mem_we ), + .addr_i ( dram_mem_word_addr ), + .wdata_i ( dram_mem_wdata ), + .be_i ( dram_mem_strb ), + .rdata_o ( dram_mem_rdata ) ); - logic dram_mem_req_q; - `FF(dram_mem_req_q, dram_mem_req, 1'b0); + logic [1:0] dram_mem_req_q; + `FF(dram_mem_req_q, dram_mem_req, 2'b0); assign dram_mem_rvalid = dram_mem_req_q; //////////// From 98aee59e6a8ecaea6bd975dbe404ba64329f2181 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Wed, 9 Jul 2025 12:02:44 +0200 Subject: [PATCH 28/39] sw/tests/dma_long: Reduce # repetitions --- sw/tests/dma_long.spm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sw/tests/dma_long.spm.c b/sw/tests/dma_long.spm.c index 4d598df4..9afcbddf 100644 --- a/sw/tests/dma_long.spm.c +++ b/sw/tests/dma_long.spm.c @@ -23,7 +23,7 @@ int main(void) { for (int i = 1; i <= 11; ++i) { fence(); uint64_t len = 1 << i; - uint64_t reps = 1024*1024 >> i; + uint64_t reps = 64*1024 >> i; mcycle_start = get_mcycle(); From c8788e34c2576aac41df4748d752a4124759c873 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 10:21:27 +0200 Subject: [PATCH 29/39] target/sim/verilator: Use define to disable instruction tracer --- target/sim/verilator/verilator.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index bdaea719..e4e0d9fb 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -21,6 +21,8 @@ VERILATOR_ARGS ?= -j 0 -Wall $(VERILATOR_WNO) -timescale 1ns/1ps VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions VERILATOR_ARGS += -DASSERTS_OFF +# Disable CVA6 instruction tracer +VERILATOR_ARGS += -DCVA6_NO_TRACE # multithreading VERILATOR_ARGS += --threads $(CHS_VERILATOR_THREADS) # C++ Compiler Optimization From 0ac7b4324744b54364ee2d67928fd82adf7b90cd Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 10:24:55 +0200 Subject: [PATCH 30/39] target/sim/verilator: Work around assignment issues using split_var directives --- .gitignore | 1 + hw/cheshire_soc.sv | 4 ++-- target/sim/verilator/config.vlt | 7 +++++++ target/sim/verilator/verilator.mk | 8 ++++---- 4 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 target/sim/verilator/config.vlt diff --git a/.gitignore b/.gitignore index 80ef51f9..41a54a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,7 @@ target/sim/verilator/obj_dir/ target/sim/verilator/*.flist target/sim/verilator/*.fst target/sim/verilator/*.vlt +!target/sim/verilator/config.vlt target/sim/verilator/*.vcd target/sim/verilator/logs/ target/sim/verilator/trace_hart_*.dasm diff --git a/hw/cheshire_soc.sv b/hw/cheshire_soc.sv index b3b62544..b612f448 100644 --- a/hw/cheshire_soc.sv +++ b/hw/cheshire_soc.sv @@ -588,8 +588,8 @@ module cheshire_soc import cheshire_pkg::*; #( assign intr.intn.bus_err.cores = core_bus_err_intr_comb; for (genvar i = 0; i < NumIntHarts; i++) begin : gen_cva6_cores - axi_cva6_req_t core_out_req, core_ur_req; - axi_cva6_rsp_t core_out_rsp, core_ur_rsp; + axi_cva6_req_t core_out_req, core_ur_req /*verilator split_var*/; + axi_cva6_rsp_t core_out_rsp, core_ur_rsp /*verilator split_var*/; // CLIC interface logic clic_irq_valid, clic_irq_ready; diff --git a/target/sim/verilator/config.vlt b/target/sim/verilator/config.vlt new file mode 100644 index 00000000..21d5813b --- /dev/null +++ b/target/sim/verilator/config.vlt @@ -0,0 +1,7 @@ +`verilator_config + +// CVA6: avoid wrong runtime results (probably a Verilator bug) from "combinational loops" +// This can show up in some cases as a core deadlock. +// Probably this issue: https://github.com/verilator/verilator/issues/5350 +split_var -module "load_unit" -var "req_port_i" +split_var -module "load_unit" -var "req_port_o" diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index e4e0d9fb..721db9fa 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -43,14 +43,14 @@ VERILATOR_CXX_SRCS = $(CHS_VERILATOR_DIR)/sim/main.cpp \ $(RISCV_DBG_DIR)/tb/remote_bitbang/remote_bitbang.c \ $(RISCV_DBG_DIR)/tb/remote_bitbang/sim_jtag.c +VERILATOR_CONFIG = $(CHS_VERILATOR_DIR)/config.vlt + $(CHS_VERILATOR_DIR)/cheshire_soc.flist: $(CHS_ROOT)/Bender.yml $(BENDER) script verilator $(CHS_BENDER_RTL_FLAGS) > $@ - # TODO: Add verilator target for these upstream to avoid patch-in - echo '$(shell $(BENDER) path axi)/src/axi_sim_mem.sv' >> $@ -$(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) +$(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper: $(CHS_ROOT)/target/sim/verilator/cheshire_soc.flist $(VERILATOR_CXX_SRCS) $(VERILATOR_CONFIG) +cd $(CHS_VERILATOR_DIR) && $(VERILATOR_PREFIX) $(VERILATOR) $(VERILATOR_ARGS) \ - -DASSERTS_OFF -f $< $(VERILATOR_CXX_SRCS) \ + -f $< $(VERILATOR_CXX_SRCS) $(VERILATOR_CONFIG) \ --cc --exe --build --top-module cheshire_soc_wrapper $(CHS_ROOT)/target/sim/verilator/cheshire_soc.vlt: $(CHS_ROOT)/target/sim/verilator/obj_dir/Vcheshire_soc_wrapper From 239b06e3a9795a128363e781187dbb1fb99e6042 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 10:25:29 +0200 Subject: [PATCH 31/39] target/sim/verilator: Link using clang++ --- target/sim/verilator/verilator.mk | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index 721db9fa..dbffd416 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -28,7 +28,18 @@ VERILATOR_ARGS += --threads $(CHS_VERILATOR_THREADS) # C++ Compiler Optimization VERILATOR_ARGS += -CFLAGS "-O3" -CFLAGS "-march=native" -CFLAGS "-mtune=native" # Use Clang (faster simulation than GCC) -VERILATOR_ARGS += --compiler clang -MAKEFLAGS "CC=clang" -MAKEFLAGS "CXX=clang++" +VERILATOR_ARGS += --compiler clang -MAKEFLAGS "CC=clang" -MAKEFLAGS "CXX=clang++" -MAKEFLAGS "LINK=clang++" + +# Link Time Optimization (LTO) +# VERILATOR_ARGS += -CFLAGS "-flto" -LDFLAGS "-flto" + +# Thread Profile-Guided Optimization (PGO) +# VERILATOR_ARGS += --prof-pgo +# VERILATOR_ARGS += profile.vlt + +# Compiler Profile-Guided Optimization (PGO) +# VERILATOR_ARGS += -CFLAGS "-fprofile-generate" -MAKEFLAGS "LINK=clang++" -LDFLAGS "-fprofile-generate" +# VERILATOR_ARGS += -CFLAGS "-fprofile-use=../default.profdata" # Profiling # generates `gmon.out` that can be processed by `gprof` and then `verilator_profcfunc` From 4b4b8b567ea9a3608df95438a0b2cabf843f7dd6 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 10:25:47 +0200 Subject: [PATCH 32/39] sw/tests/dma_2d: Insert fence() after DMA copy --- sw/tests/dma_2d.spm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sw/tests/dma_2d.spm.c b/sw/tests/dma_2d.spm.c index 1ce1f102..e085a465 100644 --- a/sw/tests/dma_2d.spm.c +++ b/sw/tests/dma_2d.spm.c @@ -34,6 +34,7 @@ int main(void) { // Issue blocking 2D memcpy (exclude null terminator from source) sys_dma_2d_blk_memcpy((uintptr_t)(void *)dst, (uintptr_t)(void *)src, sizeof(src_cached) - 4, 7, 1, 4, DMA_CONF_DECOUPLE_NONE); + fence(); // Check destination string int errors = sizeof(gold); From 5312cfcc216da5c150f7b87f3a681d67e05d2487 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 10:42:12 +0200 Subject: [PATCH 33/39] target/sim/verilator: Make UART baud rate configurable --- target/sim/verilator/src/cheshire_soc_wrapper.sv | 3 ++- target/sim/verilator/verilator.mk | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index cd27204a..1a6615f2 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -10,7 +10,8 @@ function automatic cheshire_pkg::cheshire_cfg_t gen_cheshire_cfg(); endfunction module cheshire_soc_wrapper # ( - parameter cheshire_pkg::cheshire_cfg_t DutCfg = gen_cheshire_cfg() + parameter cheshire_pkg::cheshire_cfg_t DutCfg = gen_cheshire_cfg(), + parameter int unsigned UartBaudRate = 115200 ) ( input logic clk_i, input logic rtc_i, diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index dbffd416..b6ea400a 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -11,12 +11,15 @@ RISCV_DBG_DIR = $(shell bender path riscv-dbg) VERILATOR_PREFIX ?= oseda VERILATOR ?= verilator -CHS_VERILATOR_THREADS ?= 4 +CHS_VERILATOR_THREADS ?= 4 +CHS_VERILATOR_UART_BAUD ?= 115200 # Silly Verilator warnings: these are perfectly valid and should not be warnings VERILATOR_WNO = -Wno-fatal -Wno-style \ -Wno-BLKANDNBLK -Wno-WIDTHEXPAND -Wno-WIDTHTRUNC -Wno-WIDTHCONCAT -Wno-ASCRANGE VERILATOR_ARGS ?= -j 0 -Wall $(VERILATOR_WNO) -timescale 1ns/1ps +# UART baud rate +VERILATOR_ARGS += -GUartBaudRate=$(CHS_VERILATOR_UART_BAUD) # Verilation optimizations VERILATOR_ARGS += -O3 --x-assign fast --x-initial fast --noassert # Disable common_cells assertions From 22092886c79fb6475b12969a279db25969b1f0be Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 10 Jul 2025 17:40:31 +0200 Subject: [PATCH 34/39] target/sim/verilator: Prepare fw_payload and DTB preloading --- Bender.yml | 1 + target/sim/verilator/sim/Mem64Master.h | 2 + target/sim/verilator/sim/main.cpp | 79 +++++++- .../sim/verilator/src/cheshire_soc_wrapper.sv | 4 +- target/sim/verilator/src/verilator_ram.sv | 188 ++++++++++++++++++ 5 files changed, 267 insertions(+), 7 deletions(-) create mode 100644 target/sim/verilator/src/verilator_ram.sv diff --git a/Bender.yml b/Bender.yml index fd5bf3f0..7c0d3689 100644 --- a/Bender.yml +++ b/Bender.yml @@ -56,6 +56,7 @@ sources: - target: any(verilator) files: + - target/sim/verilator/src/verilator_ram.sv - target/sim/verilator/src/verilator_uart_rx.sv - target/sim/verilator/src/cheshire_soc_wrapper.sv diff --git a/target/sim/verilator/sim/Mem64Master.h b/target/sim/verilator/sim/Mem64Master.h index 35e6dd7d..564f7357 100644 --- a/target/sim/verilator/sim/Mem64Master.h +++ b/target/sim/verilator/sim/Mem64Master.h @@ -131,6 +131,8 @@ class Mem64Master { if (m_write_request_queue.empty()) { printf("Mem64Master: emptied write queue\n"); + } else if (m_write_request_queue.size() % 1000 == 0) { + printf("Mem64Master: %zu writes remaining\n", m_write_request_queue.size()); } } diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index f3f53c8d..a155f6b3 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -84,7 +84,7 @@ static bool elf_preload_open(const char *filename) { return true; } -static void elf_preload_write_enqueue() { +static void elf_preload_write_enqueue(bool is_zsl) { long long section_address, section_len; size_t num_writes = 0; @@ -94,6 +94,14 @@ static void elf_preload_write_enqueue() { char *buf = (char *)calloc(section_len + sizeof(uint64_t), 1); read_section_raw(section_address, buf, section_len); + if (is_zsl) { + assert(section_address == 0); + section_address = 0x10000000; // SPM start + VL_PRINTF("[ELF] ZSL will be loaded at 0x%llx instead of 0x0\n", section_address); + } else { + assert(section_address != 0); + } + for (size_t i = 0; i < section_len; i += sizeof(uint64_t)) { mem_master->write(section_address + i, *(uint64_t *)(buf + i)); num_writes++; @@ -104,6 +112,7 @@ static void elf_preload_write_enqueue() { long long entry; get_entry(&entry); + VL_PRINTF("[ELF] entry point: %p\n", (void*)entry); // write entrypoint mem_master->write(0x03000000, entry); num_writes++; @@ -114,6 +123,39 @@ static void elf_preload_write_enqueue() { VL_PRINTF("[ELF] enqueued %zu memory writes\n", num_writes); } +static void bin_preload_write_enqueue(const char* bin_path, uint64_t load_addr) { + FILE *fp = fopen(bin_path, "rb"); + if (!fp) { + perror("fopen"); + exit(1); + } + fseek(fp, 0, SEEK_END); + uint64_t size = ftell(fp); + if (size >= 0x200000) size = 0x200000; // HACK: only load first page of fw_payload + fseek(fp, 0, SEEK_SET); + uint64_t num_words = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); + uint64_t* buf = (uint64_t*)calloc(num_words, sizeof(uint64_t)); + size_t nread = fread(buf, 1, size, fp); + if (nread != size) { + VL_PRINTF("[BIN] Error: could not read entire file %s\n", bin_path); + exit(1); + } + fclose(fp); + + size_t num_writes = 0; + for (size_t i = 0; i < num_words; i++) { + if (buf[i] == 0x0) + // skip writing zeros (for speed) + continue; + + mem_master->write(load_addr + i * sizeof(uint64_t), buf[i]); + num_writes++; + } + + free(buf); + VL_PRINTF("[BIN] enqueued %zu memory writes (skipped %zu zero words)\n", num_writes, num_words - num_writes); +} + static void poll_for_exit() { static bool request_inflight = false; static uint64_t idle_cycles = 0; @@ -166,16 +208,37 @@ int main(int argc, char** argv) { // This needs to be called before you create any model contextp->commandArgs(argc, argv); + // for (int i = 0; i < argc; i++) { + // VL_PRINTF("%s\n", argv[i]); + // } + const char *filename_plusarg = contextp->commandArgsPlusMatch("BINARY="); if (strlen(filename_plusarg) == 0) { VL_PRINTF("Error: no binary specified (+BINARY=...)\n"); return 1; } - const char *filename = filename_plusarg + strlen("+BINARY="); + const char *filename = strdup(filename_plusarg + strlen("+BINARY=")); if (!filename || strlen(filename) == 0 || filename[0] != '/') { VL_PRINTF("Error: +BINARY requires absolute path\n"); return 1; } + VL_PRINTF("[ELF] BINARY: %s\n", filename); + + const char *fw_payload_plusarg = contextp->commandArgsPlusMatch("FW_PAYLOAD="); + const char *fw_payload_bin = NULL; + if (strlen(fw_payload_plusarg) != 0) { + fw_payload_bin = strdup(fw_payload_plusarg + strlen("+FW_PAYLOAD=")); + VL_PRINTF("[FW] FW_PAYLOAD: %s\n", fw_payload_bin); + } + + const char *fw_dtb_plusarg = contextp->commandArgsPlusMatch("FW_DTB="); + const char *fw_dtb_bin = NULL; + if (strlen(fw_dtb_plusarg) != 0) { + fw_dtb_bin = strdup(fw_dtb_plusarg + strlen("+FW_DTB=")); + VL_PRINTF("[FW] DTB: %s\n", fw_dtb_bin); + } + + bool is_firmware = fw_payload_bin != NULL; // "TOP" will be the hierarchical name of the module topp = std::make_unique(contextp.get(), "TOP"); @@ -245,8 +308,14 @@ int main(int argc, char** argv) { // TODO: This is determined experimentally. // We should rather poll until the SPM has been configured properly. - if (cycle == 2000) - elf_preload_write_enqueue(); + if (cycle == 2000) { + if (is_firmware) { + // preload payloads before the actual ZSL ELF, to avoid premature execution + bin_preload_write_enqueue(fw_payload_bin, 0x80000000); + bin_preload_write_enqueue(fw_dtb_bin, 0x80800000); + } + elf_preload_write_enqueue(is_firmware); + } if (cycle > 2000 && !preload_done_cycle && !mem_master->has_write()) { preload_done_cycle = cycle; @@ -256,7 +325,7 @@ int main(int argc, char** argv) { // I/O if (reset_done) { - if (preload_done_cycle > 0) + if (!is_firmware && preload_done_cycle > 0) poll_for_exit(); #if 0 jtag_tick_io(); diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index 1a6615f2..da1e1781 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -224,13 +224,13 @@ module cheshire_soc_wrapper # ( dram_mem_addr[0][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth] }; - tc_sram #( + verilator_ram #( .NumWords ( NumDramWords ), .DataWidth ( DramDataWidth ), .ByteWidth ( 8 ), .NumPorts ( 2 ), .Latency ( 1 ) - ) i_dram( + ) i_dram ( .clk_i, .rst_ni, .req_i ( dram_mem_req ), diff --git a/target/sim/verilator/src/verilator_ram.sv b/target/sim/verilator/src/verilator_ram.sv new file mode 100644 index 00000000..c732df9c --- /dev/null +++ b/target/sim/verilator/src/verilator_ram.sv @@ -0,0 +1,188 @@ +// Copyright (c) 2020 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +// Author: Wolfgang Roenninger + +// Description: Functional module of a generic SRAM +// +// Parameters: +// - NumWords: Number of words in the macro. Address width can be calculated with: +// `AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1` +// The module issues a warning if there is a request on an address which is +// not in range. +// - DataWidth: Width of the ports `wdata_i` and `rdata_o`. +// - ByteWidth: Width of a byte, the byte enable signal `be_i` can be calculated with the +// ceiling division `ceil(DataWidth, ByteWidth)`. +// - NumPorts: Number of read and write ports. Each is a full port. Ports with a higher +// index read and write after the ones with lower indices. +// - Latency: Read latency, the read data is available this many cycles after a request. +// - PrintSimCfg: Prints at the beginning of the simulation a `Hello` message with +// the instantiated parameters and signal widths. +// - ImplKey: Key by which an instance can refer to a specific implementation (e.g. macro). +// May be used to look up additional parameters for implementation (e.g. generator, +// line width, muxing) in an external reference, such as a configuration file. +// +// Ports: +// - `clk_i`: Clock +// - `rst_ni`: Asynchronous reset, active low +// - `req_i`: Request, active high +// - `we_i`: Write request, active high +// - `addr_i`: Request address +// - `wdata_i`: Write data, has to be valid on request +// - `be_i`: Byte enable, active high +// - `rdata_o`: Read data, valid `Latency` cycles after a request with `we_i` low. +// +// Behaviour: +// - Address collision: When Ports are making a write access onto the same address, +// the write operation will start at the port with the lowest address +// index, each port will overwrite the changes made by the previous ports +// according how the respective `be_i` signal is set. +// - Read data on write: This implementation will not produce a read data output on the signal +// `rdata_o` when `req_i` and `we_i` are asserted. The output data is stable +// on write requests. + +module verilator_ram #( + parameter int unsigned NumWords = 32'd1024, // Number of Words in data array + parameter int unsigned DataWidth = 32'd128, // Data signal width + parameter int unsigned ByteWidth = 32'd8, // Width of a data byte + parameter int unsigned NumPorts = 32'd2, // Number of read and write ports + parameter int unsigned Latency = 32'd1, // Latency when the read data is available + parameter bit PrintSimCfg = 1'b0, // Print configuration + parameter ImplKey = "none", // Reference to specific implementation + // DEPENDENT PARAMETERS, DO NOT OVERWRITE! + parameter int unsigned AddrWidth = (NumWords > 32'd1) ? $clog2(NumWords) : 32'd1, + parameter int unsigned BeWidth = (DataWidth + ByteWidth - 32'd1) / ByteWidth, // ceil_div + parameter type addr_t = logic [AddrWidth-1:0], + parameter type data_t = logic [DataWidth-1:0], + parameter type be_t = logic [BeWidth-1:0] +) ( + input logic clk_i, // Clock + input logic rst_ni, // Asynchronous reset active low + // input ports + input logic [NumPorts-1:0] req_i, // request + input logic [NumPorts-1:0] we_i, // write enable + input addr_t [NumPorts-1:0] addr_i, // request address + input data_t [NumPorts-1:0] wdata_i, // write data + input be_t [NumPorts-1:0] be_i, // write byte enable + // output ports + output data_t [NumPorts-1:0] rdata_o // read data +); + + // memory array + data_t sram [NumWords-1:0]; + // hold the read address when no read access is made + addr_t [NumPorts-1:0] r_addr_q; + + // set the read output if requested + // The read data at the highest array index is set combinational. + // It gets then delayed for a number of cycles until it gets available at the output at + // array index 0. + + // read data output assignment + data_t [NumPorts-1:0][Latency-1:0] rdata_q, rdata_d; + if (Latency == 32'd0) begin : gen_no_read_lat + for (genvar i = 0; i < NumPorts; i++) begin : gen_port + assign rdata_o[i] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end else begin : gen_read_lat + + always_comb begin + for (int unsigned i = 0; i < NumPorts; i++) begin + rdata_o[i] = rdata_q[i][0]; + for (int unsigned j = 0; j < (Latency-1); j++) begin + rdata_d[i][j] = rdata_q[i][j+1]; + end + rdata_d[i][Latency-1] = (req_i[i] && !we_i[i]) ? sram[addr_i[i]] : sram[r_addr_q[i]]; + end + end + end + + // write memory array + always_ff @(posedge clk_i or negedge rst_ni) begin + if (!rst_ni) begin + for (int unsigned i = 0; i < NumWords; i++) begin + sram[i] <= '0; + end + for (int i = 0; i < NumPorts; i++) begin + r_addr_q[i] <= {AddrWidth{1'b0}}; + // initialize the read output register for each port + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= '0; + end + end + end else begin + // read value latch happens before new data is written to the sram + for (int unsigned i = 0; i < NumPorts; i++) begin + if (Latency != 0) begin + for (int unsigned j = 0; j < Latency; j++) begin + rdata_q[i][j] <= rdata_d[i][j]; + end + end + end + // there is a request for the SRAM, latch the required register + for (int unsigned i = 0; i < NumPorts; i++) begin + if (req_i[i]) begin + if (we_i[i]) begin + // update value when write is set at clock + for (int unsigned j = 0; j < BeWidth; j++) begin + if (be_i[i][j]) begin + sram[addr_i[i]][j*ByteWidth+:ByteWidth] <= wdata_i[i][j*ByteWidth+:ByteWidth]; + end + end + end else begin + // otherwise update read address for subsequent non request cycles + r_addr_q[i] <= addr_i[i]; + end + end // if req_i + end // for ports + end // if !rst_ni + end + +// Validate parameters. +// pragma translate_off +`ifndef VERILATOR +`ifndef TARGET_SYNTHESIS + initial begin: p_assertions + assert ($bits(addr_i) == NumPorts * AddrWidth) else $fatal(1, "AddrWidth problem on `addr_i`"); + assert ($bits(wdata_i) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `wdata_i`"); + assert ($bits(be_i) == NumPorts * BeWidth) else $fatal(1, "BeWidth problem on `be_i`" ); + assert ($bits(rdata_o) == NumPorts * DataWidth) else $fatal(1, "DataWidth problem on `rdata_o`"); + assert (NumWords >= 32'd1) else $fatal(1, "NumWords has to be > 0"); + assert (DataWidth >= 32'd1) else $fatal(1, "DataWidth has to be > 0"); + assert (ByteWidth >= 32'd1) else $fatal(1, "ByteWidth has to be > 0"); + assert (NumPorts >= 32'd1) else $fatal(1, "The number of ports must be at least 1!"); + end + initial begin: p_sim_hello + if (PrintSimCfg) begin + $display("#################################################################################"); + $display("verilator_ram functional instantiated with the configuration:" ); + $display("Instance: %m" ); + $display("Number of ports (dec): %0d", NumPorts ); + $display("Number of words (dec): %0d", NumWords ); + $display("Address width (dec): %0d", AddrWidth ); + $display("Data width (dec): %0d", DataWidth ); + $display("Byte width (dec): %0d", ByteWidth ); + $display("Byte enable width (dec): %0d", BeWidth ); + $display("Latency Cycles (dec): %0d", Latency ); + $display("Simulation init (str): %0s", SimInit ); + $display("#################################################################################"); + end + end + for (genvar i = 0; i < NumPorts; i++) begin : gen_assertions + assert property ( @(posedge clk_i) disable iff (!rst_ni) + (req_i[i] |-> (addr_i[i] < NumWords))) else + $warning("Request address %0h not mapped, port %0d, expect random write or read behavior!", + addr_i[i], i); + end + +`endif +`endif +// pragma translate_on +endmodule From a52c677e4ccac4913c105b7b22f015ad42855a79 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 11 Jul 2025 12:22:00 +0200 Subject: [PATCH 35/39] target/sim/verilator: Optimize preloading into DRAM directly --- target/sim/verilator/sim/main.cpp | 116 +++++++++++------- .../sim/verilator/src/cheshire_soc_wrapper.sv | 85 +++++++++---- 2 files changed, 134 insertions(+), 67 deletions(-) diff --git a/target/sim/verilator/sim/main.cpp b/target/sim/verilator/sim/main.cpp index a155f6b3..1f2f1170 100644 --- a/target/sim/verilator/sim/main.cpp +++ b/target/sim/verilator/sim/main.cpp @@ -1,4 +1,5 @@ #include // timers +#include #include // std::unique_ptr #include // common Verilator routines @@ -30,7 +31,8 @@ extern "C" { std::unique_ptr contextp; std::unique_ptr topp; -std::unique_ptr mem_master; +std::unique_ptr slink_master; +std::unique_ptr dram_master; bool do_exit = false; int exit_code = 0; @@ -75,6 +77,30 @@ static void handle_uart(char data) { } } +static size_t write_chunk(uint64_t dst_addr, uint64_t* data, size_t num_words) { + assert(dst_addr % sizeof(uint64_t) == 0 && "aligned address"); + size_t num_writes = 0; + + for (size_t i = 0; i < num_words; i++) { + uint64_t word_addr = dst_addr + i * sizeof(uint64_t); + if (word_addr >= 0x80000000ULL && word_addr < 0x100000000ULL) { + // DRAM + if (data[i] == 0) + // DRAM is zero-initialized, so skip writing zero words + continue; + dram_master->write(word_addr, data[i]); + num_writes++; + } else { + slink_master->write(word_addr, data[i]); + num_writes++; + } + } + + VL_PRINTF("[MEM] writing %zu words to %p (skipping %zu zero words)\n", + num_writes, (void *)dst_addr, num_words - num_writes); + return num_writes; +} + static bool elf_preload_open(const char *filename) { char ret = read_elf(filename); if (ret != 0) { @@ -84,15 +110,16 @@ static bool elf_preload_open(const char *filename) { return true; } -static void elf_preload_write_enqueue(bool is_zsl) { +static uint64_t elf_preload_write_enqueue(bool is_zsl) { long long section_address, section_len; size_t num_writes = 0; while (get_section(§ion_address, §ion_len)) { VL_PRINTF("[ELF] loading section at 0x%llx (%lld bytes)\n", section_address, section_len); + size_t num_words = (section_len + sizeof(uint64_t) - 1) / sizeof(uint64_t); // div ceil - char *buf = (char *)calloc(section_len + sizeof(uint64_t), 1); - read_section_raw(section_address, buf, section_len); + uint64_t* buf = (uint64_t*)calloc(num_words, sizeof(uint64_t)); + read_section_raw(section_address, (char *)buf, section_len); if (is_zsl) { assert(section_address == 0); @@ -102,25 +129,14 @@ static void elf_preload_write_enqueue(bool is_zsl) { assert(section_address != 0); } - for (size_t i = 0; i < section_len; i += sizeof(uint64_t)) { - mem_master->write(section_address + i, *(uint64_t *)(buf + i)); - num_writes++; - } - + num_writes += write_chunk(section_address, buf, num_words); free(buf); } + VL_PRINTF("[ELF] enqueued %zu memory writes\n", num_writes); long long entry; get_entry(&entry); - VL_PRINTF("[ELF] entry point: %p\n", (void*)entry); - // write entrypoint - mem_master->write(0x03000000, entry); - num_writes++; - // set start bit (read by boot ROM) - mem_master->write(0x03000008, 2); - num_writes++; - - VL_PRINTF("[ELF] enqueued %zu memory writes\n", num_writes); + return entry; } static void bin_preload_write_enqueue(const char* bin_path, uint64_t load_addr) { @@ -131,7 +147,6 @@ static void bin_preload_write_enqueue(const char* bin_path, uint64_t load_addr) } fseek(fp, 0, SEEK_END); uint64_t size = ftell(fp); - if (size >= 0x200000) size = 0x200000; // HACK: only load first page of fw_payload fseek(fp, 0, SEEK_SET); uint64_t num_words = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t); uint64_t* buf = (uint64_t*)calloc(num_words, sizeof(uint64_t)); @@ -142,18 +157,16 @@ static void bin_preload_write_enqueue(const char* bin_path, uint64_t load_addr) } fclose(fp); - size_t num_writes = 0; - for (size_t i = 0; i < num_words; i++) { - if (buf[i] == 0x0) - // skip writing zeros (for speed) - continue; - - mem_master->write(load_addr + i * sizeof(uint64_t), buf[i]); - num_writes++; - } - + write_chunk(load_addr, buf, num_words); free(buf); - VL_PRINTF("[BIN] enqueued %zu memory writes (skipped %zu zero words)\n", num_writes, num_words - num_writes); +} + +static void elf_preload_start_enqueue(uint64_t entrypoint_addr) { + VL_PRINTF("[ELF] starting execution, entry point %p\n", (void*)entrypoint_addr); + // write entrypoint + slink_master->write(0x03000000, entrypoint_addr); + // set start bit (read by boot ROM) + slink_master->write(0x03000008, 2); } static void poll_for_exit() { @@ -161,7 +174,7 @@ static void poll_for_exit() { static uint64_t idle_cycles = 0; if (request_inflight) { - auto maybe_response = mem_master->get_read_response(); + auto maybe_response = slink_master->get_read_response(); if (maybe_response) { auto data = maybe_response->data; @@ -177,7 +190,7 @@ static void poll_for_exit() { idle_cycles++; if (idle_cycles >= 1000) { - mem_master->read(0x03000008); + slink_master->read(0x03000008); request_inflight = true; } } @@ -267,7 +280,7 @@ int main(int argc, char** argv) { uint64_t preload_done_cycle = 0; double preload_done_time = -1; - mem_master = std::make_unique( + slink_master = std::make_unique( &topp->slink_mem_req_i, &topp->slink_mem_addr_i, &topp->slink_mem_we_i, @@ -278,6 +291,19 @@ int main(int argc, char** argv) { &topp->slink_mem_rsp_rdata_o ); + dram_master = std::make_unique( + &topp->dram_mem_req_i, + &topp->dram_mem_addr_i, + &topp->dram_mem_we_i, + &topp->dram_mem_wdata_i, + &topp->dram_mem_be_i, + &topp->dram_mem_gnt_o, + &topp->dram_mem_rsp_valid_o, + &topp->dram_mem_rsp_rdata_o + ); + + uint64_t entrypoint_addr = -1; + // ELF preloading if (!elf_preload_open(filename)) return 1; @@ -291,11 +317,13 @@ int main(int argc, char** argv) { if (!topp->clk_i) { if (cycle == 1) { // Apply Reset + VL_PRINTF("[RST] applying reset\n"); topp->rst_ni = 0; } if (cycle == RST_CYCLES + 1) { // Release Reset + VL_PRINTF("[RST] releasing reset\n"); topp->rst_ni = 1; reset_done = true; } @@ -312,15 +340,19 @@ int main(int argc, char** argv) { if (is_firmware) { // preload payloads before the actual ZSL ELF, to avoid premature execution bin_preload_write_enqueue(fw_payload_bin, 0x80000000); - bin_preload_write_enqueue(fw_dtb_bin, 0x80800000); + bin_preload_write_enqueue(fw_dtb_bin, 0x90000000); } - elf_preload_write_enqueue(is_firmware); + entrypoint_addr = elf_preload_write_enqueue(is_firmware); } - if (cycle > 2000 && !preload_done_cycle && !mem_master->has_write()) { - preload_done_cycle = cycle; - preload_done_time = get_seconds(); - VL_PRINTF("[ELF] preload complete\n"); + if (cycle > 2000 && !preload_done_cycle) { + if (!slink_master->has_write() && !dram_master->has_write()) { + preload_done_cycle = cycle; + preload_done_time = get_seconds(); + VL_PRINTF("[ELF] preload complete\n"); + + elf_preload_start_enqueue(entrypoint_addr); + } } // I/O @@ -338,7 +370,8 @@ int main(int argc, char** argv) { // Monitor Synchronous Outputs: just before @(posedge clk_i) if (reset_done && topp->clk_i) { - mem_master->handle_before(); + slink_master->handle_before(); + dram_master->handle_before(); } // Evaluate model @@ -346,7 +379,8 @@ int main(int argc, char** argv) { // Apply Synchronous Inputs: just after @(posedge clk_i) if (reset_done && topp->clk_i) { - mem_master->handle_after(); + slink_master->handle_after(); + dram_master->handle_after(); } #if VM_TRACE diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index da1e1781..c6982fa8 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -37,7 +37,17 @@ module cheshire_soc_wrapper # ( input logic [DutCfg.AxiDataWidth/8-1:0] slink_mem_be_i, output logic slink_mem_gnt_o, output logic slink_mem_rsp_valid_o, - output logic [DutCfg.AxiDataWidth-1:0] slink_mem_rsp_rdata_o + output logic [DutCfg.AxiDataWidth-1:0] slink_mem_rsp_rdata_o, + + // Memory Interface (DRAM) + input logic dram_mem_req_i, + input logic [DutCfg.AddrWidth-1:0] dram_mem_addr_i, + input logic dram_mem_we_i, + input logic [DutCfg.AxiDataWidth-1:0] dram_mem_wdata_i, + input logic [DutCfg.AxiDataWidth/8-1:0] dram_mem_be_i, + output logic dram_mem_gnt_o, + output logic dram_mem_rsp_valid_o, + output logic [DutCfg.AxiDataWidth-1:0] dram_mem_rsp_rdata_o ); import cheshire_pkg::*; @@ -178,13 +188,13 @@ module cheshire_soc_wrapper # ( localparam int unsigned DramAddrWidth = $clog2(DutCfg.LlcOutRegionEnd - DutCfg.LlcOutRegionStart); localparam int unsigned DramDataWidth = DutCfg.AxiDataWidth; - logic [1:0] dram_mem_req; - logic [1:0][DramAddrWidth-1:0] dram_mem_addr; - logic [1:0][DramDataWidth-1:0] dram_mem_wdata; - logic [1:0][DramDataWidth/8-1:0] dram_mem_strb; - logic [1:0] dram_mem_we; - logic [1:0] dram_mem_rvalid; - logic [1:0][DramDataWidth-1:0] dram_mem_rdata; + logic [2:0] dram_mem_req; + logic [2:0][DramAddrWidth-1:0] dram_mem_addr; + logic [2:0][DramDataWidth-1:0] dram_mem_wdata; + logic [2:0][DramDataWidth/8-1:0] dram_mem_strb; + logic [2:0] dram_mem_we; + logic [2:0] dram_mem_rvalid; + logic [2:0][DramDataWidth-1:0] dram_mem_rdata; axi_to_mem_split #( .axi_req_t ( axi_llc_req_t ), @@ -194,32 +204,55 @@ module cheshire_soc_wrapper # ( .IdWidth ( $bits(axi_llc_id_t) ), .MemDataWidth ( DramDataWidth ), .BufDepth ( 1 ), - .HideStrb ( 1'b1 ), + .HideStrb ( 1'b0 ), .OutFifoDepth ( 1 ) ) i_dram_axi ( .clk_i, .rst_ni, - .test_i ( 1'b0 ), - .busy_o ( ), - .axi_req_i ( axi_llc_mst_req ), - .axi_resp_o ( axi_llc_mst_rsp ), - .mem_req_o ( dram_mem_req ), - .mem_gnt_i ( 2'b11 ), - .mem_addr_o ( dram_mem_addr ), - .mem_wdata_o ( dram_mem_wdata ), - .mem_strb_o ( dram_mem_strb ), - .mem_atop_o ( ), - .mem_we_o ( dram_mem_we ), - .mem_rvalid_i ( dram_mem_rvalid ), - .mem_rdata_i ( dram_mem_rdata ) + .test_i ( 1'b0 ), + .busy_o ( ), + .axi_req_i ( axi_llc_mst_req ), + .axi_resp_o ( axi_llc_mst_rsp ), + .mem_req_o ( dram_mem_req[1:0] ), + .mem_gnt_i ( 2'b11 ), + .mem_addr_o ( dram_mem_addr[1:0] ), + .mem_wdata_o ( dram_mem_wdata[1:0] ), + .mem_strb_o ( dram_mem_strb[1:0] ), + .mem_atop_o ( ), + .mem_we_o ( dram_mem_we[1:0] ), + .mem_rvalid_i ( dram_mem_rvalid[1:0] ), + .mem_rdata_i ( dram_mem_rdata[1:0] ) ); + // direct access port from C++ + assign dram_mem_req[2] = dram_mem_req_i; + assign dram_mem_addr[2] = dram_mem_addr_i; + assign dram_mem_wdata[2] = dram_mem_wdata_i; + assign dram_mem_strb[2] = dram_mem_be_i; + assign dram_mem_we[2] = dram_mem_we_i; + assign dram_mem_gnt_o = 1'b1; + assign dram_mem_rsp_valid_o = dram_mem_rvalid[2]; + assign dram_mem_rsp_rdata_o = dram_mem_rdata[2]; + localparam int unsigned DramWordAddrWidth = DramAddrWidth - $clog2(DramDataWidth / 8); localparam int unsigned NumDramWords = 1 << DramWordAddrWidth; + logic [DramAddrWidth-1:0] dram_mem_raddr_q; + `FF(dram_mem_raddr_q, dram_mem_addr[0], '0); + + always_ff @(posedge clk_i) begin + // if (rst_ni && dram_mem_req[1] && dram_mem_we[1]) begin + // $display("[DRAM] wrote 0x%x to address 0x%x", dram_mem_wdata[1], dram_mem_addr[1]); + // end + if (rst_ni && dram_mem_rvalid[0]) begin + $display("[DRAM] address 0x%x -> data 0x%x", dram_mem_raddr_q, dram_mem_rdata[0]); + end + end + // Translate byte addresses (from axi_to_mem_split) to word addresses (for tc_sram) - logic [1:0][DramWordAddrWidth-1:0] dram_mem_word_addr; + logic [2:0][DramWordAddrWidth-1:0] dram_mem_word_addr; assign dram_mem_word_addr = { + dram_mem_addr[2][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth], dram_mem_addr[1][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth], dram_mem_addr[0][DramAddrWidth-1:DramAddrWidth-DramWordAddrWidth] }; @@ -228,7 +261,7 @@ module cheshire_soc_wrapper # ( .NumWords ( NumDramWords ), .DataWidth ( DramDataWidth ), .ByteWidth ( 8 ), - .NumPorts ( 2 ), + .NumPorts ( 3 ), .Latency ( 1 ) ) i_dram ( .clk_i, @@ -241,8 +274,8 @@ module cheshire_soc_wrapper # ( .rdata_o ( dram_mem_rdata ) ); - logic [1:0] dram_mem_req_q; - `FF(dram_mem_req_q, dram_mem_req, 2'b0); + logic [2:0] dram_mem_req_q; + `FF(dram_mem_req_q, dram_mem_req, 3'b000); assign dram_mem_rvalid = dram_mem_req_q; //////////// From 002d39a74a0a8ca77eadd12458700a0148fca14a Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Wed, 13 Aug 2025 14:50:08 +0200 Subject: [PATCH 36/39] target/sim/verilator: Re-enable LTO --- target/sim/verilator/verilator.mk | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/sim/verilator/verilator.mk b/target/sim/verilator/verilator.mk index b6ea400a..8a3d719b 100644 --- a/target/sim/verilator/verilator.mk +++ b/target/sim/verilator/verilator.mk @@ -34,14 +34,14 @@ VERILATOR_ARGS += -CFLAGS "-O3" -CFLAGS "-march=native" -CFLAGS "-mtune=native" VERILATOR_ARGS += --compiler clang -MAKEFLAGS "CC=clang" -MAKEFLAGS "CXX=clang++" -MAKEFLAGS "LINK=clang++" # Link Time Optimization (LTO) -# VERILATOR_ARGS += -CFLAGS "-flto" -LDFLAGS "-flto" +VERILATOR_ARGS += -CFLAGS "-flto" -LDFLAGS "-flto" # Thread Profile-Guided Optimization (PGO) # VERILATOR_ARGS += --prof-pgo # VERILATOR_ARGS += profile.vlt # Compiler Profile-Guided Optimization (PGO) -# VERILATOR_ARGS += -CFLAGS "-fprofile-generate" -MAKEFLAGS "LINK=clang++" -LDFLAGS "-fprofile-generate" +# VERILATOR_ARGS += -CFLAGS "-fprofile-generate" -CFLAGS "-fprofile-update=atomic" -LDFLAGS "-fprofile-generate" -LDFLAGS "-fprofile-update=atomic" # VERILATOR_ARGS += -CFLAGS "-fprofile-use=../default.profdata" # Profiling From ed71f20b02393dd998b37c06869457b9f303dd80 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Wed, 13 Aug 2025 14:50:47 +0200 Subject: [PATCH 37/39] target/sim/verilator: Remove DRAM logging from wrapper --- target/sim/verilator/src/cheshire_soc_wrapper.sv | 9 --------- 1 file changed, 9 deletions(-) diff --git a/target/sim/verilator/src/cheshire_soc_wrapper.sv b/target/sim/verilator/src/cheshire_soc_wrapper.sv index c6982fa8..2f28027b 100644 --- a/target/sim/verilator/src/cheshire_soc_wrapper.sv +++ b/target/sim/verilator/src/cheshire_soc_wrapper.sv @@ -240,15 +240,6 @@ module cheshire_soc_wrapper # ( logic [DramAddrWidth-1:0] dram_mem_raddr_q; `FF(dram_mem_raddr_q, dram_mem_addr[0], '0); - always_ff @(posedge clk_i) begin - // if (rst_ni && dram_mem_req[1] && dram_mem_we[1]) begin - // $display("[DRAM] wrote 0x%x to address 0x%x", dram_mem_wdata[1], dram_mem_addr[1]); - // end - if (rst_ni && dram_mem_rvalid[0]) begin - $display("[DRAM] address 0x%x -> data 0x%x", dram_mem_raddr_q, dram_mem_rdata[0]); - end - end - // Translate byte addresses (from axi_to_mem_split) to word addresses (for tc_sram) logic [2:0][DramWordAddrWidth-1:0] dram_mem_word_addr; assign dram_mem_word_addr = { From e7a52f226bc27a4dfdee0900a6004129a1565b4d Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 19 Aug 2025 10:09:18 +0200 Subject: [PATCH 38/39] TMP: Overwrite common_cells and cva6 --- Bender.local | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Bender.local diff --git a/Bender.local b/Bender.local new file mode 100644 index 00000000..5583d2a9 --- /dev/null +++ b/Bender.local @@ -0,0 +1,3 @@ +overrides: + common_cells: { git: "https://github.com/pulp-platform/common_cells.git", rev: master } + cva6: { git: "https://github.com/MaxWipfli/pulp_cva6.git", rev: verilator_speedup } From 14dfc64d6f70c512d2f772fd2a7ad452e6359b4b Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 19 Aug 2025 10:15:15 +0200 Subject: [PATCH 39/39] TMP: Update Bender.lock --- Bender.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Bender.lock b/Bender.lock index 6bf3ab46..e86fbb46 100644 --- a/Bender.lock +++ b/Bender.lock @@ -85,8 +85,8 @@ packages: - common_cells - register_interface common_cells: - revision: 9afda9abb565971649c2aa0985639c096f351171 - version: 1.38.0 + revision: b20ad7aba2080b3bb705680e0f70f1fc180ce290 + version: null source: Git: https://github.com/pulp-platform/common_cells.git dependencies: @@ -99,10 +99,10 @@ packages: Git: https://github.com/pulp-platform/common_verification.git dependencies: [] cva6: - revision: 9338c2ca7cf1a47aef54322f89ce867825c3c8d5 + revision: 2565f6dad38bb5630beac3d0043d9742c55d1ce2 version: null source: - Git: https://github.com/pulp-platform/cva6.git + Git: https://github.com/MaxWipfli/pulp_cva6.git dependencies: - axi - common_cells