Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
bb8dc7f
deps: Update common_cells to v1.38.0
MaxWipfli Jun 19, 2025
850970f
target/sim/verilator: Initial verilator support
MaxWipfli Jun 19, 2025
9dd2c19
target/sim/verilator: Add JTAG support using OpenOCD remote_bitbang
MaxWipfli Jun 19, 2025
3aeb6e0
target/sim/verilator: Tie off unused signals
MaxWipfli Jun 19, 2025
a3f9611
target/sim/verilator: Disable serial link
MaxWipfli Jun 19, 2025
2c8c87b
target/sim/verilator: Add simulated DRAM memory
MaxWipfli Jun 19, 2025
0bf8448
target/sim/verilator: Exit when JTAG client disconnects
MaxWipfli Jun 20, 2025
c59d9bf
target/sim/verilator: Disable tracing
MaxWipfli Jun 20, 2025
a19e768
target/sim/verilator: Track simulation rate regularly
MaxWipfli Jun 20, 2025
64d8a38
target/sim/verilator: Drive clock/reset from C++
MaxWipfli Jun 20, 2025
27aba0a
target/sim/verilator: Clean up Makefile and sim/main.cpp
MaxWipfli Jun 21, 2025
c31c1e2
target/sim/verilator: Add UART receiver
MaxWipfli Jun 21, 2025
649e64c
target/sim/verilator: Minor C++ cleanup
MaxWipfli Jun 21, 2025
f021065
target/sim/verilator: Add working serial link preloading
MaxWipfli Jun 23, 2025
3297a5d
target/sim/verilator: Improve reset handling
MaxWipfli Jun 23, 2025
ea56ba4
target/sim/verilator: Modify status print
MaxWipfli Jul 7, 2025
7380487
target/sim/verilator: Enable multi-threading
MaxWipfli Jul 7, 2025
ae5b05a
target/sim/verilator: Add support for SW exit codes via DDR link
MaxWipfli Jul 7, 2025
8a7f04b
target/sim/verilator: Use 8 threads, more C optimizations
MaxWipfli Jul 8, 2025
eeb4324
target/sim/verilator: Switch to Clang
MaxWipfli Jul 8, 2025
3d8cd28
target/sim/verilator: Allow passing CLI arguments to simulation
MaxWipfli Jul 8, 2025
27516b9
sw [WIP]: Add some tests
paulsc96 Jul 7, 2025
8ddc0c0
target/sim/verilator: Properly measure simulation rate without preloa…
MaxWipfli Jul 8, 2025
19d2d31
target/sim/verilator: Speed-up preloading by using bigger axi_from_me…
MaxWipfli Jul 8, 2025
33a0d2e
target/sim/verilator: Use taskset to avoid slow parallel execution
MaxWipfli Jul 8, 2025
9a71b76
target/sim/verilator: Implement fake DRAM using tc_sram
MaxWipfli Jul 8, 2025
c5d69fb
target/sim/verilator: Split R/W channels for DRAM to avoid DMA lock-up
MaxWipfli Jul 9, 2025
98aee59
sw/tests/dma_long: Reduce # repetitions
MaxWipfli Jul 9, 2025
c8788e3
target/sim/verilator: Use define to disable instruction tracer
MaxWipfli Jul 10, 2025
0ac7b43
target/sim/verilator: Work around assignment issues using split_var d…
MaxWipfli Jul 10, 2025
239b06e
target/sim/verilator: Link using clang++
MaxWipfli Jul 10, 2025
4b4b8b5
sw/tests/dma_2d: Insert fence() after DMA copy
MaxWipfli Jul 10, 2025
5312cfc
target/sim/verilator: Make UART baud rate configurable
MaxWipfli Jul 10, 2025
2209288
target/sim/verilator: Prepare fw_payload and DTB preloading
MaxWipfli Jul 10, 2025
a52c677
target/sim/verilator: Optimize preloading into DRAM directly
MaxWipfli Jul 11, 2025
002d39a
target/sim/verilator: Re-enable LTO
MaxWipfli Aug 13, 2025
ed71f20
target/sim/verilator: Remove DRAM logging from wrapper
MaxWipfli Aug 13, 2025
e7a52f2
TMP: Overwrite common_cells and cva6
MaxWipfli Aug 19, 2025
14dfc64
TMP: Update Bender.lock
MaxWipfli Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ target/sim/vcs/ucli.key
target/sim/vcs/work*
target/sim/vcs/vc_hdrs.h

# Verilator generated files
target/sim/verilator/obj_dir/
target/sim/verilator/*.flist
target/sim/verilator/*.fst
target/sim/verilator/*.vlt
!target/sim/verilator/config.vlt
target/sim/verilator/*.vcd
target/sim/verilator/logs/
target/sim/verilator/trace_hart_*.dasm

# Xilinx generated files
target/xilinx/build
target/xilinx/out
Expand Down
3 changes: 3 additions & 0 deletions Bender.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
overrides:
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", rev: master }
cva6: { git: "https://github.com/MaxWipfli/pulp_cva6.git", rev: verilator_speedup }
40 changes: 20 additions & 20 deletions Bender.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,17 @@ packages:
- apb
- register_interface
axi:
revision: 39f5f2d51c5e524f6fc5cf8b6e901f7dcc5622d7
version: 0.39.6
revision: f07498d53ecd5518b277c7d213ec3b71ca4df93c
version: 0.39.7
source:
Git: https://github.com/pulp-platform/axi.git
dependencies:
- common_cells
- common_verification
- tech_cells_generic
axi_llc:
revision: 559bcbd09a5a884dbe31e2d72fd95d024e357f39
version: 0.2.1
revision: 59bb8a681347e1133f11a82190fbf4bc11900d9e
version: 0.2.2
source:
Git: https://github.com/pulp-platform/axi_llc.git
dependencies:
Expand Down Expand Up @@ -60,8 +60,8 @@ packages:
dependencies:
- common_cells
axi_vga:
revision: 3718b9930f94a9eaad8ee50b4bccc71df0403084
version: 0.1.3
revision: 4d3e70d4f47bb74edc1ab68d99ffc02382e0fb9e
version: 0.1.4
source:
Git: https://github.com/pulp-platform/axi_vga.git
dependencies:
Expand All @@ -85,24 +85,24 @@ packages:
- common_cells
- register_interface
common_cells:
revision: 13f28aa0021fc22c0d01a12d618fda58d2c93239
version: 1.33.0
revision: b20ad7aba2080b3bb705680e0f70f1fc180ce290
version: null
source:
Git: https://github.com/pulp-platform/common_cells.git
dependencies:
- common_verification
- tech_cells_generic
common_verification:
revision: 9c07fa860593b2caabd9b5681740c25fac04b878
version: 0.2.3
revision: fb1885f48ea46164a10568aeff51884389f67ae3
version: 0.2.5
source:
Git: https://github.com/pulp-platform/common_verification.git
dependencies: []
cva6:
revision: 9338c2ca7cf1a47aef54322f89ce867825c3c8d5
revision: 2565f6dad38bb5630beac3d0043d9742c55d1ce2
version: null
source:
Git: https://github.com/pulp-platform/cva6.git
Git: https://github.com/MaxWipfli/pulp_cva6.git
dependencies:
- axi
- common_cells
Expand Down Expand Up @@ -131,8 +131,8 @@ packages:
dependencies:
- common_cells
idma:
revision: c12caf59bb482fe44b27361f6924ad346b2d22fe
version: 0.6.3
revision: ff5d56fffb3767814db88d6bf8f381974ea33aa5
version: 0.6.4
source:
Git: https://github.com/pulp-platform/iDMA.git
dependencies:
Expand All @@ -152,8 +152,8 @@ packages:
- common_cells
- register_interface
obi:
revision: 5321106817e177d6c16ecc4daa922b96b1bc946b
version: 0.1.5
revision: 0155fc34e900c7c884e081c0a1114a247937ff69
version: 0.1.7
source:
Git: https://github.com/pulp-platform/obi.git
dependencies:
Expand All @@ -169,8 +169,8 @@ packages:
- register_interface
- tech_cells_generic
register_interface:
revision: ae616e5a1ec2b41e72d200e5ab09c65e94aebd3d
version: 0.4.4
revision: 5daa85d164cf6b54ad061ea1e4c6f3624556e467
version: 0.4.5
source:
Git: https://github.com/pulp-platform/register_interface.git
dependencies:
Expand All @@ -187,8 +187,8 @@ packages:
- common_cells
- tech_cells_generic
serial_link:
revision: 5a25f5a71074f1ebb6de7b5280f2b16924bcc666
version: 1.1.1
revision: c55df03a1da06b00e567cf968b1b1a5f40c9f802
version: 1.1.2
source:
Git: https://github.com/pulp-platform/serial_link.git
dependencies:
Expand Down
8 changes: 7 additions & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ dependencies:
axi_vga: { git: "https://github.com/pulp-platform/axi_vga.git", version: 0.1.3 }
clic: { git: "https://github.com/pulp-platform/clic.git", version: 2.0.0 }
clint: { git: "https://github.com/pulp-platform/clint.git", version: 0.2.0 }
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.33.0 }
common_cells: { git: "https://github.com/pulp-platform/common_cells.git", version: 1.38.0 }
common_verification: { git: "https://github.com/pulp-platform/common_verification.git", version: 0.2.0 }
cva6: { git: "https://github.com/pulp-platform/cva6.git", rev: pulp-v1.0.0 }
iDMA: { git: "https://github.com/pulp-platform/iDMA.git", version: 0.6.3 }
Expand Down Expand Up @@ -54,6 +54,12 @@ sources:
- target/sim/src/fixture_cheshire_soc.sv
- target/sim/src/tb_cheshire_soc.sv

- target: any(verilator)
files:
- target/sim/verilator/src/verilator_ram.sv
- target/sim/verilator/src/verilator_uart_rx.sv
- target/sim/verilator/src/cheshire_soc_wrapper.sv

- target: all(fpga, xilinx)
files:
- target/xilinx/src/phy_definitions.svh
Expand Down
2 changes: 2 additions & 0 deletions cheshire.mk
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ CHS_SIM_ALL += $(CHS_ROOT)/target/sim/models/24FC1025.v
CHS_SIM_ALL += $(CHS_ROOT)/target/sim/vsim/compile.cheshire_soc.tcl
CHS_SIM_ALL += $(CHS_ROOT)/target/sim/vcs/compile.cheshire_soc.sh

include $(CHS_ROOT)/target/sim/verilator/verilator.mk

###########
# DRAMSys #
###########
Expand Down
4 changes: 2 additions & 2 deletions hw/cheshire_soc.sv
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,8 @@ module cheshire_soc import cheshire_pkg::*; #(
assign intr.intn.bus_err.cores = core_bus_err_intr_comb;

for (genvar i = 0; i < NumIntHarts; i++) begin : gen_cva6_cores
axi_cva6_req_t core_out_req, core_ur_req;
axi_cva6_rsp_t core_out_rsp, core_ur_rsp;
axi_cva6_req_t core_out_req, core_ur_req /*verilator split_var*/;
axi_cva6_rsp_t core_out_rsp, core_ur_rsp /*verilator split_var*/;

// CLIC interface
logic clic_irq_valid, clic_irq_ready;
Expand Down
1 change: 1 addition & 0 deletions sw/tests/dma_2d.spm.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ int main(void) {
// Issue blocking 2D memcpy (exclude null terminator from source)
sys_dma_2d_blk_memcpy((uintptr_t)(void *)dst, (uintptr_t)(void *)src, sizeof(src_cached) - 4, 7,
1, 4, DMA_CONF_DECOUPLE_NONE);
fence();

// Check destination string
int errors = sizeof(gold);
Expand Down
38 changes: 38 additions & 0 deletions sw/tests/dma_long.spm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Copyright 2023 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Nicole Narr <[email protected]>
// Christopher Reinwardt <[email protected]>
// Paul Scheffler <[email protected]>

#include "regs/cheshire.h"
#include "dif/clint.h"
#include "params.h"
#include "util.h"
#include "dif/dma.h"

int main(void) {
// Source and dst
volatile uint64_t dma_src = 0x80000000;
volatile uint64_t dma_dst = 0x80010000;

volatile uint64_t mcycle_start = get_mcycle();
volatile uint64_t mcycle_tot = get_mcycle() - mcycle_start;

for (int i = 1; i <= 11; ++i) {
fence();
uint64_t len = 1 << i;
uint64_t reps = 64*1024 >> i;

mcycle_start = get_mcycle();

*((volatile uint32_t*)(void*)(uintptr_t)(0x010000d4)) = 1;

sys_dma_2d_blk_memcpy(dma_dst, (uintptr_t)(void*)dma_src, len, 0,
0, reps, DMA_CONF_DECOUPLE_NONE);
mcycle_tot = get_mcycle() - mcycle_start;
}

return 0;
}
168 changes: 168 additions & 0 deletions sw/tests/matmul.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#include "regs/cheshire.h"

Check failure on line 1 in sw/tests/matmul.c

View workflow job for this annotation

GitHub Actions / lint-license

FAILED: File does not start with comment
#include "dif/clint.h"
#include "params.h"
#include "util.h"

#include "matmul_data.h"
int sN = 32;

const int CHKSUM = 10614161;
const int ITERATIONS = 16;


// Get cycle count since reset
static inline uint64_t get_minstret() {
uint64_t reg;
asm volatile("csrr %0, minstret" : "=r"(reg)::"memory");
return reg;
}

#define ROWOP(c0, c1, c2, c3) \
bb0 = &b[(n+0)*b_colstride + k]; \
bb1 = &b[(n+1)*b_colstride + k]; \
bb2 = &b[(n+2)*b_colstride + k]; \
bb3 = &b[(n+3)*b_colstride + k]; \
asm volatile( \
"fld f0, 0(%[bb0]) \n" \
"fld f1, 0(%[bb1]) \n" \
"fld f2, 0(%[bb2]) \n" \
"fld f3, 0(%[bb3]) \n" \
"fld f4, 8(%[bb0]) \n" \
"fld f5, 8(%[bb1]) \n" \
"fld f6, 8(%[bb2]) \n" \
"fld f7, 8(%[bb3]) \n" \
"fmadd.d %[cx0], %[ax0], f0, %[cx0] \n" \
"fmadd.d %[cx1], %[ax0], f1, %[cx1] \n" \
"fmadd.d %[cx2], %[ax0], f2, %[cx2] \n" \
"fmadd.d %[cx3], %[ax0], f3, %[cx3] \n" \
"fmadd.d %[cx0], %[ax1], f4, %[cx0] \n" \
"fmadd.d %[cx1], %[ax1], f5, %[cx1] \n" \
"fmadd.d %[cx2], %[ax1], f6, %[cx2] \n" \
"fmadd.d %[cx3], %[ax1], f7, %[cx3] \n" \
"fld f0, 16(%[bb0]) \n" \
"fld f1, 16(%[bb1]) \n" \
"fld f2, 16(%[bb2]) \n" \
"fld f3, 16(%[bb3]) \n" \
"fld f4, 24(%[bb0]) \n" \
"fld f5, 24(%[bb1]) \n" \
"fld f6, 24(%[bb2]) \n" \
"fld f7, 24(%[bb3]) \n" \
"fmadd.d %[cx0], %[ax2], f0, %[cx0] \n" \
"fmadd.d %[cx1], %[ax2], f1, %[cx1] \n" \
"fmadd.d %[cx2], %[ax2], f2, %[cx2] \n" \
"fmadd.d %[cx3], %[ax2], f3, %[cx3] \n" \
"fmadd.d %[cx0], %[ax3], f4, %[cx0] \n" \
"fmadd.d %[cx1], %[ax3], f5, %[cx1] \n" \
"fmadd.d %[cx2], %[ax3], f6, %[cx2] \n" \
"fmadd.d %[cx3], %[ax3], f7, %[cx3] \n" \
: \
[bb0]"+&r"(bb0), [bb1]"+&r"(bb1), [bb2]"+&r"(bb2), [bb3]"+&r"(bb3), \
[cx0]"+&f"(c0), [cx1]"+&f"(c1), [cx2]"+&f"(c2), [cx3]"+&f"(c3), \
[ax0]"+&f"(ax[0]), [ax1]"+&f"(ax[1]), [ax2]"+&f"(ax[2]), [ax3]"+&f"(ax[3]) \
:: "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7" \
); \

int __attribute__ ((visibility("hidden"))) __attribute__((noinline)) mmopt(
double* __restrict a,
double* __restrict b,
double* __restrict c,
int N,
int M,
int K,
int a_rowstride,
int b_colstride,
int c_rowstride
) {
for (int m = 0; m < M; m+=4)
for (int n = 0; n < N; n+=4) {
// One output block per n-m pair

register double cb0 = c[c_rowstride*(m+0)+n+0];
register double cb1 = c[c_rowstride*(m+0)+n+1];
register double cb2 = c[c_rowstride*(m+0)+n+2];
register double cb3 = c[c_rowstride*(m+0)+n+3];
register double cb4 = c[c_rowstride*(m+1)+n+0];
register double cb5 = c[c_rowstride*(m+1)+n+1];
register double cb6 = c[c_rowstride*(m+1)+n+2];
register double cb7 = c[c_rowstride*(m+1)+n+3];
register double cb8 = c[c_rowstride*(m+2)+n+0];
register double cb9 = c[c_rowstride*(m+2)+n+1];
register double cb10 = c[c_rowstride*(m+2)+n+2];
register double cb11 = c[c_rowstride*(m+2)+n+3];
register double cb12 = c[c_rowstride*(m+3)+n+0];
register double cb13 = c[c_rowstride*(m+3)+n+1];
register double cb14 = c[c_rowstride*(m+3)+n+2];
register double cb15 = c[c_rowstride*(m+3)+n+3];

for (int k = 0; k < K; k+=4) {
register double *bb0, *bb1, *bb2, *bb3;

register double* ax = &a[m*a_rowstride + k];
ROWOP(cb0, cb1, cb2, cb3)

ax += a_rowstride;
ROWOP(cb4, cb5, cb6, cb7)

ax += a_rowstride;
ROWOP(cb8, cb9, cb10, cb11)

ax += a_rowstride;
ROWOP(cb12, cb13, cb14, cb15)
}
// Write back output block
c[(m+0)*c_rowstride + (n+0)] = cb0;
c[(m+0)*c_rowstride + (n+1)] = cb1;
c[(m+0)*c_rowstride + (n+2)] = cb2;
c[(m+0)*c_rowstride + (n+3)] = cb3;
c[(m+1)*c_rowstride + (n+0)] = cb4;
c[(m+1)*c_rowstride + (n+1)] = cb5;
c[(m+1)*c_rowstride + (n+2)] = cb6;
c[(m+1)*c_rowstride + (n+3)] = cb7;
c[(m+2)*c_rowstride + (n+0)] = cb8;
c[(m+2)*c_rowstride + (n+1)] = cb9;
c[(m+2)*c_rowstride + (n+2)] = cb10;
c[(m+2)*c_rowstride + (n+3)] = cb11;
c[(m+3)*c_rowstride + (n+0)] = cb12;
c[(m+3)*c_rowstride + (n+1)] = cb13;
c[(m+3)*c_rowstride + (n+2)] = cb14;
c[(m+3)*c_rowstride + (n+3)] = cb15;
}
return 0;
}

int main(void) {

// Get start cycle count
uint32_t instret = get_minstret();
uint32_t cycles = get_mcycle();

for (int i = 0; i < ITERATIONS; ++i) {
mmopt(
float_data_a,
float_data_b,
float_data_c,
sN, sN, sN,
sN, sN, sN
);
}

// Get end cycle count
cycles = get_mcycle() - cycles;
instret = get_minstret() - instret;

// Compute checksum
double checksum = 0.0;
for (int y = 0; y < sN; y++) {
double sign = (y & 1) ? -1.0 : 1.0;
for (int x = 0; x < sN; x++) {
checksum += sign*1000.0*1000.0*float_data_c[y*sN+x];
}
}

// Scale checksum to int.
// Take ~2 digits off (divide by 128) to account for FP rounding.
uint64_t chkint = (uint64_t)(checksum) >> 7;

// Check return
return (chkint != CHKSUM);
}
Loading
Loading