Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 70 additions & 5 deletions backends/generators/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,55 @@ def check_requirement(req, exts):
return False


def build_match_from_format(format_field):
"""
Build a match string from the format field in the new schema.
The format field contains opcodes with specific bit fields.
"""
if not format_field or "opcodes" not in format_field:
return None

opcodes = format_field["opcodes"]

# Initialize a 32-bit match string with all variable bits
match_bits = ["-"] * 32

# Process each opcode field
for field_name, field_data in opcodes.items():
if field_name == "$child_of":
continue

if (
isinstance(field_data, dict)
and "location" in field_data
and "value" in field_data
):
location = field_data["location"]
value = field_data["value"]

# Parse the location string (e.g., "31-25" or "7")
if "-" in location:
# Range format like "31-25"
high, low = map(int, location.split("-"))
else:
# Single bit format like "7"
high = low = int(location)

# Convert value to binary and place in the match string
if isinstance(value, int):
# Calculate the number of bits needed
num_bits = high - low + 1
binary_value = format(value, f"0{num_bits}b")

# Place bits in the match string (MSB first)
for i, bit in enumerate(binary_value):
bit_position = high - i
if 0 <= bit_position < 32:
match_bits[31 - bit_position] = bit

return "".join(match_bits)


def parse_extension_requirements(extensions_spec):
"""
Parse the extension requirements from the definedBy field.
Expand Down Expand Up @@ -177,11 +226,27 @@ def load_instructions(

encoding = data.get("encoding", {})
if not encoding:
logging.error(
f"Missing 'encoding' field in instruction {name} in {path}"
)
encoding_filtered += 1
continue
# Check if this instruction uses the new schema with a 'format' field
format_field = data.get("format")
if format_field:
# Try to build a match string from the format field
match_string = build_match_from_format(format_field)
if match_string:
# Create a synthetic encoding compatible with existing logic
encoding = {"match": match_string, "variables": []}
logging.debug(f"Built encoding from format field for {name}")
else:
logging.error(
f"Could not build encoding from format field in instruction {name} in {path}"
)
encoding_filtered += 1
continue
else:
logging.error(
f"Missing 'encoding' field in instruction {name} in {path}"
)
encoding_filtered += 1
continue

# Check if the instruction specifies a base architecture constraint
base = data.get("base")
Expand Down
179 changes: 179 additions & 0 deletions backends/generators/sverilog/sverilog_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
#!/usr/bin/env python3

import argparse
import os
import sys
import logging
from pathlib import Path

# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from generator import load_instructions, load_csrs


def format_instruction_name(name):
"""Format instruction name for SystemVerilog (uppercase with underscores)."""
# Handle compressed instructions
if name.startswith("c."):
name = "C_" + name[2:]
# Replace dots with underscores and convert to uppercase
return name.replace(".", "_").upper()
Comment on lines +18 to +21
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like there is no reason to special-case compressed instructions here? The c. --> C_ conversion should be handled by the standard name.replace(".", "_").upper().



def format_csr_name(name):
"""Format CSR name for SystemVerilog (uppercase with underscores)."""
return "CSR_" + name.replace(".", "_").upper()


def match_to_sverilog_bits(match_str, is_compressed=False):
"""Convert a match string to SystemVerilog bit pattern."""
if not match_str:
return "32'b" + "?" * 32
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this throw an error/warning instead of silently generating a match string of all wildcards that will match any instruction? That would be very problematic in use because it might shadow an actual match later.


# For compressed instructions (16-bit), we need to handle them differently
# The riscv-opcodes format puts the 16-bit pattern in the lower 16 bits
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably better for comments to stand alone instead of justifying choices based on riscv-opcodes.

# with the upper 16 bits as wildcards
if is_compressed or len(match_str) == 16:
# Pad with wildcards on the left for 16-bit instructions
match_str = "?" * 16 + match_str
elif len(match_str) < 32:
# For other cases, pad on the right
match_str = match_str + "-" * (32 - len(match_str))
Comment on lines +40 to +42
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What cases do you envision this matching? If none, we're probably better off throwing an error here.


# Convert to SystemVerilog format (0, 1, or ?)
result = []
for bit in match_str:
if bit == "0":
result.append("0")
elif bit == "1":
result.append("1")
else: # '-' or any other character
result.append("?")
Comment on lines +46 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this whole block be simplified to just match_str.replace("-", "?")?


return "32'b" + "".join(result)


def generate_sverilog(instructions, csrs, output_file):
"""Generate SystemVerilog package file."""
with open(output_file, "w") as f:
# Write header
f.write("\n/* Automatically generated by parse_opcodes */\n")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is more clear?

Suggested change
f.write("\n/* Automatically generated by parse_opcodes */\n")
f.write("\n/* Automatically generated by UDB */\n")

f.write("package riscv_instr;\n")

# Find the maximum name length for alignment
max_instr_len = max((len(format_instruction_name(name)) for name in instructions.keys()), default=0)
max_csr_len = max((len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0)
max_len = max(max_instr_len, max_csr_len)

# Write instruction parameters
for name in sorted(instructions.keys()):
encoding = instructions[name]
sv_name = format_instruction_name(name)
# Pad the name for alignment
padded_name = sv_name.ljust(max_len)

# Get the match pattern
if isinstance(encoding, dict) and "match" in encoding:
match = encoding["match"]
else:
# If no match field, use all wildcards
match = "-" * 32
Comment on lines +79 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above, please don't generate all wildcards if we don't find a match. A warning/error seems like a better choice here.


# Check if this is a compressed instruction
is_compressed = name.startswith("c.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the is_compressed logic necessary since you also check if the instruction encoding is 16 bits in the match_to_sverilog_bits function? Seems like it should be fine to just use that to check instead of having both the string-based and encoding-based approaches.

sv_bits = match_to_sverilog_bits(match, is_compressed)
f.write(f" localparam [31:0] {padded_name} = {sv_bits};\n")

# Write CSR parameters
# CSRs are returned as {address: name} by load_csrs
for addr in sorted(csrs.keys()):
csr_name = csrs[addr]
sv_name = format_csr_name(csr_name)
# Pad the name for alignment
padded_name = sv_name.ljust(max_len)

# Format CSR address as 12-bit hex
f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n")

# Write footer
f.write("\nendpackage\n")


def parse_args():
parser = argparse.ArgumentParser(
description="Generate SystemVerilog package from RISC-V instruction definitions"
)
parser.add_argument(
"--inst-dir",
default="../../../gen/resolved_spec/_/inst/",
help="Directory containing instruction YAML files",
)
parser.add_argument(
"--csr-dir",
default="../../../gen/resolved_spec/_/csr/",
help="Directory containing CSR YAML files",
)
parser.add_argument(
"--output",
default="inst.sverilog",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about something like this instead?

Suggested change
default="inst.sverilog",
default="riscv_decode_package.svh",

help="Output SystemVerilog file name"
)
parser.add_argument(
"--extensions",
default="A,D,F,I,M,Q,Zba,Zbb,Zbs,S,System,V,Zicsr,Smpmp,Sm,H,U,Zicntr,Zihpm,Smhpm",
help="Comma-separated list of enabled extensions. Default includes standard extensions.",
)
Comment on lines +122 to +126
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not default to a subset of (seemingly arbitrary) extensions. Not sure what the comments means when it says it defaults to the standard extensions. Similar to the generated C header, the default should be to include all extensions.

parser.add_argument(
"--arch",
default="RV64",
choices=["RV32", "RV64", "BOTH"],
help="Target architecture (RV32, RV64, or BOTH). Default is RV64.",
)
Comment on lines +127 to +132
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The generated C header does not have this option. I think it makes sense to keep the same CLI interface for both of them. The default output should also include everything, not just RV64.

parser.add_argument(
"--verbose", "-v", action="store_true", help="Enable verbose logging"
)
parser.add_argument(
"--include-all",
action="store_true",
help="Include all instructions and CSRs regardless of extensions",
)
return parser.parse_args()


def main():
args = parse_args()

# Set up logging
log_level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(level=log_level, format="%(levelname)s:: %(message)s")

# Parse extensions
if args.include_all:
enabled_extensions = []
logging.info("Including all instructions and CSRs (ignoring extension filter)")
else:
enabled_extensions = [ext.strip() for ext in args.extensions.split(",")]
logging.info(f"Enabled extensions: {', '.join(enabled_extensions)}")

logging.info(f"Target architecture: {args.arch}")

# Load instructions
instructions = load_instructions(
args.inst_dir, enabled_extensions, args.include_all, args.arch
)
logging.info(f"Loaded {len(instructions)} instructions")

# Load CSRs
csrs = load_csrs(args.csr_dir, enabled_extensions, args.include_all, args.arch)
logging.info(f"Loaded {len(csrs)} CSRs")

# Generate the SystemVerilog file
generate_sverilog(instructions, csrs, args.output)
logging.info(
f"Generated {args.output} with {len(instructions)} instructions and {len(csrs)} CSRs"
)


if __name__ == "__main__":
main()
25 changes: 25 additions & 0 deletions backends/generators/tasks.rake
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require 'tempfile'

directory "#{$root}/gen/go"
directory "#{$root}/gen/c_header"
directory "#{$root}/gen/sverilog"

namespace :gen do
desc <<~DESC
Expand Down Expand Up @@ -87,4 +88,28 @@ namespace :gen do
resolved_codes_file.unlink
end
end

desc <<~DESC
Generate SystemVerilog package from RISC-V instruction and CSR definitions

Options:
* CONFIG - Configuration name (defaults to "_")
* OUTPUT_DIR - Output directory for generated SystemVerilog code (defaults to "#{$root}/gen/sverilog")
DESC
task sverilog: "#{$root}/gen/sverilog" do
config_name = ENV["CONFIG"] || "_"
output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/sverilog/"

# Ensure the output directory exists
FileUtils.mkdir_p output_dir

# Get the arch paths based on the config
resolver = Udb::Resolver.new
cfg_arch = resolver.cfg_arch_for(config_name)
inst_dir = cfg_arch.path / "inst"
csr_dir = cfg_arch.path / "csr"

# Run the SystemVerilog generator script using the same Python environment
sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}inst.sverilog"
end
end
Loading