From aab7d0f472b25857d4fd90679bf28ff3b561cee2 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Sat, 30 Aug 2025 21:53:37 +0100 Subject: [PATCH 1/2] fix: add backward compatibility for new instruction schema format UDB currently allows for 2 schemas for instructions with the on-going sub-type development. Some instructions now use a 'format' field with 'opcodes' instead of the traditional 'encoding' field with 'match' and 'variables'. This caused generators to fail when processing these instructions as they couldn't extract the bit pattern matching information. Changes: - Added build_match_from_format() function to convert format.opcodes to match strings compatible with existing generator logic - Enhanced encoding detection in load_instruction() to handle both old schema (encoding.match) and new schema (format.opcodes) - Maintains full backward compatibility with existing instructions - No functional changes to generated output format The fix ensures generators can process the complete UDB instruction set regardless of which schema format individual instructions use. Signed-off-by: Afonso Oliveira --- backends/generators/generator.py | 75 +++++++++++++++++++++++++++++--- 1 file changed, 70 insertions(+), 5 deletions(-) diff --git a/backends/generators/generator.py b/backends/generators/generator.py index 053cfab59b..84c86498c5 100755 --- a/backends/generators/generator.py +++ b/backends/generators/generator.py @@ -17,6 +17,55 @@ def check_requirement(req, exts): return False +def build_match_from_format(format_field): + """ + Build a match string from the format field in the new schema. + The format field contains opcodes with specific bit fields. + """ + if not format_field or "opcodes" not in format_field: + return None + + opcodes = format_field["opcodes"] + + # Initialize a 32-bit match string with all variable bits + match_bits = ["-"] * 32 + + # Process each opcode field + for field_name, field_data in opcodes.items(): + if field_name == "$child_of": + continue + + if ( + isinstance(field_data, dict) + and "location" in field_data + and "value" in field_data + ): + location = field_data["location"] + value = field_data["value"] + + # Parse the location string (e.g., "31-25" or "7") + if "-" in location: + # Range format like "31-25" + high, low = map(int, location.split("-")) + else: + # Single bit format like "7" + high = low = int(location) + + # Convert value to binary and place in the match string + if isinstance(value, int): + # Calculate the number of bits needed + num_bits = high - low + 1 + binary_value = format(value, f"0{num_bits}b") + + # Place bits in the match string (MSB first) + for i, bit in enumerate(binary_value): + bit_position = high - i + if 0 <= bit_position < 32: + match_bits[31 - bit_position] = bit + + return "".join(match_bits) + + def parse_extension_requirements(extensions_spec): """ Parse the extension requirements from the definedBy field. @@ -177,11 +226,27 @@ def load_instructions( encoding = data.get("encoding", {}) if not encoding: - logging.error( - f"Missing 'encoding' field in instruction {name} in {path}" - ) - encoding_filtered += 1 - continue + # Check if this instruction uses the new schema with a 'format' field + format_field = data.get("format") + if format_field: + # Try to build a match string from the format field + match_string = build_match_from_format(format_field) + if match_string: + # Create a synthetic encoding compatible with existing logic + encoding = {"match": match_string, "variables": []} + logging.debug(f"Built encoding from format field for {name}") + else: + logging.error( + f"Could not build encoding from format field in instruction {name} in {path}" + ) + encoding_filtered += 1 + continue + else: + logging.error( + f"Missing 'encoding' field in instruction {name} in {path}" + ) + encoding_filtered += 1 + continue # Check if the instruction specifies a base architecture constraint base = data.get("base") From cfbf86cd5346543e36ef342bde74098d7bc842bd Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Sat, 30 Aug 2025 22:26:20 +0100 Subject: [PATCH 2/2] feat: add SystemVerilog backend generator Implements a new backend generator for SystemVerilog output, matching the exact format used by riscv-opcodes/inst.sverilog. This provides direct compatibility with hardware designs using the riscv-opcodes SystemVerilog package format. Features: - Generates SystemVerilog package with instruction and CSR definitions - Outputs 32-bit instruction patterns with proper bit encoding - Handles compressed (16-bit) instructions correctly - Supports all standard RISC-V extensions - Integrated with the ./do build system as gen:sverilog task The generator produces output identical to riscv-opcodes format: - Instructions as 'localparam [31:0] NAME = 32'bpattern' - CSRs as 'localparam logic [11:0] CSR_NAME = 12'haddr' - Proper alignment and formatting for readability Tested against riscv-opcodes/inst.sverilog to ensure format compatibility. Signed-off-by: Afonso Oliveira --- .../generators/sverilog/sverilog_generator.py | 179 ++++++++++++++++++ backends/generators/tasks.rake | 25 +++ 2 files changed, 204 insertions(+) create mode 100644 backends/generators/sverilog/sverilog_generator.py diff --git a/backends/generators/sverilog/sverilog_generator.py b/backends/generators/sverilog/sverilog_generator.py new file mode 100644 index 0000000000..c508bef700 --- /dev/null +++ b/backends/generators/sverilog/sverilog_generator.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 + +import argparse +import os +import sys +import logging +from pathlib import Path + +# Add parent directory to path for imports +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from generator import load_instructions, load_csrs + + +def format_instruction_name(name): + """Format instruction name for SystemVerilog (uppercase with underscores).""" + # Handle compressed instructions + if name.startswith("c."): + name = "C_" + name[2:] + # Replace dots with underscores and convert to uppercase + return name.replace(".", "_").upper() + + +def format_csr_name(name): + """Format CSR name for SystemVerilog (uppercase with underscores).""" + return "CSR_" + name.replace(".", "_").upper() + + +def match_to_sverilog_bits(match_str, is_compressed=False): + """Convert a match string to SystemVerilog bit pattern.""" + if not match_str: + return "32'b" + "?" * 32 + + # For compressed instructions (16-bit), we need to handle them differently + # The riscv-opcodes format puts the 16-bit pattern in the lower 16 bits + # with the upper 16 bits as wildcards + if is_compressed or len(match_str) == 16: + # Pad with wildcards on the left for 16-bit instructions + match_str = "?" * 16 + match_str + elif len(match_str) < 32: + # For other cases, pad on the right + match_str = match_str + "-" * (32 - len(match_str)) + + # Convert to SystemVerilog format (0, 1, or ?) + result = [] + for bit in match_str: + if bit == "0": + result.append("0") + elif bit == "1": + result.append("1") + else: # '-' or any other character + result.append("?") + + return "32'b" + "".join(result) + + +def generate_sverilog(instructions, csrs, output_file): + """Generate SystemVerilog package file.""" + with open(output_file, "w") as f: + # Write header + f.write("\n/* Automatically generated by parse_opcodes */\n") + f.write("package riscv_instr;\n") + + # Find the maximum name length for alignment + max_instr_len = max((len(format_instruction_name(name)) for name in instructions.keys()), default=0) + max_csr_len = max((len(format_csr_name(csrs[addr])) for addr in csrs.keys()), default=0) + max_len = max(max_instr_len, max_csr_len) + + # Write instruction parameters + for name in sorted(instructions.keys()): + encoding = instructions[name] + sv_name = format_instruction_name(name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Get the match pattern + if isinstance(encoding, dict) and "match" in encoding: + match = encoding["match"] + else: + # If no match field, use all wildcards + match = "-" * 32 + + # Check if this is a compressed instruction + is_compressed = name.startswith("c.") + sv_bits = match_to_sverilog_bits(match, is_compressed) + f.write(f" localparam [31:0] {padded_name} = {sv_bits};\n") + + # Write CSR parameters + # CSRs are returned as {address: name} by load_csrs + for addr in sorted(csrs.keys()): + csr_name = csrs[addr] + sv_name = format_csr_name(csr_name) + # Pad the name for alignment + padded_name = sv_name.ljust(max_len) + + # Format CSR address as 12-bit hex + f.write(f" localparam logic [11:0] {padded_name} = 12'h{addr:03x};\n") + + # Write footer + f.write("\nendpackage\n") + + +def parse_args(): + parser = argparse.ArgumentParser( + description="Generate SystemVerilog package from RISC-V instruction definitions" + ) + parser.add_argument( + "--inst-dir", + default="../../../gen/resolved_spec/_/inst/", + help="Directory containing instruction YAML files", + ) + parser.add_argument( + "--csr-dir", + default="../../../gen/resolved_spec/_/csr/", + help="Directory containing CSR YAML files", + ) + parser.add_argument( + "--output", + default="inst.sverilog", + help="Output SystemVerilog file name" + ) + parser.add_argument( + "--extensions", + default="A,D,F,I,M,Q,Zba,Zbb,Zbs,S,System,V,Zicsr,Smpmp,Sm,H,U,Zicntr,Zihpm,Smhpm", + help="Comma-separated list of enabled extensions. Default includes standard extensions.", + ) + parser.add_argument( + "--arch", + default="RV64", + choices=["RV32", "RV64", "BOTH"], + help="Target architecture (RV32, RV64, or BOTH). Default is RV64.", + ) + parser.add_argument( + "--verbose", "-v", action="store_true", help="Enable verbose logging" + ) + parser.add_argument( + "--include-all", + action="store_true", + help="Include all instructions and CSRs regardless of extensions", + ) + return parser.parse_args() + + +def main(): + args = parse_args() + + # Set up logging + log_level = logging.DEBUG if args.verbose else logging.INFO + logging.basicConfig(level=log_level, format="%(levelname)s:: %(message)s") + + # Parse extensions + if args.include_all: + enabled_extensions = [] + logging.info("Including all instructions and CSRs (ignoring extension filter)") + else: + enabled_extensions = [ext.strip() for ext in args.extensions.split(",")] + logging.info(f"Enabled extensions: {', '.join(enabled_extensions)}") + + logging.info(f"Target architecture: {args.arch}") + + # Load instructions + instructions = load_instructions( + args.inst_dir, enabled_extensions, args.include_all, args.arch + ) + logging.info(f"Loaded {len(instructions)} instructions") + + # Load CSRs + csrs = load_csrs(args.csr_dir, enabled_extensions, args.include_all, args.arch) + logging.info(f"Loaded {len(csrs)} CSRs") + + # Generate the SystemVerilog file + generate_sverilog(instructions, csrs, args.output) + logging.info( + f"Generated {args.output} with {len(instructions)} instructions and {len(csrs)} CSRs" + ) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/backends/generators/tasks.rake b/backends/generators/tasks.rake index 2049f8e4e9..e26829ad9e 100644 --- a/backends/generators/tasks.rake +++ b/backends/generators/tasks.rake @@ -6,6 +6,7 @@ require 'tempfile' directory "#{$root}/gen/go" directory "#{$root}/gen/c_header" +directory "#{$root}/gen/sverilog" namespace :gen do desc <<~DESC @@ -87,4 +88,28 @@ namespace :gen do resolved_codes_file.unlink end end + + desc <<~DESC + Generate SystemVerilog package from RISC-V instruction and CSR definitions + + Options: + * CONFIG - Configuration name (defaults to "_") + * OUTPUT_DIR - Output directory for generated SystemVerilog code (defaults to "#{$root}/gen/sverilog") + DESC + task sverilog: "#{$root}/gen/sverilog" do + config_name = ENV["CONFIG"] || "_" + output_dir = ENV["OUTPUT_DIR"] || "#{$root}/gen/sverilog/" + + # Ensure the output directory exists + FileUtils.mkdir_p output_dir + + # Get the arch paths based on the config + resolver = Udb::Resolver.new + cfg_arch = resolver.cfg_arch_for(config_name) + inst_dir = cfg_arch.path / "inst" + csr_dir = cfg_arch.path / "csr" + + # Run the SystemVerilog generator script using the same Python environment + sh "#{$root}/.home/.venv/bin/python3 #{$root}/backends/generators/sverilog/sverilog_generator.py --inst-dir=#{inst_dir} --csr-dir=#{csr_dir} --output=#{output_dir}inst.sverilog" + end end