Compare commits

...

10 Commits

Author SHA1 Message Date
Colin 5ec810907e Refine soc_cxxrtl and pass demo. 2025-03-27 16:02:09 +08:00
Colin 616da81d63 Add soc_cxxrtl simulation. 2025-03-26 16:28:09 +08:00
Luke Wren 787da131a1
Merge pull request #28 from Wren6991/fix-21
Coding style change for Verilator compatibility (fixes #21)
2024-12-09 05:55:00 +00:00
Luke Wren c57e9f4c9b Coding style change for Verilator compatibility (fixes #21)
The boundary_conditions process in hazard3_frontend needs to be
scheduled at least twice to resolve to the correct values. There are
multiple possible interleavings, which should all result in the same
result. However Verilator schedules the process only once.

Work around this by moving the tie-off of the problematic variable into
the synchronous update process.
2024-12-09 05:35:34 +00:00
Luke Wren 8272910121
Merge pull request #23 from Wren6991/fix-dm-abstractauto-regno
Fix abstract command using wrong register when initiated by abstractauto
2024-10-12 20:03:45 +01:00
Luke Wren cb700f30b1 Fix abstract access GPR command using wrong register number when initiated by abstractauto.
Fixes #20. Bug introduced in  78a5cb9.
2024-10-12 19:35:13 +01:00
Luke Wren a4412c0b00
Merge pull request #14 from Wren6991/develop
Promote readme and adoc changes, no functional change
2024-08-09 07:19:01 -07:00
Leon Schuermann 1d0fc21430 Readme.md / doc: clarify "naturally aligned regions", no TOR support
While NA4 and NAPOT are the only "naturally aligned" addressing modes
in the RISC-V PMP (Privileged) Spec, calling their support out by
name, and clearly stating that the TOR addressing mode is not
supported, can clarify this fact for software / OS developers.

This is a common point of confusion and frustration when porting to
new RISC-V chips and so increased visbility of this limitation in the
documentation and README might help.
2024-08-08 20:44:43 -07:00
Luke Wren b291b46bf1 Update Readme.md 2024-08-08 08:13:27 -07:00
Luke Wren 0003e016a0 Update Readme.md 2024-08-08 08:11:54 -07:00
24 changed files with 1838 additions and 11 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
.DS_Store
*.todo
.vscode

View File

@ -15,7 +15,7 @@ Hazard3 is a 3-stage RISC-V processor, implementing the `RV32I` instruction set
* `Zcmp`: push/pop instructions
* Debug, Machine and User privilege/execution modes
* Privileged instructions `ecall`, `ebreak`, `mret` and `wfi`
* Physical memory protection (PMP) with up to 16 naturally aligned regions
* Physical memory protection (PMP) with up to 16 naturally aligned regions (NA4 / NAPOT address matching, TOR not supported)
You can [read the documentation here](doc/hazard3.pdf). (PDF link)
@ -334,3 +334,32 @@ make -f ULX3S.mk flash
# Should be able to attach to the processor
riscv-openocd -f ../ulx3s-openocd.cfg
```
# Performance
The RP2350 configuration of Hazard3 achieves 3.81 CoreMark/MHz.
```
2K performance run parameters for coremark.
CoreMark Size : 666
Total ticks : 15758494
Total time (secs): 15.758494
Iterations/Sec : 3.807470
Iterations : 60
Compiler version : GCC14.2.1 20240807
Compiler flags : -O3 -g -march=rv32ima_zicsr_zifencei_zba_zbb_zbkb_zbs -mbranch-cost=1 -funroll-all-loops --param max-inline-insns-auto=200 -finline-limit=10000 -fno-code-hoisting -fno-if-conversion2 -DPERFORMANCE_RUN=1
Memory location : STACK
seedcrc : 0xe9f5
[0]crclist : 0xe714
[0]crcmatrix : 0x1fd7
[0]crcstate : 0x8e3a
[0]crcfinal : 0xa14c
Correct operation validated. See README.md for run and reporting rules.
CoreMark 1.0 : 3.807470 / GCC14.2.1 20240807 -O3 -g -march=rv32ima_zicsr_zifencei_zba_zbb_zbkb_zbs -mbranch-cost=1 -funroll-all-loops --param max-inline-insns-auto=200 -finline-limit=10000 -fno-code-hoisting -fno-if-conversion2 -DPERFORMANCE_RUN=1 / STACK
```
To reproduce this in the RTL simulator, use the top-level Makefile in [test/sim/coremark](test/sim/coremark) after you have followed all the steps to get set up for running a "Hello, world!" binary above.
The default flags are appropriate for the non-multilib toolchain build, and achieve 3.74 CoreMark/MHz. To achieve the full 3.81 CoreMark/MHz, change the ISA variant in `core_portme.mak` to `rv32ima_zicsr_zifencei_zba_zbb_zbkb_zbs`. See the comments in that file for an explanation of why this makes a difference.
See the RP2350 datasheet for details of the Hazard3 configuration used by that chip. The default `tb_cxxrtl` build uses the same configuration as RP2350, except that it also enables the Zbc extension (which is not emitted by GCC 14 as it is not useful for general-purpose code).

View File

@ -273,7 +273,7 @@ Each `pmpcfg` register divides into four identical 8-bit chunks, each correspond
|Bits | Name | Description
| 7 | `L` | Lock region, and additionally enforce its permissions on M-mode as well as U-mode.
| 6:5 | - | RES0
| 4:3 | `A` | Address-matching mode. Values supported are 0 (OFF), 2 (NA4, naturally aligned 4-byte) and 3 (NAPOT, naturally aligned power-of-two). Attempting to write an unsupported value will set the region to OFF.
| 4:3 | `A` | Address-matching mode. Values supported are 0 (OFF), 2 (NA4, naturally aligned 4-byte) and 3 (NAPOT, naturally aligned power-of-two). 1 (TOR, top of range) is not supported. Attempting to write an unsupported value will set the region to OFF.
| 2 | `X` | Execute permission
| 1 | `W` | Write permission
| 0 | `R` | Read permission

View File

@ -529,7 +529,6 @@ always @ (posedge clk or negedge rst_n) begin
end
end
// We only support abstractauto on data0 update (use case is bulk memory read/write)
reg abstractauto_autoexecdata;
reg [1:0] abstractauto_autoexecprogbuf;
@ -623,16 +622,19 @@ always @ (posedge clk or negedge rst_n) begin
acmd_prev_postexec <= 1'b0;
acmd_prev_transfer <= 1'b0;
acmd_prev_write <= 1'b0;
acmd_prev_regno <= 5'h0;
acmd_prev_unsupported <= 1'b1;
end else if (!dmactive) begin
acmd_prev_postexec <= 1'b0;
acmd_prev_transfer <= 1'b0;
acmd_prev_write <= 1'b0;
acmd_prev_regno <= 5'h0;
acmd_prev_unsupported <= 1'b1;
end else if (start_abstract_cmd && acmd_new) begin
acmd_prev_postexec <= acmd_new_postexec;
acmd_prev_transfer <= acmd_new_transfer;
acmd_prev_write <= acmd_new_write;
acmd_prev_regno <= acmd_new_regno;
acmd_prev_unsupported <= acmd_new_unsupported;
end
end
@ -640,6 +642,7 @@ end
wire acmd_postexec = acmd_new ? acmd_new_postexec : acmd_prev_postexec ;
wire acmd_transfer = acmd_new ? acmd_new_transfer : acmd_prev_transfer ;
wire acmd_write = acmd_new ? acmd_new_write : acmd_prev_write ;
wire [4:0] acmd_regno = acmd_new ? acmd_new_regno : acmd_prev_regno ;
wire acmd_unsupported = acmd_new ? acmd_new_unsupported : acmd_prev_unsupported;
always @ (*) begin
@ -745,11 +748,11 @@ wire [N_HARTS-1:0] hart_instr_data_vld_nxt = {{N_HARTS-1{1'b0}},
} << hartsel;
wire [31:0] hart_instr_data_nxt =
acmd_state_nxt == S_ISSUE_REGWRITE ? 32'hbff02073 | {20'd0, acmd_new_regno, 7'd0} : // csrr xx, dmdata0
acmd_state_nxt == S_ISSUE_REGREAD ? 32'hbff01073 | {12'd0, acmd_new_regno, 15'd0} : // csrw dmdata0, xx
acmd_state_nxt == S_ISSUE_PROGBUF0 ? progbuf0 :
acmd_state_nxt == S_ISSUE_PROGBUF1 ? progbuf1 :
32'h00100073; // ebreak
acmd_state_nxt == S_ISSUE_REGWRITE ? 32'hbff02073 | {20'd0, acmd_regno, 7'd0} : // csrr xx, dmdata0
acmd_state_nxt == S_ISSUE_REGREAD ? 32'hbff01073 | {12'd0, acmd_regno, 15'd0} : // csrw dmdata0, xx
acmd_state_nxt == S_ISSUE_PROGBUF0 ? progbuf0 :
acmd_state_nxt == S_ISSUE_PROGBUF1 ? progbuf1 :
32'h00100073; // ebreak
reg [31:0] hart_instr_data_reg;
assign hart_instr_data = {N_HARTS{hart_instr_data_reg}};

View File

@ -135,7 +135,6 @@ always @ (*) begin: boundary_conditions
fifo_mem[FIFO_DEPTH] = mem_data;
fifo_predbranch[FIFO_DEPTH] = 2'b00;
fifo_err[FIFO_DEPTH] = 1'b0;
fifo_valid_hw[FIFO_DEPTH] = 2'b00;
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
fifo_valid[i] = |EXTENSION_C ? |fifo_valid_hw[i] : fifo_valid_hw[i][0];
// valid-to-right condition: i == 0 || fifo_valid[i - 1], but without
@ -158,6 +157,10 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
fifo_err[i] <= 1'b0;
fifo_predbranch[i] <= 2'b00;
end
// This exists only for loop boundary conditions, but is tied off in
// this synchronous process to work around a Verilator scheduling
// issue (see issue #21)
fifo_valid_hw[FIFO_DEPTH] <= 2'b00;
end else begin
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
if (fifo_pop || (fifo_push && !fifo_valid[i])) begin
@ -183,6 +186,7 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
fifo_predbranch[0] <= 2'b00;
fifo_valid_hw[0] <= jump_now ? 2'b00 : 2'b11;
end
fifo_valid_hw[FIFO_DEPTH] <= 2'b00;
`ifdef HAZARD3_ASSERTIONS
// FIFO validity must be compact, so we can always consume from the end
if (!fifo_valid[0]) begin

View File

@ -9,7 +9,7 @@ endif
DOTF ?= tb.f
CCFLAGS ?=
LDSCRIPT ?= ../common/memmap.ld
CROSS_PREFIX ?= riscv32-unknown-elf-
CROSS_PREFIX ?= /opt/riscv/bin/riscv32-unknown-elf-
TBEXEC ?= ../tb_cxxrtl/tb
TBDIR := $(dir $(abspath $(TBEXEC)))
INCDIR ?= ../common

View File

@ -9,7 +9,7 @@
// ----------------------------------------------------------------------------
// Testbench IO hardware layout
#define IO_BASE 0x80000000
#define IO_BASE 0x40000000
typedef struct {
volatile uint32_t print_char;

5
test/sim/soc_cxxrtl/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
tb
dut.cpp
build.*
tb_multicore

View File

@ -0,0 +1,46 @@
# To build single-core dual-port tb: make
# To build dual-core single-port tb: make DOTF=tb_multicore.f
include ../project_paths.mk
TOP := example_soc
DOTF := tb.f
CONFIG := default
TBEXEC := $(patsubst %.f,%,$(DOTF))
FILE_LIST := $(shell HDL=$(HDL) $(SCRIPTS)/listfiles $(DOTF))
BUILD_DIR := build-$(patsubst %.f,%,$(DOTF))
# Note: clang++-18 has a >20x compile time regression, even at low
# optimisation levels. I have tried clang++-16 and clang++-17, both fine.
CLANGXX := clang++-16
.PHONY: clean all lint
all: $(TBEXEC)
SYNTH_CMD += read_verilog -I ../../../hdl -DCONFIG_HEADER="config_$(CONFIG).vh" $(FILE_LIST);
SYNTH_CMD += hierarchy -top $(TOP);
SYNTH_CMD += write_cxxrtl $(BUILD_DIR)/dut.cpp
$(BUILD_DIR)/dut.cpp: $(FILE_LIST) $(wildcard *.vh)
mkdir -p $(BUILD_DIR)
yosys -p '$(SYNTH_CMD)' 2>&1 > $(BUILD_DIR)/cxxrtl.log
clean::
rm -rf $(BUILD_DIR) $(TBEXEC)
sim: $(TBEXEC)
./$(TBEXEC) --port 9824
openocd:
openocd -f openocd.cfg
gdb:
/opt/riscv/bin/riscv32-unknown-elf-gdb -x gdb_init
$(TBEXEC): $(BUILD_DIR)/dut.cpp tb.cpp
$(CLANGXX) -O3 -std=c++14 $(addprefix -D,$(CDEFINES) $(CDEFINES_$(DOTF))) -I $(shell yosys-config --datdir)/include/backends/cxxrtl/runtime -I $(BUILD_DIR) tb.cpp -o $(TBEXEC)
lint:
verilator --lint-only --top-module $(TOP) -I$(HDL) $(FILE_LIST)

View File

@ -0,0 +1,18 @@
adapter driver remote_bitbang
remote_bitbang_host localhost
remote_bitbang_port 9824
transport select jtag
set _CHIPNAME hazard3
jtag newtap $_CHIPNAME cpu -irlen 5
set _TARGETNAME $_CHIPNAME.cpu
target create $_TARGETNAME riscv -chain-position $_TARGETNAME
$_TARGETNAME configure -rtos hwthread
gdb_report_data_abort enable
init
halt
riscv test_compliance

View File

@ -0,0 +1,46 @@
// Default Hazard3 config for testbench: all ISA features
localparam RESET_VECTOR = 32'h40;
localparam MTVEC_INIT = 32'h0;
localparam EXTENSION_A = 1;
localparam EXTENSION_C = 1;
localparam EXTENSION_M = 1;
localparam EXTENSION_ZBA = 1;
localparam EXTENSION_ZBB = 1;
localparam EXTENSION_ZBC = 1;
localparam EXTENSION_ZBS = 1;
localparam EXTENSION_ZBKB = 1;
localparam EXTENSION_ZCB = 1;
localparam EXTENSION_ZCMP = 1;
localparam EXTENSION_ZIFENCEI = 1;
localparam EXTENSION_XH3BEXTM = 1;
localparam EXTENSION_XH3IRQ = 1;
localparam EXTENSION_XH3PMPM = 1;
localparam EXTENSION_XH3POWER = 1;
localparam CSR_M_MANDATORY = 1;
localparam CSR_M_TRAP = 1;
localparam CSR_COUNTER = 1;
localparam U_MODE = 1;
localparam PMP_REGIONS = 4;
localparam PMP_GRAIN = 0;
localparam PMP_HARDWIRED = {PMP_REGIONS{1'b0}};
localparam PMP_HARDWIRED_ADDR = {PMP_REGIONS{32'h0}};
localparam PMP_HARDWIRED_CFG = {PMP_REGIONS{8'h00}};
localparam DEBUG_SUPPORT = 1;
localparam BREAKPOINT_TRIGGERS = 4;
localparam NUM_IRQS = 32;
localparam IRQ_PRIORITY_BITS = 4;
localparam IRQ_INPUT_BYPASS = {NUM_IRQS{1'b0}};
localparam MVENDORID_VAL = 32'hdeadbeef;
localparam MIMPID_VAL = 32'h12345678;
localparam MHARTID_VAL = 32'h0;
localparam MCONFIGPTR_VAL = 32'h9abcdef0;
localparam REDUCED_BYPASS = 0;
localparam MULDIV_UNROLL = 2;
localparam MUL_FAST = 1;
localparam MUL_FASTER = 1;
localparam MULH_FAST = 1;
localparam FAST_BRANCHCMP = 1;
localparam RESET_REGFILE = 1;
localparam BRANCH_PREDICTOR = 1;
localparam MTVEC_WMASK = 32'hfffffffd;

View File

@ -0,0 +1,46 @@
// Minimal Hazard3 config for testbench
localparam RESET_VECTOR = 32'h40;
localparam MTVEC_INIT = 32'h0;
localparam EXTENSION_A = 0;
localparam EXTENSION_C = 0;
localparam EXTENSION_M = 0;
localparam EXTENSION_ZBA = 0;
localparam EXTENSION_ZBB = 0;
localparam EXTENSION_ZBC = 0;
localparam EXTENSION_ZBS = 0;
localparam EXTENSION_ZBKB = 0;
localparam EXTENSION_ZCB = 0;
localparam EXTENSION_ZCMP = 0;
localparam EXTENSION_ZIFENCEI = 0;
localparam EXTENSION_XH3BEXTM = 0;
localparam EXTENSION_XH3IRQ = 0;
localparam EXTENSION_XH3PMPM = 0;
localparam EXTENSION_XH3POWER = 0;
localparam CSR_M_MANDATORY = 1;
localparam CSR_M_TRAP = 1;
localparam CSR_COUNTER = 0;
localparam U_MODE = 0;
localparam PMP_REGIONS = 0;
localparam PMP_GRAIN = 0;
localparam PMP_HARDWIRED = {PMP_REGIONS{1'b0}};
localparam PMP_HARDWIRED_ADDR = {PMP_REGIONS{32'h0}};
localparam PMP_HARDWIRED_CFG = {PMP_REGIONS{8'h00}};
localparam DEBUG_SUPPORT = 0;
localparam BREAKPOINT_TRIGGERS = 4;
localparam NUM_IRQS = 32;
localparam IRQ_PRIORITY_BITS = 0;
localparam IRQ_INPUT_BYPASS = {NUM_IRQS{1'b0}};
localparam MVENDORID_VAL = 32'hdeadbeef;
localparam MIMPID_VAL = 32'h12345678;
localparam MHARTID_VAL = 32'h0;
localparam MCONFIGPTR_VAL = 32'h9abcdef0;
localparam REDUCED_BYPASS = 1;
localparam MULDIV_UNROLL = 1;
localparam MUL_FAST = 0;
localparam MUL_FASTER = 0;
localparam MULH_FAST = 0;
localparam FAST_BRANCHCMP = 0;
localparam RESET_REGFILE = 1;
localparam BRANCH_PREDICTOR = 0;
localparam MTVEC_WMASK = 32'hfffffffd;

View File

@ -0,0 +1,18 @@
# riscv32-unknown-elf-gdb
# Remaining commands are typed into the gdb prompt. This one tells gdb to shut up:
set confirm off
# Connect to openocd on its default port:
target extended-remote localhost:3333
# Load hello world, and check that it loaded correctly
file ../hellow/tmp/hellow.elf
load
compare-sections
# The processor will quit the simulation when after returning from main(), by
# writing to a magic MMIO register. openocd will be quite unhappy that the
# other end of its socket disappeared, so to avoid the resulting error
# messages, add a breakpoint before _exit.
break _exit
run
# Should break at _exit. Check the terminal with the simulator, you should see
# the hello world message. The exit code is in register a0, it should be 123:
info reg a0

View File

@ -0,0 +1,5 @@
set arch riscv:rv32
set confirm off
set disassemble-next-line on
targ rem localhost:3333
monitor reset halt

View File

@ -0,0 +1,14 @@
adapter driver remote_bitbang
remote_bitbang_host localhost
remote_bitbang_port 9824
transport select jtag
set _CHIPNAME hazard3
jtag newtap $_CHIPNAME cpu -irlen 5
target create $_CHIPNAME.cpu0 riscv -chain-position $_CHIPNAME.cpu -rtos hwthread
target create $_CHIPNAME.cpu1 riscv -chain-position $_CHIPNAME.cpu -coreid 1
target smp $_CHIPNAME.cpu0 $_CHIPNAME.cpu1
gdb_report_data_abort enable
init
halt

View File

@ -0,0 +1,92 @@
[*]
[*] GTKWave Analyzer v3.3.104 (w)1999-2020 BSI
[*] Sat Jul 2 12:28:35 2022
[*]
[dumpfile] "/home/luke/proj/hazard3/test/sim/tb_cxxrtl/waves.vcd"
[dumpfile_mtime] "Sun Jun 26 18:58:42 2022"
[dumpfile_size] 34958839
[savefile] "/home/luke/proj/hazard3/test/sim/tb_cxxrtl/multicore.gtkw"
[timestart] 0
[size] 2509 1368
[pos] -1 -1
*-16.000000 136300 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[sst_width] 233
[signals_width] 238
[sst_expanded] 1
[sst_vpaned_height] 410
@200
-JTAG
@28
tck
tdi
tdo
tms
@200
-
@22
dm.dmi_paddr[8:0]
@28
dm.dmi_penable
dm.dmi_pwrite
@22
dm.dmi_pwdata[31:0]
dm.dmi_prdata[31:0]
@200
-
@28
dm.hartsel
dm.hasel
dm.hart_array_mask[1:0]
dm.hart_array_mask_next[1:0]
@200
-
>-491
-Core 0 debug
@28
>0
cpu0.dbg_req_halt
cpu0.dbg_req_resume
cpu0.dbg_halted
@200
-
@28
cpu0.dbg_instr_data_vld
cpu0.dbg_instr_data_rdy
@22
cpu0.dbg_instr_data[31:0]
@28
cpu0.dbg_instr_caught_ebreak
cpu0.dbg_instr_caught_exception
@200
-
@22
cpu0.dbg_data0_rdata[31:0]
cpu0.dbg_data0_wdata[31:0]
@28
cpu0.dbg_data0_wen
@200
-
-Core 1 debug
@28
cpu1.dbg_req_halt
cpu1.dbg_req_resume
cpu1.dbg_halted
@200
-
@28
cpu1.dbg_instr_data_vld
cpu1.dbg_instr_data_rdy
@22
cpu1.dbg_instr_data[31:0]
@28
cpu1.dbg_instr_caught_ebreak
cpu1.dbg_instr_caught_exception
@200
-
@22
cpu1.dbg_data0_rdata[31:0]
cpu1.dbg_data0_wdata[31:0]
@28
cpu1.dbg_data0_wen
[pattern_trace] 1
[pattern_trace] 0

View File

@ -0,0 +1,13 @@
adapter driver remote_bitbang
remote_bitbang host localhost
remote_bitbang port 9824
transport select jtag
set _CHIPNAME hazard3
jtag newtap $_CHIPNAME cpu -irlen 5
set _TARGETNAME $_CHIPNAME.cpu
target create $_TARGETNAME riscv -chain-position $_TARGETNAME
gdb_report_data_abort enable
init
halt

692
test/sim/soc_cxxrtl/tb.cpp Normal file
View File

@ -0,0 +1,692 @@
#include <iostream>
#include <fstream>
#include <cstdint>
#include <string>
#include <stdio.h>
#include <unistd.h>
#include <sys/socket.h>
#include <netinet/in.h>
// Device-under-test model generated by CXXRTL:
#include "dut.cpp"
#include <cxxrtl/cxxrtl_vcd.h>
// There must be a better way
#ifdef __x86_64__
#define I64_FMT "%ld"
#else
#define I64_FMT "%lld"
#endif
// -----------------------------------------------------------------------------
static const int MEM_SIZE = 16 * 1024 * 1024;
static const int N_RESERVATIONS = 2;
static const uint32_t RESERVATION_ADDR_MASK = 0xfffffff8u;
static const unsigned int IO_BASE = 0x80000000;
enum {
IO_PRINT_CHAR = 0x000,
IO_PRINT_U32 = 0x004,
IO_EXIT = 0x008,
IO_SET_SOFTIRQ = 0x010,
IO_CLR_SOFTIRQ = 0x014,
IO_GLOBMON_EN = 0x018,
IO_SET_IRQ = 0x020,
IO_CLR_IRQ = 0x030,
IO_MTIME = 0x100,
IO_MTIMEH = 0x104,
IO_MTIMECMP0 = 0x108,
IO_MTIMECMP0H = 0x10c,
IO_MTIMECMP1 = 0x110,
IO_MTIMECMP1H = 0x114
};
struct mem_io_state {
uint64_t mtime;
uint64_t mtimecmp[2];
bool exit_req;
uint32_t exit_code;
uint8_t *mem;
bool monitor_enabled;
bool reservation_valid[2];
uint32_t reservation_addr[2];
mem_io_state() {
mtime = 0;
mtimecmp[0] = 0;
mtimecmp[1] = 0;
exit_req = false;
exit_code = 0;
monitor_enabled = false;
for (int i = 0; i < N_RESERVATIONS; ++i) {
reservation_valid[i] = false;
reservation_addr[i] = 0;
}
mem = new uint8_t[MEM_SIZE];
for (size_t i = 0; i < MEM_SIZE; ++i)
mem[i] = 0;
}
// Where we're going we don't need a destructor B-)
void step(cxxrtl_design::p_example__soc &tb) {
// Default update logic for mtime, mtimecmp
++mtime;
// tb.p_timer__irq.set<uint8_t>((mtime >= mtimecmp[0]) | (mtime >= mtimecmp[1]) << 1);
}
};
typedef enum {
SIZE_BYTE = 0,
SIZE_HWORD = 1,
SIZE_WORD = 2
} bus_size_t;
struct bus_request {
uint32_t addr;
bus_size_t size;
bool write;
bool excl;
uint32_t wdata;
int reservation_id;
bus_request(): addr(0), size(SIZE_BYTE), write(0), excl(0), wdata(0), reservation_id(0) {}
};
struct bus_response {
uint32_t rdata;
int stall_cycles;
bool err;
bool exokay;
bus_response(): rdata(0), stall_cycles(0), err(false), exokay(true) {}
};
bus_response mem_access(cxxrtl_design::p_example__soc &tb, mem_io_state &memio, bus_request req) {
bus_response resp;
// Global monitor. When monitor is not enabled, HEXOKAY is tied high
if (memio.monitor_enabled) {
if (req.excl) {
// Always set reservation on read. Always clear reservation on
// write. On successful write, clear others' matching reservations.
if (req.write) {
resp.exokay = memio.reservation_valid[req.reservation_id] &&
memio.reservation_addr[req.reservation_id] == (req.addr & RESERVATION_ADDR_MASK);
memio.reservation_valid[req.reservation_id] = false;
if (resp.exokay) {
for (int i = 0; i < N_RESERVATIONS; ++i) {
if (i == req.reservation_id)
continue;
if (memio.reservation_addr[i] == (req.addr & RESERVATION_ADDR_MASK))
memio.reservation_valid[i] = false;
}
}
}
else {
resp.exokay = true;
memio.reservation_valid[req.reservation_id] = true;
memio.reservation_addr[req.reservation_id] = req.addr & RESERVATION_ADDR_MASK;
}
}
else {
resp.exokay = false;
// Non-exclusive write still clears others' reservations
if (req.write) {
for (int i = 0; i < N_RESERVATIONS; ++i) {
if (i == req.reservation_id)
continue;
if (memio.reservation_addr[i] == (req.addr & RESERVATION_ADDR_MASK))
memio.reservation_valid[i] = false;
}
}
}
}
if (req.write) {
if (memio.monitor_enabled && req.excl && !resp.exokay) {
// Failed exclusive write; do nothing
}
else if (req.addr <= MEM_SIZE - 4u) {
unsigned int n_bytes = 1u << (int)req.size;
// Note we are relying on hazard3's byte lane replication
for (unsigned int i = 0; i < n_bytes; ++i) {
memio.mem[req.addr + i] = req.wdata >> (8 * i) & 0xffu;
}
}
else if (req.addr == IO_BASE + IO_PRINT_CHAR) {
putchar(req.wdata);
}
else if (req.addr == IO_BASE + IO_PRINT_U32) {
printf("%08x\n", req.wdata);
}
else if (req.addr == IO_BASE + IO_EXIT) {
if (!memio.exit_req) {
memio.exit_req = true;
memio.exit_code = req.wdata;
}
}
else if (req.addr == IO_BASE + IO_SET_SOFTIRQ) {
// tb.p_soft__irq.set<uint8_t>(tb.p_soft__irq.get<uint8_t>() | req.wdata);
}
else if (req.addr == IO_BASE + IO_CLR_SOFTIRQ) {
// tb.p_soft__irq.set<uint8_t>(tb.p_soft__irq.get<uint8_t>() & ~req.wdata);
}
else if (req.addr == IO_BASE + IO_GLOBMON_EN) {
memio.monitor_enabled = req.wdata;
}
else if (req.addr == IO_BASE + IO_SET_IRQ) {
// tb.p_irq.set<uint32_t>(tb.p_irq.get<uint32_t>() | req.wdata);
}
else if (req.addr == IO_BASE + IO_CLR_IRQ) {
// tb.p_irq.set<uint32_t>(tb.p_irq.get<uint32_t>() & ~req.wdata);
}
else if (req.addr == IO_BASE + IO_MTIME) {
memio.mtime = (memio.mtime & 0xffffffff00000000u) | req.wdata;
}
else if (req.addr == IO_BASE + IO_MTIMEH) {
memio.mtime = (memio.mtime & 0x00000000ffffffffu) | ((uint64_t)req.wdata << 32);
}
else if (req.addr == IO_BASE + IO_MTIMECMP0) {
memio.mtimecmp[0] = (memio.mtimecmp[0] & 0xffffffff00000000u) | req.wdata;
}
else if (req.addr == IO_BASE + IO_MTIMECMP0H) {
memio.mtimecmp[0] = (memio.mtimecmp[0] & 0x00000000ffffffffu) | ((uint64_t)req.wdata << 32);
}
else if (req.addr == IO_BASE + IO_MTIMECMP1) {
memio.mtimecmp[1] = (memio.mtimecmp[1] & 0xffffffff00000000u) | req.wdata;
}
else if (req.addr == IO_BASE + IO_MTIMECMP1H) {
memio.mtimecmp[1] = (memio.mtimecmp[1] & 0x00000000ffffffffu) | ((uint64_t)req.wdata << 32);
}
else {
resp.err = true;
}
}
else {
if (req.addr <= MEM_SIZE - (1u << (int)req.size)) {
req.addr &= ~0x3u;
resp.rdata =
(uint32_t)memio.mem[req.addr] |
memio.mem[req.addr + 1] << 8 |
memio.mem[req.addr + 2] << 16 |
memio.mem[req.addr + 3] << 24;
}
else if (req.addr == IO_BASE + IO_SET_SOFTIRQ || req.addr == IO_BASE + IO_CLR_SOFTIRQ) {
// resp.rdata = tb.p_soft__irq.get<uint8_t>();
}
else if (req.addr == IO_BASE + IO_SET_IRQ || req.addr == IO_BASE + IO_CLR_IRQ) {
// resp.rdata = tb.p_irq.get<uint32_t>();
}
else if (req.addr == IO_BASE + IO_MTIME) {
resp.rdata = memio.mtime;
}
else if (req.addr == IO_BASE + IO_MTIMEH) {
resp.rdata = memio.mtime >> 32;
}
else if (req.addr == IO_BASE + IO_MTIMECMP0) {
resp.rdata = memio.mtimecmp[0];
}
else if (req.addr == IO_BASE + IO_MTIMECMP0H) {
resp.rdata = memio.mtimecmp[0] >> 32;
}
else if (req.addr == IO_BASE + IO_MTIMECMP1) {
resp.rdata = memio.mtimecmp[1];
}
else if (req.addr == IO_BASE + IO_MTIMECMP1H) {
resp.rdata = memio.mtimecmp[1] >> 32;
}
else {
resp.err = true;
}
}
if (resp.err) {
resp.exokay = false;
}
return resp;
}
// -----------------------------------------------------------------------------
const char *help_str =
"Usage: tb [--bin x.bin] [--port n] [--vcd x.vcd] [--dump start end] \\\n"
" [--cycles n] [--cpuret] [--jtagdump x] [--jtagreplay x]\n"
"\n"
" --bin x.bin : Flat binary file loaded to address 0x0 in RAM\n"
" --vcd x.vcd : Path to dump waveforms to\n"
" --dump start end : Print out memory contents from start to end (exclusive)\n"
" after execution finishes. Can be passed multiple times.\n"
" --cycles n : Maximum number of cycles to run before exiting.\n"
" Default is 0 (no maximum).\n"
" --port n : Port number to listen for openocd remote bitbang. Sim\n"
" runs in lockstep with JTAG bitbang, not free-running.\n"
" --cpuret : Testbench's return code is the return code written to\n"
" IO_EXIT by the CPU, or -1 if timed out.\n"
" --jtagdump : Dump OpenOCD JTAG bitbang commands to a file so they\n"
" can be replayed. (Lower perf impact than VCD dumping)\n"
" --jtagreplay : Play back some dumped OpenOCD JTAG bitbang commands\n"
;
void exit_help(std::string errtext = "") {
std::cerr << errtext << help_str;
exit(-1);
}
int wait_for_connection(int server_fd, uint16_t port, struct sockaddr *sock_addr, socklen_t *sock_addr_len) {
int sock_fd;
printf("Waiting for connection on port %u\n", port);
if (listen(server_fd, 3) < 0) {
fprintf(stderr, "listen failed\n");
exit(-1);
}
sock_fd = accept(server_fd, sock_addr, sock_addr_len);
if (sock_fd < 0) {
fprintf(stderr, "accept failed\n");
exit(-1);
}
printf("Connected\n");
return sock_fd;
}
static const int TCP_BUF_SIZE = 256;
int main(int argc, char **argv) {
bool load_bin = false;
std::string bin_path;
bool dump_waves = false;
std::string waves_path;
std::vector<std::pair<uint32_t, uint32_t>> dump_ranges;
int64_t max_cycles = 0;
bool propagate_return_code = false;
uint16_t port = 0;
bool dump_jtag = false;
std::string jtag_dump_path;
bool replay_jtag = false;
std::string jtag_replay_path;
for (int i = 1; i < argc; ++i) {
std::string s(argv[i]);
if (s.rfind("--", 0) != 0) {
std::cerr << "Unexpected positional argument " << s << "\n";
exit_help("");
}
else if (s == "--bin") {
if (argc - i < 2)
exit_help("Option --bin requires an argument\n");
load_bin = true;
bin_path = argv[i + 1];
i += 1;
}
else if (s == "--vcd") {
if (argc - i < 2)
exit_help("Option --vcd requires an argument\n");
dump_waves = true;
waves_path = argv[i + 1];
i += 1;
}
else if (s == "--jtagdump") {
if (argc - i < 2)
exit_help("Option --jtagdump requires an argument\n");
dump_jtag = true;
jtag_dump_path = argv[i + 1];
i += 1;
}
else if (s == "--jtagreplay") {
if (argc - i < 2)
exit_help("Option --jtagreplay requires an argument\n");
replay_jtag = true;
jtag_replay_path = argv[i + 1];
i += 1;
}
else if (s == "--dump") {
if (argc - i < 3)
exit_help("Option --dump requires 2 arguments\n");
dump_ranges.push_back(std::pair<uint32_t, uint32_t>(
std::stoul(argv[i + 1], 0, 0),
std::stoul(argv[i + 2], 0, 0)
));;
i += 2;
}
else if (s == "--cycles") {
if (argc - i < 2)
exit_help("Option --cycles requires an argument\n");
max_cycles = std::stol(argv[i + 1], 0, 0);
i += 1;
}
else if (s == "--port") {
if (argc - i < 2)
exit_help("Option --port requires an argument\n");
port = std::stol(argv[i + 1], 0, 0);
i += 1;
}
else if (s == "--cpuret") {
propagate_return_code = true;
}
else {
std::cerr << "Unrecognised argument " << s << "\n";
exit_help("");
}
}
if (!(load_bin || port != 0 || replay_jtag))
exit_help("At least one of --bin, --port or --jtagreplay must be specified.\n");
if (dump_jtag && port == 0)
exit_help("--jtagdump specified, but there is no JTAG socket to dump from.\n");
if (replay_jtag && port != 0)
exit_help("Can't specify both --port and --jtagreplay\n");
int server_fd, sock_fd;
struct sockaddr_in sock_addr;
int sock_opt = 1;
socklen_t sock_addr_len = sizeof(sock_addr);
char txbuf[TCP_BUF_SIZE], rxbuf[TCP_BUF_SIZE];
int rx_ptr = 0, rx_remaining = 0, tx_ptr = 0;
if (port != 0) {
server_fd = socket(AF_INET, SOCK_STREAM, 0);
if (server_fd == 0) {
fprintf(stderr, "socket creation failed\n");
exit(-1);
}
int setsockopt_rc = setsockopt(
server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT,
&sock_opt, sizeof(sock_opt)
);
if (setsockopt_rc) {
fprintf(stderr, "setsockopt failed\n");
exit(-1);
}
sock_addr.sin_family = AF_INET;
sock_addr.sin_addr.s_addr = INADDR_ANY;
sock_addr.sin_port = htons(port);
if (bind(server_fd, (struct sockaddr *)&sock_addr, sizeof(sock_addr)) < 0) {
fprintf(stderr, "bind failed\n");
exit(-1);
}
sock_fd = wait_for_connection(server_fd, port, (struct sockaddr *)&sock_addr, &sock_addr_len);
}
mem_io_state memio;
if (load_bin) {
std::ifstream fd(bin_path, std::ios::binary | std::ios::ate);
if (!fd){
std::cerr << "Failed to open \"" << bin_path << "\"\n";
return -1;
}
std::streamsize bin_size = fd.tellg();
if (bin_size > MEM_SIZE) {
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n";
return -1;
}
fd.seekg(0, std::ios::beg);
fd.read((char*)memio.mem, bin_size);
}
std::ofstream jtag_dump_fd;
if (dump_jtag) {
jtag_dump_fd.open(jtag_dump_path);
if (!jtag_dump_fd.is_open()) {
std::cerr << "Failed to open \"" << jtag_dump_path << "\"\n";
return -1;
}
}
std::ifstream jtag_replay_fd;
if (replay_jtag) {
jtag_replay_fd.open(jtag_replay_path);
if (!jtag_replay_fd.is_open()) {
std::cerr << "Failed to open \"" << jtag_replay_path << "\"\n";
return -1;
}
}
cxxrtl_design::p_example__soc top;
std::ofstream waves_fd;
cxxrtl::vcd_writer vcd;
if (dump_waves) {
waves_fd.open(waves_path);
cxxrtl::debug_items all_debug_items;
top.debug_info(&all_debug_items, /*scopes=*/nullptr, "");
vcd.timescale(1, "us");
vcd.add(all_debug_items);
}
// Loop-carried address-phase requests
bus_request req_i;
bus_request req_d;
bool req_i_vld = false;
bool req_d_vld = false;
req_i.reservation_id = 0;
req_d.reservation_id = 1;
// Set bus interfaces to generate good IDLE responses at first
// top.p_i__hready.set<bool>(true);
// top.p_d__hready.set<bool>(true);
// Reset + initial clock pulse
top.step();
top.p_clk.set<bool>(true);
top.p_tck.set<bool>(true);
top.step();
top.p_clk.set<bool>(false);
top.p_tck.set<bool>(false);
top.p_trst__n.set<bool>(true);
top.p_rst__n.set<bool>(true);
top.step();
top.step(); // workaround for github.com/YosysHQ/yosys/issues/2780
bool timed_out = false;
for (int64_t cycle = 0; cycle < max_cycles || max_cycles == 0; ++cycle) {
top.p_clk.set<bool>(false);
top.step();
if (dump_waves)
vcd.sample(cycle * 2);
top.p_clk.set<bool>(true);
top.step();
top.step(); // workaround for github.com/YosysHQ/yosys/issues/2780
// If --port is specified, we run the simulator in lockstep with the
// remote bitbang commands, to get more consistent simulation traces.
// This slows down simulation quite a bit compared with normal
// free-running.
//
// Most bitbang commands complete in one cycle (e.g. TCK/TMS/TDI
// writes) but reads take 0 cycles, step=false.
bool got_exit_cmd = false;
bool step = false;
if (port != 0 or replay_jtag) {
while (!step) {
if (rx_remaining > 0) {
char c = rxbuf[rx_ptr++];
--rx_remaining;
if (c == 'r' || c == 's') {
top.p_trst__n.set<bool>(true);
step = true;
}
else if (c == 't' || c == 'u') {
top.p_trst__n.set<bool>(false);
}
else if (c >= '0' && c <= '7') {
int mask = c - '0';
top.p_tck.set<bool>(mask & 0x4);
top.p_tms.set<bool>(mask & 0x2);
top.p_tdi.set<bool>(mask & 0x1);
step = true;
}
else if (c == 'R') {
txbuf[tx_ptr++] = top.p_tdo.get<bool>() ? '1' : '0';
if (tx_ptr >= TCP_BUF_SIZE || rx_remaining == 0) {
send(sock_fd, txbuf, tx_ptr, 0);
tx_ptr = 0;
}
}
else if (c == 'Q') {
printf("OpenOCD sent quit command\n");
got_exit_cmd = true;
step = true;
}
}
else {
// Potentially the last command was not a read command, but
// OpenOCD is still waiting for a last response from its
// last command packet before it sends us any more, so now is
// the time to flush TX.
if (tx_ptr > 0) {
send(sock_fd, txbuf, tx_ptr, 0);
tx_ptr = 0;
}
rx_ptr = 0;
if (replay_jtag) {
rx_remaining = jtag_replay_fd.readsome(rxbuf, TCP_BUF_SIZE);
}
else {
rx_remaining = read(sock_fd, &rxbuf, TCP_BUF_SIZE);
}
if (dump_jtag && rx_remaining > 0) {
jtag_dump_fd.write(rxbuf, rx_remaining);
}
if (rx_remaining == 0) {
if (port == 0) {
// Presumably EOF, so quit.
got_exit_cmd = true;
}
else {
// The socket is closed. Wait for another connection.
sock_fd = wait_for_connection(server_fd, port, (struct sockaddr *)&sock_addr, &sock_addr_len);
}
}
}
}
}
memio.step(top);
// The two bus ports are handled identically. This enables swapping out of
// various `tb.v` hardware integration files containing:
//
// - A single, dual-ported processor (instruction fetch, load/store ports)
// - A single, single-ported processor (instruction fetch + load/store muxed internally)
// - A pair of single-ported processors, for dual-core debug tests
// if (top.p_d__hready.get<bool>()) {
// // Clear bus error by default
// top.p_d__hresp.set<bool>(false);
// // Handle current data phase
// req_d.wdata = top.p_d__hwdata.get<uint32_t>();
// bus_response resp;
// if (req_d_vld)
// resp = mem_access(top, memio, req_d);
// else
// resp.exokay = !memio.monitor_enabled;
// if (resp.err) {
// // Phase 1 of error response
// top.p_d__hready.set<bool>(false);
// top.p_d__hresp.set<bool>(true);
// }
// top.p_d__hrdata.set<uint32_t>(resp.rdata);
// top.p_d__hexokay.set<bool>(resp.exokay);
// // Progress current address phase to data phase
// req_d_vld = top.p_d__htrans.get<uint8_t>() >> 1;
// req_d.write = top.p_d__hwrite.get<bool>();
// req_d.size = (bus_size_t)top.p_d__hsize.get<uint8_t>();
// req_d.addr = top.p_d__haddr.get<uint32_t>();
// req_d.excl = top.p_d__hexcl.get<bool>();
// }
// else {
// // hready=0. Currently this only happens when we're in the first
// // phase of an error response, so go to phase 2.
// top.p_d__hready.set<bool>(true);
// }
// if (top.p_i__hready.get<bool>()) {
// top.p_i__hresp.set<bool>(false);
// req_i.wdata = top.p_i__hwdata.get<uint32_t>();
// bus_response resp;
// if (req_i_vld)
// resp = mem_access(top, memio, req_i);
// else
// resp.exokay = !memio.monitor_enabled;
// if (resp.err) {
// // Phase 1 of error response
// top.p_i__hready.set<bool>(false);
// top.p_i__hresp.set<bool>(true);
// }
// top.p_i__hrdata.set<uint32_t>(resp.rdata);
// top.p_i__hexokay.set<bool>(resp.exokay);
// // Progress current address phase to data phase
// req_i_vld = top.p_i__htrans.get<uint8_t>() >> 1;
// req_i.write = top.p_i__hwrite.get<bool>();
// req_i.size = (bus_size_t)top.p_i__hsize.get<uint8_t>();
// req_i.addr = top.p_i__haddr.get<uint32_t>();
// req_i.excl = top.p_i__hexcl.get<bool>();
// }
// else {
// // hready=0. Currently this only happens when we're in the first
// // phase of an error response, so go to phase 2.
// top.p_i__hready.set<bool>(true);
// }
if (dump_waves) {
// The extra step() is just here to get the bus responses to line up nicely
// in the VCD (hopefully is a quick update)
top.step();
vcd.sample(cycle * 2 + 1);
waves_fd << vcd.buffer;
vcd.buffer.clear();
}
if (memio.exit_req) {
printf("CPU requested halt. Exit code %d\n", memio.exit_code);
printf("Ran for " I64_FMT " cycles\n", cycle + 1);
break;
}
if (cycle + 1 == max_cycles) {
printf("Max cycles reached\n");
timed_out = true;
}
if (got_exit_cmd)
break;
}
close(sock_fd);
if (dump_jtag) {
jtag_dump_fd.close();
}
if (replay_jtag) {
jtag_replay_fd.close();
}
for (auto r : dump_ranges) {
printf("Dumping memory from %08x to %08x:\n", r.first, r.second);
for (int i = 0; i < r.second - r.first; ++i)
printf("%02x%c", memio.mem[r.first + i], i % 16 == 15 ? '\n' : ' ');
printf("\n");
}
if (propagate_return_code && timed_out) {
return -1;
}
else if (propagate_return_code && memio.exit_req) {
return memio.exit_code;
}
else {
return 0;
}
}

3
test/sim/soc_cxxrtl/tb.f Normal file
View File

@ -0,0 +1,3 @@
list $HDL/../example_soc/soc/soc.f
file $HDL/debug/cdc/hazard3_reset_sync.v
list tb_common.f

320
test/sim/soc_cxxrtl/tb.v Normal file
View File

@ -0,0 +1,320 @@
// An integration of JTAG-DTM + DM + CPU for openocd to poke at over a remote
// bitbang socket
`default_nettype none
module tb #(
parameter W_DATA = 32, // do not modify
parameter W_ADDR = 32 // do not modify
) (
// Global signals
input wire clk,
input wire rst_n,
// JTAG port
input wire tck,
input wire trst_n,
input wire tms,
input wire tdi,
output wire tdo,
// Instruction fetch port
output wire [W_ADDR-1:0] i_haddr,
output wire i_hwrite,
output wire [1:0] i_htrans,
output wire i_hexcl,
output wire [2:0] i_hsize,
output wire [2:0] i_hburst,
output wire [3:0] i_hprot,
output wire i_hmastlock,
output wire [7:0] i_hmaster,
input wire i_hready,
input wire i_hresp,
input wire i_hexokay,
output wire [W_DATA-1:0] i_hwdata,
input wire [W_DATA-1:0] i_hrdata,
// Load/store port
output wire [W_ADDR-1:0] d_haddr,
output wire d_hwrite,
output wire [1:0] d_htrans,
output wire d_hexcl,
output wire [2:0] d_hsize,
output wire [2:0] d_hburst,
output wire [3:0] d_hprot,
output wire d_hmastlock,
output wire [7:0] d_hmaster,
input wire d_hready,
input wire d_hresp,
input wire d_hexokay,
output wire [W_DATA-1:0] d_hwdata,
input wire [W_DATA-1:0] d_hrdata,
// Level-sensitive interrupt sources
input wire [NUM_IRQS-1:0] irq, // -> mip.meip
input wire [1:0] soft_irq, // -> mip.msip
input wire [1:0] timer_irq // -> mip.mtip
);
// JTAG-DTM IDCODE, selected after TAP reset, would normally be a
// JEP106-compliant ID
localparam IDCODE = 32'hdeadbeef;
wire dmi_psel;
wire dmi_penable;
wire dmi_pwrite;
wire [8:0] dmi_paddr;
wire [31:0] dmi_pwdata;
reg [31:0] dmi_prdata;
wire dmi_pready;
wire dmi_pslverr;
wire dmihardreset_req;
wire assert_dmi_reset = !rst_n || dmihardreset_req;
wire rst_n_dmi;
hazard3_reset_sync dmi_reset_sync_u (
.clk (clk),
.rst_n_in (!assert_dmi_reset),
.rst_n_out (rst_n_dmi)
);
// Note the idle hint of 8 cycles was empirically found to be the correct
// value for a 1:2 TCK:clk_dmi ratio. OpenOCD doesn't particularly care
// because it will just increase idle cycles until it stops seeing BUSY.
hazard3_jtag_dtm #(
.IDCODE (IDCODE),
.DTMCS_IDLE_HINT (8)
) inst_hazard3_jtag_dtm (
.tck (tck),
.trst_n (trst_n),
.tms (tms),
.tdi (tdi),
.tdo (tdo),
.dmihardreset_req (dmihardreset_req),
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
localparam N_HARTS = 1;
localparam XLEN = 32;
wire sys_reset_req;
wire sys_reset_done;
wire [N_HARTS-1:0] hart_reset_req;
wire [N_HARTS-1:0] hart_reset_done;
wire [N_HARTS-1:0] hart_req_halt;
wire [N_HARTS-1:0] hart_req_halt_on_reset;
wire [N_HARTS-1:0] hart_req_resume;
wire [N_HARTS-1:0] hart_halted;
wire [N_HARTS-1:0] hart_running;
wire [N_HARTS*XLEN-1:0] hart_data0_rdata;
wire [N_HARTS*XLEN-1:0] hart_data0_wdata;
wire [N_HARTS-1:0] hart_data0_wen;
wire [N_HARTS*XLEN-1:0] hart_instr_data;
wire [N_HARTS-1:0] hart_instr_data_vld;
wire [N_HARTS-1:0] hart_instr_data_rdy;
wire [N_HARTS-1:0] hart_instr_caught_exception;
wire [N_HARTS-1:0] hart_instr_caught_ebreak;
wire [31:0] sbus_addr;
wire sbus_write;
wire [1:0] sbus_size;
wire sbus_vld;
wire sbus_rdy;
wire sbus_err;
wire [31:0] sbus_wdata;
wire [31:0] sbus_rdata;
hazard3_dm #(
.N_HARTS (N_HARTS),
.HAVE_SBA (1),
.NEXT_DM_ADDR (0)
) dm (
.clk (clk),
.rst_n (rst_n),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr),
.sys_reset_req (sys_reset_req),
.sys_reset_done (sys_reset_done),
.hart_reset_req (hart_reset_req),
.hart_reset_done (hart_reset_done),
.hart_req_halt (hart_req_halt),
.hart_req_halt_on_reset (hart_req_halt_on_reset),
.hart_req_resume (hart_req_resume),
.hart_halted (hart_halted),
.hart_running (hart_running),
.hart_data0_rdata (hart_data0_rdata),
.hart_data0_wdata (hart_data0_wdata),
.hart_data0_wen (hart_data0_wen),
.hart_instr_data (hart_instr_data),
.hart_instr_data_vld (hart_instr_data_vld),
.hart_instr_data_rdy (hart_instr_data_rdy),
.hart_instr_caught_exception (hart_instr_caught_exception),
.hart_instr_caught_ebreak (hart_instr_caught_ebreak),
.sbus_addr (sbus_addr),
.sbus_write (sbus_write),
.sbus_size (sbus_size),
.sbus_vld (sbus_vld),
.sbus_rdy (sbus_rdy),
.sbus_err (sbus_err),
.sbus_wdata (sbus_wdata),
.sbus_rdata (sbus_rdata)
);
// Generate resynchronised reset for CPU based on upstream reset and
// on reset requests from DM.
wire assert_cpu_reset = !rst_n || sys_reset_req || hart_reset_req[0];
wire rst_n_cpu;
hazard3_reset_sync cpu_reset_sync (
.clk (clk),
.rst_n_in (!assert_cpu_reset),
.rst_n_out (rst_n_cpu)
);
// Still some work to be done on the reset handshake -- this ought to be
// resynchronised to DM's reset domain here, and the DM should wait for a
// rising edge after it has asserted the reset pulse, to make sure the tail
// of the previous "done" is not passed on.
assign sys_reset_done = rst_n_cpu;
assign hart_reset_done = rst_n_cpu;
wire pwrup_req;
reg pwrup_ack;
wire clk_en;
wire unblock_out;
wire unblock_in = unblock_out;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
pwrup_ack <= 1'b1;
end else begin
pwrup_ack <= pwrup_req;
end
end
// Clock gate is disabled, as CXXRTL currently can't simulated gated clocks
// due to a limitation of the scheduler design
// // Latching clock gate. Does not insert an NBA delay on the gated clock, so
// // safe to exchange data between NBAs on the gated and non-gated clock. Does
// // not glitch as long as clk_en is driven from an NBA on the posedge of clk
// // (e.g. a normal RTL register). The clock stops *high*.
// reg clk_gated;
// always @ (*) begin
// if (clk_en)
// clk_gated = clk;
// end
`ifndef CONFIG_HEADER
`define CONFIG_HEADER "config_default.vh"
`endif
`include `CONFIG_HEADER
hazard3_cpu_2port #(
`include "hazard3_config_inst.vh"
) cpu (
.clk (clk),
.clk_always_on (clk),
.rst_n (rst_n_cpu),
.pwrup_req (pwrup_req),
.pwrup_ack (pwrup_ack),
.clk_en (clk_en),
.unblock_out (unblock_out),
.unblock_in (unblock_in),
.i_haddr (i_haddr),
.i_hwrite (i_hwrite),
.i_htrans (i_htrans),
.i_hsize (i_hsize),
.i_hburst (i_hburst),
.i_hprot (i_hprot),
.i_hmastlock (i_hmastlock),
.i_hmaster (i_hmaster),
.i_hready (i_hready),
.i_hresp (i_hresp),
.i_hwdata (i_hwdata),
.i_hrdata (i_hrdata),
.d_haddr (d_haddr),
.d_hexcl (d_hexcl),
.d_hwrite (d_hwrite),
.d_htrans (d_htrans),
.d_hsize (d_hsize),
.d_hburst (d_hburst),
.d_hprot (d_hprot),
.d_hmastlock (d_hmastlock),
.d_hmaster (d_hmaster),
.d_hready (d_hready),
.d_hresp (d_hresp),
.d_hexokay (d_hexokay),
.d_hwdata (d_hwdata),
.d_hrdata (d_hrdata),
.dbg_req_halt (hart_req_halt),
.dbg_req_halt_on_reset (hart_req_halt_on_reset),
.dbg_req_resume (hart_req_resume),
.dbg_halted (hart_halted),
.dbg_running (hart_running),
.dbg_data0_rdata (hart_data0_rdata),
.dbg_data0_wdata (hart_data0_wdata),
.dbg_data0_wen (hart_data0_wen),
.dbg_instr_data (hart_instr_data),
.dbg_instr_data_vld (hart_instr_data_vld),
.dbg_instr_data_rdy (hart_instr_data_rdy),
.dbg_instr_caught_exception (hart_instr_caught_exception),
.dbg_instr_caught_ebreak (hart_instr_caught_ebreak),
.dbg_sbus_addr (sbus_addr),
.dbg_sbus_write (sbus_write),
.dbg_sbus_size (sbus_size),
.dbg_sbus_vld (sbus_vld),
.dbg_sbus_rdy (sbus_rdy),
.dbg_sbus_err (sbus_err),
.dbg_sbus_wdata (sbus_wdata),
.dbg_sbus_rdata (sbus_rdata),
.irq (irq),
.soft_irq (soft_irq[0]),
.timer_irq (timer_irq[0])
);
assign i_hexcl = 1'b0;
endmodule

View File

@ -0,0 +1,5 @@
file $HDL/debug/cdc/hazard3_reset_sync.v
list $HDL/hazard3.f
list $HDL/debug/dm/hazard3_dm.f
list $HDL/debug/dtm/hazard3_jtag_dtm.f

View File

@ -0,0 +1,2 @@
file tb_multicore.v
list tb_common.f

View File

@ -0,0 +1,348 @@
// An integration of JTAG-DTM + DM + 2 single-ported CPUs for openocd to poke
// at over a remote bitbang socket
`default_nettype none
module tb #(
parameter W_ADDR = 32, // do not modify
parameter W_DATA = 32 // do not modify
) (
// Global signals
input wire clk,
input wire rst_n,
// JTAG port
input wire tck,
input wire trst_n,
input wire tms,
input wire tdi,
output wire tdo,
// Core 0 bus (named I for consistency with 1-core 2-port tb)
output wire [W_ADDR-1:0] i_haddr,
output wire i_hwrite,
output wire [1:0] i_htrans,
output wire i_hexcl,
output wire [2:0] i_hsize,
output wire [2:0] i_hburst,
output wire [3:0] i_hprot,
output wire i_hmastlock,
output wire [7:0] i_hmaster,
input wire i_hready,
input wire i_hresp,
input wire i_hexokay,
output wire [W_DATA-1:0] i_hwdata,
input wire [W_DATA-1:0] i_hrdata,
// Core 1 bus (named D for consistency with 1-core 2-port tb)
output wire [W_ADDR-1:0] d_haddr,
output wire d_hwrite,
output wire [1:0] d_htrans,
output wire d_hexcl,
output wire [2:0] d_hsize,
output wire [2:0] d_hburst,
output wire [3:0] d_hprot,
output wire d_hmastlock,
output wire [7:0] d_hmaster,
input wire d_hready,
input wire d_hresp,
input wire d_hexokay,
output wire [W_DATA-1:0] d_hwdata,
input wire [W_DATA-1:0] d_hrdata,
// Level-sensitive interrupt sources
input wire [NUM_IRQS-1:0] irq, // -> mip.meip
input wire [1:0] soft_irq, // -> mip.msip
input wire [1:0] timer_irq // -> mip.mtip
);
// JTAG-DTM IDCODE, selected after TAP reset, would normally be a
// JEP106-compliant ID
localparam IDCODE = 32'hdeadbeef;
wire dmi_psel;
wire dmi_penable;
wire dmi_pwrite;
wire [8:0] dmi_paddr;
wire [31:0] dmi_pwdata;
reg [31:0] dmi_prdata;
wire dmi_pready;
wire dmi_pslverr;
wire dmihardreset_req;
wire assert_dmi_reset = !rst_n || dmihardreset_req;
wire rst_n_dmi;
hazard3_reset_sync dmi_reset_sync_u (
.clk (clk),
.rst_n_in (!assert_dmi_reset),
.rst_n_out (rst_n_dmi)
);
hazard3_jtag_dtm #(
.IDCODE (IDCODE),
.DTMCS_IDLE_HINT (8)
) inst_hazard3_jtag_dtm (
.tck (tck),
.trst_n (trst_n),
.tms (tms),
.tdi (tdi),
.tdo (tdo),
.dmihardreset_req (dmihardreset_req),
.clk_dmi (clk),
.rst_n_dmi (rst_n_dmi),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr)
);
localparam N_HARTS = 2;
localparam XLEN = 32;
wire sys_reset_req;
wire sys_reset_done;
wire [N_HARTS-1:0] hart_reset_req;
wire [N_HARTS-1:0] hart_reset_done;
wire [N_HARTS-1:0] hart_req_halt;
wire [N_HARTS-1:0] hart_req_halt_on_reset;
wire [N_HARTS-1:0] hart_req_resume;
wire [N_HARTS-1:0] hart_halted;
wire [N_HARTS-1:0] hart_running;
wire [N_HARTS*XLEN-1:0] hart_data0_rdata;
wire [N_HARTS*XLEN-1:0] hart_data0_wdata;
wire [N_HARTS-1:0] hart_data0_wen;
wire [N_HARTS*XLEN-1:0] hart_instr_data;
wire [N_HARTS-1:0] hart_instr_data_vld;
wire [N_HARTS-1:0] hart_instr_data_rdy;
wire [N_HARTS-1:0] hart_instr_caught_exception;
wire [N_HARTS-1:0] hart_instr_caught_ebreak;
wire [31:0] sbus_addr;
wire sbus_write;
wire [1:0] sbus_size;
wire sbus_vld;
wire sbus_rdy;
wire sbus_err;
wire [31:0] sbus_wdata;
wire [31:0] sbus_rdata;
hazard3_dm #(
.N_HARTS (N_HARTS),
.HAVE_SBA (1),
.NEXT_DM_ADDR (0)
) dm (
.clk (clk),
.rst_n (rst_n),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr),
.sys_reset_req (sys_reset_req),
.sys_reset_done (sys_reset_done),
.hart_reset_req (hart_reset_req),
.hart_reset_done (hart_reset_done),
.hart_req_halt (hart_req_halt),
.hart_req_halt_on_reset (hart_req_halt_on_reset),
.hart_req_resume (hart_req_resume),
.hart_halted (hart_halted),
.hart_running (hart_running),
.hart_data0_rdata (hart_data0_rdata),
.hart_data0_wdata (hart_data0_wdata),
.hart_data0_wen (hart_data0_wen),
.hart_instr_data (hart_instr_data),
.hart_instr_data_vld (hart_instr_data_vld),
.hart_instr_data_rdy (hart_instr_data_rdy),
.hart_instr_caught_exception (hart_instr_caught_exception),
.hart_instr_caught_ebreak (hart_instr_caught_ebreak),
.sbus_addr (sbus_addr),
.sbus_write (sbus_write),
.sbus_size (sbus_size),
.sbus_vld (sbus_vld),
.sbus_rdy (sbus_rdy),
.sbus_err (sbus_err),
.sbus_wdata (sbus_wdata),
.sbus_rdata (sbus_rdata)
);
// Generate resynchronised reset for CPU based on upstream reset and
// on reset requests from DM.
wire assert_cpu_reset0 = !rst_n || sys_reset_req || hart_reset_req[0];
wire assert_cpu_reset1 = !rst_n || sys_reset_req || hart_reset_req[1];
wire rst_n_cpu0;
wire rst_n_cpu1;
hazard3_reset_sync cpu0_reset_sync (
.clk (clk),
.rst_n_in (!assert_cpu_reset0),
.rst_n_out (rst_n_cpu0)
);
hazard3_reset_sync cpu1_reset_sync (
.clk (clk),
.rst_n_in (!assert_cpu_reset1),
.rst_n_out (rst_n_cpu1)
);
// Still some work to be done on the reset handshake -- this ought to be
// resynchronised to DM's reset domain here, and the DM should wait for a
// rising edge after it has asserted the reset pulse, to make sure the tail
// of the previous "done" is not passed on.
assign sys_reset_done = rst_n_cpu0 && rst_n_cpu1;
assign hart_reset_done = {rst_n_cpu1, rst_n_cpu0};
`ifndef CONFIG_HEADER
`define CONFIG_HEADER "config_default.vh"
`endif
`include `CONFIG_HEADER
wire pwrup_req_cpu0;
wire pwrup_req_cpu1;
wire unblock_out_cpu0;
wire unblock_out_cpu1;
hazard3_cpu_1port #(
.MHARTID_VAL (32'h0000_0000),
`define HAZARD3_CONFIG_INST_NO_MHARTID
`include "hazard3_config_inst.vh"
) cpu0 (
.clk (clk),
.clk_always_on (clk),
.rst_n (rst_n_cpu0),
.pwrup_req (pwrup_req_cpu0),
.pwrup_ack (pwrup_req_cpu0),
.clk_en (),
.unblock_out (unblock_out_cpu0),
.unblock_in (unblock_out_cpu1),
.haddr (i_haddr),
.hexcl (i_hexcl),
.hwrite (i_hwrite),
.htrans (i_htrans),
.hsize (i_hsize),
.hburst (i_hburst),
.hprot (i_hprot),
.hmastlock (i_hmastlock),
.hmaster (i_hmaster),
.hready (i_hready),
.hresp (i_hresp),
.hexokay (i_hexokay),
.hwdata (i_hwdata),
.hrdata (i_hrdata),
.dbg_req_halt (hart_req_halt [0]),
.dbg_req_halt_on_reset (hart_req_halt_on_reset [0]),
.dbg_req_resume (hart_req_resume [0]),
.dbg_halted (hart_halted [0]),
.dbg_running (hart_running [0]),
.dbg_data0_rdata (hart_data0_rdata [0 * XLEN +: XLEN]),
.dbg_data0_wdata (hart_data0_wdata [0 * XLEN +: XLEN]),
.dbg_data0_wen (hart_data0_wen [0]),
.dbg_instr_data (hart_instr_data [0 * XLEN +: XLEN]),
.dbg_instr_data_vld (hart_instr_data_vld [0]),
.dbg_instr_data_rdy (hart_instr_data_rdy [0]),
.dbg_instr_caught_exception (hart_instr_caught_exception[0]),
.dbg_instr_caught_ebreak (hart_instr_caught_ebreak [0]),
// SBA is routed through core 1, so tie off on core 0
.dbg_sbus_addr (32'h0),
.dbg_sbus_write (1'b0),
.dbg_sbus_size (2'h0),
.dbg_sbus_vld (1'b0),
.dbg_sbus_rdy (),
.dbg_sbus_err (),
.dbg_sbus_wdata (32'h0),
.dbg_sbus_rdata (),
.irq (irq),
.soft_irq (soft_irq[0]),
.timer_irq (timer_irq[0])
);
hazard3_cpu_1port #(
.MHARTID_VAL (32'h0000_0001),
`define HAZARD3_CONFIG_INST_NO_MHARTID
`include "hazard3_config_inst.vh"
) cpu1 (
.clk (clk),
.clk_always_on (clk),
.rst_n (rst_n_cpu1),
.pwrup_req (pwrup_req_cpu1),
.pwrup_ack (pwrup_req_cpu1),
.clk_en (),
.unblock_out (unblock_out_cpu1),
.unblock_in (unblock_out_cpu0),
.haddr (d_haddr),
.hexcl (d_hexcl),
.hwrite (d_hwrite),
.htrans (d_htrans),
.hsize (d_hsize),
.hburst (d_hburst),
.hprot (d_hprot),
.hmastlock (d_hmastlock),
.hmaster (d_hmaster),
.hready (d_hready),
.hresp (d_hresp),
.hexokay (d_hexokay),
.hwdata (d_hwdata),
.hrdata (d_hrdata),
.dbg_req_halt (hart_req_halt [1]),
.dbg_req_halt_on_reset (hart_req_halt_on_reset [1]),
.dbg_req_resume (hart_req_resume [1]),
.dbg_halted (hart_halted [1]),
.dbg_running (hart_running [1]),
.dbg_data0_rdata (hart_data0_rdata [1 * XLEN +: XLEN]),
.dbg_data0_wdata (hart_data0_wdata [1 * XLEN +: XLEN]),
.dbg_data0_wen (hart_data0_wen [1]),
.dbg_instr_data (hart_instr_data [1 * XLEN +: XLEN]),
.dbg_instr_data_vld (hart_instr_data_vld [1]),
.dbg_instr_data_rdy (hart_instr_data_rdy [1]),
.dbg_instr_caught_exception (hart_instr_caught_exception[1]),
.dbg_instr_caught_ebreak (hart_instr_caught_ebreak [1]),
.dbg_sbus_addr (sbus_addr),
.dbg_sbus_write (sbus_write),
.dbg_sbus_size (sbus_size),
.dbg_sbus_vld (sbus_vld),
.dbg_sbus_rdy (sbus_rdy),
.dbg_sbus_err (sbus_err),
.dbg_sbus_wdata (sbus_wdata),
.dbg_sbus_rdata (sbus_rdata),
.irq (irq),
.soft_irq (soft_irq[1]),
.timer_irq (timer_irq[1])
);
endmodule

View File

@ -0,0 +1,117 @@
[*]
[*] GTKWave Analyzer v3.3.103 (w)1999-2019 BSI
[*] Sat Sep 4 00:39:00 2021
[*]
[dumpfile] "/home/luke/proj/hazard3/test/sim/openocd/waves.vcd"
[dumpfile_mtime] "Sat Sep 4 00:36:35 2021"
[dumpfile_size] 31718403
[savefile] "/home/luke/proj/hazard3/test/sim/openocd/waves.gtkw"
[timestart] 877885
[size] 1920 1043
[pos] 174 41
*-3.000000 877889 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] cpu.
[treeopen] cpu.core.
[treeopen] inst_hazard3_jtag_dtm.
[sst_width] 233
[signals_width] 238
[sst_expanded] 1
[sst_vpaned_height] 298
@28
clk
trst_n
rst_n_dmi
@200
-
-DTM
@28
tck
tms
tdi
tdo
@200
-
@22
inst_hazard3_jtag_dtm.tap_state[3:0]
inst_hazard3_jtag_dtm.ir[4:0]
@200
-
-DMI
@22
dm.dmi_paddr[7:0]
@28
dm.dmi_penable
dm.dmi_pwrite
@22
dm.dmi_pwdata[31:0]
dm.dmi_prdata[31:0]
@200
-
-DM Internals
@28
dm.dmactive
@200
-
-Processor Debug Controls
@28
cpu.dbg_req_halt
cpu.dbg_req_halt_on_reset
cpu.dbg_req_resume
cpu.dbg_halted
cpu.dbg_running
cpu.dbg_instr_caught_ebreak
cpu.dbg_instr_caught_exception
@22
cpu.dbg_instr_data[31:0]
@28
cpu.dbg_instr_data_rdy
cpu.dbg_instr_data_vld
@200
-
-Trap stuff
@22
cpu.core.inst_hazard3_csr.trap_addr[31:0]
@28
cpu.core.inst_hazard3_csr.trap_enter_rdy
cpu.core.inst_hazard3_csr.trap_enter_vld
cpu.core.inst_hazard3_csr.trap_is_irq
@22
cpu.core.inst_hazard3_csr.except[3:0]
@28
cpu.core.m_stall
cpu.core.bus_dph_err_d
@200
-
-CSRs
@22
cpu.core.inst_hazard3_csr.addr[11:0]
@28
cpu.core.inst_hazard3_csr.wen
cpu.core.inst_hazard3_csr.ren_soon
@200
-
-I Bus
@22
cpu.i_haddr[31:0]
@28
cpu.i_htrans[1:0]
cpu.i_hready
cpu.i_hresp
@23
cpu.i_hrdata[31:0]
@200
-
-D Bus
@22
d_haddr[31:0]
@28
d_htrans[1:0]
d_hwrite
d_hsize[2:0]
d_hready
d_hresp
@22
d_hwdata[31:0]
d_hrdata[31:0]
[pattern_trace] 1
[pattern_trace] 0