Move DM data0 CSR into the M-custom space, and document this

This commit is contained in:
Luke Wren 2021-11-28 15:52:25 +00:00
parent 9bf4d5105f
commit e7466ae4be
10 changed files with 28 additions and 556 deletions

View File

@ -90,7 +90,7 @@ Read-only, constant. Value depends on which ISA extensions Hazard3 is configured
|===
| Bits | Name | Description
| 31:30 | `mxl` | Always `0x1`. Indicates this is a 32-bit processor.
| 23 | `x` | 1 if the core is configured to support trap-handling, otherwise 0. Hazard3 has nonstandard CSRs to enable/disable external interrupts on a per-interrupt basis, see <<reg-meie0>> and <<reg-meip0>>. The `misa.x` bit must be set to indicate their presence.
| 23 | `x` | 1 if the core is configured to support trap-handling, otherwise 0. Hazard3 has nonstandard CSRs to enable/disable external interrupts on a per-interrupt basis, see <<reg-meie0>> and <<reg-meip0>>. The `misa.x` bit must be set to indicate their presence. Hazard3 does not implement any custom instructions.
| 12 | `m` | 1 if the M extension is present, otherwise 0.
| 2 | `c` | 1 if the C extension is present, otherwise 0.
|===
@ -362,23 +362,34 @@ Debug program counter. When entering Debug Mode, `dpc` samples the current progr
Address: `0x7b2`
Not implemented. However, the Debug Module's internal `data0` register is mapped to this CSR address under the following conditions:
Not implemented. Access will cause an illegal instruction exception.
- The core is in Debug Mode
- The Debug Module is _currently executing an abstract command on this core_
The Debug Module uses this mapping to exchange data with the core by injecting `csrr`/`csrw` instructions into the prefetch buffer. This in turn is used to implement the Abstract Access Register command. See <<debug-chapter>>.
The Debug Module lists the number of scratch registers as 0 in `hartinfo.dscratch`.
To provide data exchange between the Debug Module and the core, the Debug Module's `data0` register is mapped into the core's CSR space at a read/write M-custom address -- see <<reg-dmdata0>>.
==== dscratch1
Address: `0x7b3`
Not implemented. Access will cause an illegal instruction exception.
=== Custom CSRs
These are all allocated in the space `0xbc0` through `0xbff` which is available for custom read/write M-mode CSRs, and `0xfc0` through `0xfff` which is available for custom read-only M-mode CSRs.
Hazard3 also allocates a custom _Debug Mode_ register <<reg-dmdata0>> in this space.
[[reg-dmdata0]]
==== dmdata0
Address: `0xbff`
The Debug Module's internal `data0` register is mapped to this CSR address when the core is in debug mode. At any other time, access to this CSR address will cause an illegal instruction exception.
NOTE: The 0.13.2 debug specification allows for the Debug Module's abstract data registers to be mapped into the core's CSR address space, but there is no Debug-custom space, so the read/write M-custom space is used instead to avoid conflict with future versions of the debug specification.
The Debug Module uses this mapping to exchange data with the core by injecting `csrr`/`csrw` instructions into the prefetch buffer. This in turn is used to implement the Abstract Access Register command. See <<debug-chapter>>.
This CSR address is given by the `dataaddress` field of the Debug Module's `hartinfo` register, and `hartinfo.dataaccess` is set to 0 to indicate this is a CSR mapping, not a memory mapping.
[[reg-midcr]]
==== midcr

View File

@ -484,8 +484,8 @@ assign hart_instr_data_vld = {{N_HARTS{1'b0}},
} << hartsel;
assign hart_instr_data = {N_HARTS{
acmd_state == S_ISSUE_REGWRITE ? 32'h7b202073 | {20'd0, acmd_prev_regno, 7'd0} : // csrr xx, data0
acmd_state == S_ISSUE_REGREAD ? 32'h7b201073 | {12'd0, acmd_prev_regno, 15'd0} : // csrw data0, xx
acmd_state == S_ISSUE_REGWRITE ? 32'hbff02073 | {20'd0, acmd_prev_regno, 7'd0} : // csrr xx, dmdata0
acmd_state == S_ISSUE_REGREAD ? 32'hbff01073 | {12'd0, acmd_prev_regno, 15'd0} : // csrw dmdata0, xx
acmd_state == S_ISSUE_PROGBUF0 ? progbuf0 :
acmd_state == S_ISSUE_PROGBUF1 ? progbuf1 :
32'h00100073 // ebreak
@ -531,9 +531,10 @@ always @ (*) begin
8'h0, // reserved
4'h0, // nscratch = 0
3'h0, // reserved
1'b0, // dataccess = 0, data0 is backed by a per-hart CSR
1'b0, // dataccess = 0, data0 is mapped to each hart's CSR space
4'h1, // datasize = 1, a single data CSR (data0) is available
12'h7b2 // dataaddr, same location where dscratch0 would be if implemented
12'hbff // dataaddr, placed at the top of the M-custom space since
// the spec doesn't reserve a location for it.
};
ADDR_HALTSUM0: dmi_prdata = {
{XLEN - N_HARTS{1'b0}},

View File

@ -258,7 +258,7 @@ localparam TSELECT = 12'h7a0;
localparam DCSR = 12'h7b0;
localparam DPC = 12'h7b1;
localparam DATA0 = 12'h7b2; // DSCRATCH0 would be here if implemented
localparam DMDATA0 = 12'hbff; // Custom read/write
// ----------------------------------------------------------------------------
// CSR state + update logic
@ -564,7 +564,7 @@ always @ (posedge clk or negedge rst_n) begin
end
assign dbg_data0_wdata = wdata;
assign dbg_data0_wen = wen && addr == DATA0;
assign dbg_data0_wen = debug_mode && wen && addr == DMDATA0;
// ----------------------------------------------------------------------------
// Read port + detect addressing of unmapped CSRs
@ -857,7 +857,7 @@ always @ (*) begin
rdata = dpc;
end
DATA0: if (DEBUG_SUPPORT && debug_mode) begin
DMDATA0: if (DEBUG_SUPPORT && debug_mode) begin
decode_match = 1'b1;
rdata = dbg_data0_rdata;
end

View File

@ -1,7 +1,7 @@
APP := coremark
MAX_CYCLES := 100000000
CROSS_PREFIX ?= riscv32-unknown-elf-
CROSS_PREFIX ?= /opt/riscv/unstable/bin/riscv32-unknown-elf-
TBDIR ?= ../tb_cxxrtl

View File

@ -1,2 +0,0 @@
tb
dut.cpp

View File

@ -1,16 +0,0 @@
TOP := tb
all: tb
SYNTH_CMD += read_verilog -I ../../../hdl $(shell listfiles tb.f);
SYNTH_CMD += prep -flatten -top $(TOP); async2sync;
SYNTH_CMD += write_cxxrtl dut.cpp
dut.cpp:
yosys -p "$(SYNTH_CMD)" 2>&1 > cxxrtl.log
clean::
rm -f dut.cpp cxxrtl.log tb
tb: dut.cpp
clang++ -O3 -std=c++14 $(addprefix -D,$(CDEFINES)) -I $(shell yosys-config --datdir)/include tb.cpp -o tb

View File

@ -1,26 +0,0 @@
# data0 is at 0x04
# command is at 0x17
# command for reg write is 0x00231000 + reg
# command for reg read is 0x00221000 + reg
# Turn on dm
w 0x10 1
# Request halt
w 0x10 0x80000001
# Read back halt status
i 30
r 0x11
# write to registers a0, a1
w 0x04 0x1234
w 0x17 0x00231008
w 0x04 0x5678
w 0x17 0x00231009
# Read them back
w 0x17 0x00221008
r 0x04
w 0x17 0x00221009
r 0x04
x

View File

@ -1,286 +0,0 @@
#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstdint>
#include <string>
#include <algorithm>
#include <stdio.h>
// Device-under-test model generated by CXXRTL:
#include "dut.cpp"
#include <backends/cxxrtl/cxxrtl_vcd.h>
static const unsigned int MEM_SIZE = 16 * 1024 * 1024;
uint8_t mem[MEM_SIZE];
static const unsigned int IO_BASE = 0x80000000;
enum {
IO_PRINT_CHAR = 0,
IO_PRINT_U32 = 4,
IO_EXIT = 8
};
const char *help_str =
"Usage: tb binfile cmdlist [vcdfile] [--dump start end] [--cycles n]\n"
" binfile : Binary to load into start of memory\n"
" cmdlist : Debug module command list file\n"
" vcdfile : Path to dump waveforms to\n"
" --dump start end : Print out memory contents between start and end (exclusive)\n"
" after execution finishes. Can be passed multiple times.\n"
" --cycles n : Maximum number of cycles to run before exiting.\n"
;
void exit_help(std::string errtext = "") {
std::cerr << errtext << help_str;
exit(-1);
}
enum cmdstate {
S_IDLE = 0,
S_WRITE_SETUP,
S_WRITE_ACCESS,
S_READ_SETUP,
S_READ_ACCESS
};
int main(int argc, char **argv) {
if (argc < 3)
exit_help();
bool dump_waves = false;
std::string waves_path;
std::vector<std::pair<uint32_t, uint32_t>> dump_ranges;
int64_t max_cycles = 100000;
for (int i = 3; i < argc; ++i) {
std::string s(argv[i]);
if (i == 3 && s.rfind("--", 0) != 0) {
// Optional positional argument: vcdfile
dump_waves = true;
waves_path = s;
}
else if (s == "--dump") {
if (argc - i < 3)
exit_help("Option --dump requires 2 arguments\n");
dump_ranges.push_back(std::pair<uint32_t, uint32_t>(
std::stoul(argv[i + 1], 0, 0),
std::stoul(argv[i + 2], 0, 0)
));;
i += 2;
}
else if (s == "--cycles") {
if (argc - i < 2)
exit_help("Option --cycles requires an argument\n");
max_cycles = std::stol(argv[i + 1], 0, 0);
i += 1;
}
else {
std::cerr << "Unrecognised argument " << s << "\n";
exit_help("");
}
}
cxxrtl_design::p_tb top;
std::fill(std::begin(mem), std::end(mem), 0);
std::ifstream fd(argv[1], std::ios::binary | std::ios::ate);
std::streamsize bin_size = fd.tellg();
if (bin_size > MEM_SIZE) {
std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n";
return -1;
}
fd.seekg(0, std::ios::beg);
fd.read((char*)mem, bin_size);
std::ifstream cmdfile(argv[2]);
std::ofstream waves_fd;
cxxrtl::vcd_writer vcd;
if (dump_waves) {
waves_fd.open(waves_path);
cxxrtl::debug_items all_debug_items;
top.debug_info(all_debug_items);
vcd.timescale(1, "us");
vcd.add(all_debug_items);
}
bool bus_trans = false;
bool bus_write = false;
bool bus_trans_i = false;
uint32_t bus_addr_i = 0;
uint32_t bus_addr = 0;
uint8_t bus_size = 0;
// Never generate bus stalls
top.p_i__hready.set<bool>(true);
top.p_d__hready.set<bool>(true);
// Reset + initial clock pulse
top.step();
top.p_clk.set<bool>(true);
top.step();
top.p_clk.set<bool>(false);
top.p_rst__n.set<bool>(true);
top.step();
cmdstate state = S_IDLE;
int idle_counter = 0;
for (int64_t cycle = 0; cycle < max_cycles; ++cycle) {
top.p_clk.set<bool>(false);
top.step();
if (dump_waves)
vcd.sample(cycle * 2);
top.p_clk.set<bool>(true);
top.step();
bool got_exit_cmd = false;
switch (state) {
case S_IDLE:
if (idle_counter > 0) {
--idle_counter;
}
else {
std::string line;
do {
if (!std::getline(cmdfile, line))
line = "i 1000";
} while (line.length() == 0 || line[0] == '#');
std::istringstream iss(line);
iss >> std::setbase(0);
std::string verb;
iss >> verb;
if (verb == "i") {
iss >> idle_counter;
printf("i %d\n", idle_counter);
}
else if (verb == "w") {
uint32_t addr, data;
iss >> addr;
iss >> data;
top.p_dmi__paddr.set<uint32_t>(addr);
top.p_dmi__pwdata.set<uint32_t>(data);
top.p_dmi__psel.set<bool>(true);
top.p_dmi__pwrite.set<bool>(true);
state = S_WRITE_SETUP;
printf("w %02x: %08x\n", addr, data);
}
else if (verb == "r") {
uint32_t addr;
iss >> addr;
top.p_dmi__paddr.set<uint32_t>(addr);
top.p_dmi__psel.set<bool>(true);
top.p_dmi__pwrite.set<bool>(false);
state = S_READ_SETUP;
}
else if (verb == "x") {
got_exit_cmd = true;
}
else {
std::cerr << "Unrecognised verb " << verb << "\n";
got_exit_cmd = true;
}
}
break;
case S_READ_SETUP:
top.p_dmi__penable.set<bool>(true);
state = S_READ_ACCESS;
break;
case S_READ_ACCESS:
top.p_dmi__penable.set<bool>(false);
top.p_dmi__psel.set<bool>(false);
printf("r %02x: %08x\n", top.p_dmi__paddr.get<uint32_t>(), top.p_dmi__prdata.get<uint32_t>());
state = S_IDLE;
idle_counter = 10;
break;
case S_WRITE_SETUP:
top.p_dmi__penable.set<bool>(true);
state = S_WRITE_ACCESS;
break;
case S_WRITE_ACCESS:
top.p_dmi__penable.set<bool>(false);
top.p_dmi__psel.set<bool>(false);
top.p_dmi__pwrite.set<bool>(false);
state = S_IDLE;
idle_counter = 10;
break;
default:
state = S_IDLE;
break;
}
// Handle current data phase, then move current address phase to data phase
uint32_t rdata = 0;
if (bus_trans && bus_write) {
uint32_t wdata = top.p_d__hwdata.get<uint32_t>();
if (bus_addr <= MEM_SIZE) {
unsigned int n_bytes = 1u << bus_size;
// Note we are relying on hazard3's byte lane replication
for (unsigned int i = 0; i < n_bytes; ++i) {
mem[bus_addr + i] = wdata >> (8 * i) & 0xffu;
}
}
else if (bus_addr == IO_BASE + IO_PRINT_CHAR) {
putchar(wdata);
}
else if (bus_addr == IO_BASE + IO_PRINT_U32) {
printf("%08x\n", wdata);
}
else if (bus_addr == IO_BASE + IO_EXIT) {
printf("CPU requested halt. Exit code %d\n", wdata);
printf("Ran for %ld cycles\n", cycle + 1);
break;
}
}
else if (bus_trans && !bus_write) {
if (bus_addr <= MEM_SIZE) {
bus_addr &= ~0x3u;
rdata =
(uint32_t)mem[bus_addr] |
mem[bus_addr + 1] << 8 |
mem[bus_addr + 2] << 16 |
mem[bus_addr + 3] << 24;
}
}
top.p_d__hrdata.set<uint32_t>(rdata);
if (bus_trans_i) {
bus_addr_i &= ~0x3u;
top.p_i__hrdata.set<uint32_t>(
(uint32_t)mem[bus_addr_i] |
mem[bus_addr_i + 1] << 8 |
mem[bus_addr_i + 2] << 16 |
mem[bus_addr_i + 3] << 24
);
}
bus_trans = top.p_d__htrans.get<uint8_t>() >> 1;
bus_write = top.p_d__hwrite.get<bool>();
bus_size = top.p_d__hsize.get<uint8_t>();
bus_addr = top.p_d__haddr.get<uint32_t>();
bus_trans_i = top.p_i__htrans.get<uint8_t>() >> 1;
bus_addr_i = top.p_i__haddr.get<uint32_t>();
if (dump_waves) {
// The extra step() is just here to get the bus responses to line up nicely
// in the VCD (hopefully is a quick update)
top.step();
vcd.sample(cycle * 2 + 1);
waves_fd << vcd.buffer;
vcd.buffer.clear();
}
if (got_exit_cmd)
break;
}
for (auto r : dump_ranges) {
printf("Dumping memory from %08x to %08x:\n", r.first, r.second);
for (int i = 0; i < r.second - r.first; ++i)
printf("%02x%c", mem[r.first + i], i % 16 == 15 ? '\n' : ' ');
printf("\n");
}
return 0;
}

View File

@ -1,4 +0,0 @@
file tb.v
list $HDL/hazard3.f
list $HDL/debug/dm/hazard3_dm.f

View File

@ -1,206 +0,0 @@
// This is not really a "testbench", just an integration of CPU + DM for a
// CXXRTL test to poke at
module tb #(
parameter W_DATA = 32,
parameter W_ADDR = 32,
parameter NUM_IRQ = 16
) (
// Global signals
input wire clk,
input wire rst_n,
// Instruction fetch port
output wire [W_ADDR-1:0] i_haddr,
output wire i_hwrite,
output wire [1:0] i_htrans,
output wire [2:0] i_hsize,
output wire [2:0] i_hburst,
output wire [3:0] i_hprot,
output wire i_hmastlock,
input wire i_hready,
input wire i_hresp,
output wire [W_DATA-1:0] i_hwdata,
input wire [W_DATA-1:0] i_hrdata,
// Load/store port
output wire [W_ADDR-1:0] d_haddr,
output wire d_hwrite,
output wire [1:0] d_htrans,
output wire [2:0] d_hsize,
output wire [2:0] d_hburst,
output wire [3:0] d_hprot,
output wire d_hmastlock,
input wire d_hready,
input wire d_hresp,
output wire [W_DATA-1:0] d_hwdata,
input wire [W_DATA-1:0] d_hrdata,
// Debug module interface
input wire dmi_psel,
input wire dmi_penable,
input wire dmi_pwrite,
input wire [7:0] dmi_paddr,
input wire [31:0] dmi_pwdata,
output reg [31:0] dmi_prdata,
output wire dmi_pready,
output wire dmi_pslverr,
// Level-sensitive interrupt sources
input wire [NUM_IRQ-1:0] irq, // -> mip.meip
input wire soft_irq, // -> mip.msip
input wire timer_irq // -> mip.mtip
);
localparam N_HARTS = 1;
localparam XLEN = 32;
wire sys_reset_req;
wire sys_reset_done;
wire [N_HARTS-1:0] hart_reset_req;
wire [N_HARTS-1:0] hart_reset_done;
wire [N_HARTS-1:0] hart_req_halt;
wire [N_HARTS-1:0] hart_req_halt_on_reset;
wire [N_HARTS-1:0] hart_req_resume;
wire [N_HARTS-1:0] hart_halted;
wire [N_HARTS-1:0] hart_running;
wire [N_HARTS*XLEN-1:0] hart_data0_rdata;
wire [N_HARTS*XLEN-1:0] hart_data0_wdata;
wire [N_HARTS-1:0] hart_data0_wen;
wire [N_HARTS*XLEN-1:0] hart_instr_data;
wire [N_HARTS-1:0] hart_instr_data_vld;
wire [N_HARTS-1:0] hart_instr_data_rdy;
wire [N_HARTS-1:0] hart_instr_caught_exception;
wire [N_HARTS-1:0] hart_instr_caught_ebreak;
hazard3_dm #(
.N_HARTS (N_HARTS),
.NEXT_DM_ADDR (0)
) dm (
.clk (clk),
.rst_n (rst_n),
.dmi_psel (dmi_psel),
.dmi_penable (dmi_penable),
.dmi_pwrite (dmi_pwrite),
.dmi_paddr (dmi_paddr),
.dmi_pwdata (dmi_pwdata),
.dmi_prdata (dmi_prdata),
.dmi_pready (dmi_pready),
.dmi_pslverr (dmi_pslverr),
.sys_reset_req (sys_reset_req),
.sys_reset_done (sys_reset_done),
.hart_reset_req (hart_reset_req),
.hart_reset_done (hart_reset_done),
.hart_req_halt (hart_req_halt),
.hart_req_halt_on_reset (hart_req_halt_on_reset),
.hart_req_resume (hart_req_resume),
.hart_halted (hart_halted),
.hart_running (hart_running),
.hart_data0_rdata (hart_data0_rdata),
.hart_data0_wdata (hart_data0_wdata),
.hart_data0_wen (hart_data0_wen),
.hart_instr_data (hart_instr_data),
.hart_instr_data_vld (hart_instr_data_vld),
.hart_instr_data_rdy (hart_instr_data_rdy),
.hart_instr_caught_exception (hart_instr_caught_exception),
.hart_instr_caught_ebreak (hart_instr_caught_ebreak)
);
// Generate resynchronised reset for CPU based on upstream reset and
// on reset requests from DM.
wire assert_cpu_reset = !rst_n || sys_reset_req || hart_reset_req[0];
reg [1:0] cpu_reset_sync;
wire rst_n_cpu = cpu_reset_sync[1];
always @ (posedge clk or posedge assert_cpu_reset)
if (assert_cpu_reset)
cpu_reset_sync <= 2'b00;
else
cpu_reset_sync <= (cpu_reset_sync << 1) | 2'b01;
// Still some work to be done on the reset handshake -- this ought to be
// resynchronised to DM's reset domain here, and the DM should wait for a
// rising edge after it has asserted the reset pulse, to make sure the tail
// of the previous "done" is not passed on.
assign sys_reset_done = rst_n_cpu;
assign hart_reset_done = rst_n_cpu;
hazard3_cpu_2port #(
.RESET_VECTOR (32'hc0),
.MTVEC_INIT (32'h00),
.EXTENSION_C (1),
.EXTENSION_M (1),
.CSR_M_MANDATORY (1),
.CSR_M_TRAP (1),
.CSR_COUNTER (1),
.DEBUG_SUPPORT (1),
.NUM_IRQ (NUM_IRQ),
.MVENDORID_VAL (32'hdeadbeef),
.MARCHID_VAL (32'hfeedf00d),
.MIMPID_VAL (32'h12345678),
.MHARTID_VAL (32'h0),
.REDUCED_BYPASS (0),
.MULDIV_UNROLL (2),
.MUL_FAST (1),
) cpu (
.clk (clk),
.rst_n (rst_n_cpu),
.i_haddr (i_haddr),
.i_hwrite (i_hwrite),
.i_htrans (i_htrans),
.i_hsize (i_hsize),
.i_hburst (i_hburst),
.i_hprot (i_hprot),
.i_hmastlock (i_hmastlock),
.i_hready (i_hready),
.i_hresp (i_hresp),
.i_hwdata (i_hwdata),
.i_hrdata (i_hrdata),
.d_haddr (d_haddr),
.d_hwrite (d_hwrite),
.d_htrans (d_htrans),
.d_hsize (d_hsize),
.d_hburst (d_hburst),
.d_hprot (d_hprot),
.d_hmastlock (d_hmastlock),
.d_hready (d_hready),
.d_hresp (d_hresp),
.d_hwdata (d_hwdata),
.d_hrdata (d_hrdata),
.dbg_req_halt (hart_req_halt),
.dbg_req_halt_on_reset (hart_req_halt_on_reset),
.dbg_req_resume (hart_req_resume),
.dbg_halted (hart_halted),
.dbg_running (hart_running),
.dbg_data0_rdata (hart_data0_rdata),
.dbg_data0_wdata (hart_data0_wdata),
.dbg_data0_wen (hart_data0_wen),
.dbg_instr_data (hart_instr_data),
.dbg_instr_data_vld (hart_instr_data_vld),
.dbg_instr_data_rdy (hart_instr_data_rdy),
.dbg_instr_caught_exception (hart_instr_caught_exception),
.dbg_instr_caught_ebreak (hart_instr_caught_ebreak),
.irq (irq),
.soft_irq (soft_irq),
.timer_irq (timer_irq)
);
endmodule