diff --git a/hdl/hazard3_csr.v b/hdl/hazard3_csr.v index 6618cc7..909182b 100644 --- a/hdl/hazard3_csr.v +++ b/hdl/hazard3_csr.v @@ -921,6 +921,14 @@ assign enter_debug_mode = (want_halt_irq || want_halt_except) && trap_enter_rdy; assign exit_debug_mode = pending_dbg_resume && trap_enter_rdy; +// Report back to DM instruction injector to tell it its instruction sequence +// has finished (ebreak) or crashed out +assign dbg_instr_caught_ebreak = debug_mode && except == EXCEPT_EBREAK && trap_enter_rdy; + +// Note we exclude ebreak from here regardless of dcsr.ebreakm, since we are +// already in debug mode at this point +assign dbg_instr_caught_exception = debug_mode && except != EXCEPT_NONE && except != EXCEPT_EBREAK && trap_enter_rdy; + // ---------------------------------------------------------------------------- // Trap request generation @@ -999,12 +1007,6 @@ assign trap_enter_vld = assign mcause_irq_next = !exception_req_any; assign mcause_code_next = exception_req_any ? {2'h0, except} : vector_sel; -// Report back to DM instruction injector to tell it its instruction sequence -// has finished or crashed out -assign dbg_instr_caught_ebreak = debug_mode && except == EXCEPT_EBREAK; -// Note we exclude ebreak from here regardless of dcsr.ebreakm! -assign dbg_instr_caught_exception = debug_mode && except != EXCEPT_NONE && except != EXCEPT_EBREAK; - // ---------------------------------------------------------------------------- `ifdef RISCV_FORMAL diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 2b14d3d..7757f22 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -271,7 +271,7 @@ reg [1:0] buf_level; reg [W_BUNDLE-1:0] hwbuf; wire [W_DATA-1:0] fetch_data = fifo_empty ? mem_data : fifo_rdata; -wire fetch_data_vld = !fifo_empty || (mem_data_vld && ~|ctr_flush_pending); +wire fetch_data_vld = !fifo_empty || (mem_data_vld && ~|ctr_flush_pending && !debug_mode); // Shift any recycled instruction data down to backfill D's consumption // We don't care about anything which is invalid or will be overlaid with fresh data, diff --git a/test/sim/core_debug/.gitignore b/test/sim/core_debug/.gitignore new file mode 100644 index 0000000..719db5b --- /dev/null +++ b/test/sim/core_debug/.gitignore @@ -0,0 +1,2 @@ +tb +dut.cpp diff --git a/test/sim/core_debug/Makefile b/test/sim/core_debug/Makefile new file mode 100644 index 0000000..8379c25 --- /dev/null +++ b/test/sim/core_debug/Makefile @@ -0,0 +1,38 @@ +TOP := hazard3_cpu_2port +CDEFINES := DUAL_PORT + +CPU_RESET_VECTOR := 32'hc0 +EXTENSION_C := 1 +EXTENSION_M := 1 +DEBUG_SUPPORT := 1 +MULDIV_UNROLL := 2 +MUL_FAST := 1 +REDUCED_BYPASS := 0 + +.PHONY: clean tb all run + +all: run + +run: tb + ./tb zero.bin waves.vcd + +SYNTH_CMD += read_verilog -I ../../../hdl $(shell listfiles ../../../hdl/hazard3.f); +SYNTH_CMD += chparam -set EXTENSION_C $(EXTENSION_C) $(TOP); +SYNTH_CMD += chparam -set EXTENSION_M $(EXTENSION_M) $(TOP); +SYNTH_CMD += chparam -set DEBUG_SUPPORT $(DEBUG_SUPPORT) $(TOP); +SYNTH_CMD += chparam -set CSR_COUNTER 1 $(TOP); +SYNTH_CMD += chparam -set RESET_VECTOR $(CPU_RESET_VECTOR) $(TOP); +SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP); +SYNTH_CMD += chparam -set MULDIV_UNROLL $(MULDIV_UNROLL) $(TOP); +SYNTH_CMD += chparam -set MUL_FAST $(MUL_FAST) $(TOP); +SYNTH_CMD += prep -flatten -top $(TOP); async2sync; +SYNTH_CMD += write_cxxrtl dut.cpp + +dut.cpp: + yosys -p "$(SYNTH_CMD)" 2>&1 > cxxrtl.log + +clean:: + rm -f dut.cpp cxxrtl.log tb + +tb: dut.cpp + clang++ -O3 -std=c++14 $(addprefix -D,$(CDEFINES)) -I $(shell yosys-config --datdir)/include tb.cpp -o tb diff --git a/test/sim/core_debug/tb.cpp b/test/sim/core_debug/tb.cpp new file mode 100644 index 0000000..8374ad5 --- /dev/null +++ b/test/sim/core_debug/tb.cpp @@ -0,0 +1,349 @@ +#include +#include +#include +#include +#include +// jesus fuck i forgot how bad iostream formatting was, give me printf or give me death +#include + +// Device-under-test model generated by CXXRTL: +#include "dut.cpp" +#include + +static const unsigned int MEM_SIZE = 16 * 1024 * 1024; +uint8_t mem[MEM_SIZE]; + +static const unsigned int IO_BASE = 0x80000000; +enum { + IO_PRINT_CHAR = 0, + IO_PRINT_U32 = 4, + IO_EXIT = 8 +}; + +const char *help_str = +"Usage: tb binfile [vcdfile] [--dump start end] [--cycles n]\n" +" binfile : Binary to load into start of memory\n" +" vcdfile : Path to dump waveforms to\n" +" --dump start end : Print out memory contents between start and end (exclusive)\n" +" after execution finishes. Can be passed multiple times.\n" +" --cycles n : Maximum number of cycles to run before exiting.\n" +; + +void exit_help(std::string errtext = "") { + std::cerr << errtext << help_str; + exit(-1); +} + +struct debug_test_proc { + int step; + debug_test_proc() : step(0) {} + bool operator()(cxxrtl_design::p_hazard3__cpu__2port &top) { + switch (step) { + // Request and wait for halt + case 0: + top.p_dbg__req__halt.set(true); + ++step; + break; + case 1: + if (top.p_dbg__halted.get()) { + printf("Processor halted\n"); + top.p_dbg__req__halt.set(false); + ++step; + } + break; + // Load 123 into data0 + case 2: + top.p_dbg__data0__wdata.set(123); + top.p_dbg__data0__wen.set(true); + ++step; + break; + case 3: + top.p_dbg__data0__wen.set(false); + printf("Read DATA0 CSR: %u\n", top.p_dbg__data0__rdata.get()); + ++step; + break; + // Inject csrr a0, data0 + case 4: + top.p_dbg__instr__data__vld.set(true); + top.p_dbg__instr__data.set(0x7b202573u); + printf(">inject: csrr a0, data0\n"); + ++step; + break; + // Inject addi a0, a0, 456 + case 5: + if (top.p_dbg__instr__data__rdy.get()) { + printf(">inject: addi a0, a0, 456\n"); + top.p_dbg__instr__data.set(0x1c850513u); + ++step; + } + break; + // Inject csrw data0, a0 + case 6: + if (top.p_dbg__instr__data__rdy.get()) { + printf(">inject: csrw data0, a0\n"); + top.p_dbg__instr__data.set(0x7b251073u); + ++step; + } + break; + // Inject illegal instruction (just want to see the wire pulse) + case 7: + if (top.p_dbg__instr__data__rdy.get()) { + printf(">inject: 2x illegal 0000\n"); + top.p_dbg__instr__data.set(0); + ++step; + } + break; + case 8: + if (top.p_dbg__instr__data__rdy.get()) { + top.p_dbg__instr__data__vld.set(false); + ++step; + } + break; + case 9: + if (top.p_dbg__instr__caught__exception.get()) { + printf("Core reported exception during debug execution\n"); + ++step; + } + break; + + // Inject ebreak (just want to see the wire pulse) + case 10: + printf(">inject: ebreak\n"); + top.p_dbg__instr__data__vld.set(true); + top.p_dbg__instr__data.set(0x00100073u); + ++step; + break; + case 11: + if (top.p_dbg__instr__data__rdy.get()) { + top.p_dbg__instr__data__vld.set(false); + ++step; + } + break; + case 12: + if (top.p_dbg__instr__caught__ebreak.get()) { + printf("Core reported ebreak during debug execution\n"); + ++step; + } + break; + + // Print new data0 value (should be 123 + 456 == 579) + case 13: + if (top.p_dbg__instr__data__rdy.get()) { + top.p_dbg__instr__data__vld.set(false); + printf("Read DATA0 CSR: %u\n", top.p_dbg__data0__rdata.get()); + ++step; + } + break; + // Assert resume request + case 14: + top.p_dbg__req__resume.set(true); + ++step; + break; + case 15: + top.p_dbg__req__resume.set(false); + ++step; + break; + + // Exit once request is acknowledged + case 16: + if (top.p_dbg__running.get()) { + printf("Processor resumed\n"); + return true; + } + break; + default: + break; + } + return false; + } +}; + +int main(int argc, char **argv) { + + if (argc < 2) + exit_help(); + + bool dump_waves = false; + std::string waves_path; + std::vector> dump_ranges; + int64_t max_cycles = 100000; + + for (int i = 2; i < argc; ++i) { + std::string s(argv[i]); + if (i == 2 && s.rfind("--", 0) != 0) { + // Optional positional argument: vcdfile + dump_waves = true; + waves_path = s; + } + else if (s == "--dump") { + if (argc - i < 3) + exit_help("Option --dump requires 2 arguments\n"); + dump_ranges.push_back(std::pair( + std::stoul(argv[i + 1], 0, 0), + std::stoul(argv[i + 2], 0, 0) + ));; + i += 2; + } + else if (s == "--cycles") { + if (argc - i < 2) + exit_help("Option --cycles requires an argument\n"); + max_cycles = std::stol(argv[i + 1], 0, 0); + i += 1; + } + else { + std::cerr << "Unrecognised argument " << s << "\n"; + exit_help(""); + } + } + +#ifdef DUAL_PORT + cxxrtl_design::p_hazard3__cpu__2port top; +#else + cxxrtl_design::p_hazard3__cpu__1port top; +#endif + + debug_test_proc test_step; + + std::fill(std::begin(mem), std::end(mem), 0); + + std::ifstream fd(argv[1], std::ios::binary | std::ios::ate); + std::streamsize bin_size = fd.tellg(); + if (bin_size > MEM_SIZE) { + std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n"; + return -1; + } + fd.seekg(0, std::ios::beg); + fd.read((char*)mem, bin_size); + + std::ofstream waves_fd; + cxxrtl::vcd_writer vcd; + if (dump_waves) { + waves_fd.open(waves_path); + cxxrtl::debug_items all_debug_items; + top.debug_info(all_debug_items); + vcd.timescale(1, "us"); + vcd.add(all_debug_items); + } + + bool bus_trans = false; + bool bus_write = false; +#ifdef DUAL_PORT + bool bus_trans_i = false; + uint32_t bus_addr_i = 0; +#endif + uint32_t bus_addr = 0; + uint8_t bus_size = 0; + // Never generate bus stalls +#ifdef DUAL_PORT + top.p_i__hready.set(true); + top.p_d__hready.set(true); +#else + top.p_ahblm__hready.set(true); +#endif + + // Reset + initial clock pulse + top.step(); + top.p_clk.set(true); + top.step(); + top.p_clk.set(false); + top.p_rst__n.set(true); + top.step(); + + for (int64_t cycle = 0; cycle < max_cycles; ++cycle) { + top.p_clk.set(false); + top.step(); + if (dump_waves) + vcd.sample(cycle * 2); + top.p_clk.set(true); + top.step(); + // Handle current data phase, then move current address phase to data phase + uint32_t rdata = 0; + if (bus_trans && bus_write) { +#ifdef DUAL_PORT + uint32_t wdata = top.p_d__hwdata.get(); +#else + uint32_t wdata = top.p_ahblm__hwdata.get(); +#endif + if (bus_addr <= MEM_SIZE) { + unsigned int n_bytes = 1u << bus_size; + // Note we are relying on hazard3's byte lane replication + for (unsigned int i = 0; i < n_bytes; ++i) { + mem[bus_addr + i] = wdata >> (8 * i) & 0xffu; + } + } + else if (bus_addr == IO_BASE + IO_PRINT_CHAR) { + putchar(wdata); + } + else if (bus_addr == IO_BASE + IO_PRINT_U32) { + printf("%08x\n", wdata); + } + else if (bus_addr == IO_BASE + IO_EXIT) { + printf("CPU requested halt. Exit code %d\n", wdata); + printf("Ran for %ld cycles\n", cycle + 1); + break; + } + } + else if (bus_trans && !bus_write) { + if (bus_addr <= MEM_SIZE) { + bus_addr &= ~0x3u; + rdata = + (uint32_t)mem[bus_addr] | + mem[bus_addr + 1] << 8 | + mem[bus_addr + 2] << 16 | + mem[bus_addr + 3] << 24; + } + } +#ifdef DUAL_PORT + top.p_d__hrdata.set(rdata); + if (bus_trans_i) { + bus_addr_i &= ~0x3u; + top.p_i__hrdata.set( + (uint32_t)mem[bus_addr_i] | + mem[bus_addr_i + 1] << 8 | + mem[bus_addr_i + 2] << 16 | + mem[bus_addr_i + 3] << 24 + ); + } +#else + top.p_ahblm__hrdata.set(rdata); +#endif + +#ifdef DUAL_PORT + bus_trans = top.p_d__htrans.get() >> 1; + bus_write = top.p_d__hwrite.get(); + bus_size = top.p_d__hsize.get(); + bus_addr = top.p_d__haddr.get(); + bus_trans_i = top.p_i__htrans.get() >> 1; + bus_addr_i = top.p_i__haddr.get(); +#else + bus_trans = top.p_ahblm__htrans.get() >> 1; + bus_write = top.p_ahblm__hwrite.get(); + bus_size = top.p_ahblm__hsize.get(); + bus_addr = top.p_ahblm__haddr.get(); +#endif + + // Goto next in debugger sequence + bool test_done = test_step(top); + + if (dump_waves) { + // The extra step() is just here to get the bus responses to line up nicely + // in the VCD (hopefully is a quick update) + top.step(); + vcd.sample(cycle * 2 + 1); + waves_fd << vcd.buffer; + vcd.buffer.clear(); + } + + if (test_done) + break; + } + + for (auto r : dump_ranges) { + printf("Dumping memory from %08x to %08x:\n", r.first, r.second); + for (int i = 0; i < r.second - r.first; ++i) + printf("%02x%c", mem[r.first + i], i % 16 == 15 ? '\n' : ' '); + printf("\n"); + } + + return 0; +} diff --git a/test/sim/core_debug/test.gtkw b/test/sim/core_debug/test.gtkw new file mode 100644 index 0000000..abd3b36 --- /dev/null +++ b/test/sim/core_debug/test.gtkw @@ -0,0 +1,57 @@ +[*] +[*] GTKWave Analyzer v3.3.103 (w)1999-2019 BSI +[*] Sat Jul 10 19:30:00 2021 +[*] +[dumpfile] "/home/luke/proj/hazard3/test/sim/core_debug/waves.vcd" +[dumpfile_mtime] "Sat Jul 10 19:27:20 2021" +[dumpfile_size] 23617 +[savefile] "/home/luke/proj/hazard3/test/sim/core_debug/test.gtkw" +[timestart] 0 +[size] 1920 1043 +[pos] -1 -1 +*-2.330973 12 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[treeopen] core. +[sst_width] 233 +[signals_width] 310 +[sst_expanded] 1 +[sst_vpaned_height] 298 +@28 +dbg_req_halt +dbg_req_resume +dbg_halted +dbg_running +@200 +- +@28 +dbg_data0_wen +@22 +dbg_data0_wdata[31:0] +dbg_data0_rdata[31:0] +@200 +- +@22 +dbg_instr_data[31:0] +@28 +dbg_instr_data_vld +dbg_instr_data_rdy +dbg_instr_caught_ebreak +dbg_instr_caught_exception +@200 +- +@22 +core.frontend.cir[31:0] +@28 +core.frontend.cir_vld[1:0] +core.df_cir_use[1:0] +@200 +- +@28 +core.frontend.mem_addr_rdy +core.frontend.mem_addr_vld +core.frontend.mem_data_vld +@29 +core.frontend.fetch_data_vld +@28 +core.frontend.buf_level_next[1:0] +[pattern_trace] 1 +[pattern_trace] 0 diff --git a/test/sim/core_debug/zero.bin b/test/sim/core_debug/zero.bin new file mode 100644 index 0000000..08e7df1 Binary files /dev/null and b/test/sim/core_debug/zero.bin differ diff --git a/test/sim/tb_cxxrtl/Makefile b/test/sim/tb_cxxrtl/Makefile index dd6dfc0..ba1386b 100644 --- a/test/sim/tb_cxxrtl/Makefile +++ b/test/sim/tb_cxxrtl/Makefile @@ -4,6 +4,7 @@ CDEFINES := DUAL_PORT CPU_RESET_VECTOR := 32'hc0 EXTENSION_C := 1 EXTENSION_M := 1 +DEBUG_SUPPORT := 0 MULDIV_UNROLL := 2 MUL_FAST := 1 REDUCED_BYPASS := 0 @@ -15,6 +16,7 @@ all: tb SYNTH_CMD += read_verilog -I ../../../hdl $(shell listfiles ../../../hdl/hazard3.f); SYNTH_CMD += chparam -set EXTENSION_C $(EXTENSION_C) $(TOP); SYNTH_CMD += chparam -set EXTENSION_M $(EXTENSION_M) $(TOP); +SYNTH_CMD += chparam -set DEBUG_SUPPORT $(DEBUG_SUPPORT) $(TOP); SYNTH_CMD += chparam -set CSR_COUNTER 1 $(TOP); SYNTH_CMD += chparam -set RESET_VECTOR $(CPU_RESET_VECTOR) $(TOP); SYNTH_CMD += chparam -set REDUCED_BYPASS $(REDUCED_BYPASS) $(TOP);