diff --git a/test/sim/tb_cxxrtl/multicore.gtkw b/test/sim/tb_cxxrtl/multicore.gtkw new file mode 100644 index 0000000..5953631 --- /dev/null +++ b/test/sim/tb_cxxrtl/multicore.gtkw @@ -0,0 +1,77 @@ +[*] +[*] GTKWave Analyzer v3.3.103 (w)1999-2019 BSI +[*] Wed Dec 15 09:37:16 2021 +[*] +[dumpfile] "/home/luke/proj/hazard3/test/sim/tb_cxxrtl/waves.vcd" +[dumpfile_mtime] "Wed Dec 15 09:24:58 2021" +[dumpfile_size] 9773165 +[savefile] "/home/luke/proj/hazard3/test/sim/tb_cxxrtl/multicore.gtkw" +[timestart] 0 +[size] 2509 1368 +[pos] -1 -1 +*-13.000000 9780 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[sst_width] 233 +[signals_width] 238 +[sst_expanded] 1 +[sst_vpaned_height] 410 +@200 +-JTAG +@28 +tck +tdi +tdo +tms +@200 +- +@28 +dm.hartsel +@200 +- +-Core 0 debug +@28 +cpu0.dbg_req_halt +cpu0.dbg_req_resume +cpu0.dbg_halted +@200 +- +@28 +cpu0.dbg_instr_data_vld +cpu0.dbg_instr_data_rdy +@22 +cpu0.dbg_instr_data[31:0] +@28 +cpu0.dbg_instr_caught_ebreak +cpu0.dbg_instr_caught_exception +@200 +- +@22 +cpu0.dbg_data0_rdata[31:0] +cpu0.dbg_data0_wdata[31:0] +@28 +cpu0.dbg_data0_wen +@200 +- +-Core 1 debug +@28 +cpu1.dbg_req_halt +cpu1.dbg_req_resume +cpu1.dbg_halted +@200 +- +@28 +cpu1.dbg_instr_data_vld +cpu1.dbg_instr_data_rdy +@22 +cpu1.dbg_instr_data[31:0] +@28 +cpu1.dbg_instr_caught_ebreak +cpu1.dbg_instr_caught_exception +@200 +- +@22 +cpu1.dbg_data0_rdata[31:0] +cpu1.dbg_data0_wdata[31:0] +@28 +cpu1.dbg_data0_wen +[pattern_trace] 1 +[pattern_trace] 0 diff --git a/test/sim/tb_cxxrtl/tb.cpp b/test/sim/tb_cxxrtl/tb.cpp index 83441c4..bff9819 100644 --- a/test/sim/tb_cxxrtl/tb.cpp +++ b/test/sim/tb_cxxrtl/tb.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include @@ -13,8 +12,9 @@ #include "dut.cpp" #include +// ----------------------------------------------------------------------------- + static const unsigned int MEM_SIZE = 16 * 1024 * 1024; -uint8_t mem[MEM_SIZE]; static const unsigned int IO_BASE = 0x80000000; enum { @@ -31,7 +31,143 @@ enum { IO_MTIMECMPH = 0x10c }; -static const int TCP_BUF_SIZE = 256; +struct mem_io_state { + uint64_t mtime; + uint64_t mtimecmp; + + bool exit_req; + uint32_t exit_code; + + uint8_t *mem; + + mem_io_state() { + mtime = 0; + mtimecmp = 0; + exit_req = false; + exit_code = 0; + mem = new uint8_t[MEM_SIZE]; + for (size_t i = 0; i < MEM_SIZE; ++i) + mem[i] = 0; + } + + // Where we're going we don't need a destructor B-) + + void step(cxxrtl_design::p_tb &tb) { + // Default update logic for mtime, mtimecmp + ++mtime; + tb.p_timer__irq.set(mtime >= mtimecmp); + } +}; + +typedef enum { + SIZE_BYTE = 0, + SIZE_HWORD = 1, + SIZE_WORD = 2 +} bus_size_t; + +struct bus_request { + uint32_t addr; + bus_size_t size; + bool write; + bool excl; + uint32_t wdata; + bus_request(): addr(0), size(SIZE_BYTE), write(0), excl(0), wdata(0) {} +}; + +struct bus_response { + uint32_t rdata; + int stall_cycles; + bool err; + bool exokay; + bus_response(): rdata(0), stall_cycles(0), err(false), exokay(true) {} +}; + +bus_response mem_access(cxxrtl_design::p_tb &tb, mem_io_state &memio, bus_request req) { + bus_response resp; + + if (req.write) { + if (req.addr <= MEM_SIZE - 4u) { + unsigned int n_bytes = 1u << (int)req.size; + // Note we are relying on hazard3's byte lane replication + for (unsigned int i = 0; i < n_bytes; ++i) { + memio.mem[req.addr + i] = req.wdata >> (8 * i) & 0xffu; + } + } + else if (req.addr == IO_BASE + IO_PRINT_CHAR) { + putchar(req.wdata); + } + else if (req.addr == IO_BASE + IO_PRINT_U32) { + printf("%08x\n", req.wdata); + } + else if (req.addr == IO_BASE + IO_EXIT) { + if (!memio.exit_req) { + memio.exit_req = true; + memio.exit_code = req.wdata; + } + } + else if (req.addr == IO_BASE + IO_SET_SOFTIRQ) { + tb.p_soft__irq.set(true); + } + else if (req.addr == IO_BASE + IO_CLR_SOFTIRQ) { + tb.p_soft__irq.set(false); + } + else if (req.addr == IO_BASE + IO_SET_IRQ) { + tb.p_irq.set(tb.p_irq.get() | req.wdata); + } + else if (req.addr == IO_BASE + IO_CLR_IRQ) { + tb.p_irq.set(tb.p_irq.get() & ~req.wdata); + } + else if (req.addr == IO_BASE + IO_MTIME) { + memio.mtime = (memio.mtime & 0xffffffff00000000u) | req.wdata; + } + else if (req.addr == IO_BASE + IO_MTIMEH) { + memio.mtime = (memio.mtime & 0x00000000ffffffffu) | ((uint64_t)req.wdata << 32); + } + else if (req.addr == IO_BASE + IO_MTIMECMP) { + memio.mtimecmp = (memio.mtimecmp & 0xffffffff00000000u) | req.wdata; + } + else if (req.addr == IO_BASE + IO_MTIMECMPH) { + memio.mtimecmp = (memio.mtimecmp & 0x00000000ffffffffu) | ((uint64_t)req.wdata << 32); + } + else { + resp.err = true; + } + } + else { + if (req.addr <= MEM_SIZE - (1u << (int)req.size)) { + req.addr &= ~0x3u; + resp.rdata = + (uint32_t)memio.mem[req.addr] | + memio.mem[req.addr + 1] << 8 | + memio.mem[req.addr + 2] << 16 | + memio.mem[req.addr + 3] << 24; + } + else if (req.addr == IO_BASE + IO_SET_SOFTIRQ || req.addr == IO_BASE + IO_CLR_SOFTIRQ) { + resp.rdata = tb.p_soft__irq.get(); + } + else if (req.addr == IO_BASE + IO_SET_IRQ || req.addr == IO_BASE + IO_CLR_IRQ) { + resp.rdata = tb.p_irq.get(); + } + else if (req.addr == IO_BASE + IO_MTIME) { + resp.rdata = memio.mtime; + } + else if (req.addr == IO_BASE + IO_MTIMEH) { + resp.rdata = memio.mtime >> 32; + } + else if (req.addr == IO_BASE + IO_MTIMECMP) { + resp.rdata = memio.mtimecmp; + } + else if (req.addr == IO_BASE + IO_MTIMECMPH) { + resp.rdata = memio.mtimecmp >> 32; + } + else { + resp.err = true; + } + } + return resp; +} + +// ----------------------------------------------------------------------------- const char *help_str = "Usage: tb [--bin x.bin] [--vcd x.vcd] [--dump start end] [--cycles n] [--port n]\n" @@ -50,6 +186,8 @@ void exit_help(std::string errtext = "") { exit(-1); } +static const int TCP_BUF_SIZE = 256; + int main(int argc, char **argv) { bool load_bin = false; @@ -109,22 +247,6 @@ int main(int argc, char **argv) { if (!(load_bin || port != 0)) exit_help("At least one of --bin or --port must be specified.\n"); - std::fill(std::begin(mem), std::end(mem), 0); - if (load_bin) { - std::ifstream fd(bin_path, std::ios::binary | std::ios::ate); - if (!fd){ - std::cerr << "Failed to open \"" << bin_path << "\"\n"; - return -1; - } - std::streamsize bin_size = fd.tellg(); - if (bin_size > MEM_SIZE) { - std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n"; - return -1; - } - fd.seekg(0, std::ios::beg); - fd.read((char*)mem, bin_size); - } - int server_fd, sock_fd; struct sockaddr_in sock_addr; int sock_opt = 1; @@ -170,6 +292,23 @@ int main(int argc, char **argv) { printf("Connected\n"); } + mem_io_state memio; + + if (load_bin) { + std::ifstream fd(bin_path, std::ios::binary | std::ios::ate); + if (!fd){ + std::cerr << "Failed to open \"" << bin_path << "\"\n"; + return -1; + } + std::streamsize bin_size = fd.tellg(); + if (bin_size > MEM_SIZE) { + std::cerr << "Binary file (" << bin_size << " bytes) is larger than memory (" << MEM_SIZE << " bytes)\n"; + return -1; + } + fd.seekg(0, std::ios::beg); + fd.read((char*)memio.mem, bin_size); + } + cxxrtl_design::p_tb top; std::ofstream waves_fd; @@ -182,21 +321,18 @@ int main(int argc, char **argv) { vcd.add(all_debug_items); } - bool bus_trans = false; - bool bus_write = false; - bool bus_trans_i = false; - uint32_t bus_addr_i = 0; - uint32_t bus_addr = 0; - uint8_t bus_size = 0; - // Never generate bus stalls + // Loop-carried address-phase requests + bus_request req_i; + bus_request req_d; + bool req_i_vld = false; + bool req_d_vld = false; + + // Set bus interfaces to generate good IDLE responses at first top.p_i__hready.set(true); top.p_d__hready.set(true); - top.p_d__hexokay.set(true); - - uint64_t mtime = 0; - uint64_t mtimecmp = 0; // Reset + initial clock pulse + top.step(); top.p_clk.set(true); top.p_tck.set(true); @@ -274,107 +410,38 @@ int main(int argc, char **argv) { } } - // Default update logic for mtime, mtimecmp - ++mtime; - top.p_timer__irq.set(mtime >= mtimecmp); + memio.step(top); + + // The two bus ports are handled identically. This enables swapping out of + // various `tb.v` hardware integration files containing: + // + // - A single, dual-ported processor (instruction fetch, load/store ports) + // - A single, single-ported processor (instruction fetch + load/store muxed internally) + // - A pair of single-ported processors, for dual-core debug tests if (top.p_d__hready.get()) { // Clear bus error by default top.p_d__hresp.set(false); + // Handle current data phase - uint32_t rdata = 0; - bool bus_err = false; - if (bus_trans && bus_write) { - uint32_t wdata = top.p_d__hwdata.get(); - if (bus_addr <= MEM_SIZE - 4u) { - unsigned int n_bytes = 1u << bus_size; - // Note we are relying on hazard3's byte lane replication - for (unsigned int i = 0; i < n_bytes; ++i) { - mem[bus_addr + i] = wdata >> (8 * i) & 0xffu; - } - } - else if (bus_addr == IO_BASE + IO_PRINT_CHAR) { - putchar(wdata); - } - else if (bus_addr == IO_BASE + IO_PRINT_U32) { - printf("%08x\n", wdata); - } - else if (bus_addr == IO_BASE + IO_EXIT) { - printf("CPU requested halt. Exit code %d\n", wdata); - printf("Ran for %ld cycles\n", cycle + 1); - break; - } - else if (bus_addr == IO_BASE + IO_SET_SOFTIRQ) { - top.p_soft__irq.set(true); - } - else if (bus_addr == IO_BASE + IO_CLR_SOFTIRQ) { - top.p_soft__irq.set(false); - } - else if (bus_addr == IO_BASE + IO_SET_IRQ) { - top.p_irq.set(top.p_irq.get() | wdata); - } - else if (bus_addr == IO_BASE + IO_CLR_IRQ) { - top.p_irq.set(top.p_irq.get() & ~wdata); - } - else if (bus_addr == IO_BASE + IO_MTIME) { - mtime = (mtime & 0xffffffff00000000u) | wdata; - } - else if (bus_addr == IO_BASE + IO_MTIMEH) { - mtime = (mtime & 0x00000000ffffffffu) | ((uint64_t)wdata << 32); - } - else if (bus_addr == IO_BASE + IO_MTIMECMP) { - mtimecmp = (mtimecmp & 0xffffffff00000000u) | wdata; - } - else if (bus_addr == IO_BASE + IO_MTIMECMPH) { - mtimecmp = (mtimecmp & 0x00000000ffffffffu) | ((uint64_t)wdata << 32); - } - else { - bus_err = true; - } - } - else if (bus_trans && !bus_write) { - if (bus_addr <= MEM_SIZE - (1u << bus_size)) { - bus_addr &= ~0x3u; - rdata = - (uint32_t)mem[bus_addr] | - mem[bus_addr + 1] << 8 | - mem[bus_addr + 2] << 16 | - mem[bus_addr + 3] << 24; - } - else if (bus_addr == IO_BASE + IO_SET_SOFTIRQ || bus_addr == IO_BASE + IO_CLR_SOFTIRQ) { - rdata = top.p_soft__irq.get(); - } - else if (bus_addr == IO_BASE + IO_SET_IRQ || bus_addr == IO_BASE + IO_CLR_IRQ) { - rdata = top.p_irq.get(); - } - else if (bus_addr == IO_BASE + IO_MTIME) { - rdata = mtime; - } - else if (bus_addr == IO_BASE + IO_MTIMEH) { - rdata = mtime >> 32; - } - else if (bus_addr == IO_BASE + IO_MTIMECMP) { - rdata = mtimecmp; - } - else if (bus_addr == IO_BASE + IO_MTIMECMPH) { - rdata = mtimecmp >> 32; - } - else { - bus_err = true; - } - } - if (bus_err) { + req_d.wdata = top.p_d__hwdata.get(); + bus_response resp; + if (req_d_vld) + resp = mem_access(top, memio, req_d); + if (resp.err) { // Phase 1 of error response top.p_d__hready.set(false); top.p_d__hresp.set(true); } - top.p_d__hrdata.set(rdata); + top.p_d__hrdata.set(resp.rdata); + top.p_d__hexokay.set(resp.exokay); // Progress current address phase to data phase - bus_trans = top.p_d__htrans.get() >> 1; - bus_write = top.p_d__hwrite.get(); - bus_size = top.p_d__hsize.get(); - bus_addr = top.p_d__haddr.get(); + req_d_vld = top.p_d__htrans.get() >> 1; + req_d.write = top.p_d__hwrite.get(); + req_d.size = (bus_size_t)top.p_d__hsize.get(); + req_d.addr = top.p_d__haddr.get(); + req_d.excl = top.p_d__hexcl.get(); } else { // hready=0. Currently this only happens when we're in the first @@ -382,27 +449,32 @@ int main(int argc, char **argv) { top.p_d__hready.set(true); } + if (top.p_i__hready.get()) { top.p_i__hresp.set(false); - if (bus_trans_i) { - bus_addr_i &= ~0x3u; - if (bus_addr_i < MEM_SIZE) { - top.p_i__hrdata.set( - (uint32_t)mem[bus_addr_i] | - mem[bus_addr_i + 1] << 8 | - mem[bus_addr_i + 2] << 16 | - mem[bus_addr_i + 3] << 24 - ); - } - else { - top.p_i__hready.set(false); - top.p_i__hresp.set(true); - } + + req_i.wdata = top.p_i__hwdata.get(); + bus_response resp; + if (req_i_vld) + resp = mem_access(top, memio, req_i); + if (resp.err) { + // Phase 1 of error response + top.p_i__hready.set(false); + top.p_i__hresp.set(true); } - bus_trans_i = top.p_i__htrans.get() >> 1; - bus_addr_i = top.p_i__haddr.get(); + top.p_i__hrdata.set(resp.rdata); + top.p_i__hexokay.set(resp.exokay); + + // Progress current address phase to data phase + req_i_vld = top.p_i__htrans.get() >> 1; + req_i.write = top.p_i__hwrite.get(); + req_i.size = (bus_size_t)top.p_i__hsize.get(); + req_i.addr = top.p_i__haddr.get(); + req_i.excl = top.p_i__hexcl.get(); } else { + // hready=0. Currently this only happens when we're in the first + // phase of an error response, so go to phase 2. top.p_i__hready.set(true); } @@ -415,6 +487,11 @@ int main(int argc, char **argv) { vcd.buffer.clear(); } + if (memio.exit_req) { + printf("CPU requested halt. Exit code %d\n", memio.exit_code); + printf("Ran for %ld cycles\n", cycle + 1); + break; + } if (cycle + 1 == max_cycles) printf("Max cycles reached\n"); if (got_exit_cmd) @@ -426,7 +503,7 @@ int main(int argc, char **argv) { for (auto r : dump_ranges) { printf("Dumping memory from %08x to %08x:\n", r.first, r.second); for (int i = 0; i < r.second - r.first; ++i) - printf("%02x%c", mem[r.first + i], i % 16 == 15 ? '\n' : ' '); + printf("%02x%c", memio.mem[r.first + i], i % 16 == 15 ? '\n' : ' '); printf("\n"); } diff --git a/test/sim/tb_cxxrtl/tb.v b/test/sim/tb_cxxrtl/tb.v index ad0041f..c50af30 100644 --- a/test/sim/tb_cxxrtl/tb.v +++ b/test/sim/tb_cxxrtl/tb.v @@ -21,12 +21,14 @@ module tb #( output wire [W_ADDR-1:0] i_haddr, output wire i_hwrite, output wire [1:0] i_htrans, + output wire i_hexcl, output wire [2:0] i_hsize, output wire [2:0] i_hburst, output wire [3:0] i_hprot, output wire i_hmastlock, input wire i_hready, input wire i_hresp, + input wire i_hexokay, output wire [W_DATA-1:0] i_hwdata, input wire [W_DATA-1:0] i_hrdata, @@ -233,4 +235,6 @@ hazard3_cpu_2port #( .timer_irq (timer_irq) ); +assign i_hexcl = 1'b0; + endmodule