From e68d8a6cd69958df072ade9248f53cc0a1ed3b5f Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Mon, 13 Jun 2022 01:23:32 +0100 Subject: [PATCH] Fix two frontend bugs: possibility for fetch to be blocked at CIR whilst also not going to FIFO (fixed by making those signals the complement of each other) and typo in the shift value for shifting into a CIR with 32 bits of contents, which is only reachable via a CIR-locked branch to an unaligned address. --- doc/sections/csr.adoc | 34 +++++++++++++++++ example_soc/soc/example_soc.v | 37 +++++++++++-------- example_soc/soc/soc.f | 2 - hdl/hazard3_decode.v | 19 ++++------ hdl/hazard3_frontend.v | 19 ++++------ test/formal/frontend_fetch_match/tb.v | 9 +++-- .../riscv-formal/tb/hazard3_rvfi_monitor.vh | 12 ++++-- 7 files changed, 85 insertions(+), 47 deletions(-) diff --git a/doc/sections/csr.adoc b/doc/sections/csr.adoc index 9e50700..077c6c1 100644 --- a/doc/sections/csr.adoc +++ b/doc/sections/csr.adoc @@ -466,3 +466,37 @@ Lowest external interrupt. Contains the index of the lowest-numbered external in | 6:2 | - | Index of the lowest-numbered active external interrupt. A LSB-first priority encode of `meip0 & meie0`. Zero when no external interrupts are both pending and enabled. | 1:0 | - | RES0 |=== + +==== msleep + +Address: `0xbf0` + +M-mode sleep control register. + +[cols="10h,20h,~", options="header"] +|=== +| Bits | Name | Description +| 31:3 | - | RES0 +| 2 | `deepsleep` | Deassert the clock request signal when entering the block or WFI state, and wait for clock acknowledge to reassert before leaving the block or WFI state. +| 1 | `block` | Write 1 to enter a WFI sleep state until either an unblock signal is received, or an interrupt is asserted that would cause a WFI to exit. Clears automatically when leaving the blocked state. + +If an unblock signal has been received since the last time `block` was written to 1, the write is ignored. In other words, the blocked state falls through immediately in this case. + +An unblock signal is received when another hart writes 1 to its `unblock` register, or for some other platform-specified reason. +| 0 | `unblock` | Write 1 to post an unblock signal to other harts in the system. Always reads back as 0. +|=== + + +==== sleep + +Address: `0x8f0` + +U-mode sleep control register. A subset of the fields in `msleep`. If `mstatus.tw` is 1, then attempting to access this register from U-mode causes an illegal opcode trap. + +[cols="10h,20h,~", options="header"] +|=== +| Bits | Name | Description +| 31:2 | - | RES0 +| 1 | `block` | U-mode access to the `msleep.block` bit +| 0 | `unblock` | U-mode access to the `msleep.unblock` bit +|=== diff --git a/example_soc/soc/example_soc.v b/example_soc/soc/example_soc.v index 72d5814..a294612 100644 --- a/example_soc/soc/example_soc.v +++ b/example_soc/soc/example_soc.v @@ -484,25 +484,30 @@ uart_mini uart_u ( .dreq (/* unused */) ); -hazard3_riscv_timer timer_u ( - .clk (clk), - .rst_n (rst_n), +// hazard3_riscv_timer timer_u ( +// .clk (clk), +// .rst_n (rst_n), - .psel (timer_psel), - .penable (timer_penable), - .pwrite (timer_pwrite), - .paddr (timer_paddr), - .pwdata (timer_pwdata), - .prdata (timer_prdata), - .pready (timer_pready), - .pslverr (timer_pslverr), +// .psel (timer_psel), +// .penable (timer_penable), +// .pwrite (timer_pwrite), +// .paddr (timer_paddr), +// .pwdata (timer_pwdata), +// .prdata (timer_prdata), +// .pready (timer_pready), +// .pslverr (timer_pslverr), - .dbg_halt (&hart_halted), +// .dbg_halt (&hart_halted), - // Tie high for 64-cycle timebase: - .tick (1'b1), +// // Tie high for 64-cycle timebase: +// .tick (1'b1), - .timer_irq (timer_irq) -); +// .timer_irq (timer_irq) +// ); + +assign timer_pslverr = 1'b0; +assign timer_pready = 1'b1; +assign timer_prdata = 32'h0; +assign timer_irq = 1'b0; endmodule diff --git a/example_soc/soc/soc.f b/example_soc/soc/soc.f index 6b108c3..8c4ab70 100644 --- a/example_soc/soc/soc.f +++ b/example_soc/soc/soc.f @@ -8,8 +8,6 @@ list $HDL/hazard3.f list $HDL/debug/dtm/hazard3_jtag_dtm.f list $HDL/debug/dm/hazard3_dm.f -list $HDL/peri/hazard3_riscv_timer.f - # Generic SoC components from libfpga file ../libfpga/common/reset_sync.v diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 0c76afb..de6e4e9 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -80,7 +80,6 @@ wire [31:0] d_imm_b = {{20{d_instr[31]}}, d_instr[7], d_instr[30:25], d_instr[11 wire [31:0] d_imm_u = {d_instr[31:12], {12{1'b0}}}; wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[30:21], 1'b0}; - // ---------------------------------------------------------------------------- // PC/CIR control @@ -98,16 +97,14 @@ assign df_cir_use = d_starved || d_stall ? 2'h0 : d_instr_is_32bit ? 2'h2 : 2'h1; -// CIR Locking is required if we successfully assert a jump request, but decode is stalled. -// (This only happens if decode stall is caused by X stall, not if fetch is starved!) -// The reason for this is that, if the CIR is not locked in, it can be trashed by -// incoming fetch data before the roadblock clears ahead of us, which will squash any other -// side effects this instruction may have besides jumping! This includes: -// - Linking for JAL -// - Mispredict recovery for branches -// Note that it is not possible to simply gate the jump request based on X stalling, -// because X stall is a function of hready, and jump request feeds haddr htrans etc. - +// CIR Locking is required if we successfully assert a jump request, but +// decode is stalled. It is not possible to gate the jump request if the +// stall depends on bus stall (as this would create a through-path from bus +// stall to bus request) so instead we instruct the frontent to preserve the +// stalled instruction when flushing, and fill in behind it. +// +// Once the stall clears, the stalled instruction can execute its remaining +// side effects e.g. writing a link value to the register file. wire jump_caused_by_d = f_jump_now && x_jump_not_except; wire assert_cir_lock = jump_caused_by_d && d_stall; wire deassert_cir_lock = !d_stall; diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 99e9cfb..ec18ad0 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -159,9 +159,9 @@ wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_h // Debugger only injects instructions when the frontend is at rest and empty. assign dbg_instr_data_rdy = DEBUG_SUPPORT && !fifo_valid[0] && ~|ctr_flush_pending; -wire cir_must_refill; +wire cir_room_for_fetch; // If fetch data is forwarded past the FIFO, ensure it is not also written to it. -assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_must_refill && fifo_empty) +assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_room_for_fetch && fifo_empty) && !(DEBUG_SUPPORT && debug_mode); always @ (posedge clk or negedge rst_n) begin @@ -306,12 +306,12 @@ wire [1:0] level_next_no_fetch = buf_level - cir_use; // Overlay fresh fetch data onto the shifted/recycled instruction data // Again, if something won't be looked at, generate cheapest possible garbage. -wire instr_fetch_overlay_blocked = level_next_no_fetch[1] && (~|EXTENSION_C || &fetch_data_hwvld); - +assign cir_room_for_fetch = level_next_no_fetch <= (|EXTENSION_C && ~&fetch_data_hwvld ? 2'h2 : 2'h1); +assign fifo_pop = cir_room_for_fetch && !fifo_empty; wire [3*W_BUNDLE-1:0] instr_data_plus_fetch = - instr_fetch_overlay_blocked ? instr_data_shifted : - level_next_no_fetch[1] && |EXTENSION_C ? {fetch_data_aligned[0 +: W_BUNDLE], instr_data_shifted} : + !cir_room_for_fetch ? instr_data_shifted : + level_next_no_fetch[1] && |EXTENSION_C ? {fetch_data_aligned[0 +: W_BUNDLE], instr_data_shifted[0 +: 2 * W_BUNDLE]} : level_next_no_fetch[0] && |EXTENSION_C ? {fetch_data_aligned, instr_data_shifted[0 +: W_BUNDLE]} : {instr_data_shifted[2 * W_BUNDLE +: W_BUNDLE], fetch_data_aligned}; @@ -327,15 +327,12 @@ wire [2:0] cir_bus_err_shifted = cir_use[0] && EXTENSION_C ? cir_bus_err >> 1 : cir_bus_err; wire [2:0] cir_bus_err_plus_fetch = - instr_fetch_overlay_blocked ? cir_bus_err_shifted : + !cir_room_for_fetch ? cir_bus_err_shifted : level_next_no_fetch[1] && |EXTENSION_C ? {fetch_bus_err, cir_bus_err_shifted[1:0]} : level_next_no_fetch[0] && |EXTENSION_C ? {{2{fetch_bus_err}}, cir_bus_err_shifted[0]} : {cir_bus_err_shifted[2], {2{fetch_bus_err}}}; -assign cir_must_refill = !level_next_no_fetch[1]; -assign fifo_pop = cir_must_refill && !fifo_empty; - -wire [1:0] fetch_fill_amount = cir_must_refill && fetch_data_vld ? ( +wire [1:0] fetch_fill_amount = cir_room_for_fetch && fetch_data_vld ? ( &fetch_data_hwvld ? 2'h2 : 2'h1 ) : 2'h0; diff --git a/test/formal/frontend_fetch_match/tb.v b/test/formal/frontend_fetch_match/tb.v index c6ee61a..cbfc9d2 100644 --- a/test/formal/frontend_fetch_match/tb.v +++ b/test/formal/frontend_fetch_match/tb.v @@ -41,7 +41,7 @@ always @ (posedge clk) (*keep*) wire [1:0] cir_vld; (*keep*) wire [1:0] cir_use; (*keep*) wire [1:0] cir_err; -(*keep*) reg cir_lock; +(*keep*) reg cir_flush_behind; (*keep*) wire [4:0] predecode_rs1_coarse; (*keep*) wire [4:0] predecode_rs2_coarse; @@ -78,7 +78,7 @@ hazard3_frontend #( .cir_vld (cir_vld), .cir_use (cir_use), .cir_err (cir_err), - .cir_lock (cir_lock), + .cir_flush_behind (cir_flush_behind), .predecode_rs1_coarse (predecode_rs1_coarse), .predecode_rs2_coarse (predecode_rs2_coarse), @@ -120,7 +120,7 @@ always @ (posedge clk or negedge rst_n) begin end end -assign cir_lock = 1'b0; // TODO +assign cir_flush_behind = 1'b0; // TODO assign debug_mode = 1'b0; assign dbg_instr_data_vld = 1'b0; @@ -134,12 +134,13 @@ assign jump_target[0] = 1'b0; // assert on it inside the frontend. always @ (posedge clk) assume(!(jump_target_vld && !$past(rst_n))); - // ---------------------------------------------------------------------------- // Properties reg [31:0] pc; +always if (!EXTENSION_C) assume(!pc[1]); + always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin pc <= RESET_VECTOR; diff --git a/test/formal/riscv-formal/tb/hazard3_rvfi_monitor.vh b/test/formal/riscv-formal/tb/hazard3_rvfi_monitor.vh index 3878fc5..019d419 100644 --- a/test/formal/riscv-formal/tb/hazard3_rvfi_monitor.vh +++ b/test/formal/riscv-formal/tb/hazard3_rvfi_monitor.vh @@ -136,8 +136,14 @@ always @ (posedge clk or negedge rst_n) begin rvfm_xm_rdata1 <= 32'h0; rvfm_xm_rdata2 <= 32'h0; end else if (!x_stall) begin - rvfm_xm_rdata1 <= x_rs1_bypass; - rvfm_xm_rdata2 <= x_rs2_bypass; + // rs*_bypass may have garbage on them for instructions with *no* + // register operands (due to some interesting optimisations), though + // d_rs* is still driven to 0 to disable stalling on that register + // lane. riscv-formal still likes to see zeroes from x0, so fix that + // up here. This shouldn't cover up any bugs, since a + // register-operand instruction would still *use* the garbage value. + rvfm_xm_rdata1 <= |d_rs1 ? x_rs1_bypass : 32'h0; + rvfm_xm_rdata2 <= |d_rs2 ? x_rs2_bypass : 32'h0; end end @@ -161,7 +167,7 @@ always @ (posedge clk or negedge rst_n) begin rvfi_rs1_addr_r <= m_stall ? 5'h0 : xm_rs1; rvfi_rs2_addr_r <= m_stall ? 5'h0 : xm_rs2; rvfi_rs1_rdata_r <= rvfm_xm_rdata1; - rvfi_rs2_rdata_r <= rvfm_xm_rdata2; + rvfi_rs2_rdata_r <= xm_rs2 == mw_rd && |xm_rs2 ? m_wdata : rvfm_xm_rdata2; end end