From f8aad6d2f394f6ffec7069928b50e69ce6abd5ef Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Wed, 15 Jun 2022 04:05:31 +0100 Subject: [PATCH] Fix some bugs, too tired to list them, look at the diff --- hdl/hazard3_core.v | 7 +- hdl/hazard3_decode.v | 39 ++++++----- hdl/hazard3_frontend.v | 66 ++++++++++++------- .../hazard3_formal_regression.vh | 7 +- 4 files changed, 80 insertions(+), 39 deletions(-) diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index e9bb2f1..01c78a7 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -108,6 +108,8 @@ wire [W_ADDR-1:0] x_btb_set_src_addr; wire [W_ADDR-1:0] x_btb_set_target_addr; wire x_btb_clear; +wire [W_ADDR-1:0] d_btb_target_addr; + assign bus_aph_panic_i = 1'b0; wire f_mem_size; @@ -138,6 +140,7 @@ hazard3_frontend #( .btb_set_src_addr (x_btb_set_src_addr), .btb_set_target_addr (x_btb_set_target_addr), .btb_clear (x_btb_clear), + .btb_target_addr_out (d_btb_target_addr), .cir (fd_cir), .cir_err (fd_cir_err), @@ -226,6 +229,7 @@ hazard3_decode #( .x_stall (x_stall), .f_jump_now (f_jump_now), .f_jump_target (f_jump_target), + .d_btb_target_addr (d_btb_target_addr), .d_imm (d_imm), .d_rs1 (d_rs1), @@ -738,7 +742,8 @@ wire x_jump_req_unchecked = !x_stall_on_raw && ( assign x_jump_req = x_jump_req_unchecked && !x_jump_misaligned && !x_exec_pmp_fail; assign x_btb_set = |BRANCH_PREDICTOR && ( - x_jump_req_unchecked && d_addr_offs[W_ADDR - 1] && !x_branch_was_predicted + x_jump_req_unchecked && d_addr_offs[W_ADDR - 1] && !x_branch_was_predicted && + (d_branchcond == BCOND_NZERO || d_branchcond == BCOND_ZERO) ); assign x_btb_clear = d_fence_i || (m_trap_enter_vld && m_trap_enter_rdy) || (|BRANCH_PREDICTOR && ( diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 1a1365e..9fe7a90 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -30,6 +30,7 @@ module hazard3_decode #( input wire f_jump_now, input wire [W_ADDR-1:0] f_jump_target, input wire x_jump_not_except, + input wire [W_ADDR-1:0] d_btb_target_addr, output reg [W_DATA-1:0] d_imm, output reg [W_REGADDR-1:0] d_rs1, @@ -123,31 +124,39 @@ always @ (posedge clk or negedge rst_n) begin end end -reg [W_ADDR-1:0] pc; -wire [W_ADDR-1:0] pc_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2); +reg [W_ADDR-1:0] pc; +wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2); assign d_pc = pc; +wire predicted_branch = fd_cir_predbranch[d_instr_is_32bit] && |BRANCH_PREDICTOR; + always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin pc <= RESET_VECTOR; end else begin if ((f_jump_now && !assert_cir_lock) || (cir_lock_prev && deassert_cir_lock)) begin pc <= f_jump_target; -`ifdef FORMAL - // Being cheeky above to save a 32 bit mux. Check that we never get an M target by mistake. - - // FIXME disabled this for now -- we do sometimes see an exception taking - // place during the stall, which then leads to a different branch target - // appearing. (i.e. f_jump_now is asserted for two cycles, the first one - // from this instruction and the second from the exception; this is ok, - // because the exception will return to this branch when done.) - - // if (cir_lock_prev && deassert_cir_lock) - // assert(f_jump_target == d_jump_target); -`endif end else if (!d_stall && !cir_lock) begin - pc <= pc_next; + // If this instruction is a predicted-taken branch (and has not + // generated a mispredict recovery jump) then set PC to the + // prediction target instead of the sequentially next PC + pc <= predicted_branch ? d_btb_target_addr : pc_seq_next; end + // If the current instruction is marked as being a predicted-taken + // branch, it *must* be a valid branch instruction, because executing + // the branch is how we recover from misprediction +`ifdef FORMAL + // This can be defeated if you branch backward halfway into a 32-bit instruction that immediately precedes the branch. + if (predicted_branch && !d_starved && !debug_mode) begin + assert(!d_invalid); + assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO); + end + if (&fd_cir_predbranch && !d_instr_is_32bit) begin + // Only way to get two back-to-back predicted-taken instructions + // is if they are the same instruction + assert(d_btb_target_addr == pc); + end +`endif end end diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 26652a0..51b1e20 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -43,6 +43,7 @@ module hazard3_frontend #( input wire [W_ADDR-1:0] btb_set_src_addr, input wire [W_ADDR-1:0] btb_set_target_addr, input wire btb_clear, + output wire [W_ADDR-1:0] btb_target_addr_out, // Interface to Decode // Note reg/wire distinction @@ -181,12 +182,14 @@ if (BRANCH_PREDICTOR) begin: have_btb btb_src_addr <= {W_ADDR{1'b0}}; btb_target_addr <= {W_ADDR{1'b0}}; btb_valid <= 1'b0; + end else if (btb_clear) begin + // Clear takes precedences over set. E.g. if a taken branch is in + // stage 2 and an exception is in stage 3, we must clear the BTB. + btb_valid <= 1'b0; end else if (btb_set) begin btb_src_addr <= btb_set_src_addr; btb_target_addr <= btb_set_target_addr; btb_valid <= 1'b1; - end else if (btb_clear) begin - btb_valid <= 1'b0; end end end else begin: no_btb @@ -198,6 +201,17 @@ end else begin: no_btb end endgenerate +// Decode uses the target address to set the PC to the correct branch target +// value following a predicted-taken branch (as normally it would update PC +// by following an X jump request, and in this case there is none). +// +// Note this assumes the BTB target has not changed by the time the predicted +// branch arrives at decode! This is always true because the only way for the +// target address to change is when an older branch is taken, which would +// flush the younger predicted-taken branch before it reaches decode. + +assign btb_target_addr_out = btb_target_addr; + // ---------------------------------------------------------------------------- // Fetch request generation @@ -330,25 +344,30 @@ always @ (posedge clk or negedge rst_n) begin mem_data_hwvld <= 2'b11; mem_aph_hwvld <= 2'b11; mem_data_predbranch <= 1'b0; - end else if (EXTENSION_C) begin + end else begin if (jump_now) begin - if (mem_addr_rdy) begin - mem_data_hwvld <= {1'b1, !jump_target[1]}; - end else begin - mem_aph_hwvld <= {1'b1, !jump_target[1]}; + if (|EXTENSION_C) begin + if (mem_addr_rdy) begin + mem_data_hwvld <= {1'b1, !jump_target[1]}; + end else begin + mem_aph_hwvld <= {1'b1, !jump_target[1]}; + end end + mem_data_predbranch <= 1'b0; end else if (mem_addr_vld && mem_addr_rdy) begin - // If a predicted-taken branch instruction only spans the first - // half of a word, need to flag the second half as invalid. - mem_data_hwvld <= mem_aph_hwvld & { - !(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]), - 1'b1 - }; - // Also need to take the alignment of the destination into account. - mem_aph_hwvld <= { - 1'b1, - !(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1]) - }; + if (|EXTENSION_C) begin + // If a predicted-taken branch instruction only spans the first + // half of a word, need to flag the second half as invalid. + mem_data_hwvld <= mem_aph_hwvld & { + !(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]), + 1'b1 + }; + // Also need to take the alignment of the destination into account. + mem_aph_hwvld <= { + 1'b1, + !(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1]) + }; + end mem_data_predbranch <= |BRANCH_PREDICTOR && btb_match_now; end end @@ -397,7 +416,6 @@ wire [3*W_BUNDLE-1:0] instr_data_plus_fetch = // may be buffered in the prefetch queue. wire fetch_bus_err = fifo_empty ? mem_data_err : fifo_err[0]; -wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0]; reg [2:0] cir_bus_err; wire [2:0] cir_bus_err_shifted = @@ -413,6 +431,10 @@ wire [2:0] cir_bus_err_plus_fetch = // And the same thing again for whether CIR contains a predicted-taken branch. // One day I should clean up this copy/paste. +wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0]; +// We mark only the last halfword of the branch instruction as being predicted. +wire [1:0] fetch_predbranch_hw = &fetch_data_hwvld ? {fetch_predbranch, 1'b0} : {1'b0, fetch_predbranch}; + reg [2:0] cir_predbranch_reg; wire [2:0] cir_predbranch_shifted = cir_use[1] ? cir_predbranch_reg >> 2 : @@ -420,9 +442,9 @@ wire [2:0] cir_predbranch_shifted = wire [2:0] cir_predbranch_plus_fetch = !cir_room_for_fetch ? cir_predbranch_shifted : - level_next_no_fetch[1] && |EXTENSION_C ? {fetch_predbranch, cir_predbranch_shifted[1:0]} : - level_next_no_fetch[0] && |EXTENSION_C ? {{2{fetch_predbranch}}, cir_predbranch_shifted[0]} : - {cir_predbranch_shifted[2], {2{fetch_predbranch}}}; + level_next_no_fetch[1] && |EXTENSION_C ? {fetch_predbranch_hw[0], cir_predbranch_shifted[1:0]} : + level_next_no_fetch[0] && |EXTENSION_C ? {fetch_predbranch_hw, cir_predbranch_shifted[0]} : + {cir_predbranch_shifted[2], fetch_predbranch_hw}; wire [1:0] fetch_fill_amount = cir_room_for_fetch && fetch_data_vld ? ( &fetch_data_hwvld ? 2'h2 : 2'h1 diff --git a/test/formal/instruction_fetch_match/hazard3_formal_regression.vh b/test/formal/instruction_fetch_match/hazard3_formal_regression.vh index e239a81..999d7b4 100644 --- a/test/formal/instruction_fetch_match/hazard3_formal_regression.vh +++ b/test/formal/instruction_fetch_match/hazard3_formal_regression.vh @@ -59,7 +59,12 @@ assign expect_cir[31:16] = instr_mem[(d_pc + 2) / 4] >> (d_pc[1] ? 0 : 16); // Note we can get a mismatch in the upper halfword during a CIR lock of a // jump in the lower halfword -- the fetch will tumble into the top and this // is fine as long as we correctly update the PC when the lock clears. -wire allow_upper_half_mismatch = fd_cir[15:0] == expect_cir[15:0] && fd_cir[1:0] != 2'b11; + +// Note also following a predicted branch the upper half of CIR may be +// nonsequential, so we can't check it. +wire allow_upper_half_mismatch = fd_cir[15:0] == expect_cir[15:0] && ( + fd_cir[1:0] != 2'b11 || fd_cir_predbranch[0] +); always @ (posedge clk) if (rst_n) begin if (fd_cir_vld >= 2'd1)