From 979e80be99ac47f052778635589a4f92c987b9c1 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Fri, 24 Jun 2022 19:57:45 +0100 Subject: [PATCH] Attempt to fix fetch mismatch caused by jumping halfway into a 32-bit instruction that precedes a taken branch. The bookkeeping in the frontend has been tightened up so that the entire branch instruction, and nothing but the branch instruction, is marked as a taken branch. This required some extra state, e.g. remembering the size of the taken branch instruction, but saved an incrementer on the BTB source address value. --- hdl/hazard3_core.v | 7 +++--- hdl/hazard3_decode.v | 32 +++++++++++++++++--------- hdl/hazard3_frontend.v | 51 +++++++++++++++++++++++++++++------------- 3 files changed, 61 insertions(+), 29 deletions(-) diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index 01c78a7..2829d79 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -105,6 +105,7 @@ wire df_cir_flush_behind; wire x_btb_set; wire [W_ADDR-1:0] x_btb_set_src_addr; +wire x_btb_set_src_size; wire [W_ADDR-1:0] x_btb_set_target_addr; wire x_btb_clear; @@ -138,6 +139,7 @@ hazard3_frontend #( .btb_set (x_btb_set), .btb_set_src_addr (x_btb_set_src_addr), + .btb_set_src_size (x_btb_set_src_size), .btb_set_target_addr (x_btb_set_target_addr), .btb_clear (x_btb_clear), .btb_target_addr_out (d_btb_target_addr), @@ -750,10 +752,9 @@ assign x_btb_clear = d_fence_i || (m_trap_enter_vld && m_trap_enter_rdy) || (|BR x_branch_was_predicted && x_jump_req_unchecked )); -// Match address is the address of the final halfword of the branch -// instruction. TODO can we save this adder? -assign x_btb_set_src_addr = d_pc + {30'h0, fd_cir[1:0] == 2'b11, 1'b0}; +assign x_btb_set_src_addr = d_pc; assign x_btb_set_target_addr = x_jump_target; +assign x_btb_set_src_size = &fd_cir[1:0]; // Memory protection diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 4db61ab..d6e67ca 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -128,7 +128,16 @@ reg [W_ADDR-1:0] pc; wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2); assign d_pc = pc; -wire predicted_branch = fd_cir_predbranch[d_instr_is_32bit] && |BRANCH_PREDICTOR; +// Frontend should mark the whole instruction, and nothing but the +// instruction, as a predicted branch. This goes wrong when we execute the +// address containing the predicted branch twice with different 16-bit +// alignments! We don't care about performance in this case(it took BMC to +// find it), but need to issue a branch-to-self to get back on a linear path, +// otherwise PC and CIR will diverge and we will misexecute. +wire partial_predicted_branch = !d_starved && + |BRANCH_PREDICTOR && d_instr_is_32bit && ^fd_cir_predbranch; + +wire predicted_branch = |BRANCH_PREDICTOR && fd_cir_predbranch[0]; always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin @@ -148,15 +157,10 @@ always @ (posedge clk or negedge rst_n) begin `ifdef FORMAL // This can be defeated if you branch backward halfway into a 32-bit // instruction that immediately precedes the branch. - if (predicted_branch && !d_starved && !debug_mode && !d_except_instr_bus_fault) begin - assert(!d_invalid); - assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO); - end - if (&fd_cir_predbranch && !d_instr_is_32bit) begin - // Only way to get two back-to-back predicted-taken instructions - // is if they are the same instruction - assert(d_btb_target_addr == pc); - end + // if (predicted_branch && !d_starved && !debug_mode && !d_except_instr_bus_fault) begin + // assert(!d_invalid); + // assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO); + // end `endif end end @@ -176,6 +180,9 @@ always @ (*) begin {1'bz, 1'b1, 5'b00011}: d_addr_offs = 32'h0000_0004; // Zifencei default: d_addr_offs = 32'hxxxx_xxxx; endcase + if (partial_predicted_branch) begin + d_addr_offs = 32'h0000_0000; + end end // ---------------------------------------------------------------------------- @@ -323,7 +330,7 @@ always @ (*) begin default: begin d_invalid_32bit = 1'b1; end endcase - if (d_invalid || d_starved || d_except_instr_bus_fault) begin + if (d_invalid || d_starved || d_except_instr_bus_fault || partial_predicted_branch) begin d_rs1 = {W_REGADDR{1'b0}}; d_rs2 = {W_REGADDR{1'b0}}; d_rd = {W_REGADDR{1'b0}}; @@ -343,6 +350,9 @@ always @ (*) begin end if (cir_lock_prev) begin d_branchcond = BCOND_NEVER; + end else if (partial_predicted_branch) begin + d_addr_is_regoffs = 1'b0; + d_branchcond = BCOND_ALWAYS; end end diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 2044e96..044a7a1 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -41,6 +41,7 @@ module hazard3_frontend #( // such that `src_addr` appears to be sequentially followed by `target`. input wire btb_set, input wire [W_ADDR-1:0] btb_set_src_addr, + input wire btb_set_src_size, input wire [W_ADDR-1:0] btb_set_target_addr, input wire btb_clear, output wire [W_ADDR-1:0] btb_target_addr_out, @@ -95,16 +96,17 @@ localparam FIFO_DEPTH = 2; wire jump_now = jump_target_vld && jump_target_rdy; reg [1:0] mem_data_hwvld; // Mark data as containing a predicted-taken branch instruction so that -// mispredicts can be recovered: -reg mem_data_predbranch; +// mispredicts can be recovered -- need to track both halfwords so that we +// can mark the entire instruction, and nothing but the instruction: +reg [1:0] mem_data_predbranch; // Bus errors travel alongside data. They cause an exception if the core tries // to decode the instruction, but until then can be flushed harmlessly. -reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH]; +reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH]; reg [FIFO_DEPTH:0] fifo_err; -reg [FIFO_DEPTH:0] fifo_predbranch; -reg [1:0] fifo_valid_hw [0:FIFO_DEPTH]; +reg [1:0] fifo_predbranch [0:FIFO_DEPTH]; +reg [1:0] fifo_valid_hw [0:FIFO_DEPTH]; reg [FIFO_DEPTH:-1] fifo_valid; wire [W_DATA-1:0] fifo_rdata = fifo_mem[0]; @@ -119,6 +121,7 @@ wire fifo_dbg_inject = DEBUG_SUPPORT && dbg_instr_data_vld && dbg_i always @ (*) begin: boundary_conditions integer i; fifo_mem[FIFO_DEPTH] = mem_data; + fifo_predbranch[FIFO_DEPTH] = 2'b00; fifo_valid_hw[FIFO_DEPTH] = 2'b00; fifo_valid[FIFO_DEPTH] = 1'b0; fifo_valid[-1] = 1'b1; @@ -134,7 +137,7 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update fifo_valid_hw[i] <= 2'b00; fifo_mem[i] <= 32'h0; fifo_err[i] <= 1'b0; - fifo_predbranch[i] <= 1'b0; + fifo_predbranch[i] <= 2'b00; end end else begin for (i = 0; i < FIFO_DEPTH; i = i + 1) begin @@ -156,7 +159,7 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update if (fifo_dbg_inject) begin fifo_mem[0] <= dbg_instr_data; fifo_err[0] <= 1'b0; - fifo_predbranch[0] <= 1'b0; + fifo_predbranch[0] <= 2'b00; fifo_valid_hw[0] <= 2'b11; end `ifdef FORMAL @@ -172,6 +175,7 @@ end // Branch target buffer reg [W_ADDR-1:0] btb_src_addr; +reg btb_src_size; reg [W_ADDR-1:0] btb_target_addr; reg btb_valid; @@ -180,6 +184,7 @@ if (BRANCH_PREDICTOR) begin: have_btb always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin btb_src_addr <= {W_ADDR{1'b0}}; + btb_src_size <= 1'b0; btb_target_addr <= {W_ADDR{1'b0}}; btb_valid <= 1'b0; end else if (btb_clear) begin @@ -188,6 +193,7 @@ if (BRANCH_PREDICTOR) begin: have_btb btb_valid <= 1'b0; end else if (btb_set) begin btb_src_addr <= btb_set_src_addr; + btb_src_size <= btb_set_src_size; btb_target_addr <= btb_set_target_addr; btb_valid <= 1'b1; end @@ -218,27 +224,37 @@ assign btb_target_addr_out = btb_target_addr; // Fetch addr runs ahead of the PC, in word increments. reg [W_ADDR-1:0] fetch_addr; reg fetch_priv; +reg btb_prev_start_of_overhanging; -wire btb_match_now = |BRANCH_PREDICTOR && btb_valid && ( +wire btb_match_word = |BRANCH_PREDICTOR && btb_valid && ( fetch_addr[W_ADDR-1:2] == btb_src_addr[W_ADDR-1:2] ); +wire btb_src_overhanging = btb_src_size && btb_src_addr[1]; +wire btb_match_current_addr = btb_match_word && !btb_src_overhanging; +wire btb_match_next_addr = btb_match_word && btb_src_overhanging; + +wire btb_match_now = btb_match_current_addr || btb_prev_start_of_overhanging; + always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin fetch_addr <= RESET_VECTOR; // M-mode at reset: fetch_priv <= 1'b1; + btb_prev_start_of_overhanging <= 1'b0; end else begin if (jump_now) begin // Post-increment if jump request is going straight through fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00}; fetch_priv <= jump_priv || !U_MODE; + btb_prev_start_of_overhanging <= 1'b0; end else if (mem_addr_vld && mem_addr_rdy) begin if (btb_match_now && |BRANCH_PREDICTOR) begin fetch_addr <= {btb_target_addr[W_ADDR-1:2], 2'b00}; end else begin fetch_addr <= fetch_addr + 32'h4; end + btb_prev_start_of_overhanging <= btb_match_next_addr; end end end @@ -343,7 +359,7 @@ always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin mem_data_hwvld <= 2'b11; mem_aph_hwvld <= 2'b11; - mem_data_predbranch <= 1'b0; + mem_data_predbranch <= 2'b00; end else begin if (jump_now) begin if (|EXTENSION_C) begin @@ -354,13 +370,13 @@ always @ (posedge clk or negedge rst_n) begin mem_aph_hwvld <= {1'b1, !jump_target[1]}; end end - mem_data_predbranch <= 1'b0; + mem_data_predbranch <= 2'b00; end else if (mem_addr_vld && mem_addr_rdy) begin if (|EXTENSION_C) begin // If a predicted-taken branch instruction only spans the first // half of a word, need to flag the second half as invalid. mem_data_hwvld <= mem_aph_hwvld & { - !(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]), + !(|BRANCH_PREDICTOR && btb_match_now && (btb_src_addr[1] == btb_src_size)), 1'b1 }; // Also need to take the alignment of the destination into account. @@ -369,7 +385,14 @@ always @ (posedge clk or negedge rst_n) begin !(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1]) }; end - mem_data_predbranch <= |BRANCH_PREDICTOR && btb_match_now; + mem_data_predbranch <= + |BRANCH_PREDICTOR && btb_match_current_addr ? ( + btb_src_addr[1] ? 2'b10 : + btb_src_size ? 2'b11 : 2'b01 + ) : + |BRANCH_PREDICTOR && btb_prev_start_of_overhanging ? ( + 2'b01 + ) : 2'b00; end end end @@ -432,9 +455,7 @@ wire [2:0] cir_bus_err_plus_fetch = // And the same thing again for whether CIR contains a predicted-taken branch. // One day I should clean up this copy/paste. -wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0]; -// We mark only the last halfword of the branch instruction as being predicted. -wire [1:0] fetch_predbranch_hw = &fetch_data_hwvld ? {fetch_predbranch, 1'b0} : {1'b0, fetch_predbranch}; +wire [1:0] fetch_predbranch_hw = fifo_empty ? mem_data_predbranch : fifo_predbranch[0]; reg [2:0] cir_predbranch_reg; wire [2:0] cir_predbranch_shifted =