Attempt to fix fetch mismatch caused by jumping halfway into a 32-bit

instruction that precedes a taken branch.

The bookkeeping in the frontend has been tightened up so that the entire
branch instruction, and nothing but the branch instruction, is marked as a
taken branch. This required some extra state, e.g. remembering the size of
the taken branch instruction, but saved an incrementer on the BTB source
address value.
This commit is contained in:
Luke Wren 2022-06-24 19:57:45 +01:00
parent d9389fb23e
commit 979e80be99
3 changed files with 61 additions and 29 deletions

View File

@ -105,6 +105,7 @@ wire df_cir_flush_behind;
wire x_btb_set;
wire [W_ADDR-1:0] x_btb_set_src_addr;
wire x_btb_set_src_size;
wire [W_ADDR-1:0] x_btb_set_target_addr;
wire x_btb_clear;
@ -138,6 +139,7 @@ hazard3_frontend #(
.btb_set (x_btb_set),
.btb_set_src_addr (x_btb_set_src_addr),
.btb_set_src_size (x_btb_set_src_size),
.btb_set_target_addr (x_btb_set_target_addr),
.btb_clear (x_btb_clear),
.btb_target_addr_out (d_btb_target_addr),
@ -750,10 +752,9 @@ assign x_btb_clear = d_fence_i || (m_trap_enter_vld && m_trap_enter_rdy) || (|BR
x_branch_was_predicted && x_jump_req_unchecked
));
// Match address is the address of the final halfword of the branch
// instruction. TODO can we save this adder?
assign x_btb_set_src_addr = d_pc + {30'h0, fd_cir[1:0] == 2'b11, 1'b0};
assign x_btb_set_src_addr = d_pc;
assign x_btb_set_target_addr = x_jump_target;
assign x_btb_set_src_size = &fd_cir[1:0];
// Memory protection

View File

@ -128,7 +128,16 @@ reg [W_ADDR-1:0] pc;
wire [W_ADDR-1:0] pc_seq_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
assign d_pc = pc;
wire predicted_branch = fd_cir_predbranch[d_instr_is_32bit] && |BRANCH_PREDICTOR;
// Frontend should mark the whole instruction, and nothing but the
// instruction, as a predicted branch. This goes wrong when we execute the
// address containing the predicted branch twice with different 16-bit
// alignments! We don't care about performance in this case(it took BMC to
// find it), but need to issue a branch-to-self to get back on a linear path,
// otherwise PC and CIR will diverge and we will misexecute.
wire partial_predicted_branch = !d_starved &&
|BRANCH_PREDICTOR && d_instr_is_32bit && ^fd_cir_predbranch;
wire predicted_branch = |BRANCH_PREDICTOR && fd_cir_predbranch[0];
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
@ -148,15 +157,10 @@ always @ (posedge clk or negedge rst_n) begin
`ifdef FORMAL
// This can be defeated if you branch backward halfway into a 32-bit
// instruction that immediately precedes the branch.
if (predicted_branch && !d_starved && !debug_mode && !d_except_instr_bus_fault) begin
assert(!d_invalid);
assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO);
end
if (&fd_cir_predbranch && !d_instr_is_32bit) begin
// Only way to get two back-to-back predicted-taken instructions
// is if they are the same instruction
assert(d_btb_target_addr == pc);
end
// if (predicted_branch && !d_starved && !debug_mode && !d_except_instr_bus_fault) begin
// assert(!d_invalid);
// assert(d_branchcond == BCOND_ZERO || d_branchcond == BCOND_NZERO);
// end
`endif
end
end
@ -176,6 +180,9 @@ always @ (*) begin
{1'bz, 1'b1, 5'b00011}: d_addr_offs = 32'h0000_0004; // Zifencei
default: d_addr_offs = 32'hxxxx_xxxx;
endcase
if (partial_predicted_branch) begin
d_addr_offs = 32'h0000_0000;
end
end
// ----------------------------------------------------------------------------
@ -323,7 +330,7 @@ always @ (*) begin
default: begin d_invalid_32bit = 1'b1; end
endcase
if (d_invalid || d_starved || d_except_instr_bus_fault) begin
if (d_invalid || d_starved || d_except_instr_bus_fault || partial_predicted_branch) begin
d_rs1 = {W_REGADDR{1'b0}};
d_rs2 = {W_REGADDR{1'b0}};
d_rd = {W_REGADDR{1'b0}};
@ -343,6 +350,9 @@ always @ (*) begin
end
if (cir_lock_prev) begin
d_branchcond = BCOND_NEVER;
end else if (partial_predicted_branch) begin
d_addr_is_regoffs = 1'b0;
d_branchcond = BCOND_ALWAYS;
end
end

View File

@ -41,6 +41,7 @@ module hazard3_frontend #(
// such that `src_addr` appears to be sequentially followed by `target`.
input wire btb_set,
input wire [W_ADDR-1:0] btb_set_src_addr,
input wire btb_set_src_size,
input wire [W_ADDR-1:0] btb_set_target_addr,
input wire btb_clear,
output wire [W_ADDR-1:0] btb_target_addr_out,
@ -95,16 +96,17 @@ localparam FIFO_DEPTH = 2;
wire jump_now = jump_target_vld && jump_target_rdy;
reg [1:0] mem_data_hwvld;
// Mark data as containing a predicted-taken branch instruction so that
// mispredicts can be recovered:
reg mem_data_predbranch;
// mispredicts can be recovered -- need to track both halfwords so that we
// can mark the entire instruction, and nothing but the instruction:
reg [1:0] mem_data_predbranch;
// Bus errors travel alongside data. They cause an exception if the core tries
// to decode the instruction, but until then can be flushed harmlessly.
reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH];
reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH];
reg [FIFO_DEPTH:0] fifo_err;
reg [FIFO_DEPTH:0] fifo_predbranch;
reg [1:0] fifo_valid_hw [0:FIFO_DEPTH];
reg [1:0] fifo_predbranch [0:FIFO_DEPTH];
reg [1:0] fifo_valid_hw [0:FIFO_DEPTH];
reg [FIFO_DEPTH:-1] fifo_valid;
wire [W_DATA-1:0] fifo_rdata = fifo_mem[0];
@ -119,6 +121,7 @@ wire fifo_dbg_inject = DEBUG_SUPPORT && dbg_instr_data_vld && dbg_i
always @ (*) begin: boundary_conditions
integer i;
fifo_mem[FIFO_DEPTH] = mem_data;
fifo_predbranch[FIFO_DEPTH] = 2'b00;
fifo_valid_hw[FIFO_DEPTH] = 2'b00;
fifo_valid[FIFO_DEPTH] = 1'b0;
fifo_valid[-1] = 1'b1;
@ -134,7 +137,7 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
fifo_valid_hw[i] <= 2'b00;
fifo_mem[i] <= 32'h0;
fifo_err[i] <= 1'b0;
fifo_predbranch[i] <= 1'b0;
fifo_predbranch[i] <= 2'b00;
end
end else begin
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
@ -156,7 +159,7 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
if (fifo_dbg_inject) begin
fifo_mem[0] <= dbg_instr_data;
fifo_err[0] <= 1'b0;
fifo_predbranch[0] <= 1'b0;
fifo_predbranch[0] <= 2'b00;
fifo_valid_hw[0] <= 2'b11;
end
`ifdef FORMAL
@ -172,6 +175,7 @@ end
// Branch target buffer
reg [W_ADDR-1:0] btb_src_addr;
reg btb_src_size;
reg [W_ADDR-1:0] btb_target_addr;
reg btb_valid;
@ -180,6 +184,7 @@ if (BRANCH_PREDICTOR) begin: have_btb
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
btb_src_addr <= {W_ADDR{1'b0}};
btb_src_size <= 1'b0;
btb_target_addr <= {W_ADDR{1'b0}};
btb_valid <= 1'b0;
end else if (btb_clear) begin
@ -188,6 +193,7 @@ if (BRANCH_PREDICTOR) begin: have_btb
btb_valid <= 1'b0;
end else if (btb_set) begin
btb_src_addr <= btb_set_src_addr;
btb_src_size <= btb_set_src_size;
btb_target_addr <= btb_set_target_addr;
btb_valid <= 1'b1;
end
@ -218,27 +224,37 @@ assign btb_target_addr_out = btb_target_addr;
// Fetch addr runs ahead of the PC, in word increments.
reg [W_ADDR-1:0] fetch_addr;
reg fetch_priv;
reg btb_prev_start_of_overhanging;
wire btb_match_now = |BRANCH_PREDICTOR && btb_valid && (
wire btb_match_word = |BRANCH_PREDICTOR && btb_valid && (
fetch_addr[W_ADDR-1:2] == btb_src_addr[W_ADDR-1:2]
);
wire btb_src_overhanging = btb_src_size && btb_src_addr[1];
wire btb_match_current_addr = btb_match_word && !btb_src_overhanging;
wire btb_match_next_addr = btb_match_word && btb_src_overhanging;
wire btb_match_now = btb_match_current_addr || btb_prev_start_of_overhanging;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
fetch_addr <= RESET_VECTOR;
// M-mode at reset:
fetch_priv <= 1'b1;
btb_prev_start_of_overhanging <= 1'b0;
end else begin
if (jump_now) begin
// Post-increment if jump request is going straight through
fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00};
fetch_priv <= jump_priv || !U_MODE;
btb_prev_start_of_overhanging <= 1'b0;
end else if (mem_addr_vld && mem_addr_rdy) begin
if (btb_match_now && |BRANCH_PREDICTOR) begin
fetch_addr <= {btb_target_addr[W_ADDR-1:2], 2'b00};
end else begin
fetch_addr <= fetch_addr + 32'h4;
end
btb_prev_start_of_overhanging <= btb_match_next_addr;
end
end
end
@ -343,7 +359,7 @@ always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
mem_data_hwvld <= 2'b11;
mem_aph_hwvld <= 2'b11;
mem_data_predbranch <= 1'b0;
mem_data_predbranch <= 2'b00;
end else begin
if (jump_now) begin
if (|EXTENSION_C) begin
@ -354,13 +370,13 @@ always @ (posedge clk or negedge rst_n) begin
mem_aph_hwvld <= {1'b1, !jump_target[1]};
end
end
mem_data_predbranch <= 1'b0;
mem_data_predbranch <= 2'b00;
end else if (mem_addr_vld && mem_addr_rdy) begin
if (|EXTENSION_C) begin
// If a predicted-taken branch instruction only spans the first
// half of a word, need to flag the second half as invalid.
mem_data_hwvld <= mem_aph_hwvld & {
!(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]),
!(|BRANCH_PREDICTOR && btb_match_now && (btb_src_addr[1] == btb_src_size)),
1'b1
};
// Also need to take the alignment of the destination into account.
@ -369,7 +385,14 @@ always @ (posedge clk or negedge rst_n) begin
!(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1])
};
end
mem_data_predbranch <= |BRANCH_PREDICTOR && btb_match_now;
mem_data_predbranch <=
|BRANCH_PREDICTOR && btb_match_current_addr ? (
btb_src_addr[1] ? 2'b10 :
btb_src_size ? 2'b11 : 2'b01
) :
|BRANCH_PREDICTOR && btb_prev_start_of_overhanging ? (
2'b01
) : 2'b00;
end
end
end
@ -432,9 +455,7 @@ wire [2:0] cir_bus_err_plus_fetch =
// And the same thing again for whether CIR contains a predicted-taken branch.
// One day I should clean up this copy/paste.
wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0];
// We mark only the last halfword of the branch instruction as being predicted.
wire [1:0] fetch_predbranch_hw = &fetch_data_hwvld ? {fetch_predbranch, 1'b0} : {1'b0, fetch_predbranch};
wire [1:0] fetch_predbranch_hw = fifo_empty ? mem_data_predbranch : fifo_predbranch[0];
reg [2:0] cir_predbranch_reg;
wire [2:0] cir_predbranch_shifted =