First pass at adding branch prediction
This commit is contained in:
parent
3703b1fc4c
commit
0766ec6f8a
|
@ -169,13 +169,21 @@ parameter FAST_BRANCHCMP = 1,
|
|||
// disabled e.g. to permit block-RAM inference on FPGA.
|
||||
parameter RESET_REGFILE = 1,
|
||||
|
||||
// MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by
|
||||
// making trap vector base partly fixed (legal, as it's WARL).
|
||||
// BRANCH_PREDICTOR: enable branch prediction. The branch predictor consists
|
||||
// of a single BTB entry which is allocated on a taken backward branch, and
|
||||
// cleared on a mispredicted nontaken branch, a fence.i or a trap. Successful
|
||||
// prediction eliminates the 1-cyle fetch bubble on a taken branch, usually
|
||||
// making tight loops faster.
|
||||
parameter BRANCH_PREDICTOR = 1,
|
||||
|
||||
// MTVEC_WMASK: Mask of which bits in mtvec are writable. Full writability is
|
||||
// recommended, because a common idiom in setup code is to set mtvec just
|
||||
// past code that may trap, as a hardware "try {...} catch" block.
|
||||
//
|
||||
// - The vectoring mode can be made fixed by clearing the LSB of MTVEC_WMASK
|
||||
//
|
||||
// - Note the entire vector table must always be aligned to its size, rounded
|
||||
// up to a power of two, so careful with the low-order bits.
|
||||
// - In vectored mode, the vector table must be aligned to its size, rounded
|
||||
// up to a power of two.
|
||||
parameter MTVEC_WMASK = 32'hfffffffd,
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
.MUL_FAST (MUL_FAST),
|
||||
.MULH_FAST (MULH_FAST),
|
||||
.FAST_BRANCHCMP (FAST_BRANCHCMP),
|
||||
.BRANCH_PREDICTOR (BRANCH_PREDICTOR),
|
||||
.MTVEC_WMASK (MTVEC_WMASK),
|
||||
.RESET_REGFILE (RESET_REGFILE),
|
||||
.W_ADDR (W_ADDR),
|
||||
|
|
|
@ -98,10 +98,16 @@ wire [W_REGADDR-1:0] f_rs2_fine;
|
|||
|
||||
wire [31:0] fd_cir;
|
||||
wire [1:0] fd_cir_err;
|
||||
wire [1:0] fd_cir_predbranch;
|
||||
wire [1:0] fd_cir_vld;
|
||||
wire [1:0] df_cir_use;
|
||||
wire df_cir_flush_behind;
|
||||
|
||||
wire x_btb_set;
|
||||
wire [W_ADDR-1:0] x_btb_set_src_addr;
|
||||
wire [W_ADDR-1:0] x_btb_set_target_addr;
|
||||
wire x_btb_clear;
|
||||
|
||||
assign bus_aph_panic_i = 1'b0;
|
||||
|
||||
wire f_mem_size;
|
||||
|
@ -128,8 +134,14 @@ hazard3_frontend #(
|
|||
.jump_target_vld (f_jump_req),
|
||||
.jump_target_rdy (f_jump_rdy),
|
||||
|
||||
.btb_set (x_btb_set),
|
||||
.btb_set_src_addr (x_btb_set_src_addr),
|
||||
.btb_set_target_addr (x_btb_set_target_addr),
|
||||
.btb_clear (x_btb_clear),
|
||||
|
||||
.cir (fd_cir),
|
||||
.cir_err (fd_cir_err),
|
||||
.cir_predbranch (fd_cir_predbranch),
|
||||
.cir_vld (fd_cir_vld),
|
||||
.cir_use (df_cir_use),
|
||||
.cir_flush_behind (df_cir_flush_behind),
|
||||
|
@ -181,6 +193,7 @@ wire d_addr_is_regoffs;
|
|||
wire [W_ADDR-1:0] d_pc;
|
||||
wire [W_EXCEPT-1:0] d_except;
|
||||
wire d_wfi;
|
||||
wire d_fence_i;
|
||||
wire d_csr_ren;
|
||||
wire d_csr_wen;
|
||||
wire [1:0] d_csr_wtype;
|
||||
|
@ -198,6 +211,7 @@ hazard3_decode #(
|
|||
|
||||
.fd_cir (fd_cir),
|
||||
.fd_cir_err (fd_cir_err),
|
||||
.fd_cir_predbranch (fd_cir_predbranch),
|
||||
.fd_cir_vld (fd_cir_vld),
|
||||
.df_cir_use (df_cir_use),
|
||||
.df_cir_flush_behind (df_cir_flush_behind),
|
||||
|
@ -230,7 +244,8 @@ hazard3_decode #(
|
|||
.d_addr_offs (d_addr_offs),
|
||||
.d_addr_is_regoffs (d_addr_is_regoffs),
|
||||
.d_except (d_except),
|
||||
.d_wfi (d_wfi)
|
||||
.d_wfi (d_wfi),
|
||||
.d_fence_i (d_fence_i)
|
||||
);
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
@ -689,12 +704,15 @@ endgenerate
|
|||
// For JALR, the LSB of the result must be cleared by hardware
|
||||
wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1;
|
||||
wire x_jump_misaligned = ~|EXTENSION_C && x_addr_sum[1];
|
||||
wire x_branch_cmp;
|
||||
wire x_branch_cmp_noinvert;
|
||||
|
||||
wire x_branch_was_predicted = |BRANCH_PREDICTOR && fd_cir_predbranch[fd_cir[1:0] == 2'b11];
|
||||
wire x_branch_cmp = x_branch_cmp_noinvert ^ x_branch_was_predicted;
|
||||
|
||||
generate
|
||||
if (~|FAST_BRANCHCMP) begin: alu_branchcmp
|
||||
|
||||
assign x_branch_cmp = x_alu_cmp;
|
||||
assign x_branch_cmp_noinvert = x_alu_cmp;
|
||||
|
||||
end else begin: fast_branchcmp
|
||||
|
||||
|
@ -704,7 +722,7 @@ end else begin: fast_branchcmp
|
|||
.aluop (d_aluop),
|
||||
.op_a (x_rs1_bypass),
|
||||
.op_b (x_rs2_bypass),
|
||||
.cmp (x_branch_cmp)
|
||||
.cmp (x_branch_cmp_noinvert)
|
||||
);
|
||||
|
||||
end
|
||||
|
@ -719,6 +737,19 @@ wire x_jump_req_unchecked = !x_stall_on_raw && (
|
|||
|
||||
assign x_jump_req = x_jump_req_unchecked && !x_jump_misaligned && !x_exec_pmp_fail;
|
||||
|
||||
assign x_btb_set = |BRANCH_PREDICTOR && (
|
||||
x_jump_req_unchecked && d_addr_offs[W_ADDR - 1] && !x_branch_was_predicted
|
||||
);
|
||||
|
||||
assign x_btb_clear = d_fence_i || (m_trap_enter_vld && m_trap_enter_rdy) || (|BRANCH_PREDICTOR && (
|
||||
x_branch_was_predicted && x_jump_req_unchecked
|
||||
));
|
||||
|
||||
// Match address is the address of the final halfword of the branch
|
||||
// instruction. TODO can we save this adder?
|
||||
assign x_btb_set_src_addr = d_pc + {30'h0, fd_cir[1:0] == 2'b11, 1'b0};
|
||||
assign x_btb_set_target_addr = x_jump_target;
|
||||
|
||||
// Memory protection
|
||||
|
||||
wire [11:0] x_pmp_cfg_addr;
|
||||
|
|
|
@ -15,6 +15,7 @@ module hazard3_decode #(
|
|||
|
||||
input wire [31:0] fd_cir,
|
||||
input wire [1:0] fd_cir_err,
|
||||
input wire [1:0] fd_cir_predbranch,
|
||||
input wire [1:0] fd_cir_vld,
|
||||
output wire [1:0] df_cir_use,
|
||||
output wire df_cir_flush_behind,
|
||||
|
@ -47,7 +48,8 @@ module hazard3_decode #(
|
|||
output reg [W_ADDR-1:0] d_addr_offs,
|
||||
output reg d_addr_is_regoffs,
|
||||
output reg [W_EXCEPT-1:0] d_except,
|
||||
output reg d_wfi
|
||||
output reg d_wfi,
|
||||
output reg d_fence_i
|
||||
);
|
||||
|
||||
`include "rv_opcodes.vh"
|
||||
|
@ -149,11 +151,14 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
end
|
||||
end
|
||||
|
||||
wire [W_ADDR-1:0] branch_offs =
|
||||
!d_instr_is_32bit && fd_cir_predbranch[0] && |BRANCH_PREDICTOR ? 32'h2 :
|
||||
d_instr_is_32bit && fd_cir_predbranch[1] && |BRANCH_PREDICTOR ? 32'h4 : d_imm_b;
|
||||
|
||||
always @ (*) begin
|
||||
casez ({|EXTENSION_A, |EXTENSION_ZIFENCEI, d_instr[6:2]})
|
||||
{1'bz, 1'bz, 5'b11011}: d_addr_offs = d_imm_j ; // JAL
|
||||
{1'bz, 1'bz, 5'b11000}: d_addr_offs = d_imm_b ; // Branches
|
||||
{1'bz, 1'bz, 5'b11000}: d_addr_offs = branch_offs ; // Branches
|
||||
{1'bz, 1'bz, 5'b01000}: d_addr_offs = d_imm_s ; // Store
|
||||
{1'bz, 1'bz, 5'b11001}: d_addr_offs = d_imm_i ; // JALR
|
||||
{1'bz, 1'bz, 5'b00000}: d_addr_offs = d_imm_i ; // Loads
|
||||
|
@ -188,6 +193,7 @@ always @ (*) begin
|
|||
d_invalid_32bit = 1'b0;
|
||||
d_except = EXCEPT_NONE;
|
||||
d_wfi = 1'b0;
|
||||
d_fence_i = 1'b0;
|
||||
|
||||
casez (d_instr)
|
||||
RV_BEQ: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_ZERO; end
|
||||
|
@ -292,7 +298,7 @@ always @ (*) begin
|
|||
RV_ZIP: if (EXTENSION_ZBKB) begin d_aluop = ALUOP_ZIP; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end
|
||||
|
||||
RV_FENCE: begin d_rs2 = X0; end // NOP, note rs1/rd are zero in instruction
|
||||
RV_FENCE_I: if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction
|
||||
RV_FENCE_I: if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_fence_i = 1'b1; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction
|
||||
RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end
|
||||
RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end
|
||||
|
@ -331,3 +337,7 @@ always @ (*) begin
|
|||
end
|
||||
|
||||
endmodule
|
||||
|
||||
`ifndef YOSYS
|
||||
`default_nettype wire
|
||||
`endif
|
|
@ -36,15 +36,24 @@ module hazard3_frontend #(
|
|||
input wire jump_target_vld,
|
||||
output wire jump_target_rdy,
|
||||
|
||||
// Interface to the branch target buffer. `src_addr` is the address of the
|
||||
// last halfword of a taken backward branch. The frontend redirects fetch
|
||||
// such that `src_addr` appears to be sequentially followed by `target`.
|
||||
input wire btb_set,
|
||||
input wire [W_ADDR-1:0] btb_set_src_addr,
|
||||
input wire [W_ADDR-1:0] btb_set_target_addr,
|
||||
input wire btb_clear,
|
||||
|
||||
// Interface to Decode
|
||||
// Note reg/wire distinction
|
||||
// => decode is providing live feedback on the CIR it is decoding,
|
||||
// which we fetched previously
|
||||
output reg [31:0] cir,
|
||||
output reg [1:0] cir_vld, // number of valid halfwords in CIR
|
||||
input wire [1:0] cir_use, // number of halfwords D intends to consume
|
||||
// *may* be a function of hready
|
||||
output wire [1:0] cir_err, // Bus error on upper/lower halfword of CIR.
|
||||
output reg [1:0] cir_vld, // number of valid halfwords in CIR
|
||||
input wire [1:0] cir_use, // number of halfwords D intends to consume
|
||||
// *may* be a function of hready
|
||||
output wire [1:0] cir_err, // Bus error on upper/lower halfword of CIR.
|
||||
output wire [1:0] cir_predbranch, // Set for last halfword of a predicted-taken branch
|
||||
|
||||
// "flush_behind": do not flush the oldest instruction when accepting a
|
||||
// jump request (but still flush younger instructions). Sometimes a
|
||||
|
@ -80,16 +89,20 @@ localparam W_BUNDLE = 16;
|
|||
localparam FIFO_DEPTH = 2;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Fetch Queue (FIFO)
|
||||
// Fetch queue
|
||||
|
||||
wire jump_now = jump_target_vld && jump_target_rdy;
|
||||
reg [1:0] mem_data_hwvld;
|
||||
// Mark data as containing a predicted-taken branch instruction so that
|
||||
// mispredicts can be recovered:
|
||||
reg mem_data_predbranch;
|
||||
|
||||
// Bus errors travel alongside data. They cause an exception if the core tries
|
||||
// to decode the instruction, but until then can be flushed harmlessly.
|
||||
|
||||
reg [W_DATA-1:0] fifo_mem [0:FIFO_DEPTH];
|
||||
reg [FIFO_DEPTH:0] fifo_err;
|
||||
reg [FIFO_DEPTH:0] fifo_predbranch;
|
||||
reg [1:0] fifo_valid_hw [0:FIFO_DEPTH];
|
||||
reg [FIFO_DEPTH:-1] fifo_valid;
|
||||
|
||||
|
@ -102,7 +115,6 @@ wire fifo_push;
|
|||
wire fifo_pop;
|
||||
wire fifo_dbg_inject = DEBUG_SUPPORT && dbg_instr_data_vld && dbg_instr_data_rdy;
|
||||
|
||||
|
||||
always @ (*) begin: boundary_conditions
|
||||
integer i;
|
||||
fifo_mem[FIFO_DEPTH] = mem_data;
|
||||
|
@ -121,12 +133,14 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
|
|||
fifo_valid_hw[i] <= 2'b00;
|
||||
fifo_mem[i] <= 32'h0;
|
||||
fifo_err[i] <= 1'b0;
|
||||
fifo_predbranch[i] <= 1'b0;
|
||||
end
|
||||
end else begin
|
||||
for (i = 0; i < FIFO_DEPTH; i = i + 1) begin
|
||||
if (fifo_pop || (fifo_push && !fifo_valid[i])) begin
|
||||
fifo_mem[i] <= fifo_valid[i + 1] ? fifo_mem[i + 1] : mem_data;
|
||||
fifo_err[i] <= fifo_valid[i + 1] ? fifo_err[i + 1] : mem_data_err;
|
||||
fifo_mem[i] <= fifo_valid[i + 1] ? fifo_mem[i + 1] : mem_data;
|
||||
fifo_err[i] <= fifo_valid[i + 1] ? fifo_err[i + 1] : mem_data_err;
|
||||
fifo_predbranch[i] <= fifo_valid[i + 1] ? fifo_predbranch[i + 1] : mem_data_predbranch;
|
||||
end
|
||||
fifo_valid_hw[i] <=
|
||||
jump_now ? 2'h0 :
|
||||
|
@ -141,54 +155,60 @@ always @ (posedge clk or negedge rst_n) begin: fifo_update
|
|||
if (fifo_dbg_inject) begin
|
||||
fifo_mem[0] <= dbg_instr_data;
|
||||
fifo_err[0] <= 1'b0;
|
||||
fifo_predbranch[0] <= 1'b0;
|
||||
fifo_valid_hw[0] <= 2'b11;
|
||||
end
|
||||
`ifdef FORMAL
|
||||
// FIFO validity must be compact, so we can always consume from the end
|
||||
if (!fifo_valid[0]) begin
|
||||
assert(!fifo_valid[1]);
|
||||
end
|
||||
`endif
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Fetch Request + State Logic
|
||||
// Branch target buffer
|
||||
|
||||
// Keep track of some useful state of the memory interface
|
||||
reg [W_ADDR-1:0] btb_src_addr;
|
||||
reg [W_ADDR-1:0] btb_target_addr;
|
||||
reg btb_valid;
|
||||
|
||||
reg mem_addr_hold;
|
||||
reg [1:0] pending_fetches;
|
||||
reg [1:0] ctr_flush_pending;
|
||||
wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_hold) - mem_data_vld;
|
||||
|
||||
// Debugger only injects instructions when the frontend is at rest and empty.
|
||||
assign dbg_instr_data_rdy = DEBUG_SUPPORT && !fifo_valid[0] && ~|ctr_flush_pending;
|
||||
|
||||
wire cir_room_for_fetch;
|
||||
// If fetch data is forwarded past the FIFO, ensure it is not also written to it.
|
||||
assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_room_for_fetch && fifo_empty)
|
||||
&& !(DEBUG_SUPPORT && debug_mode);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mem_addr_hold <= 1'b0;
|
||||
pending_fetches <= 2'h0;
|
||||
ctr_flush_pending <= 2'h0;
|
||||
end else begin
|
||||
`ifdef FORMAL
|
||||
assert(ctr_flush_pending <= pending_fetches);
|
||||
assert(pending_fetches < 2'd3);
|
||||
assert(!(mem_data_vld && !pending_fetches));
|
||||
`endif
|
||||
mem_addr_hold <= mem_addr_vld && !mem_addr_rdy;
|
||||
pending_fetches <= pending_fetches_next;
|
||||
if (jump_now) begin
|
||||
ctr_flush_pending <= pending_fetches - mem_data_vld;
|
||||
end else if (|ctr_flush_pending && mem_data_vld) begin
|
||||
ctr_flush_pending <= ctr_flush_pending - 1'b1;
|
||||
generate
|
||||
if (BRANCH_PREDICTOR) begin: have_btb
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
btb_src_addr <= {W_ADDR{1'b0}};
|
||||
btb_target_addr <= {W_ADDR{1'b0}};
|
||||
btb_valid <= 1'b0;
|
||||
end else if (btb_set) begin
|
||||
btb_src_addr <= btb_set_src_addr;
|
||||
btb_target_addr <= btb_set_target_addr;
|
||||
btb_valid <= 1'b1;
|
||||
end else if (btb_clear) begin
|
||||
btb_valid <= 1'b0;
|
||||
end
|
||||
end
|
||||
end else begin: no_btb
|
||||
always @ (*) begin
|
||||
btb_src_addr = {W_ADDR{1'b0}};
|
||||
btb_target_addr = {W_ADDR{1'b0}};
|
||||
btb_valid = 1'b0;
|
||||
end
|
||||
end
|
||||
endgenerate
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Fetch request generation
|
||||
|
||||
// Fetch addr runs ahead of the PC, in word increments.
|
||||
reg [W_ADDR-1:0] fetch_addr;
|
||||
reg fetch_priv;
|
||||
|
||||
wire btb_match_now = |BRANCH_PREDICTOR && btb_valid && (
|
||||
fetch_addr[W_ADDR-1:2] == btb_src_addr[W_ADDR-1:2]
|
||||
);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
fetch_addr <= RESET_VECTOR;
|
||||
|
@ -200,27 +220,11 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
fetch_addr <= {jump_target[W_ADDR-1:2] + (mem_addr_rdy && !mem_addr_hold), 2'b00};
|
||||
fetch_priv <= jump_priv || !U_MODE;
|
||||
end else if (mem_addr_vld && mem_addr_rdy) begin
|
||||
fetch_addr <= fetch_addr + 32'h4;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [1:0] mem_aph_hwvld;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mem_data_hwvld <= 2'b11;
|
||||
mem_aph_hwvld <= 2'b11;
|
||||
end else if (EXTENSION_C) begin
|
||||
if (jump_now) begin
|
||||
if (mem_addr_rdy) begin
|
||||
mem_data_hwvld <= {1'b1, !jump_target[1]};
|
||||
if (btb_match_now && |BRANCH_PREDICTOR) begin
|
||||
fetch_addr <= {btb_target_addr[W_ADDR-1:2], 2'b00};
|
||||
end else begin
|
||||
mem_aph_hwvld <= {1'b1, !jump_target[1]};
|
||||
fetch_addr <= fetch_addr + 32'h4;
|
||||
end
|
||||
end else if (mem_addr_vld && mem_addr_rdy) begin
|
||||
mem_data_hwvld <= mem_aph_hwvld;
|
||||
mem_aph_hwvld <= 2'b11;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -252,12 +256,7 @@ assign mem_priv = mem_priv_r;
|
|||
assign mem_addr_vld = mem_addr_vld_r && !reset_holdoff;
|
||||
assign mem_size = 1'b1;
|
||||
|
||||
// Using the non-registered version of pending_fetches would improve FIFO
|
||||
// utilisation, but create a combinatorial path from hready to address phase!
|
||||
// This means at least a 2-word FIFO is required for full fetch throughput.
|
||||
wire fetch_stall = fifo_full
|
||||
|| fifo_almost_full && |pending_fetches
|
||||
|| pending_fetches > 2'h1;
|
||||
wire fetch_stall;
|
||||
|
||||
always @ (*) begin
|
||||
mem_addr_r = fetch_addr;
|
||||
|
@ -277,6 +276,84 @@ end
|
|||
|
||||
assign jump_target_rdy = !mem_addr_hold;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Bus Pipeline Tracking
|
||||
|
||||
// Keep track of some useful state of the memory interface
|
||||
|
||||
reg mem_addr_hold;
|
||||
reg [1:0] pending_fetches;
|
||||
reg [1:0] ctr_flush_pending;
|
||||
|
||||
wire [1:0] pending_fetches_next = pending_fetches + (mem_addr_vld && !mem_addr_hold) - mem_data_vld;
|
||||
|
||||
// Using the non-registered version of pending_fetches would improve FIFO
|
||||
// utilisation, but create a combinatorial path from hready to address phase!
|
||||
// This means at least a 2-word FIFO is required for full fetch throughput.
|
||||
assign fetch_stall = fifo_full
|
||||
|| fifo_almost_full && |pending_fetches
|
||||
|| pending_fetches > 2'h1;
|
||||
|
||||
// Debugger only injects instructions when the frontend is at rest and empty.
|
||||
assign dbg_instr_data_rdy = DEBUG_SUPPORT && !fifo_valid[0] && ~|ctr_flush_pending;
|
||||
|
||||
wire cir_room_for_fetch;
|
||||
// If fetch data is forwarded past the FIFO, ensure it is not also written to it.
|
||||
assign fifo_push = mem_data_vld && ~|ctr_flush_pending && !(cir_room_for_fetch && fifo_empty)
|
||||
&& !(DEBUG_SUPPORT && debug_mode);
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mem_addr_hold <= 1'b0;
|
||||
pending_fetches <= 2'h0;
|
||||
ctr_flush_pending <= 2'h0;
|
||||
end else begin
|
||||
`ifdef FORMAL
|
||||
assert(ctr_flush_pending <= pending_fetches);
|
||||
assert(pending_fetches < 2'd3);
|
||||
assert(!(mem_data_vld && !pending_fetches));
|
||||
`endif
|
||||
mem_addr_hold <= mem_addr_vld && !mem_addr_rdy;
|
||||
pending_fetches <= pending_fetches_next;
|
||||
if (jump_now) begin
|
||||
ctr_flush_pending <= pending_fetches - mem_data_vld;
|
||||
end else if (|ctr_flush_pending && mem_data_vld) begin
|
||||
ctr_flush_pending <= ctr_flush_pending - 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
reg [1:0] mem_aph_hwvld;
|
||||
|
||||
always @ (posedge clk or negedge rst_n) begin
|
||||
if (!rst_n) begin
|
||||
mem_data_hwvld <= 2'b11;
|
||||
mem_aph_hwvld <= 2'b11;
|
||||
mem_data_predbranch <= 1'b0;
|
||||
end else if (EXTENSION_C) begin
|
||||
if (jump_now) begin
|
||||
if (mem_addr_rdy) begin
|
||||
mem_data_hwvld <= {1'b1, !jump_target[1]};
|
||||
end else begin
|
||||
mem_aph_hwvld <= {1'b1, !jump_target[1]};
|
||||
end
|
||||
end else if (mem_addr_vld && mem_addr_rdy) begin
|
||||
// If a predicted-taken branch instruction only spans the first
|
||||
// half of a word, need to flag the second half as invalid.
|
||||
mem_data_hwvld <= mem_aph_hwvld & {
|
||||
!(|BRANCH_PREDICTOR && btb_match_now && !btb_src_addr[1]),
|
||||
1'b1
|
||||
};
|
||||
// Also need to take the alignment of the destination into account.
|
||||
mem_aph_hwvld <= {
|
||||
1'b1,
|
||||
!(|BRANCH_PREDICTOR && btb_match_now && btb_target_addr[1])
|
||||
};
|
||||
mem_data_predbranch <= |BRANCH_PREDICTOR && btb_match_now;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Instruction assembly yard
|
||||
|
||||
|
@ -320,6 +397,7 @@ wire [3*W_BUNDLE-1:0] instr_data_plus_fetch =
|
|||
// may be buffered in the prefetch queue.
|
||||
|
||||
wire fetch_bus_err = fifo_empty ? mem_data_err : fifo_err[0];
|
||||
wire fetch_predbranch = fifo_empty ? mem_data_predbranch : fifo_predbranch[0];
|
||||
|
||||
reg [2:0] cir_bus_err;
|
||||
wire [2:0] cir_bus_err_shifted =
|
||||
|
@ -332,6 +410,20 @@ wire [2:0] cir_bus_err_plus_fetch =
|
|||
level_next_no_fetch[0] && |EXTENSION_C ? {{2{fetch_bus_err}}, cir_bus_err_shifted[0]} :
|
||||
{cir_bus_err_shifted[2], {2{fetch_bus_err}}};
|
||||
|
||||
// And the same thing again for whether CIR contains a predicted-taken branch.
|
||||
// One day I should clean up this copy/paste.
|
||||
|
||||
reg [2:0] cir_predbranch_reg;
|
||||
wire [2:0] cir_predbranch_shifted =
|
||||
cir_use[1] ? cir_predbranch_reg >> 2 :
|
||||
cir_use[0] && EXTENSION_C ? cir_predbranch_reg >> 1 : cir_predbranch_reg;
|
||||
|
||||
wire [2:0] cir_predbranch_plus_fetch =
|
||||
!cir_room_for_fetch ? cir_predbranch_shifted :
|
||||
level_next_no_fetch[1] && |EXTENSION_C ? {fetch_predbranch, cir_predbranch_shifted[1:0]} :
|
||||
level_next_no_fetch[0] && |EXTENSION_C ? {{2{fetch_predbranch}}, cir_predbranch_shifted[0]} :
|
||||
{cir_predbranch_shifted[2], {2{fetch_predbranch}}};
|
||||
|
||||
wire [1:0] fetch_fill_amount = cir_room_for_fetch && fetch_data_vld ? (
|
||||
&fetch_data_hwvld ? 2'h2 : 2'h1
|
||||
) : 2'h0;
|
||||
|
@ -347,6 +439,7 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
hwbuf <= 16'h0;
|
||||
cir <= 16'h0;
|
||||
cir_bus_err <= 3'h0;
|
||||
cir_predbranch_reg <= 3'h0;
|
||||
end else begin
|
||||
`ifdef FORMAL
|
||||
assert(cir_vld <= 2);
|
||||
|
@ -357,6 +450,7 @@ always @ (posedge clk or negedge rst_n) begin
|
|||
buf_level <= buf_level_next;
|
||||
cir_vld <= buf_level_next & ~(buf_level_next >> 1'b1);
|
||||
cir_bus_err <= cir_bus_err_plus_fetch;
|
||||
cir_predbranch_reg <= cir_predbranch_plus_fetch;
|
||||
{hwbuf, cir} <= instr_data_plus_fetch;
|
||||
end
|
||||
end
|
||||
|
@ -376,6 +470,7 @@ end
|
|||
`endif
|
||||
|
||||
assign cir_err = cir_bus_err[1:0];
|
||||
assign cir_predbranch = cir_predbranch_reg[1:0];
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Register number predecode
|
||||
|
|
|
@ -37,6 +37,11 @@ always @ (posedge clk)
|
|||
(*keep*) wire jump_target_vld;
|
||||
(*keep*) wire jump_target_rdy;
|
||||
|
||||
(*keep*) wire btb_set;
|
||||
(*keep*) wire [31:0] btb_set_src_addr;
|
||||
(*keep*) wire [31:0] btb_set_target_addr;
|
||||
(*keep*) wire btb_clear;
|
||||
|
||||
(*keep*) wire [31:0] cir;
|
||||
(*keep*) wire [1:0] cir_vld;
|
||||
(*keep*) wire [1:0] cir_use;
|
||||
|
@ -74,6 +79,11 @@ hazard3_frontend #(
|
|||
.jump_target_vld (jump_target_vld),
|
||||
.jump_target_rdy (jump_target_rdy),
|
||||
|
||||
.btb_set (btb_set),
|
||||
.btb_set_src_addr (btb_set_src_addr),
|
||||
.btb_set_target_addr (btb_set_target_addr),
|
||||
.btb_clear (btb_clear),
|
||||
|
||||
.cir (cir),
|
||||
.cir_vld (cir_vld),
|
||||
.cir_use (cir_use),
|
||||
|
@ -125,8 +135,9 @@ assign dbg_instr_data_vld = 1'b0;
|
|||
|
||||
// Don't consume nonexistent data
|
||||
always assume(cir_use <= cir_vld);
|
||||
// Consume an amount consistent with the instruction size (as the frontend
|
||||
// also considers instruction size when a cir_flush_behind occurs)
|
||||
|
||||
// Always consume an amount consistent with the instruction size (as the
|
||||
// frontend also considers instruction size when a cir_flush_behind occurs)
|
||||
always assume(cir_use == 0 || cir_use == (cir[1:0] == 2'b11 ? 2'd2 : 2'd1));
|
||||
|
||||
assign jump_target[0] = 1'b0;
|
||||
|
@ -158,6 +169,10 @@ end
|
|||
// assert on it inside the frontend.
|
||||
always @ (posedge clk) assume(!(jump_target_vld && !$past(rst_n)));
|
||||
|
||||
// TODO would be nice to cover this here as well as in the full-core test
|
||||
assign btb_set = 1'b0;
|
||||
assign btb_clear = 1'b0;
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Properties
|
||||
|
||||
|
|
Loading…
Reference in New Issue