Use branch target adder for JALR target, and use ALU for JAL/JALR linkaddr instead of muxing in next_pc

This commit is contained in:
Luke Wren 2021-05-23 09:12:50 +01:00
parent 90acfdcbe8
commit 5e61c9f9ac
2 changed files with 56 additions and 73 deletions

View File

@ -153,8 +153,6 @@ end
//synthesis translate_on
// To X
wire d_jump_req;
wire [W_ADDR-1:0] d_jump_target;
wire [W_DATA-1:0] d_imm;
wire [W_REGADDR-1:0] d_rs1;
wire [W_REGADDR-1:0] d_rs2;
@ -165,16 +163,16 @@ wire [W_ALUOP-1:0] d_aluop;
wire [W_MEMOP-1:0] d_memop;
wire [W_MULOP-1:0] d_mulop;
wire [W_BCOND-1:0] d_branchcond;
wire [W_ADDR-1:0] d_jump_offs;
wire d_jump_is_regoffs;
wire d_result_is_linkaddr;
wire [W_ADDR-1:0] d_pc;
wire [W_ADDR-1:0] d_mispredict_addr;
wire [W_EXCEPT-1:0] d_except;
wire d_csr_ren;
wire d_csr_wen;
wire [1:0] d_csr_wtype;
wire d_csr_w_imm;
wire x_jump_not_except;
hazard3_decode #(
`include "hazard3_config_inst.vh"
@ -186,9 +184,8 @@ hazard3_decode #(
.fd_cir_vld (fd_cir_vld),
.df_cir_use (df_cir_use),
.df_cir_lock (df_cir_lock),
.d_jump_req (d_jump_req),
.d_jump_target (d_jump_target),
.d_pc (d_pc),
.x_jump_not_except (x_jump_not_except),
.d_stall (d_stall),
.x_stall (x_stall),
@ -210,11 +207,9 @@ hazard3_decode #(
.d_csr_wtype (d_csr_wtype),
.d_csr_w_imm (d_csr_w_imm),
.d_branchcond (d_branchcond),
.d_jump_target (d_jump_target),
.d_jump_offs (d_jump_offs),
.d_jump_is_regoffs (d_jump_is_regoffs),
.d_result_is_linkaddr (d_result_is_linkaddr),
.d_pc (d_pc),
.d_mispredict_addr (d_mispredict_addr),
.d_except (d_except)
);
@ -283,33 +278,8 @@ always @ (*) begin
end
end
// AHB transaction request
// ALU, operand muxes and bypass
wire x_memop_vld = !d_memop[3];
wire x_memop_write = d_memop == MEMOP_SW || d_memop == MEMOP_SH || d_memop == MEMOP_SB;
wire x_unaligned_addr =
bus_hsize_d == HSIZE_WORD && |bus_haddr_d[1:0] ||
bus_hsize_d == HSIZE_HWORD && bus_haddr_d[0];
wire x_except_load_misaligned = x_memop_vld && x_unaligned_addr && !x_memop_write;
wire x_except_store_misaligned = x_memop_vld && x_unaligned_addr && x_memop_write;
always @ (*) begin
// Need to be careful not to use anything hready-sourced to gate htrans!
bus_haddr_d = x_alu_add;
bus_hwrite_d = x_memop_write;
case (d_memop)
MEMOP_LW: bus_hsize_d = HSIZE_WORD;
MEMOP_SW: bus_hsize_d = HSIZE_WORD;
MEMOP_LH: bus_hsize_d = HSIZE_HWORD;
MEMOP_LHU: bus_hsize_d = HSIZE_HWORD;
MEMOP_SH: bus_hsize_d = HSIZE_HWORD;
default: bus_hsize_d = HSIZE_BYTE;
endcase
bus_aph_req_d = x_memop_vld && !(x_stall_raw || x_trap_enter);
end
// ALU operand muxes and bypass
always @ (*) begin
if (~|d_rs1) begin
x_rs1_bypass = {W_DATA{1'b0}};
@ -341,6 +311,41 @@ always @ (*) begin
x_op_b = x_rs2_bypass;
end
hazard3_alu alu (
.aluop (d_aluop),
.op_a (x_op_a),
.op_b (x_op_b),
.result (x_alu_result),
.result_add (x_alu_add),
.cmp (x_alu_cmp)
);
// AHB transaction request
wire x_memop_vld = !d_memop[3];
wire x_memop_write = d_memop == MEMOP_SW || d_memop == MEMOP_SH || d_memop == MEMOP_SB;
wire x_unaligned_addr =
bus_hsize_d == HSIZE_WORD && |bus_haddr_d[1:0] ||
bus_hsize_d == HSIZE_HWORD && bus_haddr_d[0];
wire x_except_load_misaligned = x_memop_vld && x_unaligned_addr && !x_memop_write;
wire x_except_store_misaligned = x_memop_vld && x_unaligned_addr && x_memop_write;
always @ (*) begin
// Need to be careful not to use anything hready-sourced to gate htrans!
bus_haddr_d = x_alu_add;
bus_hwrite_d = x_memop_write;
case (d_memop)
MEMOP_LW: bus_hsize_d = HSIZE_WORD;
MEMOP_SW: bus_hsize_d = HSIZE_WORD;
MEMOP_LH: bus_hsize_d = HSIZE_HWORD;
MEMOP_LHU: bus_hsize_d = HSIZE_HWORD;
MEMOP_SH: bus_hsize_d = HSIZE_HWORD;
default: bus_hsize_d = HSIZE_BYTE;
endcase
bus_aph_req_d = x_memop_vld && !(x_stall_raw || x_trap_enter);
end
// CSRs and Trap Handling
wire x_except_ecall = d_except == EXCEPT_ECALL;
@ -519,7 +524,6 @@ end
always @ (posedge clk)
if (!m_stall) begin
xm_result <=
d_result_is_linkaddr ? d_mispredict_addr :
d_csr_ren ? x_csr_rdata :
EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
x_alu_result;
@ -529,7 +533,8 @@ always @ (posedge clk)
// Branch handling
// For JALR, the LSB of the result must be cleared by hardware
wire [W_ADDR-1:0] x_taken_jump_target = d_jump_is_regoffs ? x_alu_add & ~32'h1 : d_jump_target;
wire [W_ADDR-1:0] x_taken_jump_target = ((d_jump_is_regoffs ? x_rs1_bypass : d_pc) + d_jump_offs) & ~32'h1;
wire [W_ADDR-1:0] x_jump_target =
x_trap_exit ? x_mepc : // Note precedence -- it's possible to have enter && exit, but in this case enter_rdy is false.
x_trap_enter ? x_trap_addr :
@ -542,19 +547,11 @@ wire x_jump_req = x_trap_enter || x_trap_exit || !x_stall_raw && (
d_branchcond == BCOND_NZERO && x_alu_cmp
);
assign f_jump_req = d_jump_req || x_jump_req;
assign f_jump_req = x_jump_req;
assign f_jump_target = x_jump_target;
assign x_jump_not_except = !(x_trap_enter || x_trap_exit);
hazard3_alu alu (
.aluop (d_aluop),
.op_a (x_op_a),
.op_b (x_op_b),
.result (x_alu_result),
.result_add (x_alu_add),
.cmp (x_alu_cmp)
);
// ----------------------------------------------------------------------------
// Pipe Stage M

View File

@ -27,8 +27,6 @@ module hazard3_decode #(
input wire [1:0] fd_cir_vld,
output wire [1:0] df_cir_use,
output wire df_cir_lock,
output reg d_jump_req,
output reg [W_ADDR-1:0] d_jump_target,
output wire [W_ADDR-1:0] d_pc,
output wire d_stall,
@ -36,6 +34,7 @@ module hazard3_decode #(
input wire f_jump_rdy,
input wire f_jump_now,
input wire [W_ADDR-1:0] f_jump_target,
input wire x_jump_not_except,
output reg [W_DATA-1:0] d_imm,
output reg [W_REGADDR-1:0] d_rs1,
@ -51,10 +50,8 @@ module hazard3_decode #(
output reg [1:0] d_csr_wtype,
output reg d_csr_w_imm,
output reg [W_BCOND-1:0] d_branchcond,
output reg [W_ADDR-1:0] d_jump_target,
output reg [W_ADDR-1:0] d_jump_offs,
output reg d_jump_is_regoffs,
output reg d_result_is_linkaddr,
output reg [W_ADDR-1:0] d_mispredict_addr,
output reg [2:0] d_except
);
@ -93,8 +90,8 @@ wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[3
// PC/CIR control
wire d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
assign d_stall = x_stall ||
d_starved || (d_jump_req && !f_jump_rdy);
assign d_stall = x_stall || d_starved;
assign df_cir_use =
d_starved || d_stall ? 2'h0 :
d_instr_is_32bit ? 2'h2 : 2'h1;
@ -109,9 +106,7 @@ assign df_cir_use =
// Note that it is not possible to simply gate the jump request based on X stalling,
// because X stall is a function of hready, and jump request feeds haddr htrans etc.
// Note it is possible for d_jump_req and m_jump_req to be asserted
// simultaneously, hence checking flush:
wire jump_caused_by_d = d_jump_req && f_jump_rdy; /// FIXME what about JALR?
wire jump_caused_by_d = f_jump_now && x_jump_from_instr;
wire assert_cir_lock = jump_caused_by_d && d_stall;
wire deassert_cir_lock = !d_stall;
reg cir_lock_prev;
@ -154,25 +149,17 @@ end
// If the current CIR is there due to locking, it is a jump which has already had primary effect.
wire jump_enable = !d_starved && !cir_lock_prev && !d_invalid;
reg [W_ADDR-1:0] d_jump_offs;
always @ (*) begin
// JAL is major opcode 1101111,
// JALR is 1100111,
// branches are 1100011.
case (d_instr[3])
1'b1: d_jump_offs = d_imm_j;
casez (d_instr[3:2])
2'b1z: d_jump_offs = d_imm_j;
2'b01: d_jump_offs = d_imm_i;
default: d_jump_offs = d_imm_b;
endcase
d_jump_target = pc + d_jump_offs;
casez (d_instr)
RV_JAL: d_jump_req = jump_enable;
default: d_jump_req = 1'b0;
endcase
d_mispredict_addr = pc_next;
end
// ----------------------------------------------------------------------------
@ -197,7 +184,6 @@ always @ (*) begin
d_csr_w_imm = 1'b0;
d_branchcond = BCOND_NEVER;
d_jump_is_regoffs = 1'b0;
d_result_is_linkaddr = 1'b0;
d_invalid_32bit = 1'b0;
d_except = EXCEPT_NONE;
@ -208,8 +194,8 @@ always @ (*) begin
RV_BGE: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_ZERO; end
RV_BLTU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_NZERO; end
RV_BGEU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
RV_JALR: begin d_result_is_linkaddr = 1'b1; d_jump_is_regoffs = 1'b1; d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_branchcond = BCOND_ALWAYS; end
RV_JAL: begin d_result_is_linkaddr = 1'b1; d_rs2 = X0; d_rs1 = X0; end
RV_JALR: begin d_branchcond = BCOND_ALWAYS; d_jump_is_regoffs = 1'b1; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
RV_JAL: begin d_branchcond = BCOND_ALWAYS; d_rs1 = X0; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
RV_AUIPC: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end
RV_ADDI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
@ -248,7 +234,7 @@ always @ (*) begin
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
RV_FENCE: begin d_rd = X0; end // NOP
RV_FENCE_I: begin d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // Pretend we are recovering from a mispredicted-taken backward branch. Mispredict recovery flushes frontend.
RV_FENCE_I: begin d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // FIXME this is probably busted now. Maybe implement as an exception?
RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end