Use the branch target adder for load/store addresses. Preparing for AMO ALU deletion

This commit is contained in:
Luke Wren 2021-12-17 22:36:40 +00:00
parent a35739baf1
commit 7485269ddf
3 changed files with 48 additions and 44 deletions

View File

@ -14,7 +14,6 @@ module hazard3_alu #(
input wire [W_DATA-1:0] op_a,
input wire [W_DATA-1:0] op_b,
output reg [W_DATA-1:0] result,
output wire [W_DATA-1:0] result_add,
output wire cmp
);
@ -23,9 +22,6 @@ module hazard3_alu #(
// ----------------------------------------------------------------------------
// Fiddle around with add/sub, comparisons etc (all related).
// This adder is exposed directly on the result_add port, since it may be used
// for load/store addresses.
function msb;
input [W_DATA-1:0] x;
begin
@ -60,7 +56,6 @@ wire lt = msb(op_a) == msb(op_b) ? msb(sum) :
msb(op_a) ;
assign cmp = aluop == ALUOP_SUB ? |op_xor : lt;
assign result_add = sum;
// ----------------------------------------------------------------------------

View File

@ -171,8 +171,8 @@ wire [W_ALUOP-1:0] d_aluop;
wire [W_MEMOP-1:0] d_memop;
wire [W_MULOP-1:0] d_mulop;
wire [W_BCOND-1:0] d_branchcond;
wire [W_ADDR-1:0] d_jump_offs;
wire d_jump_is_regoffs;
wire [W_ADDR-1:0] d_addr_offs;
wire d_addr_is_regoffs;
wire [W_ADDR-1:0] d_pc;
wire [W_EXCEPT-1:0] d_except;
wire d_wfi;
@ -218,8 +218,8 @@ hazard3_decode #(
.d_csr_wtype (d_csr_wtype),
.d_csr_w_imm (d_csr_w_imm),
.d_branchcond (d_branchcond),
.d_jump_offs (d_jump_offs),
.d_jump_is_regoffs (d_jump_is_regoffs),
.d_addr_offs (d_addr_offs),
.d_addr_is_regoffs (d_addr_is_regoffs),
.d_except (d_except),
.d_wfi (d_wfi)
);
@ -243,7 +243,6 @@ reg [W_DATA-1:0] x_rs2_bypass;
reg [W_DATA-1:0] x_op_a;
reg [W_DATA-1:0] x_op_b;
wire [W_DATA-1:0] x_alu_result;
wire [W_DATA-1:0] x_alu_add;
wire x_alu_cmp;
wire [W_DATA-1:0] m_trap_addr;
@ -257,6 +256,7 @@ reg [W_REGADDR-1:0] xm_rs2;
reg [W_REGADDR-1:0] xm_rd;
reg [W_DATA-1:0] xm_result;
reg [W_DATA-1:0] xm_store_data;
reg [1:0] xm_addr_align;
reg [W_MEMOP-1:0] xm_memop;
reg [W_EXCEPT-1:0] xm_except;
reg xm_wfi;
@ -385,7 +385,6 @@ hazard3_alu #(
.op_a (x_op_a),
.op_b (x_op_b),
.result (x_alu_result),
.result_add (x_alu_add),
.cmp (x_alu_cmp)
);
@ -493,9 +492,15 @@ assign bus_aph_excl_d = |EXTENSION_A && (
d_memop_is_amo
);
// This adder is used for both branch targets and load/store addresses.
// Supporting all branch types already requires rs1 + I-fmt, and pc + B-fmt.
// B-fmt are almost identical to S-fmt, so we rs1 + S-fmt is almost free.
wire [W_ADDR-1:0] x_addr_sum = (d_addr_is_regoffs ? x_rs1_bypass : d_pc) + d_addr_offs;
always @ (*) begin
// Need to be careful not to use anything hready-sourced to gate htrans!
bus_haddr_d = x_alu_add;
bus_haddr_d = x_addr_sum;
bus_hwrite_d = x_memop_write;
case (d_memop)
MEMOP_LW: bus_hsize_d = HSIZE_WORD;
@ -766,6 +771,7 @@ always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
xm_result <= {W_DATA{1'b0}};
xm_store_data <= {W_DATA{1'b0}};
xm_addr_align <= 2'b00;
end else if (!m_stall) begin
xm_result <=
d_csr_ren ? x_csr_rdata :
@ -773,6 +779,7 @@ always @ (posedge clk or negedge rst_n) begin
|EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result :
x_alu_result;
xm_store_data <= x_rs2_bypass;
xm_addr_align <= x_addr_sum[1:0];
end else if (d_memop_is_amo && x_amo_phase == 3'h1 && bus_dph_ready_d) begin
xm_store_data <= x_rs2_bypass;
@ -782,7 +789,7 @@ end
// Branch handling
// For JALR, the LSB of the result must be cleared by hardware
wire [W_ADDR-1:0] x_jump_target = ((d_jump_is_regoffs ? x_rs1_bypass : d_pc) + d_jump_offs) & ~32'h1;
wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1;
// Be careful not to take branches whose comparisons depend on a load result
assign x_jump_req = !x_stall_on_raw && (
@ -886,7 +893,7 @@ always @ (*) begin
if (|EXTENSION_A && m_amo_wdata_valid)
bus_wdata_d = m_amo_wdata;
casez ({xm_memop, xm_result[1:0]})
casez ({xm_memop, xm_addr_align[1:0]})
{MEMOP_LH , 2'b0z}: m_rdata_pick_sext = {{16{bus_rdata_d[15]}}, bus_rdata_d[15: 0]};
{MEMOP_LH , 2'b1z}: m_rdata_pick_sext = {{16{bus_rdata_d[31]}}, bus_rdata_d[31:16]};
{MEMOP_LHU , 2'b0z}: m_rdata_pick_sext = {{16{1'b0 }}, bus_rdata_d[15: 0]};

View File

@ -42,8 +42,8 @@ module hazard3_decode #(
output reg [1:0] d_csr_wtype,
output reg d_csr_w_imm,
output reg [W_BCOND-1:0] d_branchcond,
output reg [W_ADDR-1:0] d_jump_offs,
output reg d_jump_is_regoffs,
output reg [W_ADDR-1:0] d_addr_offs,
output reg d_addr_is_regoffs,
output reg [W_EXCEPT-1:0] d_except,
output reg d_wfi
);
@ -147,14 +147,16 @@ always @ (posedge clk or negedge rst_n) begin
end
end
always @ (*) begin
// JAL is major opcode 1101111,
// JALR is 1100111,
// branches are 1100011.
casez (d_instr[3:2])
2'b1z: d_jump_offs = d_imm_j;
2'b01: d_jump_offs = d_imm_i;
default: d_jump_offs = d_imm_b;
casez ({|EXTENSION_A, d_instr[6:2]})
{1'bz, 5'b11011}: d_addr_offs = d_imm_j ; // JAL
{1'bz, 5'b11000}: d_addr_offs = d_imm_b ; // Branches
{1'bz, 5'b01000}: d_addr_offs = d_imm_s ; // Store
{1'bz, 5'b11001}: d_addr_offs = d_imm_i ; // JALR
{1'bz, 5'b00000}: d_addr_offs = d_imm_i ; // Loads
{1'b1, 5'b01011}: d_addr_offs = 32'h0000_0000; // Atomics
default: d_addr_offs = 32'hxxxx_xxxx;
endcase
end
@ -179,7 +181,7 @@ always @ (*) begin
d_csr_wtype = CSR_WTYPE_W;
d_csr_w_imm = 1'b0;
d_branchcond = BCOND_NEVER;
d_jump_is_regoffs = 1'b0;
d_addr_is_regoffs = 1'b0;
d_invalid_32bit = 1'b0;
d_except = EXCEPT_NONE;
d_wfi = 1'b0;
@ -191,7 +193,7 @@ always @ (*) begin
RV_BGE: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_ZERO; end
RV_BLTU: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_NZERO; end
RV_BGEU: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
RV_JALR: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_jump_is_regoffs = 1'b1; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
RV_JALR: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
RV_JAL: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_rs1 = X0; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end
RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
RV_AUIPC: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end
@ -214,14 +216,14 @@ always @ (*) begin
RV_SRA: begin d_aluop = ALUOP_SRA; end
RV_OR: begin d_aluop = ALUOP_OR; end
RV_AND: begin d_aluop = ALUOP_AND; end
RV_LB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LB; end
RV_LH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LH; end
RV_LW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LW; end
RV_LBU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LBU; end
RV_LHU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LHU; end
RV_SB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SB; d_rd = X0; end
RV_SH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SH; d_rd = X0; end
RV_SW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SW; d_rd = X0; end
RV_LB: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LB; end
RV_LH: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LH; end
RV_LW: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LW; end
RV_LBU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LBU; end
RV_LHU: begin d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_memop = MEMOP_LHU; end
RV_SB: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SB; d_rd = X0; end
RV_SH: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SH; d_rd = X0; end
RV_SW: begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SW; d_rd = X0; end
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
@ -232,17 +234,17 @@ always @ (*) begin
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
RV_LR_W: if (EXTENSION_A) begin d_rs2 = X0; d_memop = MEMOP_LR_W; end else begin d_invalid_32bit = 1'b1; end
RV_SC_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SC_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOSWAP_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOSWAP_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOADD_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOADD_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOXOR_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOXOR_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOAND_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOAND_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOOR_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOOR_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMIN_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOMIN_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMAX_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOMAX_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMINU_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOMINU_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMAXU_W: if (EXTENSION_A) begin d_imm = {W_DATA{1'b0}}; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_AMOMAXU_W; end else begin d_invalid_32bit = 1'b1; end
RV_LR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_LR_W; d_rs2 = X0; end else begin d_invalid_32bit = 1'b1; end
RV_SC_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_SC_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOSWAP_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOSWAP_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOADD_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOADD_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOXOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOXOR_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOAND_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOAND_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOOR_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOOR_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMIN_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMIN_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMAX_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAX_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMINU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMINU_W; end else begin d_invalid_32bit = 1'b1; end
RV_AMOMAXU_W: if (EXTENSION_A) begin d_addr_is_regoffs = 1'b1; d_memop = MEMOP_AMOMAXU_W; end else begin d_invalid_32bit = 1'b1; end
RV_SH1ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH1ADD; end else begin d_invalid_32bit = 1'b1; end
RV_SH2ADD: if (EXTENSION_ZBA) begin d_aluop = ALUOP_SH2ADD; end else begin d_invalid_32bit = 1'b1; end