From 887c93dbf011551ba72143db605078f0607fbf81 Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Wed, 2 Mar 2022 18:35:16 +0000 Subject: [PATCH] Reuse predecoded regnums for bypass mux (though can't be used for zeroing unfortunately) --- hdl/hazard3_core.v | 30 ++++++++++++++++++++++++------ hdl/hazard3_decode.v | 2 +- hdl/hazard3_frontend.v | 3 --- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index 6e87b3a..2596c73 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -89,7 +89,6 @@ wire f_jump_rdy; wire f_jump_now = f_jump_req && f_jump_rdy; // Predecoded register numbers, for register file access -wire f_regnum_vld; wire [W_REGADDR-1:0] f_rs1; wire [W_REGADDR-1:0] f_rs2; @@ -132,7 +131,6 @@ hazard3_frontend #( .next_regs_rs1 (f_rs1), .next_regs_rs2 (f_rs2), - .next_regs_vld (f_regnum_vld), .debug_mode (debug_mode), .dbg_instr_data (dbg_instr_data), @@ -344,21 +342,41 @@ wire m_wfi_stall_clear; // ALU, operand muxes and bypass +// Approximate regnums were predecoded in stage 1, for regfile read. +// (Approximate in the sense that they are invalid when the instruction +// doesn't *have* a register operand on that port.) These aren't usable for +// hazard checking but are fine for bypass, and make the bypass mux +// independent of stage 2 decode. + +reg [W_REGADDR-1:0] d_rs1_predecoded; +reg [W_REGADDR-1:0] d_rs2_predecoded; + +always @ (posedge clk or negedge rst_n) begin + if (!rst_n) begin + d_rs1_predecoded <= {W_REGADDR{1'b0}}; + d_rs2_predecoded <= {W_REGADDR{1'b0}}; + end else if (!x_stall) begin + d_rs1_predecoded <= f_rs1; + d_rs2_predecoded <= f_rs2; + end +end + always @ (*) begin if (~|d_rs1) begin + // Note the predecoded version is not sufficiently precise for zeroing x_rs1_bypass = {W_DATA{1'b0}}; - end else if (xm_rd == d_rs1) begin + end else if (xm_rd == d_rs1_predecoded) begin x_rs1_bypass = xm_result; - end else if (mw_rd == d_rs1 && !REDUCED_BYPASS) begin + end else if (mw_rd == d_rs1_predecoded && !REDUCED_BYPASS) begin x_rs1_bypass = mw_result; end else begin x_rs1_bypass = x_rdata1; end if (~|d_rs2) begin x_rs2_bypass = {W_DATA{1'b0}}; - end else if (xm_rd == d_rs2) begin + end else if (xm_rd == d_rs2_predecoded) begin x_rs2_bypass = xm_result; - end else if (mw_rd == d_rs2 && !REDUCED_BYPASS) begin + end else if (mw_rd == d_rs2_predecoded && !REDUCED_BYPASS) begin x_rs2_bypass = mw_result; end else begin x_rs2_bypass = x_rdata2; diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index 0f69b8f..feed576 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -195,7 +195,7 @@ always @ (*) begin RV_BGEU: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end RV_JALR: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_addr_is_regoffs = 1'b1; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end RV_JAL: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; d_rs1 = X0; d_rs2 = X0; d_aluop = ALUOP_ADD; d_alusrc_a = ALUSRCA_PC; d_alusrc_b = ALUSRCB_IMM; d_imm = d_instr_is_32bit ? 32'h4 : 32'h2; end - RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end + RV_LUI: begin d_aluop = ALUOP_RS2; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end RV_AUIPC: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end RV_ADDI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end RV_SLLI: begin d_aluop = ALUOP_SLL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index 4bf9449..d368b03 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -55,7 +55,6 @@ module hazard3_frontend #( // cycle. These go straight to the register file read ports. output wire [4:0] next_regs_rs1, output wire [4:0] next_regs_rs2, - output wire next_regs_vld, // Debugger instruction injection: instruction fetch is suppressed when in // debug halt state, and the DM can then inject instructions into the last @@ -389,8 +388,6 @@ assign cir_err = cir_bus_err[1:0]; wire [31:0] next_instr = instr_data_plus_fetch[31:0]; wire next_instr_is_32bit = next_instr[1:0] == 2'b11; -assign next_regs_vld = next_instr_is_32bit ? buf_level_next[1] : |buf_level_next; - assign next_regs_rs1 = next_instr_is_32bit ? next_instr[19:15] : // 32-bit R, S, B formats next_instr[1:0] == 2'b00 && next_instr[15:13] == 3'b000 ? 5'd2 : // c.addi4spn