From 20cf408632fa4e85c9678249d5ab7f1d64db171c Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Mon, 4 Apr 2022 20:16:19 +0100 Subject: [PATCH] Add fine (as well as coarse) register predecode, so that predecoded regnum can be used in bypass zeroing. --- hdl/hazard3_core.v | 67 ++++++++++++++++++++------------------- hdl/hazard3_frontend.v | 72 +++++++++++++++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 47 deletions(-) diff --git a/hdl/hazard3_core.v b/hdl/hazard3_core.v index 6c48ca2..5b8489c 100644 --- a/hdl/hazard3_core.v +++ b/hdl/hazard3_core.v @@ -89,8 +89,10 @@ wire f_jump_rdy; wire f_jump_now = f_jump_req && f_jump_rdy; // Predecoded register numbers, for register file access -wire [W_REGADDR-1:0] f_rs1; -wire [W_REGADDR-1:0] f_rs2; +wire [W_REGADDR-1:0] f_rs1_coarse; +wire [W_REGADDR-1:0] f_rs2_coarse; +wire [W_REGADDR-1:0] f_rs1_fine; +wire [W_REGADDR-1:0] f_rs2_fine; wire [31:0] fd_cir; wire [1:0] fd_cir_err; @@ -107,35 +109,37 @@ hazard3_frontend #( .FIFO_DEPTH(2), `include "hazard3_config_inst.vh" ) frontend ( - .clk (clk), - .rst_n (rst_n), + .clk (clk), + .rst_n (rst_n), - .mem_size (f_mem_size), - .mem_addr (bus_haddr_i), - .mem_addr_vld (bus_aph_req_i), - .mem_addr_rdy (bus_aph_ready_i), + .mem_size (f_mem_size), + .mem_addr (bus_haddr_i), + .mem_addr_vld (bus_aph_req_i), + .mem_addr_rdy (bus_aph_ready_i), - .mem_data (bus_rdata_i), - .mem_data_err (bus_dph_err_i), - .mem_data_vld (bus_dph_ready_i), + .mem_data (bus_rdata_i), + .mem_data_err (bus_dph_err_i), + .mem_data_vld (bus_dph_ready_i), - .jump_target (f_jump_target), - .jump_target_vld (f_jump_req), - .jump_target_rdy (f_jump_rdy), + .jump_target (f_jump_target), + .jump_target_vld (f_jump_req), + .jump_target_rdy (f_jump_rdy), - .cir (fd_cir), - .cir_err (fd_cir_err), - .cir_vld (fd_cir_vld), - .cir_use (df_cir_use), - .cir_lock (df_cir_lock), + .cir (fd_cir), + .cir_err (fd_cir_err), + .cir_vld (fd_cir_vld), + .cir_use (df_cir_use), + .cir_lock (df_cir_lock), - .next_regs_rs1 (f_rs1), - .next_regs_rs2 (f_rs2), + .predecode_rs1_coarse (f_rs1_coarse), + .predecode_rs2_coarse (f_rs2_coarse), + .predecode_rs1_fine (f_rs1_fine), + .predecode_rs2_fine (f_rs2_fine), - .debug_mode (debug_mode), - .dbg_instr_data (dbg_instr_data), - .dbg_instr_data_vld (dbg_instr_data_vld), - .dbg_instr_data_rdy (dbg_instr_data_rdy) + .debug_mode (debug_mode), + .dbg_instr_data (dbg_instr_data), + .dbg_instr_data_vld (dbg_instr_data_vld), + .dbg_instr_data_rdy (dbg_instr_data_rdy) ); // ---------------------------------------------------------------------------- @@ -356,8 +360,8 @@ always @ (posedge clk or negedge rst_n) begin d_rs1_predecoded <= {W_REGADDR{1'b0}}; d_rs2_predecoded <= {W_REGADDR{1'b0}}; end else if (d_starved || !x_stall) begin - d_rs1_predecoded <= f_rs1; - d_rs2_predecoded <= f_rs2; + d_rs1_predecoded <= f_rs1_fine; + d_rs2_predecoded <= f_rs2_fine; end end @@ -373,8 +377,7 @@ end `endif always @ (*) begin - if (~|d_rs1) begin - // Note the predecoded version is not sufficiently precise for zeroing + if (~|d_rs1_predecoded) begin x_rs1_bypass = {W_DATA{1'b0}}; end else if (xm_rd == d_rs1_predecoded) begin x_rs1_bypass = xm_result; @@ -383,7 +386,7 @@ always @ (*) begin end else begin x_rs1_bypass = x_rdata1; end - if (~|d_rs2) begin + if (~|d_rs2_predecoded) begin x_rs2_bypass = {W_DATA{1'b0}}; end else if (xm_rd == d_rs2_predecoded) begin x_rs2_bypass = xm_result; @@ -1041,9 +1044,9 @@ hazard3_regfile_1w2r #( .rst_n (rst_n), // On downstream stall, we feed D's addresses back into regfile // so that output does not change. - .raddr1 (x_stall && !d_starved ? d_rs1 : f_rs1), + .raddr1 (x_stall && !d_starved ? d_rs1 : f_rs1_coarse), .rdata1 (x_rdata1), - .raddr2 (x_stall && !d_starved ? d_rs2 : f_rs2), + .raddr2 (x_stall && !d_starved ? d_rs2 : f_rs2_coarse), .rdata2 (x_rdata2), .waddr (xm_rd), diff --git a/hdl/hazard3_frontend.v b/hdl/hazard3_frontend.v index b206305..aef1165 100644 --- a/hdl/hazard3_frontend.v +++ b/hdl/hazard3_frontend.v @@ -51,10 +51,16 @@ module hazard3_frontend #( // from being trashed by incoming fetch data; // jump instructions have other side effects besides jumping! - // Provide the rs1/rs2 register numbers which will be in CIR on the next - // cycle. These go straight to the register file read ports. - output wire [4:0] next_regs_rs1, - output wire [4:0] next_regs_rs2, + // Provide the rs1/rs2 register numbers which will be in CIR next cycle. + // Coarse: valid if this instruction has a nonzero register operand. + // (suitable for regfile read) + output reg [4:0] predecode_rs1_coarse, + output reg [4:0] predecode_rs2_coarse, + // Fine: same as coarse, but more accurate zeroing when e.g. the operand is implicit. + // (suitable for bypass) + output reg [4:0] predecode_rs1_fine, + output reg [4:0] predecode_rs2_fine, + // Debugger instruction injection: instruction fetch is suppressed when in // debug halt state, and the DM can then inject instructions into the last @@ -65,6 +71,8 @@ module hazard3_frontend #( output wire dbg_instr_data_rdy ); +`include "rv_opcodes.vh" + localparam W_BUNDLE = W_DATA / 2; parameter W_FIFO_LEVEL = $clog2(FIFO_DEPTH + 1); @@ -385,19 +393,53 @@ assign cir_err = cir_bus_err[1:0]; // Register number predecode wire [31:0] next_instr = instr_data_plus_fetch[31:0]; -wire next_instr_is_32bit = next_instr[1:0] == 2'b11; +wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C; -assign next_regs_rs1 = - next_instr_is_32bit ? next_instr[19:15] : // 32-bit R, S, B formats - next_instr[1:0] == 2'b00 && next_instr[14:13] == 2'b00 ? 5'd2 : // c.addi4spn + don't care - next_instr[1:0] == 2'b01 && next_instr[15 ] == 1'b0 ? next_instr[11:7] : // c.addi, c.addi16sp + don't care (jal, li) - next_instr[1:0] == 2'b10 && next_instr[14 ] == 1'b1 ? 5'd2 : // c.lwsp, c.lwsp + don't care - next_instr[1:0] == 2'b10 ? next_instr[11:7] : - {2'b01, next_instr[9:7]}; +always @ (*) begin -assign next_regs_rs2 = - next_instr_is_32bit ? next_instr[24:20] : - next_instr[1:0] == 2'b10 ? next_instr[6:2] : {2'b01, next_instr[4:2]}; + casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]}) + {1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats + {1'b0, 2'b00, 3'bz00}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care + {1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li) + {1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.lwsp + don't care + {1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7]; + default: predecode_rs1_coarse = {2'b01, next_instr[9:7]}; + endcase + + casez ({next_instr_is_32bit, next_instr[1:0]}) + {1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20]; + {1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2]; + default: predecode_rs2_coarse = {2'b01, next_instr[4:2]}; + endcase + + // The "fine" predecode targets those instructions which either: + // - Have an implicit zero-register operand in their expanded form (e.g. c.beqz) + // - Do not have a register operand on that port, but rely on the port being 0 + // We don't care about instructions which ignore the reg ports, e.g. ebreak + + casez ({|EXTENSION_C, next_instr}) + // -> addi rd, x0, imm: + {1'b1, 16'hzzzz, RV_C_LI }: predecode_rs1_fine = 5'd0; + {1'b1, 16'hzzzz, RV_C_MV }: begin + if (next_instr[6:2] == 5'd0) begin + // c.jr has rs1 as normal + predecode_rs1_fine = predecode_rs1_coarse; + end else begin + // -> add rd, x0, rs2: + predecode_rs1_fine = 5'd0; + end + end + default: predecode_rs1_fine = predecode_rs1_coarse; + endcase + + casez ({|EXTENSION_C, next_instr}) + {1'b1, 16'hzzzz, RV_C_BEQZ}: predecode_rs2_fine = 5'd0; // -> beq rs1, x0, label + {1'b1, 16'hzzzz, RV_C_BNEZ}: predecode_rs2_fine = 5'd0; // -> bne rs1, x0, label + default: predecode_rs2_fine = predecode_rs2_coarse; + endcase + + +end endmodule