Add fine (as well as coarse) register predecode, so that predecoded regnum can be used in bypass zeroing.

This commit is contained in:
Luke Wren 2022-04-04 20:16:19 +01:00
parent 357efac66e
commit 20cf408632
2 changed files with 92 additions and 47 deletions

View File

@ -89,8 +89,10 @@ wire f_jump_rdy;
wire f_jump_now = f_jump_req && f_jump_rdy; wire f_jump_now = f_jump_req && f_jump_rdy;
// Predecoded register numbers, for register file access // Predecoded register numbers, for register file access
wire [W_REGADDR-1:0] f_rs1; wire [W_REGADDR-1:0] f_rs1_coarse;
wire [W_REGADDR-1:0] f_rs2; wire [W_REGADDR-1:0] f_rs2_coarse;
wire [W_REGADDR-1:0] f_rs1_fine;
wire [W_REGADDR-1:0] f_rs2_fine;
wire [31:0] fd_cir; wire [31:0] fd_cir;
wire [1:0] fd_cir_err; wire [1:0] fd_cir_err;
@ -107,35 +109,37 @@ hazard3_frontend #(
.FIFO_DEPTH(2), .FIFO_DEPTH(2),
`include "hazard3_config_inst.vh" `include "hazard3_config_inst.vh"
) frontend ( ) frontend (
.clk (clk), .clk (clk),
.rst_n (rst_n), .rst_n (rst_n),
.mem_size (f_mem_size), .mem_size (f_mem_size),
.mem_addr (bus_haddr_i), .mem_addr (bus_haddr_i),
.mem_addr_vld (bus_aph_req_i), .mem_addr_vld (bus_aph_req_i),
.mem_addr_rdy (bus_aph_ready_i), .mem_addr_rdy (bus_aph_ready_i),
.mem_data (bus_rdata_i), .mem_data (bus_rdata_i),
.mem_data_err (bus_dph_err_i), .mem_data_err (bus_dph_err_i),
.mem_data_vld (bus_dph_ready_i), .mem_data_vld (bus_dph_ready_i),
.jump_target (f_jump_target), .jump_target (f_jump_target),
.jump_target_vld (f_jump_req), .jump_target_vld (f_jump_req),
.jump_target_rdy (f_jump_rdy), .jump_target_rdy (f_jump_rdy),
.cir (fd_cir), .cir (fd_cir),
.cir_err (fd_cir_err), .cir_err (fd_cir_err),
.cir_vld (fd_cir_vld), .cir_vld (fd_cir_vld),
.cir_use (df_cir_use), .cir_use (df_cir_use),
.cir_lock (df_cir_lock), .cir_lock (df_cir_lock),
.next_regs_rs1 (f_rs1), .predecode_rs1_coarse (f_rs1_coarse),
.next_regs_rs2 (f_rs2), .predecode_rs2_coarse (f_rs2_coarse),
.predecode_rs1_fine (f_rs1_fine),
.predecode_rs2_fine (f_rs2_fine),
.debug_mode (debug_mode), .debug_mode (debug_mode),
.dbg_instr_data (dbg_instr_data), .dbg_instr_data (dbg_instr_data),
.dbg_instr_data_vld (dbg_instr_data_vld), .dbg_instr_data_vld (dbg_instr_data_vld),
.dbg_instr_data_rdy (dbg_instr_data_rdy) .dbg_instr_data_rdy (dbg_instr_data_rdy)
); );
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
@ -356,8 +360,8 @@ always @ (posedge clk or negedge rst_n) begin
d_rs1_predecoded <= {W_REGADDR{1'b0}}; d_rs1_predecoded <= {W_REGADDR{1'b0}};
d_rs2_predecoded <= {W_REGADDR{1'b0}}; d_rs2_predecoded <= {W_REGADDR{1'b0}};
end else if (d_starved || !x_stall) begin end else if (d_starved || !x_stall) begin
d_rs1_predecoded <= f_rs1; d_rs1_predecoded <= f_rs1_fine;
d_rs2_predecoded <= f_rs2; d_rs2_predecoded <= f_rs2_fine;
end end
end end
@ -373,8 +377,7 @@ end
`endif `endif
always @ (*) begin always @ (*) begin
if (~|d_rs1) begin if (~|d_rs1_predecoded) begin
// Note the predecoded version is not sufficiently precise for zeroing
x_rs1_bypass = {W_DATA{1'b0}}; x_rs1_bypass = {W_DATA{1'b0}};
end else if (xm_rd == d_rs1_predecoded) begin end else if (xm_rd == d_rs1_predecoded) begin
x_rs1_bypass = xm_result; x_rs1_bypass = xm_result;
@ -383,7 +386,7 @@ always @ (*) begin
end else begin end else begin
x_rs1_bypass = x_rdata1; x_rs1_bypass = x_rdata1;
end end
if (~|d_rs2) begin if (~|d_rs2_predecoded) begin
x_rs2_bypass = {W_DATA{1'b0}}; x_rs2_bypass = {W_DATA{1'b0}};
end else if (xm_rd == d_rs2_predecoded) begin end else if (xm_rd == d_rs2_predecoded) begin
x_rs2_bypass = xm_result; x_rs2_bypass = xm_result;
@ -1041,9 +1044,9 @@ hazard3_regfile_1w2r #(
.rst_n (rst_n), .rst_n (rst_n),
// On downstream stall, we feed D's addresses back into regfile // On downstream stall, we feed D's addresses back into regfile
// so that output does not change. // so that output does not change.
.raddr1 (x_stall && !d_starved ? d_rs1 : f_rs1), .raddr1 (x_stall && !d_starved ? d_rs1 : f_rs1_coarse),
.rdata1 (x_rdata1), .rdata1 (x_rdata1),
.raddr2 (x_stall && !d_starved ? d_rs2 : f_rs2), .raddr2 (x_stall && !d_starved ? d_rs2 : f_rs2_coarse),
.rdata2 (x_rdata2), .rdata2 (x_rdata2),
.waddr (xm_rd), .waddr (xm_rd),

View File

@ -51,10 +51,16 @@ module hazard3_frontend #(
// from being trashed by incoming fetch data; // from being trashed by incoming fetch data;
// jump instructions have other side effects besides jumping! // jump instructions have other side effects besides jumping!
// Provide the rs1/rs2 register numbers which will be in CIR on the next // Provide the rs1/rs2 register numbers which will be in CIR next cycle.
// cycle. These go straight to the register file read ports. // Coarse: valid if this instruction has a nonzero register operand.
output wire [4:0] next_regs_rs1, // (suitable for regfile read)
output wire [4:0] next_regs_rs2, output reg [4:0] predecode_rs1_coarse,
output reg [4:0] predecode_rs2_coarse,
// Fine: same as coarse, but more accurate zeroing when e.g. the operand is implicit.
// (suitable for bypass)
output reg [4:0] predecode_rs1_fine,
output reg [4:0] predecode_rs2_fine,
// Debugger instruction injection: instruction fetch is suppressed when in // Debugger instruction injection: instruction fetch is suppressed when in
// debug halt state, and the DM can then inject instructions into the last // debug halt state, and the DM can then inject instructions into the last
@ -65,6 +71,8 @@ module hazard3_frontend #(
output wire dbg_instr_data_rdy output wire dbg_instr_data_rdy
); );
`include "rv_opcodes.vh"
localparam W_BUNDLE = W_DATA / 2; localparam W_BUNDLE = W_DATA / 2;
parameter W_FIFO_LEVEL = $clog2(FIFO_DEPTH + 1); parameter W_FIFO_LEVEL = $clog2(FIFO_DEPTH + 1);
@ -385,19 +393,53 @@ assign cir_err = cir_bus_err[1:0];
// Register number predecode // Register number predecode
wire [31:0] next_instr = instr_data_plus_fetch[31:0]; wire [31:0] next_instr = instr_data_plus_fetch[31:0];
wire next_instr_is_32bit = next_instr[1:0] == 2'b11; wire next_instr_is_32bit = next_instr[1:0] == 2'b11 || ~|EXTENSION_C;
assign next_regs_rs1 = always @ (*) begin
next_instr_is_32bit ? next_instr[19:15] : // 32-bit R, S, B formats
next_instr[1:0] == 2'b00 && next_instr[14:13] == 2'b00 ? 5'd2 : // c.addi4spn + don't care
next_instr[1:0] == 2'b01 && next_instr[15 ] == 1'b0 ? next_instr[11:7] : // c.addi, c.addi16sp + don't care (jal, li)
next_instr[1:0] == 2'b10 && next_instr[14 ] == 1'b1 ? 5'd2 : // c.lwsp, c.lwsp + don't care
next_instr[1:0] == 2'b10 ? next_instr[11:7] :
{2'b01, next_instr[9:7]};
assign next_regs_rs2 = casez ({next_instr_is_32bit, next_instr[1:0], next_instr[15:13]})
next_instr_is_32bit ? next_instr[24:20] : {1'b1, 2'bzz, 3'bzzz}: predecode_rs1_coarse = next_instr[19:15]; // 32-bit R, S, B formats
next_instr[1:0] == 2'b10 ? next_instr[6:2] : {2'b01, next_instr[4:2]}; {1'b0, 2'b00, 3'bz00}: predecode_rs1_coarse = 5'd2; // c.addi4spn + don't care
{1'b0, 2'b01, 3'b0zz}: predecode_rs1_coarse = next_instr[11:7]; // c.addi, c.addi16sp + don't care (jal, li)
{1'b0, 2'b10, 3'bz1z}: predecode_rs1_coarse = 5'd2; // c.lwsp, c.lwsp + don't care
{1'b0, 2'b10, 3'bz0z}: predecode_rs1_coarse = next_instr[11:7];
default: predecode_rs1_coarse = {2'b01, next_instr[9:7]};
endcase
casez ({next_instr_is_32bit, next_instr[1:0]})
{1'b1, 2'bzz}: predecode_rs2_coarse = next_instr[24:20];
{1'b0, 2'b10}: predecode_rs2_coarse = next_instr[6:2];
default: predecode_rs2_coarse = {2'b01, next_instr[4:2]};
endcase
// The "fine" predecode targets those instructions which either:
// - Have an implicit zero-register operand in their expanded form (e.g. c.beqz)
// - Do not have a register operand on that port, but rely on the port being 0
// We don't care about instructions which ignore the reg ports, e.g. ebreak
casez ({|EXTENSION_C, next_instr})
// -> addi rd, x0, imm:
{1'b1, 16'hzzzz, RV_C_LI }: predecode_rs1_fine = 5'd0;
{1'b1, 16'hzzzz, RV_C_MV }: begin
if (next_instr[6:2] == 5'd0) begin
// c.jr has rs1 as normal
predecode_rs1_fine = predecode_rs1_coarse;
end else begin
// -> add rd, x0, rs2:
predecode_rs1_fine = 5'd0;
end
end
default: predecode_rs1_fine = predecode_rs1_coarse;
endcase
casez ({|EXTENSION_C, next_instr})
{1'b1, 16'hzzzz, RV_C_BEQZ}: predecode_rs2_fine = 5'd0; // -> beq rs1, x0, label
{1'b1, 16'hzzzz, RV_C_BNEZ}: predecode_rs2_fine = 5'd0; // -> bne rs1, x0, label
default: predecode_rs2_fine = predecode_rs2_coarse;
endcase
end
endmodule endmodule