Hazard3/hdl/hazard3_decode.v

369 lines
17 KiB
Verilog

/******************************************************************************
* DO WHAT THE FUCK YOU WANT TO AND DON'T BLAME US PUBLIC LICENSE *
* Version 3, April 2008 *
* *
* Copyright (C) 2019 Luke Wren *
* *
* Everyone is permitted to copy and distribute verbatim or modified *
* copies of this license document and accompanying software, and *
* changing either is allowed. *
* *
* TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION *
* *
* 0. You just DO WHAT THE FUCK YOU WANT TO. *
* 1. We're NOT RESPONSIBLE WHEN IT DOESN'T FUCKING WORK. *
* *
*****************************************************************************/
module hazard3_decode #(
`include "hazard3_config.vh"
,
`include "hazard3_width_const.vh"
) (
input wire clk,
input wire rst_n,
input wire [31:0] fd_cir,
input wire [1:0] fd_cir_vld,
output wire [1:0] df_cir_use,
output wire df_cir_lock,
output reg d_jump_req,
output reg [W_ADDR-1:0] d_jump_target,
output wire [W_ADDR-1:0] d_pc, // FIXME only added for riscv-formal
output wire d_stall,
input wire x_stall,
input wire flush_d_x,
input wire f_jump_rdy,
input wire f_jump_now,
input wire [W_ADDR-1:0] f_jump_target,
output reg [W_REGADDR-1:0] d_rs1, // combinatorial
output reg [W_REGADDR-1:0] d_rs2, // combinatorial
output reg [W_DATA-1:0] dx_imm,
output reg [W_REGADDR-1:0] dx_rs1,
output reg [W_REGADDR-1:0] dx_rs2,
output reg [W_REGADDR-1:0] dx_rd,
output reg [W_ALUSRC-1:0] dx_alusrc_a,
output reg [W_ALUSRC-1:0] dx_alusrc_b,
output reg [W_ALUOP-1:0] dx_aluop,
output reg [W_MEMOP-1:0] dx_memop,
output reg [W_MULOP-1:0] dx_mulop,
output reg dx_csr_ren,
output reg dx_csr_wen,
output reg [1:0] dx_csr_wtype,
output reg dx_csr_w_imm,
output reg [W_BCOND-1:0] dx_branchcond,
output reg [W_ADDR-1:0] dx_jump_target,
output reg dx_jump_is_regoffs,
output reg dx_result_is_linkaddr,
output reg [W_ADDR-1:0] dx_pc,
output reg [W_ADDR-1:0] dx_mispredict_addr,
output reg [2:0] dx_except
);
`include "rv_opcodes.vh"
`include "hazard3_ops.vh"
localparam HAVE_CSR = CSR_M_MANDATORY || CSR_M_TRAP || CSR_COUNTER;
// ----------------------------------------------------------------------------
// Expand compressed instructions
wire [31:0] d_instr;
wire d_instr_is_32bit;
wire d_invalid_16bit;
reg d_invalid_32bit;
wire d_invalid = d_invalid_16bit || d_invalid_32bit;
hazard3_instr_decompress #(
.PASSTHROUGH(!EXTENSION_C)
) decomp (
.instr_in (fd_cir),
.instr_is_32bit (d_instr_is_32bit),
.instr_out (d_instr),
.invalid (d_invalid_16bit)
);
// Decode various immmediate formats
wire [31:0] d_imm_i = {{21{d_instr[31]}}, d_instr[30:20]};
wire [31:0] d_imm_s = {{21{d_instr[31]}}, d_instr[30:25], d_instr[11:7]};
wire [31:0] d_imm_b = {{20{d_instr[31]}}, d_instr[7], d_instr[30:25], d_instr[11:8], 1'b0};
wire [31:0] d_imm_u = {d_instr[31:12], {12{1'b0}}};
wire [31:0] d_imm_j = {{12{d_instr[31]}}, d_instr[19:12], d_instr[20], d_instr[30:21], 1'b0};
// ----------------------------------------------------------------------------
// PC/CIR control
wire d_starved = ~|fd_cir_vld || fd_cir_vld[0] && d_instr_is_32bit;
assign d_stall = x_stall ||
d_starved || (d_jump_req && !f_jump_rdy);
assign df_cir_use =
d_starved || d_stall ? 2'h0 :
d_instr_is_32bit ? 2'h2 : 2'h1;
// CIR Locking is required if we successfully assert a jump request, but decode is stalled.
// (This only happens if decode stall is caused by X stall, not if fetch is starved!)
// The reason for this is that, if the CIR is not locked in, it can be trashed by
// incoming fetch data before the roadblock clears ahead of us, which will squash any other
// side effects this instruction may have besides jumping! This includes:
// - Linking for JAL
// - Mispredict recovery for branches
// Note that it is not possible to simply gate the jump request based on X stalling,
// because X stall is a function of hready, and jump request feeds haddr htrans etc.
// Note it is possible for d_jump_req and m_jump_req to be asserted
// simultaneously, hence checking flush:
wire jump_caused_by_d = d_jump_req && f_jump_rdy && !flush_d_x;
wire assert_cir_lock = jump_caused_by_d && d_stall;
wire deassert_cir_lock = !d_stall;
reg cir_lock_prev;
assign df_cir_lock = (cir_lock_prev && !deassert_cir_lock) || assert_cir_lock;
always @ (posedge clk or negedge rst_n)
if (!rst_n)
cir_lock_prev <= 1'b0;
else
cir_lock_prev <= df_cir_lock;
reg [W_ADDR-1:0] pc;
wire [W_ADDR-1:0] pc_next = pc + (d_instr_is_32bit ? 32'h4 : 32'h2);
assign d_pc = pc;
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
pc <= RESET_VECTOR;
end else begin
if ((f_jump_now && !assert_cir_lock) || (cir_lock_prev && deassert_cir_lock)) begin
pc <= f_jump_target;
`ifdef FORMAL
// Being cheeky above to save a 32 bit mux. Check that we never get an M target by mistake.
if (cir_lock_prev && deassert_cir_lock)
assert(f_jump_target == d_jump_target);
`endif
end else if (!d_stall && !df_cir_lock) begin
pc <= pc_next;
end
end
end
// If the current CIR is there due to locking, it is a jump which has already had primary effect.
wire jump_enable = !d_starved && !cir_lock_prev && !d_invalid;
reg [W_ADDR-1:0] d_jump_offs;
always @ (*) begin
// JAL is major opcode 1101111,
// branches are 1100011.
case (d_instr[3])
1'b1: d_jump_offs = d_imm_j;
default: d_jump_offs = d_imm_b;
endcase
d_jump_target = pc + d_jump_offs;
casez ({d_instr[31], d_instr})
{1'b1, RV_BEQ }: d_jump_req = jump_enable;
{1'b1, RV_BNE }: d_jump_req = jump_enable;
{1'b1, RV_BLT }: d_jump_req = jump_enable;
{1'b1, RV_BGE }: d_jump_req = jump_enable;
{1'b1, RV_BLTU}: d_jump_req = jump_enable;
{1'b1, RV_BGEU}: d_jump_req = jump_enable;
{1'bz, RV_JAL }: d_jump_req = jump_enable;
default: d_jump_req = 1'b0;
endcase
end
// ----------------------------------------------------------------------------
// Decode X controls
// Combinatorials:
reg [W_REGADDR-1:0] d_rd;
reg [W_DATA-1:0] d_imm;
reg [W_DATA-1:0] d_branchoffs;
reg [W_ALUSRC-1:0] d_alusrc_a;
reg [W_ALUSRC-1:0] d_alusrc_b;
reg [W_ALUOP-1:0] d_aluop;
reg [W_MEMOP-1:0] d_memop;
reg [W_MULOP-1:0] d_mulop;
reg [W_BCOND-1:0] d_branchcond;
reg d_jump_is_regoffs;
reg d_result_is_linkaddr;
reg d_csr_ren;
reg d_csr_wen;
reg [1:0] d_csr_wtype;
reg d_csr_w_imm;
reg [W_EXCEPT-1:0] d_except;
localparam X0 = {W_REGADDR{1'b0}};
always @ (*) begin
// Assign some defaults
d_rs1 = d_instr[19:15];
d_rs2 = d_instr[24:20];
d_rd = d_instr[11: 7];
d_imm = d_imm_i;
d_branchoffs = d_imm_i;
d_alusrc_a = ALUSRCA_RS1;
d_alusrc_b = ALUSRCB_RS2;
d_aluop = ALUOP_ADD;
d_memop = MEMOP_NONE;
d_mulop = M_OP_MUL;
d_csr_ren = 1'b0;
d_csr_wen = 1'b0;
d_csr_wtype = CSR_WTYPE_W;
d_csr_w_imm = 1'b0;
d_branchcond = BCOND_NEVER;
d_jump_is_regoffs = 1'b0;
d_result_is_linkaddr = 1'b0;
d_invalid_32bit = 1'b0;
d_except = EXCEPT_NONE;
casez (d_instr)
RV_BEQ: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_ZERO; end
RV_BNE: begin d_rd = X0; d_aluop = ALUOP_SUB; d_branchcond = BCOND_NZERO; end
RV_BLT: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_NZERO; end
RV_BGE: begin d_rd = X0; d_aluop = ALUOP_LT; d_branchcond = BCOND_ZERO; end
RV_BLTU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_NZERO; end
RV_BGEU: begin d_rd = X0; d_aluop = ALUOP_LTU; d_branchcond = BCOND_ZERO; end
RV_JALR: begin d_result_is_linkaddr = 1'b1; d_jump_is_regoffs = 1'b1; d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_branchcond = BCOND_ALWAYS; end
RV_JAL: begin d_result_is_linkaddr = 1'b1; d_rs2 = X0; d_rs1 = X0; end
RV_LUI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_rs1 = X0; end
RV_AUIPC: begin d_aluop = ALUOP_ADD; d_imm = d_imm_u; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_alusrc_a = ALUSRCA_PC; d_rs1 = X0; end
RV_ADDI: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLLI: begin d_aluop = ALUOP_SLL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLTI: begin d_aluop = ALUOP_LT; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SLTIU: begin d_aluop = ALUOP_LTU; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_XORI: begin d_aluop = ALUOP_XOR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SRLI: begin d_aluop = ALUOP_SRL; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_SRAI: begin d_aluop = ALUOP_SRA; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ORI: begin d_aluop = ALUOP_OR; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ANDI: begin d_aluop = ALUOP_AND; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; end
RV_ADD: begin d_aluop = ALUOP_ADD; end
RV_SUB: begin d_aluop = ALUOP_SUB; end
RV_SLL: begin d_aluop = ALUOP_SLL; end
RV_SLT: begin d_aluop = ALUOP_LT; end
RV_SLTU: begin d_aluop = ALUOP_LTU; end
RV_XOR: begin d_aluop = ALUOP_XOR; end
RV_SRL: begin d_aluop = ALUOP_SRL; end
RV_SRA: begin d_aluop = ALUOP_SRA; end
RV_OR: begin d_aluop = ALUOP_OR; end
RV_AND: begin d_aluop = ALUOP_AND; end
RV_LB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LB; end
RV_LH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LH; end
RV_LW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LW; end
RV_LBU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LBU; end
RV_LHU: begin d_aluop = ALUOP_ADD; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; d_rs2 = X0; d_memop = MEMOP_LHU; end
RV_SB: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SB; d_rd = X0; end
RV_SH: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SH; d_rd = X0; end
RV_SW: begin d_aluop = ALUOP_ADD; d_imm = d_imm_s; d_alusrc_b = ALUSRCB_IMM; d_memop = MEMOP_SW; d_rd = X0; end
RV_MUL: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MUL; end else begin d_invalid_32bit = 1'b1; end
RV_MULH: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULH; end else begin d_invalid_32bit = 1'b1; end
RV_MULHSU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHSU; end else begin d_invalid_32bit = 1'b1; end
RV_MULHU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_MULHU; end else begin d_invalid_32bit = 1'b1; end
RV_DIV: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIV; end else begin d_invalid_32bit = 1'b1; end
RV_DIVU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_DIVU; end else begin d_invalid_32bit = 1'b1; end
RV_REM: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REM; end else begin d_invalid_32bit = 1'b1; end
RV_REMU: if (EXTENSION_M) begin d_aluop = ALUOP_MULDIV; d_mulop = M_OP_REMU; end else begin d_invalid_32bit = 1'b1; end
RV_FENCE: begin d_rd = X0; end // NOP
RV_FENCE_I: begin d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // Pretend we are recovering from a mispredicted-taken backward branch. Mispredict recovery flushes frontend.
RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRWI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRSI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_CSRRCI: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; d_csr_w_imm = 1'b1; end else begin d_invalid_32bit = 1'b1; end
RV_ECALL: if (HAVE_CSR) begin d_except = EXCEPT_ECALL; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
RV_EBREAK: if (HAVE_CSR) begin d_except = EXCEPT_EBREAK; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
RV_MRET: if (HAVE_CSR) begin d_except = EXCEPT_MRET; d_rs2 = X0; d_rs1 = X0; d_rd = X0; end else begin d_invalid_32bit = 1'b1; end
default: begin d_invalid_32bit = 1'b1; end
endcase
end
always @ (posedge clk or negedge rst_n) begin
if (!rst_n) begin
{dx_rs1, dx_rs2, dx_rd} <= {(3 * W_REGADDR){1'b0}};
dx_alusrc_a <= ALUSRCA_RS1;
dx_alusrc_b <= ALUSRCB_RS2;
dx_aluop <= ALUOP_ADD;
dx_memop <= MEMOP_NONE;
dx_mulop <= M_OP_MUL;
dx_csr_ren <= 1'b0;
dx_csr_wen <= 1'b0;
dx_csr_wtype <= CSR_WTYPE_W;
dx_csr_w_imm <= 1'b0;
dx_branchcond <= BCOND_NEVER;
dx_jump_is_regoffs <= 1'b0;
dx_result_is_linkaddr <= 1'b0;
dx_except <= EXCEPT_NONE;
end else if (flush_d_x || (d_stall && !x_stall)) begin
// Bubble insertion
dx_branchcond <= BCOND_NEVER;
dx_memop <= MEMOP_NONE;
dx_rd <= 5'h0;
dx_except <= EXCEPT_NONE;
dx_csr_ren <= 1'b0;
dx_csr_wen <= 1'b0;
// Don't start a multiply in a pipe bubble
if (EXTENSION_M)
dx_aluop <= ALUOP_ADD;
// Also need to clear rs1, rs2, due to a nasty sequence of events:
// Suppose we have a load, followed by a dependent branch, which is predicted taken
// - branch will stall in D until AHB master becomes free
// - on next cycle, prediction causes jump, and bubble is in X
// - if X gets branch's rs1, rs2, it will cause spurious RAW stall
// - on next cycle, branch will not progress into X due to RAW stall, but *will* be replaced in D due to jump
// - branch mispredict now cannot be corrected
dx_rs1 <= 5'h0;
dx_rs2 <= 5'h0;
end else if (!x_stall) begin
// These ones can have side effects
dx_rs1 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs1;
dx_rs2 <= d_invalid ? {W_REGADDR{1'b0}} : d_rs2;
dx_rd <= d_invalid ? {W_REGADDR{1'b0}} : d_rd;
dx_memop <= d_invalid ? MEMOP_NONE : d_memop;
dx_branchcond <= d_invalid ? BCOND_NEVER : d_branchcond;
dx_csr_ren <= d_invalid ? 1'b0 : d_csr_ren;
dx_csr_wen <= d_invalid ? 1'b0 : d_csr_wen;
dx_except <= d_invalid ? EXCEPT_INSTR_ILLEGAL : d_except;
dx_aluop <= d_invalid && EXTENSION_M ? ALUOP_ADD : d_aluop;
// These can't
dx_alusrc_a <= d_alusrc_a;
dx_alusrc_b <= d_alusrc_b;
dx_mulop <= d_mulop;
dx_jump_is_regoffs <= d_jump_is_regoffs;
dx_result_is_linkaddr <= d_result_is_linkaddr;
dx_csr_wtype <= d_csr_wtype;
dx_csr_w_imm <= d_csr_w_imm;
end
end
// No reset required on these; will be masked by the resettable pipeline controls until they're valid
always @ (posedge clk) begin
if (!x_stall) begin
dx_imm <= d_imm;
dx_jump_target <= d_jump_target;
dx_mispredict_addr <= pc_next;
dx_pc <= pc;
end
if (flush_d_x) begin
// The target of a late jump must be propagated *immediately* to X PC, as
// mepc may sample X PC at any time due to IRQ, and must not capture
// misprediction.
// Also required for flush while X stalled (e.g. if a muldiv enters X while
// a 1 cycle bus stall holds off the jump request in M)
dx_pc <= f_jump_target;
`ifdef FORMAL
// This should only be caused by late jumps
assert(f_jump_now);
`endif
end
end
endmodule