From 2c8f3974d099c53f4b0e525401cca1968073a05f Mon Sep 17 00:00:00 2001 From: Luke Wren Date: Sat, 9 Apr 2022 13:49:16 +0100 Subject: [PATCH] Correctly implement fence.i as branch-to-next. Make Zifencei optional. Tighten up decode on fence and fence.i. --- hdl/hazard3_config.vh | 58 +++++++++++++++++++++----------------- hdl/hazard3_config_inst.vh | 53 +++++++++++++++++----------------- hdl/hazard3_decode.v | 21 +++++++------- hdl/rv_opcodes.vh | 4 +-- 4 files changed, 72 insertions(+), 64 deletions(-) diff --git a/hdl/hazard3_config.vh b/hdl/hazard3_config.vh index 4e62a30..7199924 100644 --- a/hdl/hazard3_config.vh +++ b/hdl/hazard3_config.vh @@ -37,44 +37,50 @@ parameter MTVEC_INIT = 32'h00000000, // RISC-V ISA and CSR support // EXTENSION_A: Support for atomic read/modify/write instructions -parameter EXTENSION_A = 1, +parameter EXTENSION_A = 1, // EXTENSION_C: Support for compressed (variable-width) instructions -parameter EXTENSION_C = 1, +parameter EXTENSION_C = 1, // EXTENSION_M: Support for hardware multiply/divide/modulo instructions -parameter EXTENSION_M = 1, +parameter EXTENSION_M = 1, // EXTENSION_ZBA: Support for Zba address generation instructions -parameter EXTENSION_ZBA = 1, +parameter EXTENSION_ZBA = 1, // EXTENSION_ZBB: Support for Zbb basic bit manipulation instructions -parameter EXTENSION_ZBB = 1, +parameter EXTENSION_ZBB = 1, // EXTENSION_ZBC: Support for Zbc carry-less multiplication instructions -parameter EXTENSION_ZBC = 1, +parameter EXTENSION_ZBC = 1, // EXTENSION_ZBS: Support for Zbs single-bit manipulation instructions -parameter EXTENSION_ZBS = 1, +parameter EXTENSION_ZBS = 1, + +// EXTENSION_ZIFENCEI: Support for the fence.i instruction +// Optional, since a plain branch/jump will also flush the prefetch queue. +parameter EXTENSION_ZIFENCEI = 1, + +// Note the Zicsr extension is implied by any of the following CSR support: // CSR_M_MANDATORY: Bare minimum CSR support e.g. misa. Spec says must = 1 if // CSRs are present, but I won't tell anyone. -parameter CSR_M_MANDATORY = 1, +parameter CSR_M_MANDATORY = 1, // CSR_M_TRAP: Include M-mode trap-handling CSRs, and enable trap support. -parameter CSR_M_TRAP = 1, +parameter CSR_M_TRAP = 1, // CSR_COUNTER: Include performance counters and relevant M-mode CSRs -parameter CSR_COUNTER = 1, +parameter CSR_COUNTER = 1, // DEBUG_SUPPORT: Support for run/halt and instruction injection from an // external Debug Module, support for Debug Mode, and Debug Mode CSRs. // Requires: CSR_M_MANDATORY, CSR_M_TRAP. -parameter DEBUG_SUPPORT = 0, +parameter DEBUG_SUPPORT = 0, // NUM_IRQ: Number of external IRQs implemented in meie0 and meip0. // Minimum 1 (if CSR_M_TRAP = 1), maximum 128. -parameter NUM_IRQ = 32, +parameter NUM_IRQ = 32, // ---------------------------------------------------------------------------- // ID registers @@ -82,42 +88,42 @@ parameter NUM_IRQ = 32, // JEDEC JEP106-compliant vendor ID, can be left at 0 if "not implemented or // [...] this is a non-commercial implementation" (RISC-V spec). // 31:7 is continuation code count, 6:0 is ID. Parity bit is not stored. -parameter MVENDORID_VAL = 32'h0, +parameter MVENDORID_VAL = 32'h0, // Implementation ID for this specific version of Hazard3. Git hash is perfect. -parameter MIMPID_VAL = 32'h0, +parameter MIMPID_VAL = 32'h0, // Each core has a single hardware thread. Multiple cores should have unique IDs. -parameter MHARTID_VAL = 32'h0, +parameter MHARTID_VAL = 32'h0, // Pointer to configuration structure blob, or all-zeroes. Must be at least // 4-byte-aligned. -parameter MCONFIGPTR_VAL = 32'h0, +parameter MCONFIGPTR_VAL = 32'h0, // ---------------------------------------------------------------------------- // Performance/size options // REDUCED_BYPASS: Remove all forwarding paths except X->X (so back-to-back // ALU ops can still run at 1 CPI), to save area. -parameter REDUCED_BYPASS = 0, +parameter REDUCED_BYPASS = 0, // MULDIV_UNROLL: Bits per clock for multiply/divide circuit, if present. Must // be a power of 2. -parameter MULDIV_UNROLL = 1, +parameter MULDIV_UNROLL = 1, // MUL_FAST: Use single-cycle multiply circuit for MUL instructions, retiring // to stage M. The sequential multiply/divide circuit is still used for MULH* -parameter MUL_FAST = 0, +parameter MUL_FAST = 0, // MULH_FAST: extend the fast multiply circuit to also cover MULH*, and remove // the multiply functionality from the sequential multiply/divide circuit. // Requires; MUL_FAST -parameter MULH_FAST = 0, +parameter MULH_FAST = 0, // FAST_BRANCHCMP: Instantiate a separate comparator (eq/lt/ltu) for branch -// resolution, rather than using the ALU. May improve fetch address delay. -// (Especially if Zba extension is enabled) -parameter FAST_BRANCHCMP = 1, +// comparisons, rather than using the ALU. Improves fetch address delay, +// especially if Zba extension is enabled. Disabling may save area. +parameter FAST_BRANCHCMP = 1, // MTVEC_WMASK: Mask of which bits in MTVEC are modifiable. Save gates by // making trap vector base partly fixed (legal, as it's WARL). @@ -126,10 +132,10 @@ parameter FAST_BRANCHCMP = 1, // // - Note the entire vector table must always be aligned to its size, rounded // up to a power of two, so careful with the low-order bits. -parameter MTVEC_WMASK = 32'hfffffffd, +parameter MTVEC_WMASK = 32'hfffffffd, // ---------------------------------------------------------------------------- // Port size parameters (do not modify) -parameter W_ADDR = 32, // Do not modify -parameter W_DATA = 32 // Do not modify +parameter W_ADDR = 32, // Do not modify +parameter W_DATA = 32 // Do not modify diff --git a/hdl/hazard3_config_inst.vh b/hdl/hazard3_config_inst.vh index 90c602c..2c995bb 100644 --- a/hdl/hazard3_config_inst.vh +++ b/hdl/hazard3_config_inst.vh @@ -7,29 +7,30 @@ // be set at instantiation rather than editing the config file, and will flow // correctly down through the hierarchy. -.RESET_VECTOR (RESET_VECTOR), -.MTVEC_INIT (MTVEC_INIT), -.EXTENSION_A (EXTENSION_A), -.EXTENSION_C (EXTENSION_C), -.EXTENSION_M (EXTENSION_M), -.EXTENSION_ZBA (EXTENSION_ZBA), -.EXTENSION_ZBB (EXTENSION_ZBB), -.EXTENSION_ZBC (EXTENSION_ZBC), -.EXTENSION_ZBS (EXTENSION_ZBS), -.CSR_M_MANDATORY (CSR_M_MANDATORY), -.CSR_M_TRAP (CSR_M_TRAP), -.CSR_COUNTER (CSR_COUNTER), -.DEBUG_SUPPORT (DEBUG_SUPPORT), -.NUM_IRQ (NUM_IRQ), -.MVENDORID_VAL (MVENDORID_VAL), -.MIMPID_VAL (MIMPID_VAL), -.MHARTID_VAL (MHARTID_VAL), -.MCONFIGPTR_VAL (MCONFIGPTR_VAL), -.REDUCED_BYPASS (REDUCED_BYPASS), -.MULDIV_UNROLL (MULDIV_UNROLL), -.MUL_FAST (MUL_FAST), -.MULH_FAST (MULH_FAST), -.FAST_BRANCHCMP (FAST_BRANCHCMP), -.MTVEC_WMASK (MTVEC_WMASK), -.W_ADDR (W_ADDR), -.W_DATA (W_DATA) +.RESET_VECTOR (RESET_VECTOR), +.MTVEC_INIT (MTVEC_INIT), +.EXTENSION_A (EXTENSION_A), +.EXTENSION_C (EXTENSION_C), +.EXTENSION_M (EXTENSION_M), +.EXTENSION_ZBA (EXTENSION_ZBA), +.EXTENSION_ZBB (EXTENSION_ZBB), +.EXTENSION_ZBC (EXTENSION_ZBC), +.EXTENSION_ZBS (EXTENSION_ZBS), +.EXTENSION_ZIFENCEI (EXTENSION_ZIFENCEI), +.CSR_M_MANDATORY (CSR_M_MANDATORY), +.CSR_M_TRAP (CSR_M_TRAP), +.CSR_COUNTER (CSR_COUNTER), +.DEBUG_SUPPORT (DEBUG_SUPPORT), +.NUM_IRQ (NUM_IRQ), +.MVENDORID_VAL (MVENDORID_VAL), +.MIMPID_VAL (MIMPID_VAL), +.MHARTID_VAL (MHARTID_VAL), +.MCONFIGPTR_VAL (MCONFIGPTR_VAL), +.REDUCED_BYPASS (REDUCED_BYPASS), +.MULDIV_UNROLL (MULDIV_UNROLL), +.MUL_FAST (MUL_FAST), +.MULH_FAST (MULH_FAST), +.FAST_BRANCHCMP (FAST_BRANCHCMP), +.MTVEC_WMASK (MTVEC_WMASK), +.W_ADDR (W_ADDR), +.W_DATA (W_DATA) diff --git a/hdl/hazard3_decode.v b/hdl/hazard3_decode.v index feed576..3aa2293 100644 --- a/hdl/hazard3_decode.v +++ b/hdl/hazard3_decode.v @@ -149,14 +149,15 @@ end always @ (*) begin - casez ({|EXTENSION_A, d_instr[6:2]}) - {1'bz, 5'b11011}: d_addr_offs = d_imm_j ; // JAL - {1'bz, 5'b11000}: d_addr_offs = d_imm_b ; // Branches - {1'bz, 5'b01000}: d_addr_offs = d_imm_s ; // Store - {1'bz, 5'b11001}: d_addr_offs = d_imm_i ; // JALR - {1'bz, 5'b00000}: d_addr_offs = d_imm_i ; // Loads - {1'b1, 5'b01011}: d_addr_offs = 32'h0000_0000; // Atomics - default: d_addr_offs = 32'hxxxx_xxxx; + casez ({|EXTENSION_A, |EXTENSION_ZIFENCEI, d_instr[6:2]}) + {1'bz, 1'bz, 5'b11011}: d_addr_offs = d_imm_j ; // JAL + {1'bz, 1'bz, 5'b11000}: d_addr_offs = d_imm_b ; // Branches + {1'bz, 1'bz, 5'b01000}: d_addr_offs = d_imm_s ; // Store + {1'bz, 1'bz, 5'b11001}: d_addr_offs = d_imm_i ; // JALR + {1'bz, 1'bz, 5'b00000}: d_addr_offs = d_imm_i ; // Loads + {1'b1, 1'bz, 5'b01011}: d_addr_offs = 32'h0000_0000; // Atomics + {1'bz, 1'b1, 5'b00011}: d_addr_offs = 32'h0000_0004; // Zifencei + default: d_addr_offs = 32'hxxxx_xxxx; endcase end @@ -282,8 +283,8 @@ always @ (*) begin RV_BSET: if (EXTENSION_ZBS) begin d_aluop = ALUOP_BSET; end else begin d_invalid_32bit = 1'b1; end RV_BSETI: if (EXTENSION_ZBS) begin d_aluop = ALUOP_BSET; d_rs2 = X0; d_imm = d_imm_i; d_alusrc_b = ALUSRCB_IMM; end else begin d_invalid_32bit = 1'b1; end - RV_FENCE: begin d_rd = X0; end // NOP - RV_FENCE_I: begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_rd = X0; d_rs1 = X0; d_rs2 = X0; d_branchcond = BCOND_NZERO; d_imm[31] = 1'b1; end // FIXME this is probably busted now. Maybe implement as an exception? + RV_FENCE: begin d_rs2 = X0; end // NOP, note rs1/rd are zero in instruction + RV_FENCE_I: if (EXTENSION_ZIFENCEI) begin d_invalid_32bit = DEBUG_SUPPORT && debug_mode; d_branchcond = BCOND_ALWAYS; end else begin d_invalid_32bit = 1'b1; end // note rs1/rs2/rd are zero in instruction RV_CSRRW: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = 1'b1 ; d_csr_ren = |d_rd; d_csr_wtype = CSR_WTYPE_W; end else begin d_invalid_32bit = 1'b1; end RV_CSRRS: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_S; end else begin d_invalid_32bit = 1'b1; end RV_CSRRC: if (HAVE_CSR) begin d_imm = d_imm_i; d_csr_wen = |d_rs1; d_csr_ren = 1'b1 ; d_csr_wtype = CSR_WTYPE_C; end else begin d_invalid_32bit = 1'b1; end diff --git a/hdl/rv_opcodes.vh b/hdl/rv_opcodes.vh index ce11ba8..7cd2523 100644 --- a/hdl/rv_opcodes.vh +++ b/hdl/rv_opcodes.vh @@ -48,8 +48,8 @@ localparam RV_LHU = 32'b?????????????????101?????0000011; localparam RV_SB = 32'b?????????????????000?????0100011; localparam RV_SH = 32'b?????????????????001?????0100011; localparam RV_SW = 32'b?????????????????010?????0100011; -localparam RV_FENCE = 32'b?????????????????000?????0001111; -localparam RV_FENCE_I = 32'b?????????????????001?????0001111; +localparam RV_FENCE = 32'b????????????00000000000000001111; +localparam RV_FENCE_I = 32'b00000000000000000001000000001111; localparam RV_ECALL = 32'b00000000000000000000000001110011; localparam RV_EBREAK = 32'b00000000000100000000000001110011; localparam RV_CSRRW = 32'b?????????????????001?????1110011;