/*****************************************************************************\ | Copyright (C) 2021 Luke Wren | | SPDX-License-Identifier: Apache-2.0 | \*****************************************************************************/ `default_nettype none module hazard3_core #( `include "hazard3_config.vh" , `include "hazard3_width_const.vh" ) ( // Global signals input wire clk, input wire rst_n, `ifdef RISCV_FORMAL `RVFI_OUTPUTS , `endif // Instruction fetch port output wire bus_aph_req_i, output wire bus_aph_panic_i, // e.g. branch mispredict + flush input wire bus_aph_ready_i, input wire bus_dph_ready_i, input wire bus_dph_err_i, output wire [2:0] bus_hsize_i, output wire [W_ADDR-1:0] bus_haddr_i, input wire [W_DATA-1:0] bus_rdata_i, // Load/store port output reg bus_aph_req_d, output wire bus_aph_excl_d, input wire bus_aph_ready_d, input wire bus_dph_ready_d, input wire bus_dph_err_d, input wire bus_dph_exokay_d, output reg [W_ADDR-1:0] bus_haddr_d, output reg [2:0] bus_hsize_d, output reg bus_hwrite_d, output reg [W_DATA-1:0] bus_wdata_d, input wire [W_DATA-1:0] bus_rdata_d, // Debugger run/halt control input wire dbg_req_halt, input wire dbg_req_halt_on_reset, input wire dbg_req_resume, output wire dbg_halted, output wire dbg_running, // Debugger access to data0 CSR input wire [W_DATA-1:0] dbg_data0_rdata, output wire [W_DATA-1:0] dbg_data0_wdata, output wire dbg_data0_wen, // Debugger instruction injection input wire [W_DATA-1:0] dbg_instr_data, input wire dbg_instr_data_vld, output wire dbg_instr_data_rdy, output wire dbg_instr_caught_exception, output wire dbg_instr_caught_ebreak, // Level-sensitive interrupt sources input wire [NUM_IRQ-1:0] irq, // -> mip.meip input wire soft_irq, // -> mip.msip input wire timer_irq // -> mip.mtip ); `include "hazard3_ops.vh" wire x_stall; wire m_stall; localparam HSIZE_WORD = 3'd2; localparam HSIZE_HWORD = 3'd1; localparam HSIZE_BYTE = 3'd0; wire debug_mode; assign dbg_halted = DEBUG_SUPPORT && debug_mode; assign dbg_running = DEBUG_SUPPORT && !debug_mode; // ---------------------------------------------------------------------------- // Pipe Stage F wire f_jump_req; wire [W_ADDR-1:0] f_jump_target; wire f_jump_rdy; wire f_jump_now = f_jump_req && f_jump_rdy; // Predecoded register numbers, for register file access wire f_regnum_vld; wire [W_REGADDR-1:0] f_rs1; wire [W_REGADDR-1:0] f_rs2; wire [31:0] fd_cir; wire [1:0] fd_cir_err; wire [1:0] fd_cir_vld; wire [1:0] df_cir_use; wire df_cir_lock; assign bus_aph_panic_i = 1'b0; wire f_mem_size; assign bus_hsize_i = f_mem_size ? HSIZE_WORD : HSIZE_HWORD; hazard3_frontend #( .FIFO_DEPTH(2), `include "hazard3_config_inst.vh" ) frontend ( .clk (clk), .rst_n (rst_n), .mem_size (f_mem_size), .mem_addr (bus_haddr_i), .mem_addr_vld (bus_aph_req_i), .mem_addr_rdy (bus_aph_ready_i), .mem_data (bus_rdata_i), .mem_data_err (bus_dph_err_i), .mem_data_vld (bus_dph_ready_i), .jump_target (f_jump_target), .jump_target_vld (f_jump_req), .jump_target_rdy (f_jump_rdy), .cir (fd_cir), .cir_err (fd_cir_err), .cir_vld (fd_cir_vld), .cir_use (df_cir_use), .cir_lock (df_cir_lock), .next_regs_rs1 (f_rs1), .next_regs_rs2 (f_rs2), .next_regs_vld (f_regnum_vld), .debug_mode (debug_mode), .dbg_instr_data (dbg_instr_data), .dbg_instr_data_vld (dbg_instr_data_vld), .dbg_instr_data_rdy (dbg_instr_data_rdy) ); // ---------------------------------------------------------------------------- // Pipe Stage X (Decode Logic) // X-check on pieces of instruction which frontend claims are valid //synthesis translate_off always @ (posedge clk) begin if (rst_n) begin if (|fd_cir_vld && (^fd_cir[15:0] === 1'bx)) begin $display("CIR LSBs are X, should be valid!"); $finish; end if (fd_cir_vld[1] && (^fd_cir === 1'bX)) begin $display("CIR contains X, should be fully valid!"); $finish; end end end //synthesis translate_on // To X wire d_starved; wire [W_DATA-1:0] d_imm; wire [W_REGADDR-1:0] d_rs1; wire [W_REGADDR-1:0] d_rs2; wire [W_REGADDR-1:0] d_rd; wire [W_ALUSRC-1:0] d_alusrc_a; wire [W_ALUSRC-1:0] d_alusrc_b; wire [W_ALUOP-1:0] d_aluop; wire [W_MEMOP-1:0] d_memop; wire [W_MULOP-1:0] d_mulop; wire [W_BCOND-1:0] d_branchcond; wire [W_ADDR-1:0] d_addr_offs; wire d_addr_is_regoffs; wire [W_ADDR-1:0] d_pc; wire [W_EXCEPT-1:0] d_except; wire d_wfi; wire d_csr_ren; wire d_csr_wen; wire [1:0] d_csr_wtype; wire d_csr_w_imm; wire x_jump_not_except; hazard3_decode #( `include "hazard3_config_inst.vh" ) inst_hazard3_decode ( .clk (clk), .rst_n (rst_n), .fd_cir (fd_cir), .fd_cir_err (fd_cir_err), .fd_cir_vld (fd_cir_vld), .df_cir_use (df_cir_use), .df_cir_lock (df_cir_lock), .d_pc (d_pc), .x_jump_not_except (x_jump_not_except), .debug_mode (debug_mode), .d_starved (d_starved), .x_stall (x_stall), .f_jump_now (f_jump_now), .f_jump_target (f_jump_target), .d_imm (d_imm), .d_rs1 (d_rs1), .d_rs2 (d_rs2), .d_rd (d_rd), .d_alusrc_a (d_alusrc_a), .d_alusrc_b (d_alusrc_b), .d_aluop (d_aluop), .d_memop (d_memop), .d_mulop (d_mulop), .d_csr_ren (d_csr_ren), .d_csr_wen (d_csr_wen), .d_csr_wtype (d_csr_wtype), .d_csr_w_imm (d_csr_w_imm), .d_branchcond (d_branchcond), .d_addr_offs (d_addr_offs), .d_addr_is_regoffs (d_addr_is_regoffs), .d_except (d_except), .d_wfi (d_wfi) ); // ---------------------------------------------------------------------------- // Pipe Stage X (Execution Logic) // Register the write which took place to the regfile on previous cycle, and bypass. // This is an alternative to a write -> read bypass in the regfile, // which we can't implement whilst maintaining BRAM inference compatibility (iCE40). reg [W_REGADDR-1:0] mw_rd; reg [W_DATA-1:0] mw_result; // From register file: wire [W_DATA-1:0] x_rdata1; wire [W_DATA-1:0] x_rdata2; // Combinational regs for muxing reg [W_DATA-1:0] x_rs1_bypass; reg [W_DATA-1:0] x_rs2_bypass; reg [W_DATA-1:0] x_op_a; reg [W_DATA-1:0] x_op_b; wire [W_DATA-1:0] x_alu_result; wire x_alu_cmp; wire [W_DATA-1:0] m_trap_addr; wire m_trap_is_irq; wire m_trap_enter_vld; wire m_trap_enter_soon; wire m_trap_enter_rdy = f_jump_rdy; reg [W_REGADDR-1:0] xm_rs1; reg [W_REGADDR-1:0] xm_rs2; reg [W_REGADDR-1:0] xm_rd; reg [W_DATA-1:0] xm_result; reg [1:0] xm_addr_align; reg [W_MEMOP-1:0] xm_memop; reg [W_EXCEPT-1:0] xm_except; reg xm_wfi; reg xm_delay_irq_entry; // ---------------------------------------------------------------------------- // Stall logic // IRQs squeeze in between the instructions in X and M, so in this case X // stalls but M can continue. -> X always stalls on M trap, M *may* stall. wire x_stall_on_trap = m_trap_enter_vld && !m_trap_enter_rdy || m_trap_enter_soon && !m_trap_enter_vld; // Stall inserted to avoid illegal pipelining of exclusive accesses on the bus // (also gives time to update local monitor on direct lr.w -> sc.w instruction // sequences). Note we don't check for AMOs in stage M, because AMOs fully // fence off on their own completion before passing down the pipe. wire d_memop_is_amo = |EXTENSION_A && d_memop == MEMOP_AMO; wire x_stall_on_exclusive_overlap = |EXTENSION_A && ( (d_memop_is_amo || d_memop == MEMOP_SC_W || d_memop == MEMOP_LR_W) && (xm_memop == MEMOP_SC_W || xm_memop == MEMOP_LR_W) ); // AMOs are issued completely from X. We keep X stalled, and pass bubbles into // M. Otherwise the exception handling would be even more of a mess. Phases // 0-3 are read/write address/data phases. Phase 4 is error, due to HRESP or // due to low HEXOKAY response to read. // Also need to clear AMO if it follows an excepting instruction. Note we // still stall on phase 3 when hready is high if hresp is also high, since we // then proceed to phase 4 for the error response. reg [2:0] x_amo_phase; wire x_stall_on_amo = |EXTENSION_A && d_memop_is_amo && !m_trap_enter_soon && ( x_amo_phase < 3'h3 || (x_amo_phase == 3'h3 && (!bus_dph_ready_d || !bus_dph_exokay_d || bus_dph_err_d)) ); // Read-after-write hazard detection (e.g. load-use) wire m_fast_mul_result_vld; wire m_generating_result = xm_memop < MEMOP_SW || |EXTENSION_A && xm_memop == MEMOP_LR_W || |EXTENSION_A && xm_memop == MEMOP_SC_W || // sc.w success result is data phase |EXTENSION_M && m_fast_mul_result_vld; reg x_stall_on_raw; always @ (*) begin x_stall_on_raw = 1'b0; if (REDUCED_BYPASS) begin x_stall_on_raw = |xm_rd && (xm_rd == d_rs1 || xm_rd == d_rs2) || |mw_rd && (mw_rd == d_rs1 || mw_rd == d_rs2); end else if (m_generating_result) begin // With the full bypass network, load-use (or fast multiply-use) is the only RAW stall if (|xm_rd && xm_rd == d_rs1) begin // Store addresses cannot be bypassed later, so there is no exception here. x_stall_on_raw = 1'b1; end else if (|xm_rd && xm_rd == d_rs2) begin // Store data can be bypassed in M. Any other instructions must stall. x_stall_on_raw = !(d_memop == MEMOP_SW || d_memop == MEMOP_SH || d_memop == MEMOP_SB); end end end wire x_stall_muldiv; wire x_jump_req; assign x_stall = m_stall || x_stall_on_trap || x_stall_on_exclusive_overlap || x_stall_on_amo || x_stall_on_raw || x_stall_muldiv || bus_aph_req_d && !bus_aph_ready_d || x_jump_req && !f_jump_rdy; wire m_wfi_stall_clear; // ---------------------------------------------------------------------------- // Execution logic // ALU, operand muxes and bypass always @ (*) begin if (~|d_rs1) begin x_rs1_bypass = {W_DATA{1'b0}}; end else if (xm_rd == d_rs1) begin x_rs1_bypass = xm_result; end else if (mw_rd == d_rs1 && !REDUCED_BYPASS) begin x_rs1_bypass = mw_result; end else begin x_rs1_bypass = x_rdata1; end if (~|d_rs2) begin x_rs2_bypass = {W_DATA{1'b0}}; end else if (xm_rd == d_rs2) begin x_rs2_bypass = xm_result; end else if (mw_rd == d_rs2 && !REDUCED_BYPASS) begin x_rs2_bypass = mw_result; end else begin x_rs2_bypass = x_rdata2; end // AMO captures rdata into mw_result at end of read data phase, so we can // feed back through the ALU. if (|EXTENSION_A && x_amo_phase == 3'h2) x_op_a = mw_result; else if (|d_alusrc_a) x_op_a = d_pc; else x_op_a = x_rs1_bypass; if (|d_alusrc_b) x_op_b = d_imm; else x_op_b = x_rs2_bypass; end hazard3_alu #( `include "hazard3_config_inst.vh" ) alu ( .aluop (d_aluop), .op_a (x_op_a), .op_b (x_op_b), .result (x_alu_result), .cmp (x_alu_cmp) ); // AHB transaction request // AMO stalls the pipe, then generates two bus transfers per 4-cycle // iteration, unless it bails out due to a bus fault or failed load // reservation. always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin x_amo_phase <= 3'h0; end else if (|EXTENSION_A && d_memop_is_amo && ( bus_aph_ready_d || bus_dph_ready_d || m_trap_enter_vld || x_unaligned_addr || x_amo_phase == 3'h4 )) begin if (m_trap_enter_vld) begin // Bail out, squash the in-progress AMO. x_amo_phase <= 3'h0; `ifdef FORMAL // Should only happen during an address phase, *or* the fault phase. assert(x_amo_phase == 3'h0 || x_amo_phase == 3'h2 || x_amo_phase == 3'h4); // The fault phase only holds when we have a misaligned AMO directly behind // a regular memory access that subsequently excepts, and the AMO has gone // straight to fault phase due to misalignment. if (x_amo_phase == 3'h4) assert(x_unaligned_addr); `endif end else if (x_stall_on_raw || x_stall_on_exclusive_overlap || m_trap_enter_soon) begin // First address phase stalled due to address dependency on // previous load/mul/etc. Shouldn't be possible in later phases. x_amo_phase <= 3'h0; `ifdef FORMAL assert(x_amo_phase == 3'h0); `endif end else if (x_amo_phase == 3'h4) begin // Clear fault phase once it goes through to stage 3 and excepts if (!x_stall) x_amo_phase <= 3'h0; `ifdef FORMAL // This should only happen when we are stalled on an older load/store etc assert(!(x_stall && !m_stall)); `endif end else if (x_unaligned_addr) begin x_amo_phase <= 3'h4; end else if (x_amo_phase == 3'h1 && !bus_dph_exokay_d) begin // Load reserve fail indicates the memory region does not support // exclusives, so we will never succeed at store. Exception. x_amo_phase <= 3'h4; end else if ((x_amo_phase == 3'h1 || x_amo_phase == 3'h3) && bus_dph_err_d) begin // Bus fault. Exception. x_amo_phase <= 3'h4; end else if (x_amo_phase == 3'h3) begin // Either we're done, or the write failed. Either way, back to the start. x_amo_phase <= 3'h0; end else begin // Default progression: read addr -> read data -> write addr -> write data x_amo_phase <= x_amo_phase + 3'h1; end end end `ifdef FORMAL always @ (posedge clk) if (rst_n) begin // Other states should be unreachable assert(x_amo_phase <= 3'h4); // First state should be 0 -- don't want anything carried from one AMO to the next. if (x_stall_on_amo && !$past(x_stall_on_amo)) assert(x_amo_phase == 3'h0); // Should be in resting state between AMOs if (!d_memop_is_amo) assert(x_amo_phase == 3'h0); // Error phase should have no stage 2 blockers, so it can pass to stage 3 to // raise exception entry. It's ok to block behind a younger instruction, but.. if (x_amo_phase == 3'h4) assert(!x_stall || m_stall); // ..the only way to reach AMO error phase without stage 3 clearing out should // be an unaligned AMO address, which goes straight to error phase. if (x_amo_phase == 3'h4 && m_stall) assert(x_unaligned_addr); // Error phase is either due to a bus response, or a misaligned address. // Neither of these are write-address-phase. if (x_amo_phase == 3'h4) assert($past(x_amo_phase) != 3'h2); // Make sure M is unstalled for passing store data through in phase 2 if (x_amo_phase == 3'h2) assert(!m_stall); end `endif reg mw_local_exclusive_reserved; wire x_memop_vld = d_memop != MEMOP_NONE && !( |EXTENSION_A && d_memop == MEMOP_SC_W && !mw_local_exclusive_reserved || |EXTENSION_A && d_memop_is_amo && x_amo_phase != 3'h0 && x_amo_phase != 3'h2 ); wire x_memop_write = d_memop == MEMOP_SW || d_memop == MEMOP_SH || d_memop == MEMOP_SB || |EXTENSION_A && d_memop == MEMOP_SC_W || |EXTENSION_A && d_memop_is_amo && x_amo_phase == 3'h2; wire x_unaligned_addr = d_memop != MEMOP_NONE && ( bus_hsize_d == HSIZE_WORD && |bus_haddr_d[1:0] || bus_hsize_d == HSIZE_HWORD && bus_haddr_d[0] ); // Always query the global monitor, except for store-conditional suppressed by local monitor. assign bus_aph_excl_d = |EXTENSION_A && ( d_memop == MEMOP_LR_W || d_memop == MEMOP_SC_W || d_memop_is_amo ); // This adder is used for both branch targets and load/store addresses. // Supporting all branch types already requires rs1 + I-fmt, and pc + B-fmt. // B-fmt are almost identical to S-fmt, so we rs1 + S-fmt is almost free. wire [W_ADDR-1:0] x_addr_sum = (d_addr_is_regoffs ? x_rs1_bypass : d_pc) + d_addr_offs; always @ (*) begin // Need to be careful not to use anything hready-sourced to gate htrans! bus_haddr_d = x_addr_sum; bus_hwrite_d = x_memop_write; case (d_memop) MEMOP_LW: bus_hsize_d = HSIZE_WORD; MEMOP_SW: bus_hsize_d = HSIZE_WORD; MEMOP_LH: bus_hsize_d = HSIZE_HWORD; MEMOP_LHU: bus_hsize_d = HSIZE_HWORD; MEMOP_SH: bus_hsize_d = HSIZE_HWORD; MEMOP_LB: bus_hsize_d = HSIZE_BYTE; MEMOP_LBU: bus_hsize_d = HSIZE_BYTE; MEMOP_SB: bus_hsize_d = HSIZE_BYTE; default: bus_hsize_d = HSIZE_WORD; endcase bus_aph_req_d = x_memop_vld && !( x_stall_on_raw || x_stall_on_exclusive_overlap || x_unaligned_addr || m_trap_enter_soon || (xm_wfi && !m_wfi_stall_clear) // FIXME will cause a timing issue, better to stall til *after* clear ); end // Multiply/divide wire [W_DATA-1:0] x_muldiv_result; wire [W_DATA-1:0] m_fast_mul_result; generate if (EXTENSION_M) begin: has_muldiv wire x_muldiv_op_vld; wire x_muldiv_op_rdy; wire x_muldiv_result_vld; wire [W_DATA-1:0] x_muldiv_result_h; wire [W_DATA-1:0] x_muldiv_result_l; reg x_muldiv_posted; always @ (posedge clk or negedge rst_n) if (!rst_n) x_muldiv_posted <= 1'b0; else x_muldiv_posted <= (x_muldiv_posted || (x_muldiv_op_vld && x_muldiv_op_rdy)) && x_stall; wire x_muldiv_kill = m_trap_enter_soon; wire x_use_fast_mul = d_aluop == ALUOP_MULDIV && ( MUL_FAST && d_mulop == M_OP_MUL || MULH_FAST && d_mulop == M_OP_MULH || MULH_FAST && d_mulop == M_OP_MULHU || MULH_FAST && d_mulop == M_OP_MULHSU ); assign x_muldiv_op_vld = (d_aluop == ALUOP_MULDIV && !x_use_fast_mul) && !(x_muldiv_posted || x_stall_on_raw || x_muldiv_kill); hazard3_muldiv_seq #( `include "hazard3_config_inst.vh" ) muldiv ( .clk (clk), .rst_n (rst_n), .op (d_mulop), .op_vld (x_muldiv_op_vld), .op_rdy (x_muldiv_op_rdy), .op_kill (x_muldiv_kill), .op_a (x_rs1_bypass), .op_b (x_rs2_bypass), .result_h (x_muldiv_result_h), .result_l (x_muldiv_result_l), .result_vld (x_muldiv_result_vld) ); wire x_muldiv_result_is_high = d_mulop == M_OP_MULH || d_mulop == M_OP_MULHSU || d_mulop == M_OP_MULHU || d_mulop == M_OP_REM || d_mulop == M_OP_REMU; assign x_muldiv_result = x_muldiv_result_is_high ? x_muldiv_result_h : x_muldiv_result_l; assign x_stall_muldiv = x_muldiv_op_vld || !x_muldiv_result_vld; if (MUL_FAST) begin: has_fast_mul wire x_issue_fast_mul = x_use_fast_mul && |d_rd && !x_stall; hazard3_mul_fast #( `include "hazard3_config_inst.vh" ) mul_fast ( .clk (clk), .rst_n (rst_n), .op_vld (x_issue_fast_mul), .op (d_mulop), .op_a (x_rs1_bypass), .op_b (x_rs2_bypass), .result (m_fast_mul_result), .result_vld (m_fast_mul_result_vld) ); end else begin: no_fast_mul assign m_fast_mul_result = {W_DATA{1'b0}}; assign m_fast_mul_result_vld = 1'b0; end `ifdef FORMAL always @ (posedge clk) if (d_aluop != ALUOP_MULDIV) assert(!x_stall_muldiv); `endif end else begin: no_muldiv assign x_muldiv_result = {W_DATA{1'b0}}; assign m_fast_mul_result = {W_DATA{1'b0}}; assign m_fast_mul_result_vld = 1'b0; assign x_stall_muldiv = 1'b0; end endgenerate // CSRs and Trap Handling wire [W_DATA-1:0] x_csr_wdata = d_csr_w_imm ? {{W_DATA-5{1'b0}}, d_rs1} : x_rs1_bypass; wire [W_DATA-1:0] x_csr_rdata; wire x_csr_illegal_access; // "Previous" refers to next-most-recent instruction to be in D/X, i.e. the // most recent instruction to reach stage M (which may or may not still be in M). reg prev_instr_was_32_bit; always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin xm_delay_irq_entry <= 1'b0; prev_instr_was_32_bit <= 1'b0; end else begin // Must hold off IRQ if we are in the second cycle of an address phase or // later, since at that point the load/store can't be revoked. The IRQ is // taken once this load/store moves to the next stage: if another load/store // is chasing down the pipeline then this is immediately suppressed by the // IRQ entry, before its address phase can begin. // Also hold off on AMOs, unless the AMO is transitioning to an address // phase or completing. ("completing" excludes transitions to error phase.) xm_delay_irq_entry <= bus_aph_req_d && !bus_aph_ready_d || d_memop_is_amo && !( x_amo_phase == 3'h3 && bus_dph_ready_d && !bus_dph_err_d || // Read reservation failure failure also generates error x_amo_phase == 3'h1 & bus_dph_ready_d && !bus_dph_err_d && bus_dph_exokay_d ); if (!x_stall) prev_instr_was_32_bit <= df_cir_use == 2'd2; end end wire [W_ADDR-1:0] m_exception_return_addr; wire [W_EXCEPT-1:0] x_except = x_csr_illegal_access ? EXCEPT_INSTR_ILLEGAL : |EXTENSION_A && x_unaligned_addr && d_memop_is_amo ? EXCEPT_STORE_ALIGN : |EXTENSION_A && x_amo_phase == 3'h4 && x_unaligned_addr? EXCEPT_STORE_ALIGN : |EXTENSION_A && x_amo_phase == 3'h4 ? EXCEPT_STORE_FAULT : x_unaligned_addr && x_memop_write ? EXCEPT_STORE_ALIGN : x_unaligned_addr && !x_memop_write ? EXCEPT_LOAD_ALIGN : d_except; // If an instruction causes an exceptional condition we do not consider it to have retired. wire x_except_counts_as_retire = x_except == EXCEPT_EBREAK || x_except == EXCEPT_MRET || x_except == EXCEPT_ECALL; wire x_instr_ret = |df_cir_use && (x_except == EXCEPT_NONE || x_except_counts_as_retire); wire m_dphase_in_flight = xm_memop != MEMOP_NONE && xm_memop != MEMOP_AMO; hazard3_csr #( .XLEN (W_DATA), `include "hazard3_config_inst.vh" ) inst_hazard3_csr ( .clk (clk), .rst_n (rst_n), // Debugger signalling .debug_mode (debug_mode), .dbg_req_halt (dbg_req_halt), .dbg_req_halt_on_reset (dbg_req_halt_on_reset), .dbg_req_resume (dbg_req_resume), .dbg_instr_caught_exception (dbg_instr_caught_exception), .dbg_instr_caught_ebreak (dbg_instr_caught_ebreak), .dbg_data0_rdata (dbg_data0_rdata), .dbg_data0_wdata (dbg_data0_wdata), .dbg_data0_wen (dbg_data0_wen), // CSR access port // *en_soon are early access strobes which are not a function of bus stall. // Can generate access faults (hence traps), but do not actually perform access. .addr (fd_cir[31:20]), // Always I-type immediate .wdata (x_csr_wdata), .wen_soon (d_csr_wen && !m_trap_enter_soon), .wen (d_csr_wen && !m_trap_enter_soon && !x_stall), .wtype (d_csr_wtype), .rdata (x_csr_rdata), .ren_soon (d_csr_ren && !m_trap_enter_soon), .ren (d_csr_ren && !m_trap_enter_soon && !x_stall), .illegal (x_csr_illegal_access), // Trap signalling .trap_addr (m_trap_addr), .trap_is_irq (m_trap_is_irq), .trap_enter_soon (m_trap_enter_soon), .trap_enter_vld (m_trap_enter_vld), .trap_enter_rdy (m_trap_enter_rdy), .loadstore_dphase_pending (m_dphase_in_flight), .mepc_in (m_exception_return_addr), .wfi_stall_clear (m_wfi_stall_clear), // IRQ and exception requests .delay_irq_entry (xm_delay_irq_entry), .irq (irq), .irq_software (soft_irq), .irq_timer (timer_irq), .except (xm_except), // Other CSR-specific signalling .instr_ret (|x_instr_ret) ); // Pipe register always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin xm_memop <= MEMOP_NONE; xm_except <= EXCEPT_NONE; xm_wfi <= 1'b0; {xm_rs1, xm_rs2, xm_rd} <= {3 * W_REGADDR{1'b0}}; end else begin if (!m_stall) begin {xm_rs1, xm_rs2, xm_rd} <= {d_rs1, d_rs2, d_rd}; // If the transfer is unaligned, make sure it is completely NOP'd on the bus xm_memop <= x_unaligned_addr ? MEMOP_NONE : d_memop; xm_except <= x_except; xm_wfi <= d_wfi; if (x_stall || m_trap_enter_soon) begin // Insert bubble xm_rd <= {W_REGADDR{1'b0}}; xm_memop <= MEMOP_NONE; xm_except <= EXCEPT_NONE; xm_wfi <= 1'b0; end end else if (bus_dph_err_d) begin // First phase of 2-phase AHBL error response. Pass the exception along on // this cycle, and on the next cycle the trap entry will be asserted, // suppressing any load/store that may currently be in stage X. `ifdef FORMAL assert(xm_memop != MEMOP_NONE); `endif xm_except <= |EXTENSION_A && xm_memop == MEMOP_LR_W ? EXCEPT_LOAD_FAULT : xm_memop <= MEMOP_LBU ? EXCEPT_LOAD_FAULT : EXCEPT_STORE_FAULT; xm_wfi <= 1'b0; end end end `ifdef FORMAL always @ (posedge clk) if (rst_n) begin // D bus errors must always squash younger load/stores if ($past(bus_dph_err_d && !bus_dph_ready_d)) assert(!bus_aph_req_d); end `endif // Datapath flops always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin xm_result <= {W_DATA{1'b0}}; xm_addr_align <= 2'b00; end else if (!m_stall && !(|EXTENSION_A && x_amo_phase == 3'h3 && !bus_dph_ready_d)) begin // AMOs need special attention (of course): // - Steer captured read phase data in mw_result back through xm_result at end of AMO // - Make sure xm_result (store data) doesn't transition during stalled write dphase xm_result <= d_csr_ren ? x_csr_rdata : |EXTENSION_A && x_amo_phase == 3'h3 ? mw_result : |EXTENSION_M && d_aluop == ALUOP_MULDIV ? x_muldiv_result : x_alu_result; xm_addr_align <= x_addr_sum[1:0]; end end // Branch handling // For JALR, the LSB of the result must be cleared by hardware wire [W_ADDR-1:0] x_jump_target = x_addr_sum & ~32'h1; // Be careful not to take branches whose comparisons depend on a load result assign x_jump_req = !x_stall_on_raw && ( d_branchcond == BCOND_ALWAYS || d_branchcond == BCOND_ZERO && !x_alu_cmp || d_branchcond == BCOND_NZERO && x_alu_cmp ); // ---------------------------------------------------------------------------- // Pipe Stage M reg [W_DATA-1:0] m_rdata_pick_sext; reg [W_DATA-1:0] m_wdata; reg [W_DATA-1:0] m_result; assign f_jump_req = x_jump_req || m_trap_enter_vld; assign f_jump_target = m_trap_enter_vld ? m_trap_addr : x_jump_target; assign x_jump_not_except = !m_trap_enter_vld; // EXCEPT_NONE clause is needed in the following sequence: // - Cycle 0: hresp asserted, hready low. We set the exception to squash behind us. Bus stall high. // - Cycle 1: hready high. For whatever reason, the frontend can't accept the trap address this cycle. // - Cycle 2: Our dataphase has ended, so bus_dph_ready_d doesn't pulse again. m_bus_stall stuck high. wire m_bus_stall = m_dphase_in_flight && !bus_dph_ready_d && xm_except == EXCEPT_NONE && !( |EXTENSION_A && xm_memop == MEMOP_SC_W && !mw_local_exclusive_reserved ); assign m_stall = m_bus_stall || (m_trap_enter_vld && !m_trap_enter_rdy && !m_trap_is_irq) || (xm_wfi && !m_wfi_stall_clear); // Exception is taken against the instruction currently in M, so walk the PC // back. IRQ is taken "in between" the instruction in M and the instruction // in X, so set return to X program counter. Note that, if taking an // exception, we know that the previous instruction to be in X (now in M) // was *not* a taken branch, which is why we can just walk back the PC. assign m_exception_return_addr = d_pc - ( m_trap_is_irq ? 32'h0 : prev_instr_was_32_bit ? 32'h4 : 32'h2 ); // Load/store data handling always @ (*) begin // Local forwarding of store data if (|mw_rd && xm_rs2 == mw_rd && !REDUCED_BYPASS) begin m_wdata = mw_result; end else begin m_wdata = xm_result; end // Replicate store data to ensure appropriate byte lane is driven case (xm_memop) MEMOP_SH: bus_wdata_d = {2{m_wdata[15:0]}}; MEMOP_SB: bus_wdata_d = {4{m_wdata[7:0]}}; default: bus_wdata_d = m_wdata; endcase casez ({xm_memop, xm_addr_align[1:0]}) {MEMOP_LH , 2'b0z}: m_rdata_pick_sext = {{16{bus_rdata_d[15]}}, bus_rdata_d[15: 0]}; {MEMOP_LH , 2'b1z}: m_rdata_pick_sext = {{16{bus_rdata_d[31]}}, bus_rdata_d[31:16]}; {MEMOP_LHU , 2'b0z}: m_rdata_pick_sext = {{16{1'b0 }}, bus_rdata_d[15: 0]}; {MEMOP_LHU , 2'b1z}: m_rdata_pick_sext = {{16{1'b0 }}, bus_rdata_d[31:16]}; {MEMOP_LB , 2'b00}: m_rdata_pick_sext = {{24{bus_rdata_d[ 7]}}, bus_rdata_d[ 7: 0]}; {MEMOP_LB , 2'b01}: m_rdata_pick_sext = {{24{bus_rdata_d[15]}}, bus_rdata_d[15: 8]}; {MEMOP_LB , 2'b10}: m_rdata_pick_sext = {{24{bus_rdata_d[23]}}, bus_rdata_d[23:16]}; {MEMOP_LB , 2'b11}: m_rdata_pick_sext = {{24{bus_rdata_d[31]}}, bus_rdata_d[31:24]}; {MEMOP_LBU , 2'b00}: m_rdata_pick_sext = {{24{1'b0 }}, bus_rdata_d[ 7: 0]}; {MEMOP_LBU , 2'b01}: m_rdata_pick_sext = {{24{1'b0 }}, bus_rdata_d[15: 8]}; {MEMOP_LBU , 2'b10}: m_rdata_pick_sext = {{24{1'b0 }}, bus_rdata_d[23:16]}; {MEMOP_LBU , 2'b11}: m_rdata_pick_sext = {{24{1'b0 }}, bus_rdata_d[31:24]}; {MEMOP_LW , 2'bzz}: m_rdata_pick_sext = bus_rdata_d; {MEMOP_LR_W, 2'bzz}: m_rdata_pick_sext = bus_rdata_d; default: m_rdata_pick_sext = 32'hxxxx_xxxx; endcase if (|EXTENSION_A && x_amo_phase == 3'h1) begin // Capture AMO read data into mw_result for feeding back through the ALU. m_result = bus_rdata_d; end else if (|EXTENSION_A && xm_memop == MEMOP_SC_W) begin // sc.w may fail due to negative response from either local or global monitor. m_result = {31'h0, mw_local_exclusive_reserved && bus_dph_exokay_d}; end else if (xm_memop != MEMOP_NONE && xm_memop != MEMOP_AMO) begin m_result = m_rdata_pick_sext; end else if (MUL_FAST && m_fast_mul_result_vld) begin m_result = m_fast_mul_result; end else begin m_result = xm_result; end end // Local monitor update. // - Set on a load-reserved with good response from global monitor // - Cleared by any store-conditional // - Not affected by trap entry (permitted by RISC-V spec) always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin mw_local_exclusive_reserved <= 1'b0; end else if (|EXTENSION_A && (!m_stall || bus_dph_err_d)) begin if (d_memop_is_amo) begin mw_local_exclusive_reserved <= 1'b0; end else if (xm_memop == MEMOP_SC_W && (bus_dph_ready_d || bus_dph_err_d)) begin mw_local_exclusive_reserved <= 1'b0; end else if (xm_memop == MEMOP_LR_W && bus_dph_ready_d) begin // In theory, the bus should never report HEXOKAY when HRESP is asserted. // Still might happen (e.g. if HEXOKAY is tied high), so mask HEXOKAY with // HREADY to be sure a failed lr.w clears the monitor. mw_local_exclusive_reserved <= bus_dph_exokay_d && !bus_dph_err_d; end end end // Note that exception entry prevents writeback, because the exception entry // replaces the instruction in M. Interrupt entry does not prevent writeback, // because the interrupt is notionally inserted in between the instruction in // M and the instruction in X. wire m_reg_wen_if_nonzero = !m_bus_stall && xm_except == EXCEPT_NONE; wire m_reg_wen = |xm_rd && m_reg_wen_if_nonzero; //synthesis translate_off always @ (posedge clk) begin if (rst_n) begin if (m_reg_wen && (^m_result === 1'bX)) begin $display("Writing X to register file!"); $finish; end end end //synthesis translate_on `ifdef FORMAL // We borrow mw_result during an AMO to capture rdata and feed back through // the ALU, since it already has the right paths. Make sure this is safe. // (Whatever instruction is in M ahead of AMO should have passed through by // the time AMO has reached read dphase) always @ (posedge clk) if (rst_n) begin if (x_amo_phase == 3'h1) assert(m_reg_wen_if_nonzero); if (x_amo_phase == 3'h1) assert(~|xm_rd); end `endif always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin mw_result <= {W_DATA{1'b0}}; end else if (m_reg_wen_if_nonzero && !(|EXTENSION_A && x_amo_phase[1])) begin // (don't trash the captured AMO read phase data during stage 2/3 of AMO -- we need it!) mw_result <= m_result; end end always @ (posedge clk or negedge rst_n) begin if (!rst_n) begin mw_rd <= {W_REGADDR{1'b0}}; end else begin //synthesis translate_off if (!m_stall && ^bus_wdata_d === 1'bX) begin $display("Writing Xs to memory!"); $finish; end //synthesis translate_on if (m_reg_wen_if_nonzero) mw_rd <= xm_rd; end end hazard3_regfile_1w2r #( .FAKE_DUALPORT(0), `ifdef SIM .RESET_REGS(1), `elsif FORMAL .RESET_REGS(1), `elsif FPGA .RESET_REGS(0), `else .RESET_REGS(1), `endif .N_REGS(32), .W_DATA(W_DATA) ) inst_regfile_1w2r ( .clk (clk), .rst_n (rst_n), // On downstream stall, we feed D's addresses back into regfile // so that output does not change. .raddr1 (x_stall && !d_starved ? d_rs1 : f_rs1), .rdata1 (x_rdata1), .raddr2 (x_stall && !d_starved ? d_rs2 : f_rs2), .rdata2 (x_rdata2), .waddr (xm_rd), .wdata (m_result), .wen (m_reg_wen) ); `ifdef RISCV_FORMAL `include "hazard3_rvfi_monitor.vh" `endif `ifdef HAZARD3_FORMAL_REGRESSION // Each formal regression provides its own file with the below name: `include "hazard3_formal_regression.vh" `endif endmodule `default_nettype wire