Format sv and v code.
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@ -28,419 +28,430 @@
module el2_dec
module el2_dec
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic free_clk, // Clock always. Through two clock headers. For flops without second clock header built in.
input logic free_clk, // Clock always. Through two clock headers. For flops without second clock header built in.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle
input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle
output logic dec_extint_stall, // Stall on external interrupt
output logic dec_extint_stall, // Stall on external interrupt
output logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked
output logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked
output logic dec_pause_state_cg, // to top for active state clock gating
output logic dec_pause_state_cg, // to top for active state clock gating
output logic dec_tlu_core_empty,
output logic dec_tlu_core_empty,
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic [31:1] rst_vec, // reset vector, from core pins
input logic [31:1] rst_vec, // reset vector, from core pins
input logic nmi_int, // NMI pin
input logic [31:1] nmi_vec, // NMI vector, from pins
input logic i_cpu_halt_req, // Asynchronous Halt request to CPU
input logic nmi_int, // NMI pin
input logic i_cpu_run_req, // Asynchronous Restart request to CPU
input logic [31:1] nmi_vec, // NMI vector, from pins
output logic o_cpu_halt_status, // Halt status of core (pmu/fw)
input logic i_cpu_halt_req, // Asynchronous Halt request to CPU
output logic o_cpu_halt_ack, // Halt request ack
input logic i_cpu_run_req, // Asynchronous Restart request to CPU
output logic o_cpu_run_ack, // Run request ack
output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
input logic [31:4] core_id, // CORE ID
output logic o_cpu_halt_status, // Halt status of core (pmu/fw)
output logic o_cpu_halt_ack, // Halt request ack
output logic o_cpu_run_ack, // Run request ack
output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request
// external MPC halt/run interface
input logic [31:4] core_id, // CORE ID
input logic mpc_debug_halt_req, // Async halt request
input logic mpc_debug_run_req, // Async run request
input logic mpc_reset_run_req, // Run/halt after reset
output logic mpc_debug_halt_ack, // Halt ack
output logic mpc_debug_run_ack, // Run ack
output logic debug_brkpt_status, // debug breakpoint
input logic exu_pmu_i0_br_misp, // slot 0 branch misp
// external MPC halt/run interface
input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken
input logic mpc_debug_halt_req, // Async halt request
input logic exu_pmu_i0_pc4, // slot 0 4 byte branch
input logic mpc_debug_run_req, // Async run request
input logic mpc_reset_run_req, // Run/halt after reset
output logic mpc_debug_halt_ack, // Halt ack
output logic mpc_debug_run_ack, // Run ack
output logic debug_brkpt_status, // debug breakpoint
input logic exu_pmu_i0_br_misp, // slot 0 branch misp
input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken
input logic exu_pmu_i0_pc4, // slot 0 4 byte branch
input logic lsu_nonblock_load_valid_m, // valid nonblock load at m
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag
input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag
input logic lsu_nonblock_load_data_valid, // valid nonblock load data back
input logic lsu_nonblock_load_data_error, // nonblock load bus error
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic lsu_pmu_bus_trxn, // D side bus transaction
input logic lsu_nonblock_load_valid_m, // valid nonblock load at m
input logic lsu_pmu_bus_misaligned, // D side bus misaligned
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag
input logic lsu_pmu_bus_error, // D side bus error
input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r
input logic lsu_pmu_bus_busy, // D side bus busy
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag
input logic lsu_pmu_misaligned_m, // D side load or store misaligned
input logic lsu_nonblock_load_data_valid, // valid nonblock load data back
input logic lsu_pmu_load_external_m, // D side bus load
input logic lsu_nonblock_load_data_error, // nonblock load bus error
input logic lsu_pmu_store_external_m, // D side bus store
input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag
input logic dma_pmu_dccm_read, // DMA DCCM read
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic dma_pmu_dccm_write, // DMA DCCM write
input logic dma_pmu_any_read, // DMA read
input logic dma_pmu_any_write, // DMA write
input logic [31:1] lsu_fir_addr, // Fast int address
input logic lsu_pmu_bus_trxn, // D side bus transaction
input logic [1:0] lsu_fir_error, // Fast int lookup error
input logic lsu_pmu_bus_misaligned, // D side bus misaligned
input logic lsu_pmu_bus_error, // D side bus error
input logic lsu_pmu_bus_busy, // D side bus busy
input logic lsu_pmu_misaligned_m, // D side load or store misaligned
input logic lsu_pmu_load_external_m, // D side bus load
input logic lsu_pmu_store_external_m, // D side bus store
input logic dma_pmu_dccm_read, // DMA DCCM read
input logic dma_pmu_dccm_write, // DMA DCCM write
input logic dma_pmu_any_read, // DMA read
input logic dma_pmu_any_write, // DMA write
input logic ifu_pmu_instr_aligned, // aligned instructions
input logic [31:1] lsu_fir_addr, // Fast int address
input logic ifu_pmu_fetch_stall, // fetch unit stalled
input logic [ 1:0] lsu_fir_error, // Fast int lookup error
input logic ifu_pmu_ic_miss, // icache miss
input logic ifu_pmu_ic_hit, // icache hit
input logic ifu_pmu_bus_error, // Instruction side bus error
input logic ifu_pmu_bus_busy, // Instruction side bus busy
input logic ifu_pmu_bus_trxn, // Instruction side bus transaction
input logic ifu_ic_error_start, // IC single bit error
input logic ifu_pmu_instr_aligned, // aligned instructions
input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error
input logic ifu_pmu_fetch_stall, // fetch unit stalled
input logic ifu_pmu_ic_miss, // icache miss
input logic ifu_pmu_ic_hit, // icache hit
input logic ifu_pmu_bus_error, // Instruction side bus error
input logic ifu_pmu_bus_busy, // Instruction side bus busy
input logic ifu_pmu_bus_trxn, // Instruction side bus transaction
input logic [3:0] lsu_trigger_match_m,
input logic ifu_ic_error_start, // IC single bit error
input logic dbg_cmd_valid, // debugger abstract command valid
input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error
input logic dbg_cmd_write, // command is a write
input logic [1:0] dbg_cmd_type, // command type
input logic [31:0] dbg_cmd_addr, // command address
input logic [1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i
input logic [ 3:0] lsu_trigger_match_m,
input logic dbg_cmd_valid, // debugger abstract command valid
input logic dbg_cmd_write, // command is a write
input logic [ 1:0] dbg_cmd_type, // command type
input logic [31:0] dbg_cmd_addr, // command address
input logic [ 1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i
input logic ifu_i0_icaf, // icache access fault
input logic [1:0] ifu_i0_icaf_type, // icache access fault type
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_icaf, // icache access fault
input logic ifu_i0_dbecc, // icache/iccm double-bit error
input logic [1:0] ifu_i0_icaf_type, // icache access fault type
input logic lsu_idle_any, // lsu idle for halting
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_dbecc, // icache/iccm double-bit error
input el2_br_pkt_t i0_brp, // branch packet
input logic lsu_idle_any, // lsu idle for halting
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet
input el2_br_pkt_t i0_brp, // branch packet
input logic lsu_single_ecc_error_incr, // LSU inc SB error counter
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [ pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [ pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [ $clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error
input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet
input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error
input logic lsu_single_ecc_error_incr, // LSU inc SB error counter
input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address
input logic [31:0] exu_div_result, // final div result
input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error
input logic exu_div_wren, // Divide write enable to GPR
input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error
input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address
input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction
input logic [31:0] exu_div_result, // final div result
input logic exu_div_wren, // Divide write enable to GPR
input logic [31:0] lsu_result_m, // load result
input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction
input logic [31:0] lsu_result_corr_r, // load result - corrected load data
input logic lsu_load_stall_any, // This is for blocking loads
input logic [31:0] lsu_result_m, // load result
input logic lsu_store_stall_any, // This is for blocking stores
input logic [31:0] lsu_result_corr_r, // load result - corrected load data
input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event
input logic dma_iccm_stall_any, // iccm stalled, pmu event
input logic iccm_dma_sb_error, // ICCM DMA single bit error
input logic lsu_load_stall_any, // This is for blocking loads
input logic lsu_store_stall_any, // This is for blocking stores
input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event
input logic dma_iccm_stall_any, // iccm stalled, pmu event
input logic exu_flush_final, // slot0 flush
input logic iccm_dma_sb_error, // ICCM DMA single bit error
input logic [31:1] exu_npc_r, // next PC
input logic exu_flush_final, // slot0 flush
input logic [31:0] exu_i0_result_x, // alu result x
input logic [31:1] exu_npc_r, // next PC
input logic [31:0] exu_i0_result_x, // alu result x
input logic ifu_i0_valid, // fetch valids to instruction buffer
input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer
input logic [31:1] ifu_i0_pc, // pc's for instruction buffer
input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst
input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's
input logic mexintpend, // External interrupt pending
input logic ifu_i0_valid, // fetch valids to instruction buffer
input logic timer_int, // Timer interrupt pending (from pin)
input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer
input logic soft_int, // Software interrupt pending (from pin)
input logic [31:1] ifu_i0_pc, // pc's for instruction buffer
input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst
input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's
input logic [7:0] pic_claimid, // PIC claimid
input logic mexintpend, // External interrupt pending
input logic [3:0] pic_pl, // PIC priv level
input logic timer_int, // Timer interrupt pending (from pin)
input logic mhwakeup, // High priority wakeup
input logic soft_int, // Software interrupt pending (from pin)
output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level
input logic [7:0] pic_claimid, // PIC claimid
output logic [3:0] dec_tlu_meipt, // to PIC
input logic [3:0] pic_pl, // PIC priv level
input logic mhwakeup, // High priority wakeup
input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data
output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level
input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid
output logic [3:0] dec_tlu_meipt, // to PIC
output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics
input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data
input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid
output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics
// Debug start
input logic dbg_halt_req, // DM requests a halt
input logic dbg_resume_req, // DM requests a resume
input logic ifu_miss_state_idle, // I-side miss buffer empty
output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command
// Debug start
output logic dec_tlu_debug_mode, // Core is in debug mode
input logic dbg_halt_req, // DM requests a halt
output logic dec_tlu_resume_ack, // Resume acknowledge
input logic dbg_resume_req, // DM requests a resume
output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush
input logic ifu_miss_state_idle, // I-side miss buffer empty
output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC
output logic dec_tlu_flush_leak_one_r, // single step
output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc
output logic [31:2] dec_tlu_meihap, // Fast ext int base
output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode
output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command
output logic dec_tlu_debug_mode, // Core is in debug mode
output logic dec_tlu_resume_ack, // Resume acknowledge
output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush
output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC
output logic dec_tlu_flush_leak_one_r, // single step
output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc
output logic [31:2] dec_tlu_meihap, // Fast ext int base
output logic [31:0] dec_dbg_rddata, // debug command read data
output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode
output logic dec_dbg_cmd_done, // abstract command is done
output logic [31:0] dec_dbg_rddata, // debug command read data
output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address)
output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks
output logic dec_dbg_cmd_done, // abstract command is done
output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address)
output logic dec_tlu_force_halt, // halt has been forced
output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks
// Debug end
// branch info from pipe0 for errors or counter updates
input logic [1:0] exu_i0_br_hist_r, // history
input logic exu_i0_br_error_r, // error
input logic exu_i0_br_start_error_r, // start error
input logic exu_i0_br_valid_r, // valid
input logic exu_i0_br_mp_r, // mispredict
input logic exu_i0_br_middle_r, // middle of bank
// branch info from pipe1 for errors or counter updates
output logic dec_tlu_force_halt, // halt has been forced
// Debug end
// branch info from pipe0 for errors or counter updates
input logic [1:0] exu_i0_br_hist_r, // history
input logic exu_i0_br_error_r, // error
input logic exu_i0_br_start_error_r, // start error
input logic exu_i0_br_valid_r, // valid
input logic exu_i0_br_mp_r, // mispredict
input logic exu_i0_br_middle_r, // middle of bank
input logic exu_i0_br_way_r, // way hit or repl
// branch info from pipe1 for errors or counter updates
output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
input logic exu_i0_br_way_r, // way hit or repl
output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data
output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data
output logic [31:0] dec_i0_immed_d, // immediate data
output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
output logic [12:1] dec_i0_br_immed_d, // br immediate data
output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data
output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data
output el2_alu_pkt_t i0_ap, // alu packet
output logic [31:0] dec_i0_immed_d, // immediate data
output logic [12:1] dec_i0_br_immed_d, // br immediate data
output logic dec_i0_alu_decode_d, // schedule on D-stage alu
output el2_alu_pkt_t i0_ap, // alu packet
output logic dec_i0_branch_d, // Branch in D-stage
output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's
output logic dec_i0_alu_decode_d, // schedule on D-stage alu
output logic dec_i0_branch_d, // Branch in D-stage
output logic [31:1] dec_i0_pc_d, // pc's at decode
output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's
output logic [3:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable
output logic [3:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable
output logic [31:0] dec_i0_result_r, // Result R-stage
output logic [31:1] dec_i0_pc_d, // pc's at decode
output logic [ 3:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable
output logic [ 3:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable
output el2_lsu_pkt_t lsu_p, // lsu packet
output logic [31:0] dec_i0_result_r, // Result R-stage
output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
output el2_mul_pkt_t mul_p, // mul packet
output el2_div_pkt_t div_p, // div packet
output logic dec_div_cancel, // cancel divide operation
output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
output el2_lsu_pkt_t lsu_p, // lsu packet
output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
output el2_mul_pkt_t mul_p, // mul packet
output el2_div_pkt_t div_p, // div packet
output logic dec_div_cancel, // cancel divide operation
output logic dec_csr_ren_d, // CSR read enable
output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
output logic [31:0] dec_csr_rddata_d, // CSR read data
output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int
output logic dec_csr_ren_d, // CSR read enable
output logic dec_tlu_flush_lower_wb,
output logic [31:0] dec_csr_rddata_d, // CSR read data
output logic [31:1] dec_tlu_flush_path_r, // tlu flush target
output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache
output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage
output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int
output logic dec_tlu_flush_lower_wb,
output logic [31:1] dec_tlu_flush_path_r, // tlu flush target
output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache
output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet
output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage
output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc
output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet
output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc
output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc
output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc
output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus
output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc
output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc
output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc
output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus
output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
output logic dec_lsu_valid_raw_d,
output logic [$clog2(
)-1:0] dec_fa_error_index, // Fully associt btb error index
output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic dec_lsu_valid_raw_d,
output logic [1:0] dec_data_en, // clock-gate control logic
output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic [1:0] dec_ctl_en,
input logic [15:0] ifu_i0_cinst, // 16b compressed instruction
output logic [1:0] dec_data_en, // clock-gate control logic
output logic [1:0] dec_ctl_en,
output el2_trace_pkt_t trace_rv_trace_pkt, // trace packet
input logic [15:0] ifu_i0_cinst, // 16b compressed instruction
// feature disable from mfdc
output el2_trace_pkt_t trace_rv_trace_pkt, // trace packet
output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding
output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address
output logic dec_tlu_core_ecc_disable, // disable core ECC
output logic dec_tlu_bpred_disable, // disable branch prediction
output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing
output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16]
// clock gating overrides from mcgc
// feature disable from mfdc
output logic dec_tlu_misc_clk_override, // override misc clock domain gating
output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding
output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating
output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address
output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating
output logic dec_tlu_core_ecc_disable, // disable core ECC
output logic dec_tlu_bus_clk_override, // override bus clock domain gating
output logic dec_tlu_bpred_disable, // disable branch prediction
output logic dec_tlu_pic_clk_override, // override PIC clock domain gating
output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing
output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating
output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16]
output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating
output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating
output logic dec_tlu_i0_commit_cmt, // committed i0 instruction
// clock gating overrides from mcgc
input logic scan_mode // Flop scan mode control
output logic dec_tlu_misc_clk_override, // override misc clock domain gating
output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating
output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating
output logic dec_tlu_bus_clk_override, // override bus clock domain gating
output logic dec_tlu_pic_clk_override, // override PIC clock domain gating
output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating
output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating
output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating
output logic dec_tlu_i0_commit_cmt, // committed i0 instruction
input logic scan_mode // Flop scan mode control
logic dec_tlu_dec_clk_override; // to and from dec blocks
logic clk_override;
logic dec_ib0_valid_d;
logic dec_tlu_dec_clk_override; // to and from dec blocks
logic clk_override;
logic dec_pmu_instr_decoded;
logic dec_ib0_valid_d;
logic dec_pmu_decode_stall;
logic dec_pmu_presync_stall;
logic dec_pmu_postsync_stall;
logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R.
logic dec_pmu_instr_decoded;
logic dec_pmu_decode_stall;
logic dec_pmu_presync_stall;
logic dec_pmu_postsync_stall;
logic [4:0] dec_i0_rs1_d;
logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R.
logic [4:0] dec_i0_rs2_d;
logic [31:0] dec_i0_instr_d;
logic [4:0] dec_i0_rs1_d;
logic [4:0] dec_i0_rs2_d;
logic dec_tlu_trace_disable;
logic [31:0] dec_i0_instr_d;
logic dec_tlu_pipelining_disable;
logic dec_tlu_trace_disable;
logic dec_tlu_pipelining_disable;
logic [4:0] dec_i0_waddr_r;
logic dec_i0_wen_r;
logic [31:0] dec_i0_wdata_r;
logic dec_csr_wen_r; // csr write enable at wb
logic [11:0] dec_csr_wraddr_r; // write address for csryes
logic [31:0] dec_csr_wrdata_r; // csr write data at wb
logic [11:0] dec_csr_rdaddr_d; // read address for csr
logic [4:0] dec_i0_waddr_r;
logic dec_csr_legal_d; // csr indicates legal operation
logic dec_i0_wen_r;
logic [31:0] dec_i0_wdata_r;
logic dec_csr_wen_r; // csr write enable at wb
logic [11:0] dec_csr_wraddr_r; // write address for csryes
logic [31:0] dec_csr_wrdata_r; // csr write data at wb
logic dec_csr_wen_unq_d; // valid csr with write - for csr legal
logic [11:0] dec_csr_rdaddr_d; // read address for csr
logic dec_csr_any_unq_d; // valid csr - for csr legal
logic dec_csr_legal_d; // csr indicates legal operation
logic dec_csr_stall_int_ff; // csr is mie/mstatus
el2_trap_pkt_t dec_tlu_packet_r;
logic dec_csr_wen_unq_d; // valid csr with write - for csr legal
logic dec_csr_any_unq_d; // valid csr - for csr legal
logic dec_csr_stall_int_ff; // csr is mie/mstatus
logic dec_i0_pc4_d;
el2_trap_pkt_t dec_tlu_packet_r;
logic dec_tlu_presync_d;
logic dec_tlu_postsync_d;
logic dec_tlu_debug_stall;
logic [31:0] dec_illegal_inst;
logic dec_i0_pc4_d;
logic dec_tlu_presync_d;
logic dec_tlu_postsync_d;
logic dec_tlu_debug_stall;
logic dec_i0_icaf_d;
logic [31:0] dec_illegal_inst;
logic dec_i0_dbecc_d;
logic dec_i0_icaf_d;
logic dec_i0_icaf_second_d;
logic [3:0] dec_i0_trigger_match_d;
logic dec_debug_fence_d;
logic dec_nonblock_load_wen;
logic [4:0] dec_nonblock_load_waddr;
logic dec_tlu_flush_pause_r;
el2_br_pkt_t dec_i0_brp;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index;
logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr;
logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag;
logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index; // Fully associt btb index
logic [31:1] dec_tlu_i0_pc_r;
logic dec_i0_dbecc_d;
logic dec_tlu_i0_kill_writeb_wb;
logic dec_i0_icaf_second_d;
logic dec_tlu_i0_valid_r;
logic [3:0] dec_i0_trigger_match_d;
logic dec_debug_fence_d;
logic dec_nonblock_load_wen;
logic [4:0] dec_nonblock_load_waddr;
logic dec_tlu_flush_pause_r;
el2_br_pkt_t dec_i0_brp;
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index;
logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr;
logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag;
logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index; // Fully associt btb index
logic dec_pause_state;
logic [31:1] dec_tlu_i0_pc_r;
logic dec_tlu_i0_kill_writeb_wb;
logic dec_tlu_i0_valid_r;
logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type
logic dec_pause_state;
logic dec_tlu_flush_extint; // Fast ext int started
logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type
logic [31:0] dec_i0_inst_wb;
logic dec_tlu_flush_extint; // Fast ext int started
logic [31:1] dec_i0_pc_wb;
logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1;
logic [4:0] dec_tlu_exc_cause_wb1;
logic [31:0] dec_tlu_mtval_wb1;
logic dec_tlu_i0_exc_valid_wb1;
logic [4:0] div_waddr_wb;
logic [31:0] dec_i0_inst_wb;
logic dec_div_active;
logic [31:1] dec_i0_pc_wb;
logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1;
logic [ 4:0] dec_tlu_exc_cause_wb1;
logic [31:0] dec_tlu_mtval_wb1;
logic dec_tlu_i0_exc_valid_wb1;
logic dec_debug_valid_d;
logic [ 4:0] div_waddr_wb;
logic dec_div_active;
assign clk_override = dec_tlu_dec_clk_override;
logic dec_debug_valid_d;
assign clk_override = dec_tlu_dec_clk_override;
assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0];
assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0];
el2_dec_ib_ctl #(.pt(pt)) instbuff (.*);
el2_dec_ib_ctl #(.pt(pt)) instbuff (.*);
el2_dec_decode_ctl #(.pt(pt)) decode (.*);
el2_dec_decode_ctl #(.pt(pt)) decode (.*);
el2_dec_tlu_ctl #(.pt(pt)) tlu (.*);
el2_dec_tlu_ctl #(.pt(pt)) tlu (.*);
el2_dec_gpr_ctl #(.pt(pt)) arf (.*,
// inputs
.wen0(dec_i0_wen_r), .waddr0(dec_i0_waddr_r[4:0]), .wd0(dec_i0_wdata_r[31:0]),
el2_dec_gpr_ctl #(
.wen1(dec_nonblock_load_wen), .waddr1(dec_nonblock_load_waddr[4:0]), .wd1(lsu_nonblock_load_data[31:0]),
.wen2(exu_div_wren), .waddr2(div_waddr_wb), .wd2(exu_div_result[31:0]),
) arf (
// inputs
// outputs
.rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0])
// outputs
// Trigger
el2_dec_trigger #(.pt(pt)) dec_trigger (.*);
// Trigger
el2_dec_trigger #(.pt(pt)) dec_trigger (.*);
// trace
assign trace_rv_trace_pkt.trace_rv_i_insn_ip = dec_i0_inst_wb[31:0];
assign trace_rv_trace_pkt.trace_rv_i_address_ip = { dec_i0_pc_wb[31:1], 1'b0};
assign trace_rv_trace_pkt.trace_rv_i_valid_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
// trace
assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_insn_ip = dec_i0_inst_wb[31:0];
assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports
assign trace_rv_trace_pkt.trace_rv_i_address_ip = {dec_i0_pc_wb[31:1], 1'b0};
assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports
assign trace_rv_trace_pkt.trace_rv_i_valid_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_exc_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports
assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1;
assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports
// end trace
// end trace
endmodule // el2_dec
endmodule // el2_dec
File diff suppressed because it is too large
Load Diff
@ -14,69 +14,74 @@
// limitations under the License.
// limitations under the License.
module el2_dec_gpr_ctl
module el2_dec_gpr_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
) (
input logic [4:0] raddr0, // logical read addresses
input logic [4:0] raddr0, // logical read addresses
input logic [4:0] raddr1,
input logic [4:0] raddr1,
input logic wen0, // write enable
input logic wen0, // write enable
input logic [4:0] waddr0, // write address
input logic [ 4:0] waddr0, // write address
input logic [31:0] wd0, // write data
input logic [31:0] wd0, // write data
input logic wen1, // write enable
input logic wen1, // write enable
input logic [4:0] waddr1, // write address
input logic [ 4:0] waddr1, // write address
input logic [31:0] wd1, // write data
input logic [31:0] wd1, // write data
input logic wen2, // write enable
input logic wen2, // write enable
input logic [4:0] waddr2, // write address
input logic [ 4:0] waddr2, // write address
input logic [31:0] wd2, // write data
input logic [31:0] wd2, // write data
input logic clk,
input logic clk,
input logic rst_l,
input logic rst_l,
output logic [31:0] rd0, // read data
output logic [31:0] rd0, // read data
output logic [31:0] rd1,
output logic [31:0] rd1,
input logic scan_mode
input logic scan_mode
logic [31:1] [31:0] gpr_out; // 31 x 32 bit GPRs
logic [31:1][31:0] gpr_out; // 31 x 32 bit GPRs
logic [31:1] [31:0] gpr_in;
logic [31:1][31:0] gpr_in;
logic [31:1] w0v,w1v,w2v;
logic [31:1] w0v, w1v, w2v;
logic [31:1] gpr_wr_en;
logic [31:1] gpr_wr_en;
// GPR Write Enables
// GPR Write Enables
assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]);
assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]);
for ( genvar j=1; j<32; j++ ) begin : gpr
for (genvar j = 1; j < 32; j++) begin : gpr
rvdffe #(32) gprff (.*, .en(gpr_wr_en[j]), .din(gpr_in[j][31:0]), .dout(gpr_out[j][31:0]));
rvdffe #(32) gprff (
end : gpr
.en (gpr_wr_en[j]),
.din (gpr_in[j][31:0]),
end : gpr
// the read out
// the read out
always_comb begin
always_comb begin
rd0[31:0] = 32'b0;
rd0[31:0] = 32'b0;
rd1[31:0] = 32'b0;
rd1[31:0] = 32'b0;
w0v[31:1] = 31'b0;
w0v[31:1] = 31'b0;
w1v[31:1] = 31'b0;
w1v[31:1] = 31'b0;
w2v[31:1] = 31'b0;
w2v[31:1] = 31'b0;
gpr_in[31:1] = '0;
gpr_in[31:1] = '0;
// GPR Read logic
// GPR Read logic
for (int j=1; j<32; j++ ) begin
for (int j = 1; j < 32; j++) begin
rd0[31:0] |= ({32{(raddr0[4:0]== 5'(j))}} & gpr_out[j][31:0]);
rd0[31:0] |= ({32{(raddr0[4:0] == 5'(j))}} & gpr_out[j][31:0]);
rd1[31:0] |= ({32{(raddr1[4:0]== 5'(j))}} & gpr_out[j][31:0]);
rd1[31:0] |= ({32{(raddr1[4:0] == 5'(j))}} & gpr_out[j][31:0]);
// GPR Write logic
// GPR Write logic
for (int j=1; j<32; j++ ) begin
for (int j = 1; j < 32; j++) begin
w0v[j] = wen0 & (waddr0[4:0]== 5'(j) );
w0v[j] = wen0 & (waddr0[4:0] == 5'(j));
w1v[j] = wen1 & (waddr1[4:0]== 5'(j) );
w1v[j] = wen1 & (waddr1[4:0] == 5'(j));
w2v[j] = wen2 & (waddr2[4:0]== 5'(j) );
w2v[j] = wen2 & (waddr2[4:0] == 5'(j));
gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) |
gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) |
({32{w1v[j]}} & wd1[31:0]) |
({32{w1v[j]}} & wd1[31:0]) |
({32{w2v[j]}} & wd2[31:0]);
({32{w2v[j]}} & wd2[31:0]);
end // always_comb begin
end // always_comb begin
@ -14,151 +14,151 @@
// limitations under the License.
// limitations under the License.
module el2_dec_ib_ctl
module el2_dec_ib_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic dbg_cmd_valid, // valid dbg cmd
input logic dbg_cmd_valid, // valid dbg cmd
input logic dbg_cmd_write, // dbg cmd is write
input logic dbg_cmd_write, // dbg cmd is write
input logic [1:0] dbg_cmd_type, // dbg type
input logic [ 1:0] dbg_cmd_type, // dbg type
input logic [31:0] dbg_cmd_addr, // expand to 31:0
input logic [31:0] dbg_cmd_addr, // expand to 31:0
input el2_br_pkt_t i0_brp, // i0 branch packet from aligner
input el2_br_pkt_t i0_brp, // i0 branch packet from aligner
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input logic ifu_i0_pc4, // i0 is 4B inst else 2B
input logic ifu_i0_pc4, // i0 is 4B inst else 2B
input logic ifu_i0_valid, // i0 valid from ifu
input logic ifu_i0_valid, // i0 valid from ifu
input logic ifu_i0_icaf, // i0 instruction access fault
input logic ifu_i0_icaf, // i0 instruction access fault
input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type
input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst
input logic ifu_i0_dbecc, // i0 double-bit error
input logic ifu_i0_dbecc, // i0 double-bit error
input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner
input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner
input logic [31:1] ifu_i0_pc, // i0 pc from the aligner
input logic [31:1] ifu_i0_pc, // i0 pc from the aligner
output logic dec_ib0_valid_d, // ib0 valid
output logic dec_ib0_valid_d, // ib0 valid
output logic dec_debug_valid_d, // Debug read or write at D-stage
output logic dec_debug_valid_d, // Debug read or write at D-stage
output logic [31:0] dec_i0_instr_d, // i0 inst at decode
output logic [31:0] dec_i0_instr_d, // i0 inst at decode
output logic [31:1] dec_i0_pc_d, // i0 pc at decode
output logic [31:1] dec_i0_pc_d, // i0 pc at decode
output logic dec_i0_pc4_d, // i0 is 4B inst else 2B
output logic dec_i0_pc4_d, // i0 is 4B inst else 2B
output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode
output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index
output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR
output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR
output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag
output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag
output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index
output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index
output logic dec_i0_icaf_d, // i0 instruction access fault at decode
output logic dec_i0_icaf_d, // i0 instruction access fault at decode
output logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst
output logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst
output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type
output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type
output logic dec_i0_dbecc_d, // i0 double-bit error at decode
output logic dec_i0_dbecc_d, // i0 double-bit error at decode
output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted
output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted
output logic dec_debug_fence_d // debug fence inst
output logic dec_debug_fence_d // debug fence inst
logic debug_valid;
logic debug_valid;
logic [4:0] dreg;
logic [ 4:0] dreg;
logic [11:0] dcsr;
logic [11:0] dcsr;
logic [31:0] ib0, ib0_debug_in;
logic [31:0] ib0, ib0_debug_in;
logic debug_read;
logic debug_read;
logic debug_write;
logic debug_write;
logic debug_read_gpr;
logic debug_read_gpr;
logic debug_write_gpr;
logic debug_write_gpr;
logic debug_read_csr;
logic debug_read_csr;
logic debug_write_csr;
logic debug_write_csr;
logic [34:0] ifu_i0_pcdata, pc0;
logic [34:0] ifu_i0_pcdata, pc0;
assign ifu_i0_pcdata[34:0] = { ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf,
assign ifu_i0_pcdata[34:0] = {
ifu_i0_pc[31:1], ifu_i0_pc4 };
ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf, ifu_i0_pc[31:1], ifu_i0_pc4
assign pc0[34:0] = ifu_i0_pcdata[34:0];
assign pc0[34:0] = ifu_i0_pcdata[34:0];
assign dec_i0_icaf_second_d = pc0[34]; // icaf's can only decode as i0
assign dec_i0_icaf_second_d = pc0[34]; // icaf's can only decode as i0
assign dec_i0_dbecc_d = pc0[33];
assign dec_i0_dbecc_d = pc0[33];
assign dec_i0_icaf_d = pc0[32];
assign dec_i0_icaf_d = pc0[32];
assign dec_i0_pc_d[31:1] = pc0[31:1];
assign dec_i0_pc_d[31:1] = pc0[31:1];
assign dec_i0_pc4_d = pc0[0];
assign dec_i0_pc4_d = pc0[0];
assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0];
assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0];
// GPR accesses
// GPR accesses
// put reg to read on rs1
// put reg to read on rs1
// read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011}
// read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011}
// put write date on rs1
// put write date on rs1
// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011}
// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011}
// CSR accesses
// CSR accesses
// csr is of form rd, csr, rs1
// csr is of form rd, csr, rs1
// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011}
// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011}
// put write data on rs1
// put write data on rs1
// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011}
// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011}
// abstract memory command not done here
// abstract memory command not done here
assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2);
assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2);
assign debug_read = debug_valid & ~dbg_cmd_write;
assign debug_read = debug_valid & ~dbg_cmd_write;
assign debug_write = debug_valid & dbg_cmd_write;
assign debug_write = debug_valid & dbg_cmd_write;
assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0]==2'h0);
assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0] == 2'h0);
assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0);
assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0] == 2'h0);
assign debug_read_csr = debug_read & (dbg_cmd_type[1:0]==2'h1);
assign debug_read_csr = debug_read & (dbg_cmd_type[1:0] == 2'h1);
assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1);
assign debug_write_csr = debug_write & (dbg_cmd_type[1:0] == 2'h1);
assign dreg[4:0] = dbg_cmd_addr[4:0];
assign dreg[4:0] = dbg_cmd_addr[4:0];
assign dcsr[11:0] = dbg_cmd_addr[11:0];
assign dcsr[11:0] = dbg_cmd_addr[11:0];
assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) |
assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) |
({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) |
({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) |
({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) |
({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) |
({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011});
({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011});
// machine is in halted state, pipe empty, write will always happen next cycle
// machine is in halted state, pipe empty, write will always happen next cycle
assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr;
assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr;
// special fence csr for use only in debug mode
// special fence csr for use only in debug mode
assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4);
assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4);
assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0];
assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0];
assign dec_ib0_valid_d = ifu_i0_valid | debug_valid;
assign dec_ib0_valid_d = ifu_i0_valid | debug_valid;
assign dec_debug_valid_d = debug_valid;
assign dec_debug_valid_d = debug_valid;
assign dec_i0_instr_d[31:0] = ib0[31:0];
assign dec_i0_instr_d[31:0] = ib0[31:0];
assign dec_i0_brp = i0_brp;
assign dec_i0_brp = i0_brp;
assign dec_i0_bp_index = ifu_i0_bp_index;
assign dec_i0_bp_index = ifu_i0_bp_index;
assign dec_i0_bp_fghr = ifu_i0_bp_fghr;
assign dec_i0_bp_fghr = ifu_i0_bp_fghr;
assign dec_i0_bp_btag = ifu_i0_bp_btag;
assign dec_i0_bp_btag = ifu_i0_bp_btag;
assign dec_i0_bp_fa_index = ifu_i0_fa_index;
assign dec_i0_bp_fa_index = ifu_i0_fa_index;
File diff suppressed because it is too large
Load Diff
@ -23,27 +23,32 @@
module el2_dec_trigger
module el2_dec_trigger
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match
input logic [31:1] dec_i0_pc_d, // i0 pc
input logic [31:1] dec_i0_pc_d, // i0 pc
output logic [3:0] dec_i0_trigger_match_d // Trigger match
output logic [3:0] dec_i0_trigger_match_d // Trigger match
logic [3:0][31:0] dec_i0_match_data;
logic [3:0][31:0] dec_i0_match_data;
logic [3:0] dec_i0_trigger_data_match;
logic [3:0] dec_i0_trigger_data_match;
for (genvar i=0; i<4; i++) begin
for (genvar i = 0; i < 4; i++) begin
assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match
assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match
rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i]));
rvmaskandmatch trigger_i0_match (
.mask (trigger_pkt_any[i].tdata2[31:0]),
.data (dec_i0_match_data[i][31:0]),
.match (dec_i0_trigger_data_match[i])
assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i];
assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i];
endmodule // el2_dec_trigger
endmodule // el2_dec_trigger
@ -23,42 +23,41 @@
module dmi_jtag_to_core_sync (
module dmi_jtag_to_core_sync (
// JTAG signals
// JTAG signals
input rd_en, // 1 bit Read Enable from JTAG
input rd_en, // 1 bit Read Enable from JTAG
input wr_en, // 1 bit Write enable from JTAG
input wr_en, // 1 bit Write enable from JTAG
// Processor Signals
// Processor Signals
input rst_n, // Core reset
input rst_n, // Core reset
input clk, // Core clock
input clk, // Core clock
output reg_en, // 1 bit Write interface bit to Processor
output reg_en, // 1 bit Write interface bit to Processor
output reg_wr_en // 1 bit Write enable to Processor
output reg_wr_en // 1 bit Write enable to Processor
wire c_rd_en;
wire c_wr_en;
reg [2:0] rden, wren;
// Outputs
wire c_rd_en;
assign reg_en = c_wr_en | c_rd_en;
wire c_wr_en;
assign reg_wr_en = c_wr_en;
reg [2:0] rden, wren;
// synchronizers
// Outputs
always @ ( posedge clk or negedge rst_n) begin
assign reg_en = c_wr_en | c_rd_en;
if(!rst_n) begin
assign reg_wr_en = c_wr_en;
rden <= '0;
wren <= '0;
// synchronizers
always @(posedge clk or negedge rst_n) begin
if (!rst_n) begin
rden <= '0;
wren <= '0;
end else begin
rden <= {rden[1:0], rd_en};
wren <= {wren[1:0], wr_en};
else begin
rden <= {rden[1:0], rd_en};
wren <= {wren[1:0], wr_en};
assign c_rd_en = rden[1] & ~rden[2];
assign c_wr_en = wren[1] & ~wren[2];
assign c_rd_en = rden[1] & ~rden[2];
assign c_wr_en = wren[1] & ~wren[2];
@ -21,70 +21,70 @@
module dmi_wrapper(
module dmi_wrapper (
// JTAG signals
// JTAG signals
input trst_n, // JTAG reset
input trst_n, // JTAG reset
input tck, // JTAG clock
input tck, // JTAG clock
input tms, // Test mode select
input tms, // Test mode select
input tdi, // Test Data Input
input tdi, // Test Data Input
output tdo, // Test Data Output
output tdo, // Test Data Output
output tdoEnable, // Test Data Output enable
output tdoEnable, // Test Data Output enable
// Processor Signals
// Processor Signals
input core_rst_n, // Core reset
input core_rst_n, // Core reset
input core_clk, // Core clock
input core_clk, // Core clock
input [31:1] jtag_id, // JTAG ID
input [31:1] jtag_id, // JTAG ID
input [31:0] rd_data, // 32 bit Read data from Processor
input [31:0] rd_data, // 32 bit Read data from Processor
output [31:0] reg_wr_data, // 32 bit Write data to Processor
output [31:0] reg_wr_data, // 32 bit Write data to Processor
output [6:0] reg_wr_addr, // 7 bit reg address to Processor
output [6:0] reg_wr_addr, // 7 bit reg address to Processor
output reg_en, // 1 bit Read enable to Processor
output reg_en, // 1 bit Read enable to Processor
output reg_wr_en, // 1 bit Write enable to Processor
output reg_wr_en, // 1 bit Write enable to Processor
output dmi_hard_reset
output dmi_hard_reset
//Wire Declaration
//Wire Declaration
wire rd_en;
wire rd_en;
wire wr_en;
wire wr_en;
wire dmireset;
wire dmireset;
//jtag_tap instantiation
//jtag_tap instantiation
rvjtag_tap i_jtag_tap(
rvjtag_tap i_jtag_tap (
.trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset
.trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset
.tck(tck), // dedicated JTAG TCK pad signal
.tck(tck), // dedicated JTAG TCK pad signal
.tms(tms), // dedicated JTAG TMS pad signal
.tms(tms), // dedicated JTAG TMS pad signal
.tdi(tdi), // dedicated JTAG TDI pad signal
.tdi(tdi), // dedicated JTAG TDI pad signal
.tdo(tdo), // dedicated JTAG TDO pad signal
.tdo(tdo), // dedicated JTAG TDO pad signal
.tdoEnable(tdoEnable), // enable for TDO pad
.tdoEnable(tdoEnable), // enable for TDO pad
.wr_data(reg_wr_data), // 32 bit Write data
.wr_data(reg_wr_data), // 32 bit Write data
.wr_addr(reg_wr_addr), // 7 bit Write address
.wr_addr(reg_wr_addr), // 7 bit Write address
.rd_en(rd_en), // 1 bit read enable
.rd_en(rd_en), // 1 bit read enable
.wr_en(wr_en), // 1 bit Write enable
.wr_en(wr_en), // 1 bit Write enable
.rd_data(rd_data), // 32 bit Read data
.rd_data(rd_data), // 32 bit Read data
.idle(3'h0), // no need to wait to sample data
.idle(3'h0), // no need to wait to sample data
.dmi_stat(2'b0), // no need to wait or error possible
.dmi_stat(2'b0), // no need to wait or error possible
.version(4'h1), // debug spec 0.13 compliant
.version(4'h1), // debug spec 0.13 compliant
// dmi_jtag_to_core_sync instantiation
// dmi_jtag_to_core_sync instantiation
dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync(
dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync (
.wr_en(wr_en), // 1 bit Write enable
.wr_en(wr_en), // 1 bit Write enable
.rd_en(rd_en), // 1 bit Read enable
.rd_en(rd_en), // 1 bit Read enable
.rst_n (core_rst_n),
.clk (core_clk),
.reg_en(reg_en), // 1 bit Write interface bit
.reg_en (reg_en), // 1 bit Write interface bit
.reg_wr_en(reg_wr_en) // 1 bit Write enable
.reg_wr_en(reg_wr_en) // 1 bit Write enable
@ -14,209 +14,202 @@
// limitations under the License
// limitations under the License
module rvjtag_tap #(
module rvjtag_tap #(
parameter AWIDTH = 7
parameter AWIDTH = 7
) (
input trst,
input trst,
input tck,
input tck,
input tms,
input tms,
input tdi,
input tdi,
output reg tdo,
output reg tdo,
output tdoEnable,
output tdoEnable,
output [31:0] wr_data,
output [ 31:0] wr_data,
output [AWIDTH-1:0] wr_addr,
output [AWIDTH-1:0] wr_addr,
output wr_en,
output wr_en,
output rd_en,
output rd_en,
input [31:0] rd_data,
input [31:0] rd_data,
input [1:0] rd_status,
input [ 1:0] rd_status,
output reg dmi_reset,
output reg dmi_reset,
output reg dmi_hard_reset,
output reg dmi_hard_reset,
input [2:0] idle,
input [ 2:0] idle,
input [1:0] dmi_stat,
input [ 1:0] dmi_stat,
-- revisionCode : 4'h0;
-- revisionCode : 4'h0;
-- manufacturersIdCode : 11'h45;
-- manufacturersIdCode : 11'h45;
-- deviceIdCode : 16'h0001;
-- deviceIdCode : 16'h0001;
-- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB]
-- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB]
input [31:1] jtag_id,
input [31:1] jtag_id,
input [3:0] version
input [ 3:0] version
localparam USER_DR_LENGTH = AWIDTH + 34;
localparam USER_DR_LENGTH = AWIDTH + 34;
reg [USER_DR_LENGTH-1:0] sr, nsr, dr;
reg [USER_DR_LENGTH-1:0] sr, nsr, dr;
// Tap controller
// Tap controller
logic[3:0] state, nstate;
logic [3:0] state, nstate;
logic [4:0] ir;
logic [4:0] ir;
wire jtag_reset;
wire jtag_reset;
wire shift_dr;
wire shift_dr;
wire pause_dr;
wire pause_dr;
wire update_dr;
wire update_dr;
wire capture_dr;
wire capture_dr;
wire shift_ir;
wire shift_ir;
wire pause_ir ;
wire pause_ir;
wire update_ir ;
wire update_ir;
wire capture_ir;
wire capture_ir;
wire[1:0] dr_en;
wire [1:0] dr_en;
wire devid_sel;
wire devid_sel;
wire [5:0] abits;
wire [5:0] abits;
assign abits = AWIDTH[5:0];
assign abits = AWIDTH[5:0];
localparam TEST_LOGIC_RESET_STATE = 0;
localparam TEST_LOGIC_RESET_STATE = 0;
localparam RUN_TEST_IDLE_STATE = 1;
localparam RUN_TEST_IDLE_STATE = 1;
localparam SELECT_DR_SCAN_STATE = 2;
localparam SELECT_DR_SCAN_STATE = 2;
localparam CAPTURE_DR_STATE = 3;
localparam CAPTURE_DR_STATE = 3;
localparam SHIFT_DR_STATE = 4;
localparam SHIFT_DR_STATE = 4;
localparam EXIT1_DR_STATE = 5;
localparam EXIT1_DR_STATE = 5;
localparam PAUSE_DR_STATE = 6;
localparam PAUSE_DR_STATE = 6;
localparam EXIT2_DR_STATE = 7;
localparam EXIT2_DR_STATE = 7;
localparam UPDATE_DR_STATE = 8;
localparam UPDATE_DR_STATE = 8;
localparam SELECT_IR_SCAN_STATE = 9;
localparam SELECT_IR_SCAN_STATE = 9;
localparam CAPTURE_IR_STATE = 10;
localparam CAPTURE_IR_STATE = 10;
localparam SHIFT_IR_STATE = 11;
localparam SHIFT_IR_STATE = 11;
localparam EXIT1_IR_STATE = 12;
localparam EXIT1_IR_STATE = 12;
localparam PAUSE_IR_STATE = 13;
localparam PAUSE_IR_STATE = 13;
localparam EXIT2_IR_STATE = 14;
localparam EXIT2_IR_STATE = 14;
localparam UPDATE_IR_STATE = 15;
localparam UPDATE_IR_STATE = 15;
always_comb begin
always_comb begin
nstate = state;
nstate = state;
case (state)
default: nstate = TEST_LOGIC_RESET_STATE;
default: nstate = TEST_LOGIC_RESET_STATE;
always @ (posedge tck or negedge trst) begin
always @(posedge tck or negedge trst) begin
if(!trst) state <= TEST_LOGIC_RESET_STATE;
if (!trst) state <= TEST_LOGIC_RESET_STATE;
else state <= nstate;
else state <= nstate;
assign jtag_reset = state == TEST_LOGIC_RESET_STATE;
assign jtag_reset = state == TEST_LOGIC_RESET_STATE;
assign shift_dr = state == SHIFT_DR_STATE;
assign shift_dr = state == SHIFT_DR_STATE;
assign pause_dr = state == PAUSE_DR_STATE;
assign pause_dr = state == PAUSE_DR_STATE;
assign update_dr = state == UPDATE_DR_STATE;
assign update_dr = state == UPDATE_DR_STATE;
assign capture_dr = state == CAPTURE_DR_STATE;
assign capture_dr = state == CAPTURE_DR_STATE;
assign shift_ir = state == SHIFT_IR_STATE;
assign shift_ir = state == SHIFT_IR_STATE;
assign pause_ir = state == PAUSE_IR_STATE;
assign pause_ir = state == PAUSE_IR_STATE;
assign update_ir = state == UPDATE_IR_STATE;
assign update_ir = state == UPDATE_IR_STATE;
assign capture_ir = state == CAPTURE_IR_STATE;
assign capture_ir = state == CAPTURE_IR_STATE;
assign tdoEnable = shift_dr | shift_ir;
assign tdoEnable = shift_dr | shift_ir;
// IR register
// IR register
always @ (negedge tck or negedge trst) begin
always @(negedge tck or negedge trst) begin
if (!trst) ir <= 5'b1;
if (!trst) ir <= 5'b1;
else begin
else begin
if (jtag_reset) ir <= 5'b1;
if (jtag_reset) ir <= 5'b1;
else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0];
else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f : sr[4:0];
assign devid_sel = ir == 5'b00001;
assign dr_en[0] = ir == 5'b10000;
assign dr_en[1] = ir == 5'b10001;
// Shift register
always @ (posedge tck or negedge trst) begin
sr <= '0;
else begin
sr <= nsr;
// SR next value
always_comb begin
assign devid_sel = ir == 5'b00001;
assign dr_en[0] = ir == 5'b10000;
assign dr_en[1] = ir == 5'b10001;
// Shift register
always @(posedge tck or negedge trst) begin
if (!trst) begin
sr <= '0;
end else begin
sr <= nsr;
// SR next value
always_comb begin
nsr = sr;
nsr = sr;
case (1)
shift_dr: begin
shift_dr: begin
case (1)
dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]};
dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]};
devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]};
devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]};
default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass
default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass
capture_dr: begin
capture_dr: begin
nsr[0] = 1'b0;
nsr[0] = 1'b0;
case (1)
dr_en[0]: nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version};
dr_en[0]: nsr = {{USER_DR_LENGTH - 15{1'b0}}, idle, dmi_stat, abits, version};
dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status};
dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status};
devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1};
devid_sel: nsr = {{USER_DR_LENGTH - 32{1'b0}}, jtag_id, 1'b1};
shift_ir: nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]};
shift_ir: nsr = {{USER_DR_LENGTH - 5{1'b0}}, tdi, sr[4:1]};
capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1};
capture_ir: nsr = {{USER_DR_LENGTH - 1{1'b0}}, 1'b1};
// TDO retiming
// TDO retiming
always @ (negedge tck ) tdo <= sr[0];
always @(negedge tck) tdo <= sr[0];
// DMI CS register
// DMI CS register
always @ (posedge tck or negedge trst) begin
always @(posedge tck or negedge trst) begin
if(!trst) begin
if (!trst) begin
dmi_hard_reset <= 1'b0;
dmi_hard_reset <= 1'b0;
dmi_reset <= 1'b0;
dmi_reset <= 1'b0;
end else if (update_dr & dr_en[0]) begin
else if (update_dr & dr_en[0]) begin
dmi_hard_reset <= sr[17];
dmi_hard_reset <= sr[17];
dmi_reset <= sr[16];
dmi_reset <= sr[16];
end else begin
dmi_hard_reset <= 1'b0;
dmi_reset <= 1'b0;
// DR register
always @(posedge tck or negedge trst) begin
if (!trst) dr <= '0;
else begin
else begin
dmi_hard_reset <= 1'b0;
if (update_dr & dr_en[1]) dr <= sr;
dmi_reset <= 1'b0;
else dr <= {dr[USER_DR_LENGTH-1:2], 2'b0};
// DR register
assign {wr_addr, wr_data, wr_en, rd_en} = dr;
always @ (posedge tck or negedge trst) begin
dr <= '0;
else begin
if (update_dr & dr_en[1])
dr <= sr;
dr <= {dr[USER_DR_LENGTH-1:2],2'b0};
assign {wr_addr, wr_data, wr_en, rd_en} = dr;
File diff suppressed because it is too large
Load Diff
@ -16,124 +16,132 @@
module el2_mem
module el2_mem
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk,
input logic clk,
input logic rst_l,
input logic rst_l,
input logic dccm_clk_override,
input logic dccm_clk_override,
input logic icm_clk_override,
input logic icm_clk_override,
input logic dec_tlu_core_ecc_disable,
input logic dec_tlu_core_ecc_disable,
//DCCM ports
//DCCM ports
input logic dccm_wren,
input logic dccm_wren,
input logic dccm_rden,
input logic dccm_rden,
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo,
input logic [ pt.DCCM_BITS-1:0] dccm_wr_addr_lo,
input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi,
input logic [ pt.DCCM_BITS-1:0] dccm_wr_addr_hi,
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo,
input logic [ pt.DCCM_BITS-1:0] dccm_rd_addr_lo,
input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi,
input logic [ pt.DCCM_BITS-1:0] dccm_rd_addr_hi,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi,
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi,
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi,
//`ifdef pt.DCCM_ENABLE
//`ifdef pt.DCCM_ENABLE
input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt,
input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt,
//ICCM ports
//ICCM ports
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt,
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt,
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr,
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr,
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle
input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle
input logic iccm_wren,
input logic iccm_wren,
input logic iccm_rden,
input logic iccm_rden,
input logic [2:0] iccm_wr_size,
input logic [2:0] iccm_wr_size,
input logic [77:0] iccm_wr_data,
input logic [77:0] iccm_wr_data,
output logic [63:0] iccm_rd_data,
output logic [63:0] iccm_rd_data,
output logic [77:0] iccm_rd_data_ecc,
output logic [77:0] iccm_rd_data_ecc,
// Icache and Itag Ports
// Icache and Itag Ports
input logic [31:1] ic_rw_addr,
input logic [31:1] ic_rw_addr,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en,
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en,
input logic ic_rd_en,
input logic ic_rd_en,
input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
input logic ic_sel_premux_data, // Premux data sel
input logic ic_sel_premux_data, // Premux data sel
input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,
input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt,
input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt,
input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt,
input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
input logic [70:0] ic_debug_wr_data, // Debug wr cache.
input logic [70:0] ic_debug_wr_data, // Debug wr cache.
output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
input logic ic_debug_rd_en, // Icache debug rd
input logic ic_debug_rd_en, // Icache debug rd
input logic ic_debug_wr_en, // Icache debug wr
input logic ic_debug_wr_en, // Icache debug wr
input logic ic_debug_tag_array, // Debug tag array
input logic ic_debug_tag_array, // Debug tag array
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
output logic [25:0] ictag_debug_rd_data,// Debug icache tag.
output logic [25:0] ictag_debug_rd_data, // Debug icache tag.
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank
output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit,
output logic [ pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit,
output logic ic_tag_perr, // Icache Tag parity error
output logic ic_tag_perr, // Icache Tag parity error
input logic scan_mode
input logic scan_mode
logic active_clk;
logic active_clk;
rvoclkhdr active_cg ( .en(1'b1), .l1clk(active_clk), .* );
rvoclkhdr active_cg (
// DCCM Instantiation
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
el2_lsu_dccm_mem #(.pt(pt)) dccm (
end else begin: Gen_dccm_disable
assign dccm_rd_data_lo = '0;
assign dccm_rd_data_hi = '0;
if ( pt.ICACHE_ENABLE ) begin: icache
el2_ifu_ic_mem #(.pt(pt)) icm (
else begin
// DCCM Instantiation
assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0;
if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable
assign ic_tag_perr = '0 ;
el2_lsu_dccm_mem #(
assign ic_rd_data = '0 ;
assign ictag_debug_rd_data = '0 ;
) dccm (
end // else: !if( pt.ICACHE_ENABLE )
end else begin : Gen_dccm_disable
assign dccm_rd_data_lo = '0;
assign dccm_rd_data_hi = '0;
if (pt.ICACHE_ENABLE) begin : icache
el2_ifu_ic_mem #(
) icm (
end else begin
assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0;
assign ic_tag_perr = '0 ;
assign ic_rd_data = '0 ;
assign ictag_debug_rd_data = '0 ;
end // else: !if( pt.ICACHE_ENABLE )
if (pt.ICCM_ENABLE) begin : iccm
if (pt.ICCM_ENABLE) begin : iccm
el2_ifu_iccm_mem #(.pt(pt)) iccm (.*,
el2_ifu_iccm_mem #(
) iccm (
else begin
assign iccm_rd_data = '0 ;
assign iccm_rd_data_ecc = '0 ;
end else begin
assign iccm_rd_data = '0 ;
assign iccm_rd_data_ecc = '0 ;
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -15,355 +15,437 @@
module el2_exu
module el2_exu
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Top level clock
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic rst_l, // Reset
input logic scan_mode, // Scan control
input logic scan_mode, // Scan control
input logic [1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse
input logic [ 1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse
input logic [1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse
input logic [ 1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse
input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1
input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1
input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes}
input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes}
input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1
input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1
input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet
input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet
input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index
input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag
input logic [31:0] lsu_result_m, // Load result M-stage
input logic [31:0] lsu_result_m, // Load result M-stage
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic [31:0] lsu_nonblock_load_data, // nonblock load data
input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data
input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data
input logic [31:0] gpr_i0_rs1_d, // DEC data gpr
input logic [31:0] gpr_i0_rs1_d, // DEC data gpr
input logic [31:0] gpr_i0_rs2_d, // DEC data gpr
input logic [31:0] gpr_i0_rs2_d, // DEC data gpr
input logic [31:0] dec_i0_immed_d, // DEC data immediate
input logic [31:0] dec_i0_immed_d, // DEC data immediate
input logic [31:0] dec_i0_result_r, // DEC result in R-stage
input logic [31:0] dec_i0_result_r, // DEC result in R-stage
input logic [12:1] dec_i0_br_immed_d, // Branch immediate
input logic [12:1] dec_i0_br_immed_d, // Branch immediate
input logic dec_i0_alu_decode_d, // Valid to X-stage ALU
input logic dec_i0_alu_decode_d, // Valid to X-stage ALU
input logic dec_i0_branch_d, // Branch in D-stage
input logic dec_i0_branch_d, // Branch in D-stage
input logic dec_i0_select_pc_d, // PC select to RS1
input logic dec_i0_select_pc_d, // PC select to RS1
input logic [31:1] dec_i0_pc_d, // Instruction PC
input logic [31:1] dec_i0_pc_d, // Instruction PC
input logic [3:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic [3:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic [3:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic [3:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data
input logic dec_csr_ren_d, // CSR read select
input logic dec_csr_ren_d, // CSR read select
input logic [31:0] dec_csr_rddata_d, // CSR read data
input logic [31:0] dec_csr_rddata_d, // CSR read data
input logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
input logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands
input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass}
input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass}
input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem}
input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem}
input logic dec_div_cancel, // Cancel the divide operation
input logic dec_div_cancel, // Cancel the divide operation
input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch
input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch
input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs
input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs
input logic [31:1] dec_tlu_flush_path_r, // Redirect target
input logic [31:1] dec_tlu_flush_path_r, // Redirect target
input logic dec_extint_stall, // External stall mux select
input logic dec_extint_stall, // External stall mux select
input logic [31:2] dec_tlu_meihap, // External stall mux data
input logic [31:2] dec_tlu_meihap, // External stall mux data
output logic [31:0] exu_lsu_rs1_d, // LSU operand
output logic [31:0] exu_lsu_rs1_d, // LSU operand
output logic [31:0] exu_lsu_rs2_d, // LSU operand
output logic [31:0] exu_lsu_rs2_d, // LSU operand
output logic exu_flush_final, // Pipe is being flushed this cycle
output logic exu_flush_final, // Pipe is being flushed this cycle
output logic [31:1] exu_flush_path_final, // Target for the oldest flush source
output logic [31:1] exu_flush_path_final, // Target for the oldest flush source
output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC
output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC
output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC
output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC
output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction
output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction
output logic [31:1] exu_npc_r, // Divide NPC
output logic [31:1] exu_npc_r, // Divide NPC
output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history
output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history
output logic exu_i0_br_error_r, // to DEC I0 branch error
output logic exu_i0_br_error_r, // to DEC I0 branch error
output logic exu_i0_br_start_error_r, // to DEC I0 branch start error
output logic exu_i0_br_start_error_r, // to DEC I0 branch start error
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index
output logic exu_i0_br_valid_r, // to DEC I0 branch valid
output logic exu_i0_br_valid_r, // to DEC I0 branch valid
output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict
output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict
output logic exu_i0_br_middle_r, // to DEC I0 branch middle
output logic exu_i0_br_middle_r, // to DEC I0 branch middle
output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr
output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr
output logic exu_i0_br_way_r, // to DEC I0 branch way
output logic exu_i0_br_way_r, // to DEC I0 branch way
output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet
output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict
output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict
output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken
output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken
output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC
output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC
output logic [31:0] exu_div_result, // Divide result
output logic [31:0] exu_div_result, // Divide result
output logic exu_div_wren // Divide write enable to GPR
output logic exu_div_wren // Divide write enable to GPR
logic [31:0] i0_rs1_bypass_data_d;
logic [31:0] i0_rs2_bypass_data_d;
logic i0_rs1_bypass_en_d;
logic i0_rs2_bypass_en_d;
logic [31:0] i0_rs1_d, i0_rs2_d;
logic [31:0] muldiv_rs1_d;
logic [31:1] pred_correct_npc_r;
logic i0_pred_correct_upper_r;
logic [31:1] i0_flush_path_upper_r;
logic x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2;
logic x_ctl_en, r_ctl_en;
logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d;
logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x;
logic i0_taken_d;
logic i0_taken_x;
logic i0_valid_d;
logic i0_valid_x;
logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr;
el2_predict_pkt_t final_predict_mp;
el2_predict_pkt_t i0_predict_newp_d;
logic flush_in_d;
logic [ 31:0] alu_result_x;
logic mul_valid_x;
logic [ 31:0] mul_result_x;
el2_predict_pkt_t i0_pp_r;
logic i0_flush_upper_d;
logic [ 31:1] i0_flush_path_d;
el2_predict_pkt_t i0_predict_p_d;
logic i0_pred_correct_upper_d;
logic i0_flush_upper_x;
logic [ 31:1] i0_flush_path_x;
el2_predict_pkt_t i0_predict_p_x;
logic i0_pred_correct_upper_x;
logic i0_branch_x;
logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp;
rvdffpcie #(31) i_flush_path_x_ff (
.clk (clk),
.en (x_data_en),
.din (i0_flush_path_d[31:1]),
rvdffe #(32) i_csr_rs1_x_ff (
.clk (clk),
.en (x_data_en_q1),
.din (i0_rs1_d[31:0]),
rvdffppe #($bits(
)) i_predictpacket_x_ff (
.clk (clk),
.en (x_data_en),
.din (i0_predict_p_d),
rvdffe #(PREDPIPESIZE) i_predpipe_x_ff (
.clk (clk),
.en (x_data_en_q2),
.din (predpipe_d),
rvdffe #(PREDPIPESIZE) i_predpipe_r_ff (
.clk (clk),
.en (r_data_en_q2),
.din (predpipe_x),
rvdffe #(4 + pt.BHT_GHR_SIZE) i_x_ff (
rvdffppe #($bits(
) + 1) i_r_ff0 (
.clk (clk),
.en (r_ctl_en),
.din ({i0_pred_correct_upper_x, i0_predict_p_x}),
.dout({i0_pred_correct_upper_r, i0_pp_r})
rvdffpcie #(31) i_flush_r_ff (
.clk (clk),
.en (r_data_en),
.din (i0_flush_path_x[31:1]),
rvdffpcie #(31) i_npc_r_ff (
.clk (clk),
.en (r_data_en),
.din (pred_correct_npc_x[31:1]),
rvdffie #(pt.BHT_GHR_SIZE + 2, 1) i_misc_ff (
.clk (clk),
.din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}),
.dout({ghr_d[pt.BHT_GHR_SIZE-1:0], mul_valid_x, i0_branch_x})
logic [31:0] i0_rs1_bypass_data_d;
logic [31:0] i0_rs2_bypass_data_d;
logic i0_rs1_bypass_en_d;
logic i0_rs2_bypass_en_d;
logic [31:0] i0_rs1_d, i0_rs2_d;
logic [31:0] muldiv_rs1_d;
logic [31:1] pred_correct_npc_r;
logic i0_pred_correct_upper_r;
logic [31:1] i0_flush_path_upper_r;
logic x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2;
logic x_ctl_en, r_ctl_en;
logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d;
assign predpipe_d[PREDPIPESIZE-1:0] = {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d};
logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x;
logic i0_taken_d;
logic i0_taken_x;
logic i0_valid_d;
logic i0_valid_x;
logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr;
el2_predict_pkt_t final_predict_mp;
el2_predict_pkt_t i0_predict_newp_d;
logic flush_in_d;
logic [31:0] alu_result_x;
logic mul_valid_x;
logic [31:0] mul_result_x;
el2_predict_pkt_t i0_pp_r;
logic i0_flush_upper_d;
logic [31:1] i0_flush_path_d;
el2_predict_pkt_t i0_predict_p_d;
logic i0_pred_correct_upper_d;
logic i0_flush_upper_x;
logic [31:1] i0_flush_path_x;
el2_predict_pkt_t i0_predict_p_x;
logic i0_pred_correct_upper_x;
logic i0_branch_x;
logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp;
assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3];
assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3];
assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
rvdffpcie #(31) i_flush_path_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_flush_path_d[31:1] ), .dout( i0_flush_path_x[31:1] ) );
rvdffe #(32) i_csr_rs1_x_ff (.*, .clk(clk), .en ( x_data_en_q1 ), .din ( i0_rs1_d[31:0] ), .dout( exu_csr_rs1_x[31:0] ) );
rvdffppe #($bits(el2_predict_pkt_t)) i_predictpacket_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_predict_p_d ), .dout( i0_predict_p_x ) );
rvdffe #(PREDPIPESIZE) i_predpipe_x_ff (.*, .clk(clk), .en ( x_data_en_q2 ), .din ( predpipe_d ), .dout( predpipe_x ) );
rvdffe #(PREDPIPESIZE) i_predpipe_r_ff (.*, .clk(clk), .en ( r_data_en_q2 ), .din ( predpipe_x ), .dout( predpipe_r ) );
rvdffe #(4+pt.BHT_GHR_SIZE) i_x_ff (.*, .clk(clk), .en ( x_ctl_en ), .din ({i0_valid_d,i0_taken_d,i0_flush_upper_d,i0_pred_correct_upper_d,ghr_x_ns[pt.BHT_GHR_SIZE-1:0]} ),
.dout({i0_valid_x,i0_taken_x,i0_flush_upper_x,i0_pred_correct_upper_x,ghr_x[pt.BHT_GHR_SIZE-1:0]} ) );
rvdffppe #($bits(el2_predict_pkt_t)+1) i_r_ff0 (.*, .clk(clk), .en ( r_ctl_en ), .din ({i0_pred_correct_upper_x, i0_predict_p_x}),
.dout({i0_pred_correct_upper_r, i0_pp_r }) );
rvdffpcie #(31) i_flush_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( i0_flush_path_x[31:1] ), .dout( i0_flush_path_upper_r[31:1]) );
rvdffpcie #(31) i_npc_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( pred_correct_npc_x[31:1] ), .dout( pred_correct_npc_r[31:1] ) );
rvdffie #(pt.BHT_GHR_SIZE+2,1) i_misc_ff (.*, .clk(clk), .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}),
.dout({ghr_d[pt.BHT_GHR_SIZE-1:0] , mul_valid_x, i0_branch_x}) );
assign predpipe_d[PREDPIPESIZE-1:0]
= {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d};
assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3];
assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3];
assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs1_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
({32{dec_i0_rs1_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[1]}} & lsu_result_m[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) |
({32{dec_i0_rs2_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
({32{dec_i0_rs2_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]);
assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) |
assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) |
({32{~i0_rs1_bypass_en_d & dec_i0_select_pc_d }} & {dec_i0_pc_d[31:1],1'b0} ) | // for jal's
({32{~i0_rs1_bypass_en_d & dec_i0_select_pc_d }} & {dec_i0_pc_d[31:1],1'b0} ) | // for jal's
({32{~i0_rs1_bypass_en_d & dec_debug_wdata_rs1_d }} & dbg_cmd_wrdata[31:0] ) |
({32{~i0_rs1_bypass_en_d & dec_debug_wdata_rs1_d }} & dbg_cmd_wrdata[31:0] ) |
({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] );
({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] );
assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) |
assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) |
({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) |
({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) |
({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]);
({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]);
assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0] ) |
assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0] ) |
({32{ i0_rs1_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs1_bypass_data_d[31:0]) |
({32{ i0_rs1_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs1_bypass_data_d[31:0]) |
({32{ dec_extint_stall & dec_qual_lsu_d}} & {dec_tlu_meihap[31:2],2'b0});
({32{ dec_extint_stall & dec_qual_lsu_d}} & {dec_tlu_meihap[31:2],2'b0});
assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0] ) |
assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0] ) |
({32{ i0_rs2_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs2_bypass_data_d[31:0]);
({32{ i0_rs2_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs2_bypass_data_d[31:0]);
assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) |
assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) |
({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]);
({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]);
assign x_data_en = dec_data_en[1];
assign x_data_en = dec_data_en[1];
assign x_data_en_q1 = dec_data_en[1] & dec_csr_ren_d;
assign x_data_en_q1 = dec_data_en[1] & dec_csr_ren_d;
assign x_data_en_q2 = dec_data_en[1] & dec_i0_branch_d;
assign x_data_en_q2 = dec_data_en[1] & dec_i0_branch_d;
assign r_data_en = dec_data_en[0];
assign r_data_en = dec_data_en[0];
assign r_data_en_q2 = dec_data_en[0] & i0_branch_x;
assign r_data_en_q2 = dec_data_en[0] & i0_branch_x;
assign x_ctl_en = dec_ctl_en[1];
assign x_ctl_en = dec_ctl_en[1];
assign r_ctl_en = dec_ctl_en[0];
assign r_ctl_en = dec_ctl_en[0];
el2_exu_alu_ctl #(.pt(pt)) i_alu (.*,
el2_exu_alu_ctl #(
.enable ( x_data_en ), // I
.pp_in ( i0_predict_newp_d ), // I
) i_alu (
.valid_in ( dec_i0_alu_decode_d ), // I
.flush_upper_x ( i0_flush_upper_x ), // I
.enable (x_data_en), // I
.flush_lower_r ( dec_tlu_flush_lower_r ), // I
.pp_in (i0_predict_newp_d), // I
.a_in ( i0_rs1_d[31:0] ), // I
.valid_in (dec_i0_alu_decode_d), // I
.b_in ( i0_rs2_d[31:0] ), // I
.flush_upper_x (i0_flush_upper_x), // I
.pc_in ( dec_i0_pc_d[31:1] ), // I
.flush_lower_r (dec_tlu_flush_lower_r), // I
.brimm_in ( dec_i0_br_immed_d[12:1] ), // I
.a_in (i0_rs1_d[31:0]), // I
.ap ( i0_ap ), // I
.b_in (i0_rs2_d[31:0]), // I
.csr_ren_in ( dec_csr_ren_d ), // I
.pc_in (dec_i0_pc_d[31:1]), // I
.csr_rddata_in ( dec_csr_rddata_d[31:0] ), // I
.brimm_in (dec_i0_br_immed_d[12:1]), // I
.result_ff ( alu_result_x[31:0] ), // O
.ap (i0_ap), // I
.flush_upper_out ( i0_flush_upper_d ), // O
.csr_ren_in (dec_csr_ren_d), // I
.flush_final_out ( exu_flush_final ), // O
.csr_rddata_in (dec_csr_rddata_d[31:0]), // I
.flush_path_out ( i0_flush_path_d[31:1] ), // O
.result_ff (alu_result_x[31:0]), // O
.predict_p_out ( i0_predict_p_d ), // O
.flush_upper_out (i0_flush_upper_d), // O
.pred_correct_out ( i0_pred_correct_upper_d ), // O
.flush_final_out (exu_flush_final), // O
.pc_ff ( exu_i0_pc_x[31:1] )); // O
.flush_path_out (i0_flush_path_d[31:1]), // O
.predict_p_out (i0_predict_p_d), // O
.pred_correct_out(i0_pred_correct_upper_d), // O
.pc_ff (exu_i0_pc_x[31:1])
); // O
el2_exu_mul_ctl #(.pt(pt)) i_mul (.*,
el2_exu_mul_ctl #(
.mul_p ( mul_p & {$bits(el2_mul_pkt_t){mul_p.valid}} ), // I
.rs1_in ( muldiv_rs1_d[31:0] & {32{mul_p.valid}} ), // I
) i_mul (
.rs2_in ( i0_rs2_d[31:0] & {32{mul_p.valid}} ), // I
.result_x ( mul_result_x[31:0] )); // O
.mul_p (mul_p & {$bits(el2_mul_pkt_t) {mul_p.valid}}), // I
.rs1_in (muldiv_rs1_d[31:0] & {32{mul_p.valid}}), // I
.rs2_in (i0_rs2_d[31:0] & {32{mul_p.valid}}), // I
); // O
el2_exu_div_ctl #(.pt(pt)) i_div (.*,
el2_exu_div_ctl #(
.cancel ( dec_div_cancel ), // I
.dp ( div_p ), // I
) i_div (
.dividend ( muldiv_rs1_d[31:0] ), // I
.divisor ( i0_rs2_d[31:0] ), // I
.cancel (dec_div_cancel), // I
.finish_dly ( exu_div_wren ), // O
.dp (div_p), // I
.out ( exu_div_result[31:0] )); // O
.dividend (muldiv_rs1_d[31:0]), // I
.divisor (i0_rs2_d[31:0]), // I
.finish_dly(exu_div_wren), // O
.out (exu_div_result[31:0])
); // O
assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0];
assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0];
always_comb begin
always_comb begin
i0_predict_newp_d = dec_i0_predict_p_d;
i0_predict_newp_d = dec_i0_predict_p_d;
i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst
i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst
assign exu_pmu_i0_br_misp = i0_pp_r.misp;
assign exu_pmu_i0_br_misp = i0_pp_r.misp;
assign exu_pmu_i0_br_ataken = i0_pp_r.ataken;
assign exu_pmu_i0_br_ataken = i0_pp_r.ataken;
assign exu_pmu_i0_pc4 = i0_pp_r.pc4;
assign exu_pmu_i0_pc4 = i0_pp_r.pc4;
assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r;
assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r;
assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d);
assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d);
if(pt.BTB_ENABLE==1) begin
if (pt.BTB_ENABLE == 1) begin
// maintain GHR at D
// maintain GHR at D
assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0]
assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0]
= ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) |
= ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) |
({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} & ghr_d[pt.BHT_GHR_SIZE-1:0] ) |
({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} & ghr_d[pt.BHT_GHR_SIZE-1:0] ) |
({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r }} & ghr_x[pt.BHT_GHR_SIZE-1:0] );
({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r }} & ghr_x[pt.BHT_GHR_SIZE-1:0] );
// maintain GHR at X
// maintain GHR at X
assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0]
assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0]
= ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) |
= ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) |
({pt.BHT_GHR_SIZE{~i0_valid_x}} & ghr_x[pt.BHT_GHR_SIZE-1:0] ) ;
({pt.BHT_GHR_SIZE{~i0_valid_x}} & ghr_x[pt.BHT_GHR_SIZE-1:0] ) ;
assign exu_i0_br_valid_r = i0_pp_r.valid;
assign exu_i0_br_valid_r = i0_pp_r.valid;
assign exu_i0_br_mp_r = i0_pp_r.misp;
assign exu_i0_br_mp_r = i0_pp_r.misp;
assign exu_i0_br_way_r = i0_pp_r.way;
assign exu_i0_br_way_r = i0_pp_r.way;
assign exu_i0_br_hist_r[1:0] = {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0];
assign exu_i0_br_hist_r[1:0] = {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0];
assign exu_i0_br_error_r = i0_pp_r.br_error;
assign exu_i0_br_error_r = i0_pp_r.br_error;
assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset;
assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset;
assign exu_i0_br_start_error_r = i0_pp_r.br_start_error;
assign exu_i0_br_start_error_r = i0_pp_r.br_start_error;
assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0],
assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0],
exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}= predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE];
exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}= predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE];
assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0;
assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0;
assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0;
assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0;
assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0];
assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0];
assign exu_mp_pkt.valid = final_predict_mp.valid;
assign exu_mp_pkt.valid = final_predict_mp.valid;
assign exu_mp_pkt.way = final_predict_mp.way;
assign exu_mp_pkt.way = final_predict_mp.way;
assign exu_mp_pkt.misp = final_predict_mp.misp;
assign exu_mp_pkt.misp = final_predict_mp.misp;
assign exu_mp_pkt.pcall = final_predict_mp.pcall;
assign exu_mp_pkt.pcall = final_predict_mp.pcall;
assign exu_mp_pkt.pja = final_predict_mp.pja;
assign exu_mp_pkt.pja = final_predict_mp.pja;
assign exu_mp_pkt.pret = final_predict_mp.pret;
assign exu_mp_pkt.pret = final_predict_mp.pret;
assign exu_mp_pkt.ataken = final_predict_mp.ataken;
assign exu_mp_pkt.ataken = final_predict_mp.ataken;
assign exu_mp_pkt.boffset = final_predict_mp.boffset;
assign exu_mp_pkt.boffset = final_predict_mp.boffset;
assign exu_mp_pkt.pc4 = final_predict_mp.pc4;
assign exu_mp_pkt.pc4 = final_predict_mp.pc4;
assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0];
assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0];
assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0];
assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0];
assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0];
assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0];
assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],
assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],
exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]} = final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0];
exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]} = final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0];
assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write
assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write
end // if (pt.BTB_ENABLE==1)
end // if (pt.BTB_ENABLE==1)
else begin
else begin
assign ghr_d_ns = '0;
assign ghr_d_ns = '0;
assign ghr_x_ns = '0;
assign ghr_x_ns = '0;
assign exu_mp_pkt = '0;
assign exu_mp_pkt = '0;
assign exu_mp_eghr = '0;
assign exu_mp_eghr = '0;
assign exu_mp_fghr = '0;
assign exu_mp_fghr = '0;
assign exu_mp_index = '0;
assign exu_mp_index = '0;
assign exu_mp_btag = '0;
assign exu_mp_btag = '0;
assign exu_i0_br_hist_r = '0;
assign exu_i0_br_hist_r = '0;
assign exu_i0_br_error_r = '0;
assign exu_i0_br_error_r = '0;
assign exu_i0_br_start_error_r = '0;
assign exu_i0_br_start_error_r = '0;
assign exu_i0_br_index_r = '0;
assign exu_i0_br_index_r = '0;
assign exu_i0_br_valid_r = '0;
assign exu_i0_br_valid_r = '0;
assign exu_i0_br_mp_r = '0;
assign exu_i0_br_mp_r = '0;
assign exu_i0_br_middle_r = '0;
assign exu_i0_br_middle_r = '0;
assign exu_i0_br_fghr_r = '0;
assign exu_i0_br_fghr_r = '0;
assign exu_i0_br_way_r = '0;
assign exu_i0_br_way_r = '0;
end // else: !if(pt.BTB_ENABLE==1)
end // else: !if(pt.BTB_ENABLE==1)
assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r }} & dec_tlu_flush_path_r[31:1] ) |
assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r }} & dec_tlu_flush_path_r[31:1] ) |
( {31{~dec_tlu_flush_lower_r & i0_flush_upper_d}} & i0_flush_path_d[31:1] );
( {31{~dec_tlu_flush_lower_r & i0_flush_upper_d}} & i0_flush_path_d[31:1] );
assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : i0_flush_path_upper_r[31:1];
assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : i0_flush_path_upper_r[31:1];
endmodule // el2_exu
endmodule // el2_exu
@ -15,255 +15,247 @@
module el2_exu_alu_ctl
module el2_exu_alu_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Top level clock
input logic clk, // Top level clock
input logic rst_l, // Reset
input logic rst_l, // Reset
input logic scan_mode, // Scan control
input logic scan_mode, // Scan control
input logic flush_upper_x, // Branch flush from previous cycle
input logic flush_upper_x, // Branch flush from previous cycle
input logic flush_lower_r, // Master flush of entire pipeline
input logic flush_lower_r, // Master flush of entire pipeline
input logic enable, // Clock enable
input logic enable, // Clock enable
input logic valid_in, // Valid
input logic valid_in, // Valid
input el2_alu_pkt_t ap, // predecodes
input el2_alu_pkt_t ap, // predecodes
input logic csr_ren_in, // CSR select
input logic csr_ren_in, // CSR select
input logic [31:0] csr_rddata_in, // CSR data
input logic [31:0] csr_rddata_in, // CSR data
input logic signed [31:0] a_in, // A operand
input logic signed [31:0] a_in, // A operand
input logic [31:0] b_in, // B operand
input logic [31:0] b_in, // B operand
input logic [31:1] pc_in, // for pc=pc+2,4 calculations
input logic [31:1] pc_in, // for pc=pc+2,4 calculations
input el2_predict_pkt_t pp_in, // Predicted branch structure
input el2_predict_pkt_t pp_in, // Predicted branch structure
input logic [12:1] brimm_in, // Branch offset
input logic [12:1] brimm_in, // Branch offset
output logic [31:0] result_ff, // final result
output logic [31:0] result_ff, // final result
output logic flush_upper_out, // Branch flush
output logic flush_upper_out, // Branch flush
output logic flush_final_out, // Branch flush or flush entire pipeline
output logic flush_final_out, // Branch flush or flush entire pipeline
output logic [31:1] flush_path_out, // Branch flush PC
output logic [31:1] flush_path_out, // Branch flush PC
output logic [31:1] pc_ff, // flopped PC
output logic [31:1] pc_ff, // flopped PC
output logic pred_correct_out, // NPC control
output logic pred_correct_out, // NPC control
output el2_predict_pkt_t predict_p_out // Predicted branch structure
output el2_predict_pkt_t predict_p_out // Predicted branch structure
logic [31:0] zba_a_in;
logic [31:0] aout;
logic cout, ov, neg;
logic [31:0] lout;
logic [31:0] sout;
logic sel_shift;
logic sel_adder;
logic slt_one;
logic actual_taken;
logic [31:1] pcout;
logic cond_mispredict;
logic target_mispredict;
logic eq, ne, lt, ge;
logic any_jal;
logic [ 1:0] newhist;
logic sel_pc;
logic [31:0] csr_write_data;
logic [31:0] result;
// *** Start - BitManip ***
// Zbb
logic ap_clz;
logic ap_ctz;
logic ap_cpop;
logic ap_sext_b;
logic ap_sext_h;
logic ap_min;
logic ap_max;
logic ap_rol;
logic ap_ror;
logic ap_rev8;
logic ap_orc_b;
logic ap_zbb;
// Zbs
logic ap_bset;
logic ap_bclr;
logic ap_binv;
logic ap_bext;
// Zbp
logic ap_pack;
logic ap_packu;
logic ap_packh;
// Zba
logic ap_sh1add;
logic ap_sh2add;
logic ap_sh3add;
logic ap_zba;
if (pt.BITMANIP_ZBB == 1) begin
assign ap_clz = ap.clz;
assign ap_ctz = ap.ctz;
assign ap_cpop = ap.cpop;
assign ap_sext_b = ap.sext_b;
assign ap_sext_h = ap.sext_h;
assign ap_min = ap.min;
assign ap_max = ap.max;
end else begin
assign ap_clz = 1'b0;
assign ap_ctz = 1'b0;
assign ap_cpop = 1'b0;
assign ap_sext_b = 1'b0;
assign ap_sext_h = 1'b0;
assign ap_min = 1'b0;
assign ap_max = 1'b0;
if ((pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1)) begin
assign ap_rol = ap.rol;
assign ap_ror = ap.ror;
assign ap_rev8 = ap.grev & (b_in[4:0] == 5'b11000);
assign ap_orc_b = ap.gorc & (b_in[4:0] == 5'b00111);
assign ap_zbb = ap.zbb;
end else begin
assign ap_rol = 1'b0;
assign ap_ror = 1'b0;
assign ap_rev8 = 1'b0;
assign ap_orc_b = 1'b0;
assign ap_zbb = 1'b0;
if (pt.BITMANIP_ZBS == 1) begin
assign ap_bset = ap.bset;
assign ap_bclr = ap.bclr;
assign ap_binv = ap.binv;
assign ap_bext = ap.bext;
end else begin
assign ap_bset = 1'b0;
assign ap_bclr = 1'b0;
assign ap_binv = 1'b0;
assign ap_bext = 1'b0;
if (pt.BITMANIP_ZBP == 1) begin
assign ap_packu = ap.packu;
end else begin
assign ap_packu = 1'b0;
if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) | (pt.BITMANIP_ZBE == 1) | (pt.BITMANIP_ZBF == 1) )
assign ap_pack = ap.pack;
assign ap_packh = ap.packh;
end else begin
assign ap_pack = 1'b0;
assign ap_packh = 1'b0;
if (pt.BITMANIP_ZBA == 1) begin
assign ap_sh1add = ap.sh1add;
assign ap_sh2add = ap.sh2add;
assign ap_sh3add = ap.sh3add;
assign ap_zba = ap.zba;
end else begin
assign ap_sh1add = 1'b0;
assign ap_sh2add = 1'b0;
assign ap_sh3add = 1'b0;
assign ap_zba = 1'b0;
// *** End - BitManip ***
rvdffpcie #(31) i_pc_ff (
.clk (clk),
.en (enable),
.din (pc_in[31:1]),
); // any PC is run through here - doesn't have to be alu
rvdffe #(32) i_result_ff (
.clk (clk),
.en (enable & valid_in),
.din (result[31:0]),
logic [31:0] zba_a_in;
logic [31:0] aout;
// immediates are just muxed into rs2
logic cout,ov,neg;
logic [31:0] lout;
// add => add=1;
logic [31:0] sout;
// sub => add=1; sub=1;
logic sel_shift;
logic sel_adder;
// and => lctl=3
logic slt_one;
// or => lctl=2
logic actual_taken;
// xor => lctl=1
logic [31:1] pcout;
logic cond_mispredict;
// sll => sctl=3
logic target_mispredict;
// srl => sctl=2
logic eq, ne, lt, ge;
// sra => sctl=1
logic any_jal;
logic [1:0] newhist;
// slt => slt
logic sel_pc;
logic [31:0] csr_write_data;
// lui => lctl=2; or x0, imm20 previously << 12
logic [31:0] result;
// auipc => add; add pc, imm20 previously << 12
// beq => bctl=4; add; add x0, pc, sext(offset[12:1])
// bne => bctl=3; add; add x0, pc, sext(offset[12:1])
// blt => bctl=2; add; add x0, pc, sext(offset[12:1])
// bge => bctl=1; add; add x0, pc, sext(offset[12:1])
// jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4]
// jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4]
assign zba_a_in[31:0] = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) |
// *** Start - BitManip ***
// Zbb
logic ap_clz;
logic ap_ctz;
logic ap_cpop;
logic ap_sext_b;
logic ap_sext_h;
logic ap_min;
logic ap_max;
logic ap_rol;
logic ap_ror;
logic ap_rev8;
logic ap_orc_b;
logic ap_zbb;
// Zbs
logic ap_bset;
logic ap_bclr;
logic ap_binv;
logic ap_bext;
// Zbp
logic ap_pack;
logic ap_packu;
logic ap_packh;
// Zba
logic ap_sh1add;
logic ap_sh2add;
logic ap_sh3add;
logic ap_zba;
if (pt.BITMANIP_ZBB == 1)
assign ap_clz = ap.clz;
assign ap_ctz = ap.ctz;
assign ap_cpop = ap.cpop;
assign ap_sext_b = ap.sext_b;
assign ap_sext_h = ap.sext_h;
assign ap_min = ap.min;
assign ap_max = ap.max;
assign ap_clz = 1'b0;
assign ap_ctz = 1'b0;
assign ap_cpop = 1'b0;
assign ap_sext_b = 1'b0;
assign ap_sext_h = 1'b0;
assign ap_min = 1'b0;
assign ap_max = 1'b0;
if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) )
assign ap_rol = ap.rol;
assign ap_ror = ap.ror;
assign ap_rev8 = ap.grev & (b_in[4:0] == 5'b11000);
assign ap_orc_b = ap.gorc & (b_in[4:0] == 5'b00111);
assign ap_zbb = ap.zbb;
assign ap_rol = 1'b0;
assign ap_ror = 1'b0;
assign ap_rev8 = 1'b0;
assign ap_orc_b = 1'b0;
assign ap_zbb = 1'b0;
if (pt.BITMANIP_ZBS == 1)
assign ap_bset = ap.bset;
assign ap_bclr = ap.bclr;
assign ap_binv = ap.binv;
assign ap_bext = ap.bext;
assign ap_bset = 1'b0;
assign ap_bclr = 1'b0;
assign ap_binv = 1'b0;
assign ap_bext = 1'b0;
if (pt.BITMANIP_ZBP == 1)
assign ap_packu = ap.packu;
assign ap_packu = 1'b0;
if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) | (pt.BITMANIP_ZBE == 1) | (pt.BITMANIP_ZBF == 1) )
assign ap_pack = ap.pack;
assign ap_packh = ap.packh;
assign ap_pack = 1'b0;
assign ap_packh = 1'b0;
if (pt.BITMANIP_ZBA == 1)
assign ap_sh1add = ap.sh1add;
assign ap_sh2add = ap.sh2add;
assign ap_sh3add = ap.sh3add;
assign ap_zba = ap.zba;
assign ap_sh1add = 1'b0;
assign ap_sh2add = 1'b0;
assign ap_sh3add = 1'b0;
assign ap_zba = 1'b0;
// *** End - BitManip ***
rvdffpcie #(31) i_pc_ff (.*, .clk(clk), .en(enable), .din(pc_in[31:1]), .dout(pc_ff[31:1])); // any PC is run through here - doesn't have to be alu
rvdffe #(32) i_result_ff (.*, .clk(clk), .en(enable & valid_in), .din(result[31:0]), .dout(result_ff[31:0]));
// immediates are just muxed into rs2
// add => add=1;
// sub => add=1; sub=1;
// and => lctl=3
// or => lctl=2
// xor => lctl=1
// sll => sctl=3
// srl => sctl=2
// sra => sctl=1
// slt => slt
// lui => lctl=2; or x0, imm20 previously << 12
// auipc => add; add pc, imm20 previously << 12
// beq => bctl=4; add; add x0, pc, sext(offset[12:1])
// bne => bctl=3; add; add x0, pc, sext(offset[12:1])
// blt => bctl=2; add; add x0, pc, sext(offset[12:1])
// bge => bctl=1; add; add x0, pc, sext(offset[12:1])
// jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4]
// jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4]
assign zba_a_in[31:0] = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) |
( {32{ ap_sh2add}} & {a_in[29:0],2'b0} ) |
( {32{ ap_sh2add}} & {a_in[29:0],2'b0} ) |
( {32{ ap_sh3add}} & {a_in[28:0],3'b0} ) |
( {32{ ap_sh3add}} & {a_in[28:0],3'b0} ) |
( {32{~ap_zba }} & a_in[31:0] );
( {32{~ap_zba }} & a_in[31:0] );
logic [31:0] bm;
logic [31:0] bm;
assign bm[31:0] = ( ap.sub ) ? ~b_in[31:0] : b_in[31:0];
assign bm[31:0] = (ap.sub) ? ~b_in[31:0] : b_in[31:0];
assign {cout, aout[31:0]} = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub};
assign {cout, aout[31:0]} = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub};
assign ov = (~a_in[31] & ~bm[31] & aout[31]) |
assign ov = (~a_in[31] & ~bm[31] & aout[31]) | (a_in[31] & bm[31] & ~aout[31]);
( a_in[31] & bm[31] & ~aout[31] );
assign lt = (~ap.unsign & (neg ^ ov)) |
assign lt = (~ap.unsign & (neg ^ ov)) | (ap.unsign & ~cout);
( ap.unsign & ~cout);
assign eq = (a_in[31:0] == b_in[31:0]);
assign eq = (a_in[31:0] == b_in[31:0]);
assign ne = ~eq;
assign ne = ~eq;
assign neg = aout[31];
assign neg = aout[31];
assign ge = ~lt;
assign ge = ~lt;
assign lout[31:0] = ( {32{csr_ren_in }} & csr_rddata_in[31:0] ) |
assign lout[31:0] = ( {32{csr_ren_in }} & csr_rddata_in[31:0] ) |
( {32{ & ~ap_zbb}} & a_in[31:0] & b_in[31:0] ) |
( {32{ & ~ap_zbb}} & a_in[31:0] & b_in[31:0] ) |
( {32{ap.lor & ~ap_zbb}} & (a_in[31:0] | b_in[31:0]) ) |
( {32{ap.lor & ~ap_zbb}} & (a_in[31:0] | b_in[31:0]) ) |
( {32{ap.lxor & ~ap_zbb}} & (a_in[31:0] ^ b_in[31:0]) ) |
( {32{ap.lxor & ~ap_zbb}} & (a_in[31:0] ^ b_in[31:0]) ) |
@ -274,16 +266,16 @@ import el2_pkg::*;
// * * * * * * * * * * * * * * * * * * BitManip : ROL,ROR * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : ROL,ROR * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : ZBEXT * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : ZBEXT * * * * * * * * * * * * * * * * * *
logic [5:0] shift_amount;
logic [ 5:0] shift_amount;
logic [31:0] shift_mask;
logic [31:0] shift_mask;
logic [62:0] shift_extend;
logic [62:0] shift_extend;
logic [62:0] shift_long;
logic [62:0] shift_long;
assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused
assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused
( { 6{}} & {1'b0,b_in[4:0]} ) |
( { 6{}} & {1'b0,b_in[4:0]} ) |
( { 6{ap.sra}} & {1'b0,b_in[4:0]} ) |
( { 6{ap.sra}} & {1'b0,b_in[4:0]} ) |
( { 6{ap_rol}} & (6'd32 - {1'b0,b_in[4:0]}) ) |
( { 6{ap_rol}} & (6'd32 - {1'b0,b_in[4:0]}) ) |
@ -291,174 +283,199 @@ import el2_pkg::*;
( { 6{ap_bext}} & {1'b0,b_in[4:0]} );
( { 6{ap_bext}} & {1'b0,b_in[4:0]} );
assign shift_mask[31:0] = ( 32'hffffffff << ({5{ap.sll}} & b_in[4:0]) );
assign shift_mask[31:0] = (32'hffffffff << ({5{ap.sll}} & b_in[4:0]));
assign shift_extend[31:0] = a_in[31:0];
assign shift_extend[31:0] = a_in[31:0];
assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) |
assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) |
( {31{ap.sll}} & a_in[30:0] ) |
( {31{ap.sll}} & a_in[30:0] ) |
( {31{ap_rol}} & a_in[30:0] ) |
( {31{ap_rol}} & a_in[30:0] ) |
( {31{ap_ror}} & a_in[30:0] );
( {31{ap_ror}} & a_in[30:0] );
assign shift_long[62:0] = ( shift_extend[62:0] >> shift_amount[4:0] ); // 62-32 unused
assign shift_long[62:0] = (shift_extend[62:0] >> shift_amount[4:0]); // 62-32 unused
assign sout[31:0] = shift_long[31:0] & shift_mask[31:0];
assign sout[31:0] = shift_long[31:0] & shift_mask[31:0];
// * * * * * * * * * * * * * * * * * * BitManip : CLZ,CTZ * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : CLZ,CTZ * * * * * * * * * * * * * * * * * *
logic bitmanip_clz_ctz_sel;
logic bitmanip_clz_ctz_sel;
logic [31:0] bitmanip_a_reverse_ff;
logic [31:0] bitmanip_a_reverse_ff;
logic [31:0] bitmanip_lzd_in;
logic [31:0] bitmanip_lzd_in;
logic [5:0] bitmanip_dw_lzd_enc;
logic [ 5:0] bitmanip_dw_lzd_enc;
logic [5:0] bitmanip_clz_ctz_result;
logic [ 5:0] bitmanip_clz_ctz_result;
assign bitmanip_clz_ctz_sel = ap_clz | ap_ctz;
assign bitmanip_clz_ctz_sel = ap_clz | ap_ctz;
assign bitmanip_a_reverse_ff[31:0] = {a_in[0], a_in[1], a_in[2], a_in[3], a_in[4], a_in[5], a_in[6], a_in[7],
assign bitmanip_a_reverse_ff[31:0] = {
a_in[8], a_in[9], a_in[10], a_in[11], a_in[12], a_in[13], a_in[14], a_in[15],
a_in[16], a_in[17], a_in[18], a_in[19], a_in[20], a_in[21], a_in[22], a_in[23],
a_in[24], a_in[25], a_in[26], a_in[27], a_in[28], a_in[29], a_in[30], a_in[31]};
assign bitmanip_lzd_in[31:0] = ( {32{ap_clz}} & a_in[31:0] ) |
assign bitmanip_lzd_in[31:0] = ( {32{ap_clz}} & a_in[31:0] ) |
( {32{ap_ctz}} & bitmanip_a_reverse_ff[31:0]);
( {32{ap_ctz}} & bitmanip_a_reverse_ff[31:0]);
logic [31:0] bitmanip_lzd_os;
logic [31:0] bitmanip_lzd_os;
integer i;
integer i;
logic found;
logic found;
always_comb begin
bitmanip_lzd_os[31:0] = bitmanip_lzd_in[31:0];
bitmanip_lzd_os[31:0] = bitmanip_lzd_in[31:0];
bitmanip_dw_lzd_enc[5:0] = 6'b0;
bitmanip_dw_lzd_enc[5:0]= 6'b0;
found = 1'b0;
found = 1'b0;
for (int i=0; i<32 && found==0; i++) begin
for (int i = 0; i < 32 && found == 0; i++) begin
if (bitmanip_lzd_os[31] == 1'b0) begin
if (bitmanip_lzd_os[31] == 1'b0) begin
bitmanip_dw_lzd_enc[5:0]= bitmanip_dw_lzd_enc[5:0] + 6'b00_0001;
bitmanip_dw_lzd_enc[5:0] = bitmanip_dw_lzd_enc[5:0] + 6'b00_0001;
bitmanip_lzd_os[31:0] = bitmanip_lzd_os[31:0] << 1;
bitmanip_lzd_os[31:0] = bitmanip_lzd_os[31:0] << 1;
end else found = 1'b1;
assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )};
assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )};
// * * * * * * * * * * * * * * * * * * BitManip : CPOP * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : CPOP * * * * * * * * * * * * * * * * * *
logic [5:0] bitmanip_cpop;
logic [5:0] bitmanip_cpop;
logic [5:0] bitmanip_cpop_result;
logic [5:0] bitmanip_cpop_result;
integer bitmanip_cpop_i;
integer bitmanip_cpop_i;
always_comb begin
bitmanip_cpop[5:0] = 6'b0;
bitmanip_cpop[5:0] = 6'b0;
for (bitmanip_cpop_i=0; bitmanip_cpop_i<32; bitmanip_cpop_i++)
for (bitmanip_cpop_i = 0; bitmanip_cpop_i < 32; bitmanip_cpop_i++) begin
bitmanip_cpop[5:0] = bitmanip_cpop[5:0] + {5'b0, a_in[bitmanip_cpop_i]};
bitmanip_cpop[5:0] = bitmanip_cpop[5:0] + {5'b0,a_in[bitmanip_cpop_i]};
end // FOR bitmanip_cpop_i
end // FOR bitmanip_cpop_i
assign bitmanip_cpop_result[5:0] = {6{ap_cpop}} & bitmanip_cpop[5:0];
assign bitmanip_cpop_result[5:0] = {6{ap_cpop}} & bitmanip_cpop[5:0];
// * * * * * * * * * * * * * * * * * * BitManip : SEXT_B,SEXT_H * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : SEXT_B,SEXT_H * * * * * * * * * * * * * * * * *
logic [31:0] bitmanip_sext_result;
logic [31:0] bitmanip_sext_result;
assign bitmanip_sext_result[31:0] = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0] } ) |
assign bitmanip_sext_result[31:0] = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0] } ) |
( {32{ap_sext_h}} & { {16{a_in[15]}},a_in[15:0] } );
( {32{ap_sext_h}} & { {16{a_in[15]}},a_in[15:0] } );
// * * * * * * * * * * * * * * * * * * BitManip : MIN,MAX,MINU,MAXU * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : MIN,MAX,MINU,MAXU * * * * * * * * * * * * * * *
logic bitmanip_minmax_sel;
logic bitmanip_minmax_sel;
logic [31:0] bitmanip_minmax_result;
logic [31:0] bitmanip_minmax_result;
assign bitmanip_minmax_sel = ap_min | ap_max;
assign bitmanip_minmax_sel = ap_min | ap_max;
logic bitmanip_minmax_sel_a;
logic bitmanip_minmax_sel_a;
assign bitmanip_minmax_sel_a = ge ^ ap_min;
assign bitmanip_minmax_sel_a = ge ^ ap_min;
assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel & bitmanip_minmax_sel_a}} & a_in[31:0]) |
assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel & bitmanip_minmax_sel_a}} & a_in[31:0]) |
({32{bitmanip_minmax_sel & ~bitmanip_minmax_sel_a}} & b_in[31:0]);
({32{bitmanip_minmax_sel & ~bitmanip_minmax_sel_a}} & b_in[31:0]);
// * * * * * * * * * * * * * * * * * * BitManip : PACK, PACKU, PACKH * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : PACK, PACKU, PACKH * * * * * * * * * * * * * * *
logic [31:0] bitmanip_pack_result;
logic [31:0] bitmanip_pack_result;
logic [31:0] bitmanip_packu_result;
logic [31:0] bitmanip_packu_result;
logic [31:0] bitmanip_packh_result;
logic [31:0] bitmanip_packh_result;
assign bitmanip_pack_result[31:0] = {32{ap_pack}} & {b_in[15:0], a_in[15:0]};
assign bitmanip_pack_result[31:0] = {32{ap_pack}} & {b_in[15:0], a_in[15:0]};
assign bitmanip_packu_result[31:0] = {32{ap_packu}} & {b_in[31:16],a_in[31:16]};
assign bitmanip_packu_result[31:0] = {32{ap_packu}} & {b_in[31:16], a_in[31:16]};
assign bitmanip_packh_result[31:0] = {32{ap_packh}} & {16'b0,b_in[7:0],a_in[7:0]};
assign bitmanip_packh_result[31:0] = {32{ap_packh}} & {16'b0, b_in[7:0], a_in[7:0]};
// * * * * * * * * * * * * * * * * * * BitManip : REV, ORC_B * * * * * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : REV, ORC_B * * * * * * * * * * * * * * * * * *
logic [31:0] bitmanip_rev8_result;
logic [31:0] bitmanip_rev8_result;
logic [31:0] bitmanip_orc_b_result;
logic [31:0] bitmanip_orc_b_result;
assign bitmanip_rev8_result[31:0] = {32{ap_rev8}} & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]};
assign bitmanip_rev8_result[31:0] = {32{ap_rev8}} & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]};
// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
// uint32_t gorc32(uint32_t rs1, uint32_t rs2)
// {
// {
// uint32_t x = rs1;
// uint32_t x = rs1;
// int shamt = rs2 & 31; ORC.B ORC16
// int shamt = rs2 & 31; ORC.B ORC16
// if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); 1 0
// if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); 1 0
// if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); 1 0
// if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); 1 0
// if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); 1 0
// if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); 1 0
// if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); 0 0
// if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); 0 0
// if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); 0 1
// if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); 0 1
// return x;
// return x;
// }
// }
// BEFORE 31 , 30 , 29 , 28 , 27 , 26, 25, 24
// BEFORE 31 , 30 , 29 , 28 , 27 , 26, 25, 24
// shamt[0] b = a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24
// shamt[0] b = a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24
// shamt[1] c = b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24
// shamt[1] c = b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24
// shamt[2] d = c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24
// shamt[2] d = c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24
// Expand d31 = c31 | c27;
// Expand d31 = c31 | c27;
// = b31 | b29 | b27 | b25;
// = b31 | b29 | b27 | b25;
// = a31|a30 | a29|a28 | a27|a26 | a25|a24
// = a31|a30 | a29|a28 | a27|a26 | a25|a24
assign bitmanip_orc_b_result[31:0] = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} };
assign bitmanip_orc_b_result[31:0] = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} };
// * * * * * * * * * * * * * * * * * * BitManip : ZBSET, ZBCLR, ZBINV * * * * * * * * * * * * * *
// * * * * * * * * * * * * * * * * * * BitManip : ZBSET, ZBCLR, ZBINV * * * * * * * * * * * * * *
logic [31:0] bitmanip_sb_1hot;
logic [31:0] bitmanip_sb_1hot;
logic [31:0] bitmanip_sb_data;
logic [31:0] bitmanip_sb_data;
assign bitmanip_sb_1hot[31:0] = ( 32'h00000001 << b_in[4:0] );
assign bitmanip_sb_1hot[31:0] = (32'h00000001 << b_in[4:0]);
assign bitmanip_sb_data[31:0] = ( {32{ap_bset}} & ( a_in[31:0] | bitmanip_sb_1hot[31:0]) ) |
assign bitmanip_sb_data[31:0] = ( {32{ap_bset}} & ( a_in[31:0] | bitmanip_sb_1hot[31:0]) ) |
( {32{ap_bclr}} & ( a_in[31:0] & ~bitmanip_sb_1hot[31:0]) ) |
( {32{ap_bclr}} & ( a_in[31:0] & ~bitmanip_sb_1hot[31:0]) ) |
( {32{ap_binv}} & ( a_in[31:0] ^ bitmanip_sb_1hot[31:0]) );
( {32{ap_binv}} & ( a_in[31:0] ^ bitmanip_sb_1hot[31:0]) );
@ -467,16 +484,16 @@ import el2_pkg::*;
assign sel_shift = ap.sll | | ap.sra | ap_rol | ap_ror;
assign sel_shift = ap.sll | | ap.sra | ap_rol | ap_ror;
assign sel_adder = (ap.add | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max;
assign sel_adder = (ap.add | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max;
assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret;
assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret;
assign csr_write_data[31:0]= (ap.csr_imm) ? b_in[31:0] : a_in[31:0];
assign csr_write_data[31:0] = (ap.csr_imm) ? b_in[31:0] : a_in[31:0];
assign slt_one = ap.slt & lt;
assign slt_one = ap.slt & lt;
assign result[31:0] = lout[31:0] |
assign result[31:0] = lout[31:0] |
({32{sel_shift}} & sout[31:0] ) |
({32{sel_shift}} & sout[31:0] ) |
({32{sel_adder}} & aout[31:0] ) |
({32{sel_adder}} & aout[31:0] ) |
({32{sel_pc}} & {pcout[31:1],1'b0} ) |
({32{sel_pc}} & {pcout[31:1],1'b0} ) |
@ -496,81 +513,74 @@ import el2_pkg::*;
// *** branch handling ***
// *** branch handling ***
assign any_jal = ap.jal |
assign any_jal = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret;
pp_in.pcall |
pp_in.pja |
assign actual_taken = (ap.beq & eq) |
assign actual_taken = (ap.beq & eq) | (ap.bne & ne) | (ap.blt & lt) | (ap.bge & ge) | any_jal;
(ap.bne & ne) |
(ap.blt & lt) |
(ap.bge & ge) |
// for a conditional br pcout[] will be the opposite of the branch prediction
// for a conditional br pcout[] will be the opposite of the branch prediction
// for jal or pcall, it will be the link address pc+2 or pc+4
// for jal or pcall, it will be the link address pc+2 or pc+4
rvbradder ibradder (
rvbradder ibradder (
.pc ( pc_in[31:1] ),
.pc (pc_in[31:1]),
.offset ( brimm_in[12:1] ),
.dout ( pcout[31:1] ));
.dout (pcout[31:1])
// pred_correct is for the npc logic
// pred_correct is for the npc logic
// pred_correct indicates not to use the flush_path
// pred_correct indicates not to use the flush_path
// for any_jal pred_correct==0
// for any_jal pred_correct==0
assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) |
assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) |
(valid_in & ap.predict_t & actual_taken & ~any_jal);
(valid_in & ap.predict_t & actual_taken & ~any_jal);
// for any_jal adder output is the flush path
// for any_jal adder output is the flush path
assign flush_path_out[31:1]= (any_jal) ? aout[31:1] : pcout[31:1];
assign flush_path_out[31:1] = (any_jal) ? aout[31:1] : pcout[31:1];
// pcall and pret are included here
// pcall and pret are included here
assign cond_mispredict = (ap.predict_t & ~actual_taken) |
assign cond_mispredict = (ap.predict_t & ~actual_taken) | (ap.predict_nt & actual_taken);
(ap.predict_nt & actual_taken);
// target mispredicts on ret's
// target mispredicts on ret's
assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]);
assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]);
assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r;
assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r;
assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r;
assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r;
// .i 3
// .i 3
// .o 2
// .o 2
// .ilb hist[1] hist[0] taken
// .ilb hist[1] hist[0] taken
// .ob newhist[1] newhist[0]
// .ob newhist[1] newhist[0]
// .type fd
// .type fd
// 00 0 01
// 00 0 01
// 01 0 01
// 01 0 01
// 10 0 00
// 10 0 00
// 11 0 10
// 11 0 10
// 00 1 10
// 00 1 10
// 01 1 00
// 01 1 00
// 10 1 11
// 10 1 11
// 11 1 11
// 11 1 11
assign newhist[1] = ( pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken);
assign newhist[1] = (pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken);
assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | ( pp_in.hist[1] & actual_taken);
assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | (pp_in.hist[1] & actual_taken);
always_comb begin
always_comb begin
predict_p_out = pp_in;
predict_p_out = pp_in;
predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict);
predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict);
predict_p_out.ataken = actual_taken;
predict_p_out.ataken = actual_taken;
predict_p_out.hist[1] = newhist[1];
predict_p_out.hist[1] = newhist[1];
predict_p_out.hist[0] = newhist[0];
predict_p_out.hist[0] = newhist[0];
endmodule // el2_exu_alu_ctl
endmodule // el2_exu_alu_ctl
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -20,352 +20,411 @@
module el2_ifu
module el2_ifu
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic dec_i0_decode_d, // Valid instruction at D and not blocked
input logic dec_i0_decode_d, // Valid instruction at D and not blocked
input logic exu_flush_final, // flush, includes upper and lower
input logic exu_flush_final, // flush, includes upper and lower
input logic dec_tlu_i0_commit_cmt , // committed i0
input logic dec_tlu_i0_commit_cmt, // committed i0
input logic dec_tlu_flush_err_wb , // flush due to parity error.
input logic dec_tlu_flush_err_wb, // flush due to parity error.
input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final
input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final
input logic [31:1] exu_flush_path_final, // flush fetch address
input logic [31:1] exu_flush_path_final, // flush fetch address
input logic [31:0] dec_tlu_mrac_ff ,// Side_effect , cacheable for each region
input logic [31:0] dec_tlu_mrac_ff, // Side_effect , cacheable for each region
input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final
input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final
input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches
input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches
input logic dec_tlu_bpred_disable, // disable all branch prediction
input logic dec_tlu_bpred_disable, // disable all branch prediction
input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging
input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging
input logic dec_tlu_force_halt, // force halt
input logic dec_tlu_force_halt, // force halt
//-------------------------- IFU AXI signals--------------------------
//-------------------------- IFU AXI signals--------------------------
// AXI Write Channels
// AXI Write Channels
output logic ifu_axi_awvalid,
output logic ifu_axi_awvalid,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid,
output logic [31:0] ifu_axi_awaddr,
output logic [ 31:0] ifu_axi_awaddr,
output logic [3:0] ifu_axi_awregion,
output logic [ 3:0] ifu_axi_awregion,
output logic [7:0] ifu_axi_awlen,
output logic [ 7:0] ifu_axi_awlen,
output logic [2:0] ifu_axi_awsize,
output logic [ 2:0] ifu_axi_awsize,
output logic [1:0] ifu_axi_awburst,
output logic [ 1:0] ifu_axi_awburst,
output logic ifu_axi_awlock,
output logic ifu_axi_awlock,
output logic [3:0] ifu_axi_awcache,
output logic [ 3:0] ifu_axi_awcache,
output logic [2:0] ifu_axi_awprot,
output logic [ 2:0] ifu_axi_awprot,
output logic [3:0] ifu_axi_awqos,
output logic [ 3:0] ifu_axi_awqos,
output logic ifu_axi_wvalid,
output logic ifu_axi_wvalid,
output logic [63:0] ifu_axi_wdata,
output logic [63:0] ifu_axi_wdata,
output logic [7:0] ifu_axi_wstrb,
output logic [ 7:0] ifu_axi_wstrb,
output logic ifu_axi_wlast,
output logic ifu_axi_wlast,
output logic ifu_axi_bready,
output logic ifu_axi_bready,
// AXI Read Channels
// AXI Read Channels
output logic ifu_axi_arvalid,
output logic ifu_axi_arvalid,
input logic ifu_axi_arready,
input logic ifu_axi_arready,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid,
output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid,
output logic [31:0] ifu_axi_araddr,
output logic [ 31:0] ifu_axi_araddr,
output logic [3:0] ifu_axi_arregion,
output logic [ 3:0] ifu_axi_arregion,
output logic [7:0] ifu_axi_arlen,
output logic [ 7:0] ifu_axi_arlen,
output logic [2:0] ifu_axi_arsize,
output logic [ 2:0] ifu_axi_arsize,
output logic [1:0] ifu_axi_arburst,
output logic [ 1:0] ifu_axi_arburst,
output logic ifu_axi_arlock,
output logic ifu_axi_arlock,
output logic [3:0] ifu_axi_arcache,
output logic [ 3:0] ifu_axi_arcache,
output logic [2:0] ifu_axi_arprot,
output logic [ 2:0] ifu_axi_arprot,
output logic [3:0] ifu_axi_arqos,
output logic [ 3:0] ifu_axi_arqos,
input logic ifu_axi_rvalid,
input logic ifu_axi_rvalid,
output logic ifu_axi_rready,
output logic ifu_axi_rready,
input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid,
input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid,
input logic [63:0] ifu_axi_rdata,
input logic [ 63:0] ifu_axi_rdata,
input logic [1:0] ifu_axi_rresp,
input logic [ 1:0] ifu_axi_rresp,
input logic ifu_bus_clk_en,
input logic ifu_bus_clk_en,
input logic dma_iccm_req,
input logic dma_iccm_req,
input logic [31:0] dma_mem_addr,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic [ 2:0] dma_mem_sz,
input logic dma_mem_write,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
input logic [63:0] dma_mem_wdata,
input logic [2:0] dma_mem_tag, // DMA Buffer entry number
input logic [ 2:0] dma_mem_tag, // DMA Buffer entry number
input logic dma_iccm_stall_any,
input logic dma_iccm_stall_any,
output logic iccm_dma_ecc_error,
output logic iccm_dma_ecc_error,
output logic iccm_dma_rvalid,
output logic iccm_dma_rvalid,
output logic [63:0] iccm_dma_rdata,
output logic [63:0] iccm_dma_rdata,
output logic [2:0] iccm_dma_rtag, // Tag of the DMA req
output logic [ 2:0] iccm_dma_rtag, // Tag of the DMA req
output logic iccm_ready,
output logic iccm_ready,
output logic ifu_pmu_instr_aligned,
output logic ifu_pmu_instr_aligned,
output logic ifu_pmu_fetch_stall,
output logic ifu_pmu_fetch_stall,
output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag
output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag
// I$ & ITAG Ports
// I$ & ITAG Ports
output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache.
output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache.
output logic ic_rd_en, // Icache read enable.
output logic ic_rd_en, // Icache read enable.
output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC
input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC
input logic [25:0] ictag_debug_rd_data,// Debug icache tag.
input logic [25:0] ictag_debug_rd_data, // Debug icache tag.
output logic [70:0] ic_debug_wr_data, // Debug wr cache.
output logic [70:0] ic_debug_wr_data, // Debug wr cache.
output logic [70:0] ifu_ic_debug_rd_data,
output logic [70:0] ifu_ic_debug_rd_data,
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, //
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, //
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr,
output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache.
output logic ic_sel_premux_data, // Select the premux data.
output logic ic_sel_premux_data, // Select the premux data.
output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
output logic [ pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache.
output logic ic_debug_rd_en, // Icache debug rd
output logic ic_debug_rd_en, // Icache debug rd
output logic ic_debug_wr_en, // Icache debug wr
output logic ic_debug_wr_en, // Icache debug wr
output logic ic_debug_tag_array, // Debug tag array
output logic ic_debug_tag_array, // Debug tag array
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr.
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage
output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage
input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage
input logic ic_tag_perr, // Icache Tag parity error
input logic ic_tag_perr, // Icache Tag parity error
// ICCM ports
// ICCM ports
output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address.
output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address.
output logic iccm_wren, // ICCM write enable (through the DMA)
output logic iccm_wren, // ICCM write enable (through the DMA)
output logic iccm_rden, // ICCM read enable.
output logic iccm_rden, // ICCM read enable.
output logic [77:0] iccm_wr_data, // ICCM write data.
output logic [ 77:0] iccm_wr_data, // ICCM write data.
output logic [2:0] iccm_wr_size, // ICCM write location within DW.
output logic [ 2:0] iccm_wr_size, // ICCM write location within DW.
input logic [63:0] iccm_rd_data, // Data read from ICCM.
input logic [63:0] iccm_rd_data, // Data read from ICCM.
input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM.
input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM.
output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error.
output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error.
// Perf counter sigs
// Perf counter sigs
output logic ifu_pmu_ic_miss, // ic miss
output logic ifu_pmu_ic_miss, // ic miss
output logic ifu_pmu_ic_hit, // ic hit
output logic ifu_pmu_ic_hit, // ic hit
output logic ifu_pmu_bus_error, // iside bus error
output logic ifu_pmu_bus_error, // iside bus error
output logic ifu_pmu_bus_busy, // iside bus busy
output logic ifu_pmu_bus_busy, // iside bus busy
output logic ifu_pmu_bus_trxn, // iside bus transactions
output logic ifu_pmu_bus_trxn, // iside bus transactions
output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode
output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode
output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type
output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type
output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode
output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode
output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst
output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst
output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error
output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error
output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access
output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access
output logic[31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode
output logic [31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode
output logic[31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode
output logic [31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode
output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode
output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode
output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle.
output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle.
output el2_br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode
output el2_br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index
output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR
output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag
output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index
input el2_predict_pkt_t exu_mp_pkt, // mispredict packet
input el2_predict_pkt_t exu_mp_pkt, // mispredict packet
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr
input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr
input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
input logic [ pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt
input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt
input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index
input dec_tlu_flush_lower_wb,
input dec_tlu_flush_lower_wb,
output logic [15:0] ifu_i0_cinst,
output logic [15:0] ifu_i0_cinst,
/// Icache debug
/// Icache debug
input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt ,
input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt,
output logic ifu_ic_debug_rd_data_valid,
output logic ifu_ic_debug_rd_data_valid,
output logic iccm_buf_correct_ecc,
output logic iccm_buf_correct_ecc,
output logic iccm_correction_state,
output logic iccm_correction_state,
input logic scan_mode
input logic scan_mode
localparam TAGWIDTH = 2 ;
localparam TAGWIDTH = 2;
localparam IDWIDTH = 2 ;
localparam IDWIDTH = 2;
logic ifu_fb_consume1, ifu_fb_consume2;
logic ifu_fb_consume1, ifu_fb_consume2;
logic [31:1] ifc_fetch_addr_f;
logic [31:1] ifc_fetch_addr_f;
logic [31:1] ifc_fetch_addr_bf;
logic [31:1] ifc_fetch_addr_bf;
logic [1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch
logic [ 1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch
logic [31:1] ifu_fetch_pc; // starting pc of fetch
logic [31:1] ifu_fetch_pc; // starting pc of fetch
logic iccm_rd_ecc_single_err, ic_error_start;
logic iccm_rd_ecc_single_err, ic_error_start;
assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err;
assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err;
assign ifu_ic_error_start = ic_error_start;
assign ifu_ic_error_start = ic_error_start;
logic ic_write_stall;
logic ic_write_stall;
logic ic_dma_active;
logic ic_dma_active;
logic ifc_dma_access_ok;
logic ifc_dma_access_ok;
logic [1:0] ic_access_fault_f;
logic [1:0] ic_access_fault_f;
logic [1:0] ic_access_fault_type_f;
logic [1:0] ic_access_fault_type_f;
logic ifu_ic_mb_empty;
logic ifu_ic_mb_empty;
logic ic_hit_f;
logic ic_hit_f;
logic [1:0] ifu_bp_way_f; // way indication; right justified
logic [1:0] ifu_bp_way_f; // way indication; right justified
logic ifu_bp_hit_taken_f; // kill next fetch; taken target found
logic ifu_bp_hit_taken_f; // kill next fetch; taken target found
logic [31:1] ifu_bp_btb_target_f; // predicted target PC
logic [31:1] ifu_bp_btb_target_f; // predicted target PC
logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified
logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified
logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified
logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified
logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified
logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified
logic [11:0] ifu_bp_poffset_f; // predicted target
logic [11:0] ifu_bp_poffset_f; // predicted target
logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified
logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified
logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified
logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified
logic [1:0] ifu_bp_valid_f; // branch valid, right justified
logic [1:0] ifu_bp_valid_f; // branch valid, right justified
logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f;
logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f;
logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f;
logic [1:0][$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f;
// fetch control
// fetch control
el2_ifu_ifc_ctl #(.pt(pt)) ifc (.*
el2_ifu_ifc_ctl #(.pt(pt)) ifc (.*);
// branch predictor
// branch predictor
if (pt.BTB_ENABLE==1) begin : bpred
if (pt.BTB_ENABLE == 1) begin : bpred
el2_ifu_bp_ctl #(.pt(pt)) bp (.*);
el2_ifu_bp_ctl #(.pt(pt)) bp (.*);
end else begin : bpred
else begin : bpred
assign ifu_bp_hit_taken_f = '0;
assign ifu_bp_hit_taken_f = '0;
// verif wires
// verif wires
logic btb_wr_en_way0, btb_wr_en_way1, dec_tlu_error_wb;
logic btb_wr_en_way0, btb_wr_en_way1,dec_tlu_error_wb;
logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data;
logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data;
assign btb_wr_en_way0 = '0;
assign btb_wr_en_way0 = '0;
assign btb_wr_en_way1 = '0;
assign btb_wr_en_way1 = '0;
assign btb_wr_data = '0;
assign btb_wr_data = '0;
assign dec_tlu_error_wb = '0;
assign dec_tlu_error_wb ='0;
assign ifu_bp_inst_mask_f = 1'b1;
assign ifu_bp_inst_mask_f = 1'b1;
logic [1:0] ic_fetch_val_f;
logic [1:0] ic_fetch_val_f;
logic [31:0] ic_data_f;
logic [31:0] ic_data_f;
logic [31:0] ifu_fetch_data_f;
logic [31:0] ifu_fetch_data_f;
logic ifc_fetch_req_f;
logic ifc_fetch_req_f;
logic ifc_fetch_req_f_raw;
logic ifc_fetch_req_f_raw;
logic [1:0] iccm_rd_ecc_double_err; // This fetch has an iccm double error.
logic [1:0] iccm_rd_ecc_double_err; // This fetch has an iccm double error.
logic ifu_async_error_start;
logic ifu_async_error_start;
assign ifu_fetch_data_f[31:0] = ic_data_f[31:0];
assign ifu_fetch_data_f[31:0] = ic_data_f[31:0];
assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0];
assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0];
assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1];
assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1];
logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage
logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage
logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage
logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage
logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage
logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage
logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus.
logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus.
logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM.
logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM.
// aligner
// aligner
el2_ifu_aln_ctl #(.pt(pt)) aln (
el2_ifu_aln_ctl #(.pt(pt)) aln (.*);
// icache
// icache
el2_ifu_mem_ctl #(.pt(pt)) mem_ctl
el2_ifu_mem_ctl #(
) mem_ctl (
// Performance debug info
`ifdef DUMP_BTB_ON
logic exu_mp_valid; // conditional branch mispredict
logic exu_mp_way; // conditional branch mispredict
logic exu_mp_ataken; // direction is actual taken
logic exu_mp_boffset; // branch offsett
logic exu_mp_pc4; // branch is a 4B inst
logic exu_mp_call; // branch is a call inst
logic exu_mp_ret; // branch is a ret inst
logic exu_mp_ja; // branch is a jump always
logic [ 1:0] exu_mp_hist; // new history
logic [ 11:0] exu_mp_tgt; // target offset
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict
assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken
assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset
assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst
assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst
assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst
assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always
assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always
assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history
assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0]; // target offset
assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f;
`define DEC top.rvtop.swerv.dec
`define EXU top.rvtop.swerv.exu
el2_btb_addr_hash f2hash (
.pc (ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]),
logic [31:0] mppc_ns, mppc;
logic exu_flush_final_d1;
assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d;
assign mppc_ns[0] = 1'b0;
rvdff #(33) junk_ff (
.clk (active_clk),
.din ({mppc_ns[31:0], exu_flush_final}),
.dout({mppc[31:0], exu_flush_final_d1})
logic tmp_bnk;
assign tmp_bnk = bpred.bp.btb_sel_f[1];
always @(negedge clk) begin
if (`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin
$display("BTB_CONFIG: %d", pt.BTB_SIZE);
$display("BHT_CONFIG: %d gshare: 1", pt.BHT_SIZE);
$display("BHT_CONFIG: %d gshare: 0", pt.BHT_SIZE);
$display("RS_CONFIG: %d", pt.RET_STACK_SIZE);
if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken))
"%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h",
`DEC.tlu.mcyclel[31:0] + 32'ha,
exu_flush_path_final[31:1], 1'b0
for (int i = 0; i < 8; i++) begin
if (ifu_bp_valid_f[i] & ifc_fetch_req_f)
"%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h",
`DEC.tlu.mcyclel[31:0] + 32'ha,
ifu_bp_btb_target_f[31:1], 1'b0
if (dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error))
"%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h",
`DEC.tlu.mcyclel[31:0] + 32'ha,
if (dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)
"%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h",
`DEC.tlu.mcyclel[31:0] + 32'ha,
exu_flush_path_final[31:1], 1'b0
end // always @ (negedge clk)
function [1:0] encode4_2;
input [3:0] in;
// Performance debug info
encode4_2[1] = in[3] | in[2];
encode4_2[0] = in[3] | in[1];
`ifdef DUMP_BTB_ON
logic exu_mp_valid; // conditional branch mispredict
logic exu_mp_way; // conditional branch mispredict
logic exu_mp_ataken; // direction is actual taken
logic exu_mp_boffset; // branch offsett
logic exu_mp_pc4; // branch is a 4B inst
logic exu_mp_call; // branch is a call inst
logic exu_mp_ret; // branch is a ret inst
logic exu_mp_ja; // branch is a jump always
logic [1:0] exu_mp_hist; // new history
logic [11:0] exu_mp_tgt; // target offset
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict
assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken
assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset
assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst
assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst
assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst
assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always
assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always
assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history
assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset
assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address
logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f;
`define DEC top.rvtop.swerv.dec
`define EXU top.rvtop.swerv.exu
el2_btb_addr_hash f2hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
logic [31:0] mppc_ns, mppc;
logic exu_flush_final_d1;
assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d;
assign mppc_ns[0] = 1'b0;
rvdff #(33) junk_ff (.*, .clk(active_clk), .din({mppc_ns[31:0], exu_flush_final}), .dout({mppc[31:0], exu_flush_final_d1}));
logic tmp_bnk;
assign tmp_bnk = bpred.bp.btb_sel_f[1];
always @(negedge clk) begin
if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin
$display("BTB_CONFIG: %d",pt.BTB_SIZE);
$display("BHT_CONFIG: %d gshare: 1",pt.BHT_SIZE);
$display("BHT_CONFIG: %d gshare: 0",pt.BHT_SIZE);
$display("RS_CONFIG: %d", pt.RET_STACK_SIZE);
if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken))
$display("%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], 1'b0, exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], exu_mp_valid, bpred.bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way);
for(int i = 0; i < 8; i++) begin
if(ifu_bp_valid_f[i] & ifc_fetch_req_f)
$display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],bpred.bp.btb_sel_f[1], bpred.bp.btb_rd_call_f, bpred.bp.btb_rd_ret_f, ifu_bp_hist1_f[tmp_bnk], ifu_bp_hist0_f[tmp_bnk], bpred.bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f[31:1], 1'b0}, bpred.bp.fghr[pt.BHT_GHR_SIZE-1:0], bpred.bp.bht_rd_addr_f, ifu_bp_way_f[tmp_bnk]);
if(dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error))
$display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bpred.bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO],{dec_tlu_br0_r_pkt.middle}, dec_tlu_br0_r_pkt.hist, dec_tlu_br0_r_pkt.way);
if(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)
$display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],1'b0, dec_tlu_br0_r_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_r_pkt.way);
end // always @ (negedge clk)
function [1:0] encode4_2;
input [3:0] in;
encode4_2[1] = in[3] | in[2];
encode4_2[0] = in[3] | in[1];
endmodule // el2_ifu
endmodule // el2_ifu
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -18,78 +18,77 @@
// purpose of this file is to convert 16b RISCV compressed instruction into 32b equivalent
// purpose of this file is to convert 16b RISCV compressed instruction into 32b equivalent
module el2_ifu_compress_ctl
module el2_ifu_compress_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic [15:0] din, // 16-bit compressed instruction
input logic [15:0] din, // 16-bit compressed instruction
output logic [31:0] dout // 32-bit uncompressed instruction
output logic [31:0] dout // 32-bit uncompressed instruction
logic legal;
logic legal;
logic [15:0] i;
logic [15:0] i;
logic [31:0] o,l1,l2,l3;
logic [31:0] o, l1, l2, l3;
assign i[15:0] = din[15:0];
assign i[15:0] = din[15:0];
logic [4:0] rs2d,rdd,rdpd,rs2pd;
logic [4:0] rs2d, rdd, rdpd, rs2pd;
logic rdrd;
logic rdrd;
logic rdrs1;
logic rdrs1;
logic rs2rs2;
logic rs2rs2;
logic rdprd;
logic rdprd;
logic rdprs1;
logic rdprs1;
logic rs2prs2;
logic rs2prs2;
logic rs2prd;
logic rs2prd;
logic uimm9_2;
logic uimm9_2;
logic ulwimm6_2;
logic ulwimm6_2;
logic ulwspimm7_2;
logic ulwspimm7_2;
logic rdeq2;
logic rdeq2;
logic rdeq1;
logic rdeq1;
logic rs1eq2;
logic rs1eq2;
logic sbroffset8_1;
logic sbroffset8_1;
logic simm9_4;
logic simm9_4;
logic simm5_0;
logic simm5_0;
logic sjaloffset11_1;
logic sjaloffset11_1;
logic sluimm17_12;
logic sluimm17_12;
logic uimm5_0;
logic uimm5_0;
logic uswimm6_2;
logic uswimm6_2;
logic uswspimm7_2;
logic uswspimm7_2;
// form the opcodes
// form the opcodes
// formats
// formats
// c.add rd 11:7 rs2 6:2
// c.add rd 11:7 rs2 6:2
// c.and rdp 9:7 rs2p 4:2
// c.and rdp 9:7 rs2p 4:2
// add rs2 24:20 rs1 19:15 rd 11:7
// add rs2 24:20 rs1 19:15 rd 11:7
assign rs2d[4:0] = i[6:2];
assign rs2d[4:0] = i[6:2];
assign rdd[4:0] = i[11:7];
assign rdd[4:0] = i[11:7];
assign rdpd[4:0] = {2'b01, i[9:7]};
assign rdpd[4:0] = {2'b01, i[9:7]};
assign rs2pd[4:0] = {2'b01, i[4:2]};
assign rs2pd[4:0] = {2'b01, i[4:2]};
// merge in rd, rs1, rs2
// merge in rd, rs1, rs2
// rd
// rd
assign l1[6:0] = o[6:0];
assign l1[6:0] = o[6:0];
assign l1[11:7] = o[11:7] |
assign l1[11:7] = o[11:7] |
({5{rdrd}} & rdd[4:0]) |
({5{rdrd}} & rdd[4:0]) |
({5{rdprd}} & rdpd[4:0]) |
({5{rdprd}} & rdpd[4:0]) |
({5{rs2prd}} & rs2pd[4:0]) |
({5{rs2prd}} & rs2pd[4:0]) |
@ -97,56 +96,54 @@ import el2_pkg::*;
({5{rdeq2}} & 5'd2);
({5{rdeq2}} & 5'd2);
// rs1
// rs1
assign l1[14:12] = o[14:12];
assign l1[14:12] = o[14:12];
assign l1[19:15] = o[19:15] |
assign l1[19:15] = o[19:15] |
({5{rdrs1}} & rdd[4:0]) |
({5{rdrs1}} & rdd[4:0]) |
({5{rdprs1}} & rdpd[4:0]) |
({5{rdprs1}} & rdpd[4:0]) |
({5{rs1eq2}} & 5'd2);
({5{rs1eq2}} & 5'd2);
// rs2
// rs2
assign l1[24:20] = o[24:20] |
assign l1[24:20] = o[24:20] | ({5{rs2rs2}} & rs2d[4:0]) | ({5{rs2prs2}} & rs2pd[4:0]);
({5{rs2rs2}} & rs2d[4:0]) |
({5{rs2prs2}} & rs2pd[4:0]);
assign l1[31:25] = o[31:25];
assign l1[31:25] = o[31:25];
logic [5:0] simm5d;
logic [ 5:0] simm5d;
logic [9:2] uimm9d;
logic [ 9:2] uimm9d;
logic [9:4] simm9d;
logic [ 9:4] simm9d;
logic [6:2] ulwimm6d;
logic [ 6:2] ulwimm6d;
logic [7:2] ulwspimm7d;
logic [ 7:2] ulwspimm7d;
logic [5:0] uimm5d;
logic [ 5:0] uimm5d;
logic [20:1] sjald;
logic [ 20:1] sjald;
logic [31:12] sluimmd;
logic [31:12] sluimmd;
// merge in immediates + jal offset
// merge in immediates + jal offset
assign simm5d[5:0] = { i[12], i[6:2] };
assign simm5d[5:0] = {i[12], i[6:2]};
assign uimm9d[9:2] = { i[10:7], i[12:11], i[5], i[6] };
assign uimm9d[9:2] = {i[10:7], i[12:11], i[5], i[6]};
assign simm9d[9:4] = { i[12], i[4:3], i[5], i[2], i[6] };
assign simm9d[9:4] = {i[12], i[4:3], i[5], i[2], i[6]};
assign ulwimm6d[6:2] = { i[5], i[12:10], i[6] };
assign ulwimm6d[6:2] = {i[5], i[12:10], i[6]};
assign ulwspimm7d[7:2] = { i[3:2], i[12], i[6:4] };
assign ulwspimm7d[7:2] = {i[3:2], i[12], i[6:4]};
assign uimm5d[5:0] = { i[12], i[6:2] };
assign uimm5d[5:0] = {i[12], i[6:2]};
assign sjald[11:1] = { i[12], i[8], i[10:9], i[6], i[7], i[2], i[11], i[5:4], i[3] };
assign sjald[11:1] = {i[12], i[8], i[10:9], i[6], i[7], i[2], i[11], i[5:4], i[3]};
assign sjald[20:12] = {9{i[12]}};
assign sjald[20:12] = {9{i[12]}};
assign sluimmd[31:12] = { {15{i[12]}}, i[6:2] };
assign sluimmd[31:12] = {{15{i[12]}}, i[6:2]};
assign l2[31:20] = ( l1[31:20] ) |
assign l2[31:20] = ( l1[31:20] ) |
( {12{simm5_0}} & {{7{simm5d[5]}},simm5d[4:0]} ) |
( {12{simm5_0}} & {{7{simm5d[5]}},simm5d[4:0]} ) |
( {12{uimm9_2}} & {2'b0,uimm9d[9:2],2'b0} ) |
( {12{uimm9_2}} & {2'b0,uimm9d[9:2],2'b0} ) |
( {12{simm9_4}} & {{3{simm9d[9]}},simm9d[8:4],4'b0} ) |
( {12{simm9_4}} & {{3{simm9d[9]}},simm9d[8:4],4'b0} ) |
@ -158,197 +155,197 @@ import el2_pkg::*;
assign l2[19:12] = ( l1[19:12] ) |
assign l2[19:12] = ( l1[19:12] ) |
( {8{sjaloffset11_1}} & sjald[19:12] ) |
( {8{sjaloffset11_1}} & sjald[19:12] ) |
( {8{sluimm17_12}} & sluimmd[19:12] );
( {8{sluimm17_12}} & sluimmd[19:12] );
assign l2[11:0] = l1[11:0];
assign l2[11:0] = l1[11:0];
// merge in branch offset and store immediates
// merge in branch offset and store immediates
logic [8:1] sbr8d;
logic [8:1] sbr8d;
logic [6:2] uswimm6d;
logic [6:2] uswimm6d;
logic [7:2] uswspimm7d;
logic [7:2] uswspimm7d;
assign sbr8d[8:1] = { i[12], i[6], i[5], i[2], i[11], i[10], i[4], i[3] };
assign sbr8d[8:1] = {i[12], i[6], i[5], i[2], i[11], i[10], i[4], i[3]};
assign uswimm6d[6:2] = { i[5], i[12:10], i[6] };
assign uswimm6d[6:2] = {i[5], i[12:10], i[6]};
assign uswspimm7d[7:2] = { i[8:7], i[12:9] };
assign uswspimm7d[7:2] = {i[8:7], i[12:9]};
assign l3[31:25] = ( l2[31:25] ) |
assign l3[31:25] = ( l2[31:25] ) |
( {7{sbroffset8_1}} & { {4{sbr8d[8]}},sbr8d[7:5] } ) |
( {7{sbroffset8_1}} & { {4{sbr8d[8]}},sbr8d[7:5] } ) |
( {7{uswimm6_2}} & { 5'b0, uswimm6d[6:5] } ) |
( {7{uswimm6_2}} & { 5'b0, uswimm6d[6:5] } ) |
( {7{uswspimm7_2}} & { 4'b0, uswspimm7d[7:5] } );
( {7{uswspimm7_2}} & { 4'b0, uswspimm7d[7:5] } );
assign l3[24:12] = l2[24:12];
assign l3[24:12] = l2[24:12];
assign l3[11:7] = ( l2[11:7] ) |
assign l3[11:7] = ( l2[11:7] ) |
( {5{sbroffset8_1}} & { sbr8d[4:1], sbr8d[8] } ) |
( {5{sbroffset8_1}} & { sbr8d[4:1], sbr8d[8] } ) |
( {5{uswimm6_2}} & { uswimm6d[4:2], 2'b0 } ) |
( {5{uswimm6_2}} & { uswimm6d[4:2], 2'b0 } ) |
( {5{uswspimm7_2}} & { uswspimm7d[4:2], 2'b0 } );
( {5{uswspimm7_2}} & { uswspimm7d[4:2], 2'b0 } );
assign l3[6:0] = l2[6:0];
assign l3[6:0] = l2[6:0];
assign dout[31:0] = l3[31:0] & {32{legal}};
assign dout[31:0] = l3[31:0] & {32{legal}};
// file "cdecode" is human readable file that has all of the compressed instruction decodes defined and is part of git repo
// file "cdecode" is human readable file that has all of the compressed instruction decodes defined and is part of git repo
// modify this file as needed
// modify this file as needed
// to generate all the equations below from "cdecode" except legal equation:
// to generate all the equations below from "cdecode" except legal equation:
// 1) coredecode -in cdecode > cdecode.e
// 1) coredecode -in cdecode > cdecode.e
// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations
// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations
// to generate the legal (16b compressed instruction is legal) equation below:
// to generate the legal (16b compressed instruction is legal) equation below:
// 1) coredecode -in cdecode -legal > clegal.e
// 1) coredecode -in cdecode -legal > clegal.e
// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation
// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation
// espresso decodes
// espresso decodes
assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | (
assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | (
!i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9]
!i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9]
&i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14]
&i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14]
&i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15]
&i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15]
assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14]
assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14]
&i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7]
&i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7]
&i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4]
&i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4]
&i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | (
&i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | (
!i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]);
!i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]);
assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | (
assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | (
i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]);
i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]);
assign rdprd = (i[15]&!i[14]&!i[13]&i[0]);
assign rdprd = (i[15] & !i[14] & !i[13] & i[0]);
assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]);
assign rdprs1 = (i[15] & !i[13] & i[0]) | (i[15] & i[14] & i[0]) | (i[14] & !i[1] & !i[0]);
assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]);
assign rs2prs2 = (i[15] & !i[14] & !i[13] & i[11] & i[10] & i[0]) | (i[15] & !i[1] & !i[0]);
assign rs2prd = (!i[15]&!i[1]&!i[0]);
assign rs2prd = (!i[15] & !i[1] & !i[0]);
assign uimm9_2 = (!i[14]&!i[1]&!i[0]);
assign uimm9_2 = (!i[14] & !i[1] & !i[0]);
assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]);
assign ulwimm6_2 = (!i[15] & i[14] & !i[1] & !i[0]);
assign ulwspimm7_2 = (!i[15]&i[14]&i[1]);
assign ulwspimm7_2 = (!i[15] & i[14] & i[1]);
assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
assign rdeq2 = (!i[15] & i[14] & i[13] & !i[11] & !i[10] & !i[9] & i[8] & !i[7]);
assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
&!i[2]&i[1]) | (!i[15]&!i[14]&i[13]);
&!i[2]&i[1]) | (!i[15]&!i[14]&i[13]);
assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14]
assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14]
&i[1]) | (!i[14]&!i[1]&!i[0]);
&i[1]) | (!i[14]&!i[1]&!i[0]);
assign sbroffset8_1 = (i[15]&i[14]&i[0]);
assign sbroffset8_1 = (i[15] & i[14] & i[0]);
assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]);
assign simm9_4 = (!i[15] & i[14] & i[13] & !i[11] & !i[10] & !i[9] & i[8] & !i[7]);
assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]);
assign simm5_0 = (!i[14] & !i[13] & i[11] & !i[10] & i[0]) | (!i[15] & !i[13] & i[0]);
assign sjaloffset11_1 = (!i[14]&i[13]);
assign sjaloffset11_1 = (!i[14] & i[13]);
assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | (
assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | (
!i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14]
!i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14]
assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]);
assign uimm5_0 = (i[15] & !i[14] & !i[13] & !i[11] & i[0]) | (!i[15] & !i[14] & i[1]);
assign uswimm6_2 = (i[15]&!i[1]&!i[0]);
assign uswimm6_2 = (i[15] & !i[1] & !i[0]);
assign uswspimm7_2 = (i[15]&i[14]&i[1]);
assign uswspimm7_2 = (i[15] & i[14] & i[1]);
assign o[31] = 1'b0;
assign o[31] = 1'b0;
assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14]
assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14]
assign o[29] = 1'b0;
assign o[29] = 1'b0;
assign o[28] = 1'b0;
assign o[28] = 1'b0;
assign o[27] = 1'b0;
assign o[27] = 1'b0;
assign o[26] = 1'b0;
assign o[26] = 1'b0;
assign o[25] = 1'b0;
assign o[25] = 1'b0;
assign o[24] = 1'b0;
assign o[24] = 1'b0;
assign o[23] = 1'b0;
assign o[23] = 1'b0;
assign o[22] = 1'b0;
assign o[22] = 1'b0;
assign o[21] = 1'b0;
assign o[21] = 1'b0;
assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]
assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4]
assign o[19] = 1'b0;
assign o[19] = 1'b0;
assign o[18] = 1'b0;
assign o[18] = 1'b0;
assign o[17] = 1'b0;
assign o[17] = 1'b0;
assign o[16] = 1'b0;
assign o[16] = 1'b0;
assign o[15] = 1'b0;
assign o[15] = 1'b0;
assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]
assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]
&i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5]
&i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5]
assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13]
assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13]
&i[11]&i[6]&i[0]) | (i[14]&!i[0]);
&i[11]&i[6]&i[0]) | (i[14]&!i[0]);
assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11]
assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11]
&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | (
&i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | (
assign o[11] = 1'b0;
assign o[11] = 1'b0;
assign o[10] = 1'b0;
assign o[10] = 1'b0;
assign o[9] = 1'b0;
assign o[9] = 1'b0;
assign o[8] = 1'b0;
assign o[8] = 1'b0;
assign o[7] = 1'b0;
assign o[7] = 1'b0;
assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | (
assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | (
assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | (
assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | (
i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | (
i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | (
assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14]
assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14]
&!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5]
&!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5]
&!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3]
&!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3]
&!i[0]) | (!i[14]&i[2]&!i[0]);
&!i[0]) | (!i[14]&i[2]&!i[0]);
assign o[3] = (!i[14]&i[13]);
assign o[3] = (!i[14] & i[13]);
assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3]
@ -356,13 +353,13 @@ assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]
&!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13]
&!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13]
&i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]);
&i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]);
// 32b instruction has lower two bits 2'b11
// 32b instruction has lower two bits 2'b11
assign o[1] = 1'b1;
assign o[1] = 1'b1;
assign o[0] = 1'b1;
assign o[0] = 1'b1;
assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | (
assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | (
!i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | (
!i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | (
!i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | (
!i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | (
i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12]
i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12]
File diff suppressed because it is too large
Load Diff
@ -20,450 +20,489 @@
module el2_ifu_iccm_mem
module el2_ifu_iccm_mem
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic iccm_wren, // ICCM write enable
input logic iccm_wren, // ICCM write enable
input logic iccm_rden, // ICCM read enable
input logic iccm_rden, // ICCM read enable
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address
input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle
input logic iccm_correction_state, // ICCM under a correction - This is needed to guard replacements when hit
input logic iccm_correction_state, // ICCM under a correction - This is needed to guard replacements when hit
input logic [2:0] iccm_wr_size, // ICCM write size
input logic [2:0] iccm_wr_size, // ICCM write size
input logic [77:0] iccm_wr_data, // ICCM write data
input logic [77:0] iccm_wr_data, // ICCM write data
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, // External packet
input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, // External packet
output logic [63:0] iccm_rd_data, // ICCM read data
output logic [63:0] iccm_rd_data, // ICCM read data
output logic [77:0] iccm_rd_data_ecc, // ICCM read ecc
output logic [77:0] iccm_rd_data_ecc, // ICCM read ecc
input logic scan_mode // Scan mode control
input logic scan_mode // Scan mode control
logic [pt.ICCM_NUM_BANKS-1:0] wren_bank;
logic [pt.ICCM_NUM_BANKS-1:0] wren_bank;
logic [pt.ICCM_NUM_BANKS-1:0] rden_bank;
logic [pt.ICCM_NUM_BANKS-1:0] rden_bank;
logic [pt.ICCM_NUM_BANKS-1:0] iccm_clken;
logic [pt.ICCM_NUM_BANKS-1:0] iccm_clken;
logic [pt.ICCM_NUM_BANKS-1:0] [pt.ICCM_BITS-1:pt.ICCM_BANK_INDEX_LO] addr_bank;
logic [pt.ICCM_NUM_BANKS-1:0][pt.ICCM_BITS-1:pt.ICCM_BANK_INDEX_LO] addr_bank;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_dout, iccm_bank_dout_fn;
logic [pt.ICCM_NUM_BANKS-1:0][38:0] iccm_bank_dout, iccm_bank_dout_fn;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_wr_data;
logic [pt.ICCM_NUM_BANKS-1:0][ 38:0] iccm_bank_wr_data;
logic [pt.ICCM_BITS-1:1] addr_bank_inc;
logic [ pt.ICCM_BITS-1:1] addr_bank_inc;
logic [pt.ICCM_BANK_HI : 2] iccm_rd_addr_hi_q;
logic [ pt.ICCM_BANK_HI : 2] iccm_rd_addr_hi_q;
logic [pt.ICCM_BANK_HI : 1] iccm_rd_addr_lo_q;
logic [ pt.ICCM_BANK_HI : 1] iccm_rd_addr_lo_q;
logic [63:0] iccm_rd_data_pre;
logic [ 63:0] iccm_rd_data_pre;
logic [63:0] iccm_data;
logic [ 63:0] iccm_data;
logic [1:0] addr_incr;
logic [ 1:0] addr_incr;
logic [pt.ICCM_NUM_BANKS-1:0] [38:0] iccm_bank_wr_data_vec;
logic [pt.ICCM_NUM_BANKS-1:0][ 38:0] iccm_bank_wr_data_vec;
// logic to handle hard persisten faults
// logic to handle hard persisten faults
logic [1:0] [pt.ICCM_BITS-1:2] redundant_address;
logic [ 1:0][pt.ICCM_BITS-1:2] redundant_address;
logic [1:0] [38:0] redundant_data;
logic [ 1:0][ 38:0] redundant_data;
logic [1:0] redundant_valid;
logic [ 1:0] redundant_valid;
logic [pt.ICCM_NUM_BANKS-1:0] sel_red1, sel_red0, sel_red1_q, sel_red0_q;
logic [pt.ICCM_NUM_BANKS-1:0] sel_red1, sel_red0, sel_red1_q, sel_red0_q;
logic [38:0] redundant_data0_in, redundant_data1_in;
logic [38:0] redundant_data0_in, redundant_data1_in;
logic redundant_lru, redundant_lru_in, redundant_lru_en;
logic redundant_lru, redundant_lru_in, redundant_lru_en;
logic redundant_data0_en;
logic redundant_data0_en;
logic redundant_data1_en;
logic redundant_data1_en;
logic r0_addr_en, r1_addr_en;
logic r0_addr_en, r1_addr_en;
// Testing persistent flip
// Testing persistent flip
// logic [3:0] not_iccm_bank_dout;
// logic [3:0] not_iccm_bank_dout;
// logic [15:3] ecc_insert_flip_in, ecc_insert_flip;
// logic [15:3] ecc_insert_flip_in, ecc_insert_flip;
// logic flip_en, flip_match, flip_match_q;
// logic flip_en, flip_match, flip_match_q;
// assign flip_in = (iccm_rw_addr[3:2] != 2'b00); // dont flip when bank0 - this is to make some progress in DMA streaming cases
// assign flip_in = (iccm_rw_addr[3:2] != 2'b00); // dont flip when bank0 - this is to make some progress in DMA streaming cases
// assign flip_en = iccm_rden;
// assign flip_en = iccm_rden;
// rvdffs #(1) flipmatch (.*,
// rvdffs #(1) flipmatch (.*,
// .clk(clk),
// .clk(clk),
// .din(flip_in),
// .din(flip_in),
// .en(flip_en),
// .en(flip_en),
// .dout(flip_match_q));
// .dout(flip_match_q));
// end of testing flip
// end of testing flip
assign addr_incr[1:0] = (iccm_wr_size[1:0] == 2'b11) ? 2'b10: 2'b01;
assign addr_incr[1:0] = (iccm_wr_size[1:0] == 2'b11) ? 2'b10 : 2'b01;
assign addr_bank_inc[pt.ICCM_BITS-1 : 1] = iccm_rw_addr[pt.ICCM_BITS-1 : 1] + addr_incr[1:0];
assign addr_bank_inc[pt.ICCM_BITS-1 : 1] = iccm_rw_addr[pt.ICCM_BITS-1 : 1] + addr_incr[1:0];
for (genvar i=0; i<pt.ICCM_NUM_BANKS/2; i++) begin: mem_bank_data
for (genvar i = 0; i < pt.ICCM_NUM_BANKS / 2; i++) begin : mem_bank_data
assign iccm_bank_wr_data_vec[(2*i)] = iccm_wr_data[38:0];
assign iccm_bank_wr_data_vec[(2*i)] = iccm_wr_data[38:0];
assign iccm_bank_wr_data_vec[(2*i)+1] = iccm_wr_data[77:39];
assign iccm_bank_wr_data_vec[(2*i)+1] = iccm_wr_data[77:39];
for (genvar i=0; i<pt.ICCM_NUM_BANKS; i++) begin: mem_bank
for (genvar i = 0; i < pt.ICCM_NUM_BANKS; i++) begin : mem_bank
assign wren_bank[i] = iccm_wren & ((iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign wren_bank[i] = iccm_wren & ((iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign iccm_bank_wr_data[i] = iccm_bank_wr_data_vec[i];
assign iccm_bank_wr_data[i] = iccm_bank_wr_data_vec[i];
assign rden_bank[i] = iccm_rden & ( (iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign rden_bank[i] = iccm_rden & ( (iccm_rw_addr[pt.ICCM_BANK_HI:2] == i) | (addr_bank_inc[pt.ICCM_BANK_HI:2] == i));
assign iccm_clken[i] = wren_bank[i] | rden_bank[i] | clk_override;
assign iccm_clken[i] = wren_bank[i] | rden_bank[i] | clk_override;
assign addr_bank[i][pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] = wren_bank[i] ? iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
assign addr_bank[i][pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] = wren_bank[i] ? iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
((addr_bank_inc[pt.ICCM_BANK_HI:2] == i) ?
((addr_bank_inc[pt.ICCM_BANK_HI:2] == i) ?
addr_bank_inc[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
addr_bank_inc[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO] :
iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO]);
iccm_rw_addr[pt.ICCM_BITS-1 : pt.ICCM_BANK_INDEX_LO]);
el2_ram #(.depth(1<<pt.ICCM_INDEX_BITS), .width(39)) iccm_bank (
el2_ram #(
// Primary ports
.depth(1 << pt.ICCM_INDEX_BITS),
) iccm_bank (
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
if (pt.ICCM_INDEX_BITS == 6 ) begin : iccm
if (pt.ICCM_INDEX_BITS == 6) begin : iccm
ram_64x39 iccm_bank (
ram_64x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 7 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 7 ) begin : iccm
ram_128x39 iccm_bank (
ram_128x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 8 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 8 ) begin : iccm
ram_256x39 iccm_bank (
ram_256x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 9 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 9 ) begin : iccm
ram_512x39 iccm_bank (
ram_512x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 10 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 10 ) begin : iccm
ram_1024x39 iccm_bank (
ram_1024x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 11 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 11 ) begin : iccm
ram_2048x39 iccm_bank (
ram_2048x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 12 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 12 ) begin : iccm
ram_4096x39 iccm_bank (
ram_4096x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 13 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 13 ) begin : iccm
ram_8192x39 iccm_bank (
ram_8192x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else if (pt.ICCM_INDEX_BITS == 14 ) begin : iccm
else if (pt.ICCM_INDEX_BITS == 14 ) begin : iccm
ram_16384x39 iccm_bank (
ram_16384x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
else begin : iccm
else begin : iccm
ram_32768x39 iccm_bank (
ram_32768x39 iccm_bank (
// Primary ports
// Primary ports
.ROP ( ),
// These are used by SoC
// These are used by SoC
.SD(iccm_ext_in_pkt[i].SD) ,
end // block: iccm
end // block: iccm
// match the redundant rows
// match the redundant rows
assign sel_red1[i] = (redundant_valid[1] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[1][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
assign sel_red1[i] = (redundant_valid[1] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[1][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[1][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[1][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
assign sel_red0[i] = (redundant_valid[0] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[0][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
assign sel_red0[i] = (redundant_valid[0] & (((iccm_rw_addr[pt.ICCM_BITS-1:2] == redundant_address[0][pt.ICCM_BITS-1:2]) & (iccm_rw_addr[3:2] == i)) |
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[0][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
((addr_bank_inc[pt.ICCM_BITS-1:2]== redundant_address[0][pt.ICCM_BITS-1:2]) & (addr_bank_inc[3:2] == i))));
rvdff #(1) selred0 (.*,
rvdff #(1) selred0 (
.clk (active_clk),
.din (sel_red0[i]),
rvdff #(1) selred1 (.*,
rvdff #(1) selred1 (
.clk (active_clk),
.din (sel_red1[i]),
// muxing out the memory data with the redundant data if the address matches
// muxing out the memory data with the redundant data if the address matches
assign iccm_bank_dout_fn[i][38:0] = ({39{sel_red1_q[i]}} & redundant_data[1][38:0]) |
assign iccm_bank_dout_fn[i][38:0] = ({39{sel_red1_q[i]}} & redundant_data[1][38:0]) |
({39{sel_red0_q[i]}} & redundant_data[0][38:0]) |
({39{sel_red0_q[i]}} & redundant_data[0][38:0]) |
({39{~sel_red0_q[i] & ~sel_red1_q[i]}} & iccm_bank_dout[i][38:0]);
({39{~sel_red0_q[i] & ~sel_red1_q[i]}} & iccm_bank_dout[i][38:0]);
end : mem_bank
end : mem_bank
// This section does the redundancy for tolerating single bit errors
// This section does the redundancy for tolerating single bit errors
// 2x 39 bit data values with address[hi:2] and a valid bit is needed to CAM and sub out the reads/writes to the particular locations
// 2x 39 bit data values with address[hi:2] and a valid bit is needed to CAM and sub out the reads/writes to the particular locations
// Also a LRU flop is kept to decide which of the redundant element to replace.
// Also a LRU flop is kept to decide which of the redundant element to replace.
assign r0_addr_en = ~redundant_lru & iccm_buf_correct_ecc;
assign r0_addr_en = ~redundant_lru & iccm_buf_correct_ecc;
assign r1_addr_en = redundant_lru & iccm_buf_correct_ecc;
assign r1_addr_en = redundant_lru & iccm_buf_correct_ecc;
assign redundant_lru_en = iccm_buf_correct_ecc | (((|sel_red0[pt.ICCM_NUM_BANKS-1:0]) | (|sel_red1[pt.ICCM_NUM_BANKS-1:0])) & iccm_rden & iccm_correction_state);
assign redundant_lru_en = iccm_buf_correct_ecc | (((|sel_red0[pt.ICCM_NUM_BANKS-1:0]) | (|sel_red1[pt.ICCM_NUM_BANKS-1:0])) & iccm_rden & iccm_correction_state);
assign redundant_lru_in = iccm_buf_correct_ecc ? ~redundant_lru : (|sel_red0[pt.ICCM_NUM_BANKS-1:0]) ? 1'b1 : 1'b0;
assign redundant_lru_in = iccm_buf_correct_ecc ? ~redundant_lru : (|sel_red0[pt.ICCM_NUM_BANKS-1:0]) ? 1'b1 : 1'b0;
rvdffs #() red_lru (.*, // LRU flop for the redundant replacements
rvdffs #() red_lru (
.*, // LRU flop for the redundant replacements
.clk (active_clk),
.en (redundant_lru_en),
.din (redundant_lru_in),
rvdffs #(pt.ICCM_BITS-2) r0_address (.*, // Redundant Row 0 address
rvdffs #(pt.ICCM_BITS - 2) r0_address (
.*, // Redundant Row 0 address
.clk (active_clk),
.en (r0_addr_en),
.din (iccm_rw_addr[pt.ICCM_BITS-1:2]),
rvdffs #(pt.ICCM_BITS-2) r1_address (.*, // Redundant Row 0 address
rvdffs #(pt.ICCM_BITS - 2) r1_address (
.*, // Redundant Row 0 address
.clk (active_clk),
.en (r1_addr_en),
.din (iccm_rw_addr[pt.ICCM_BITS-1:2]),
rvdffs #(1) r0_valid (.*,
rvdffs #(1) r0_valid (
.clk(active_clk), // Redundant Row 0 Valid
.clk (active_clk), // Redundant Row 0 Valid
.en (r0_addr_en),
.din (1'b1),
rvdffs #(1) r1_valid (.*, // Redundant Row 1 Valid
rvdffs #(1) r1_valid (
.*, // Redundant Row 1 Valid
.clk (active_clk),
.en (r1_addr_en),
.din (1'b1),
// We will have to update the Redundant copies in addition to the memory on subsequent writes to this memory location.
// We will have to update the Redundant copies in addition to the memory on subsequent writes to this memory location.
// The data gets updated on : 1) correction cycle, 2) Future writes - this could be W writes from DMA ( match up till addr[2]) or DW writes ( match till address[3])
// The data gets updated on : 1) correction cycle, 2) Future writes - this could be W writes from DMA ( match up till addr[2]) or DW writes ( match till address[3])
// The data to pick also depends on the current address[2], size and the addr[2] stored in the address field of the redundant flop. Correction cycle is always W write and the data is splat on both legs, so choosing lower Word
// The data to pick also depends on the current address[2], size and the addr[2] stored in the address field of the redundant flop. Correction cycle is always W write and the data is splat on both legs, so choosing lower Word
assign redundant_data0_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[0][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[0][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[0] & iccm_wren) |
assign redundant_data0_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[0][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[0][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[0] & iccm_wren) |
(~redundant_lru & iccm_buf_correct_ecc);
(~redundant_lru & iccm_buf_correct_ecc);
assign redundant_data0_in[38:0] = (((iccm_rw_addr[2] == redundant_address[0][2]) & iccm_rw_addr[2]) | (redundant_address[0][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
assign redundant_data0_in[38:0] = (((iccm_rw_addr[2] == redundant_address[0][2]) & iccm_rw_addr[2]) | (redundant_address[0][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
rvdffs #(39) r0_data (.*, // Redundant Row 1 data
rvdffs #(39) r0_data (
.*, // Redundant Row 1 data
.clk (active_clk),
.en (redundant_data0_en),
.din (redundant_data0_in[38:0]),
assign redundant_data1_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[1][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[1][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[1] & iccm_wren) |
assign redundant_data1_en = ((iccm_rw_addr[pt.ICCM_BITS-1:3] == redundant_address[1][pt.ICCM_BITS-1:3]) & ((iccm_rw_addr[2] == redundant_address[1][2]) | (iccm_wr_size[1:0] == 2'b11)) & redundant_valid[1] & iccm_wren) |
(redundant_lru & iccm_buf_correct_ecc);
(redundant_lru & iccm_buf_correct_ecc);
assign redundant_data1_in[38:0] = (((iccm_rw_addr[2] == redundant_address[1][2]) & iccm_rw_addr[2]) | (redundant_address[1][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
assign redundant_data1_in[38:0] = (((iccm_rw_addr[2] == redundant_address[1][2]) & iccm_rw_addr[2]) | (redundant_address[1][2] & (iccm_wr_size[1:0] == 2'b11))) ? iccm_wr_data[77:39] : iccm_wr_data[38:0];
rvdffs #(39) r1_data (.*, // Redundant Row 1 data
rvdffs #(39) r1_data (
.*, // Redundant Row 1 data
.clk (active_clk),
.en (redundant_data1_en),
.din (redundant_data1_in[38:0]),
rvdffs #(pt.ICCM_BANK_HI) rd_addr_lo_ff (.*, .clk(active_clk), .din(iccm_rw_addr [pt.ICCM_BANK_HI:1]), .dout(iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:1]), .en(1'b1)); // bit 0 of address is always 0
rvdffs #(pt.ICCM_BANK_HI) rd_addr_lo_ff (
rvdffs #(pt.ICCM_BANK_BITS) rd_addr_hi_ff (.*, .clk(active_clk), .din(addr_bank_inc[pt.ICCM_BANK_HI:2]), .dout(iccm_rd_addr_hi_q[pt.ICCM_BANK_HI:2]), .en(1'b1));
.clk (active_clk),
.din (iccm_rw_addr[pt.ICCM_BANK_HI:1]),
.en (1'b1)
); // bit 0 of address is always 0
rvdffs #(pt.ICCM_BANK_BITS) rd_addr_hi_ff (
.clk (active_clk),
.din (addr_bank_inc[pt.ICCM_BANK_HI:2]),
.en (1'b1)
assign iccm_rd_data_pre[63:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][31:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][31:0]};
assign iccm_rd_data_pre[63:0] = {
assign iccm_data[63:0] = 64'({16'b0, (iccm_rd_data_pre[63:0] >> (16*iccm_rd_addr_lo_q[1]))});
assign iccm_rd_data[63:0] = {iccm_data[63:0]};
assign iccm_rd_data_ecc[77:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0]};
assign iccm_data[63:0] = 64'({16'b0, (iccm_rd_data_pre[63:0] >> (16 * iccm_rd_addr_lo_q[1]))});
assign iccm_rd_data[63:0] = {iccm_data[63:0]};
assign iccm_rd_data_ecc[77:0] = {
endmodule // el2_ifu_iccm_mem
endmodule // el2_ifu_iccm_mem
@ -21,118 +21,119 @@
module el2_ifu_ifc_ctl
module el2_ifu_ifc_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in.
input logic rst_l, // reset enable, from core pin
input logic rst_l, // reset enable, from core pin
input logic scan_mode, // scan
input logic scan_mode, // scan
input logic ic_hit_f, // Icache hit
input logic ic_hit_f, // Icache hit
input logic ifu_ic_mb_empty, // Miss buffer empty
input logic ifu_ic_mb_empty, // Miss buffer empty
input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer
input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer
input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers
input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers
input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush
input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush
input logic exu_flush_final, // FLush
input logic exu_flush_final, // FLush
input logic [31:1] exu_flush_path_final, // Flush path
input logic [31:1] exu_flush_path_final, // Flush path
input logic ifu_bp_hit_taken_f, // btb hit, select the target path
input logic ifu_bp_hit_taken_f, // btb hit, select the target path
input logic [31:1] ifu_bp_btb_target_f, // predicted target PC
input logic [31:1] ifu_bp_btb_target_f, // predicted target PC
input logic ic_dma_active, // IC DMA active, stop fetching
input logic ic_dma_active, // IC DMA active, stop fetching
input logic ic_write_stall, // IC is writing, stop fetching
input logic ic_write_stall, // IC is writing, stop fetching
input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access
input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access
input logic [31:0] dec_tlu_mrac_ff , // side_effect and cacheable for each region
input logic [31:0] dec_tlu_mrac_ff, // side_effect and cacheable for each region
output logic [31:1] ifc_fetch_addr_f, // fetch addr F
output logic [31:1] ifc_fetch_addr_f, // fetch addr F
output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF
output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF
output logic ifc_fetch_req_f, // fetch request valid F
output logic ifc_fetch_req_f, // fetch request valid F
output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall
output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall
output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage
output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage
output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage
output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage
output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage
output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage
output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus.
output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus.
output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM.
output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM.
output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed
output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed
logic [31:1] fetch_addr_bf;
logic [31:1] fetch_addr_bf;
logic [31:1] fetch_addr_next;
logic [31:1] fetch_addr_next;
logic [3:0] fb_write_f, fb_write_ns;
logic [3:0] fb_write_f, fb_write_ns;
logic fb_full_f_ns, fb_full_f;
logic fb_full_f_ns, fb_full_f;
logic fb_right, fb_right2, fb_left, wfm, idle;
logic fb_right, fb_right2, fb_left, wfm, idle;
logic sel_last_addr_bf, sel_next_addr_bf;
logic sel_last_addr_bf, sel_next_addr_bf;
logic miss_f, miss_a;
logic miss_f, miss_a;
logic flush_fb, dma_iccm_stall_any_f;
logic flush_fb, dma_iccm_stall_any_f;
logic mb_empty_mod, goto_idle, leave_idle;
logic mb_empty_mod, goto_idle, leave_idle;
logic fetch_bf_en;
logic fetch_bf_en;
logic line_wrap;
logic line_wrap;
logic fetch_addr_next_1;
logic fetch_addr_next_1;
// FSM assignment
// FSM assignment
typedef enum logic [1:0] { IDLE = 2'b00 ,
typedef enum logic [1:0] {
FETCH = 2'b01 ,
IDLE = 2'b00,
STALL = 2'b10 ,
FETCH = 2'b01,
WFM = 2'b11 } state_t ;
STALL = 2'b10,
state_t state ;
WFM = 2'b11
state_t next_state ;
} state_t;
state_t state;
state_t next_state;
logic dma_stall;
logic dma_stall;
assign dma_stall = ic_dma_active | dma_iccm_stall_any_f;
assign dma_stall = ic_dma_active | dma_iccm_stall_any_f;
// Fetch address mux
// Fetch address mux
// - flush
// - flush
// - Miss *or* flush during WFM (icache miss buffer is blocking)
// - Miss *or* flush during WFM (icache miss buffer is blocking)
// - Sequential
// - Sequential
if(pt.BTB_ENABLE==1) begin
if (pt.BTB_ENABLE == 1) begin
logic sel_btb_addr_bf;
logic sel_btb_addr_bf;
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f;
assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f;
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f;
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f;
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]})| // BTB target
({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]}) | // BTB target
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
end // if (pt.BTB_ENABLE=1)
end // if (pt.BTB_ENABLE=1)
else begin
else begin
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f);
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f;
assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f;
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path
assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1 };
assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1};
assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]);
assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]);
assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1];
assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1];
assign ifc_fetch_req_bf_raw = ~idle;
assign ifc_fetch_req_bf_raw = ~idle;
assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw &
assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw &
~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) &
~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) &
~dma_stall &
~dma_stall &
@ -140,107 +141,125 @@ end
assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f;
assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f;
assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final;
assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final;
assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a;
assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a;
// Halt flushes and takes us to IDLE
// Halt flushes and takes us to IDLE
assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb;
assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb;
// If we're in IDLE, and we get a flush, goto FETCH
// If we're in IDLE, and we get a flush, goto FETCH
assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle;
assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle;
//.i 7
//.i 7
//.o 2
//.o 2
//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle
//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle
//.ob next_state[1] next_state[0]
//.ob next_state[1] next_state[0]
//.type fr
//.type fr
//# fetch 01, stall 10, wfm 11, idle 00
//# fetch 01, stall 10, wfm 11, idle 00
//-- 1---- 01
//-- 1---- 01
//-- 0--1- 00
//-- 0--1- 00
//00 0--00 00
//00 0--00 00
//00 0--01 01
//00 0--01 01
//01 01-0- 11
//01 01-0- 11
//01 00-0- 01
//01 00-0- 01
//11 0-10- 01
//11 0-10- 01
//11 0-00- 11
//11 0-00- 11
assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) |
assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) |
(state[1] & ~mb_empty_mod & ~goto_idle);
(state[1] & ~mb_empty_mod & ~goto_idle);
assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle);
assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle);
assign flush_fb = exu_flush_final;
assign flush_fb = exu_flush_final;
// model fb write logic to mass balance the fetch buffers
// model fb write logic to mass balance the fetch buffers
assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch
assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch
(ifu_fb_consume2 & ifc_fetch_req_f); // Consumed 2 and new fetch
(ifu_fb_consume2 & ifc_fetch_req_f); // Consumed 2 and new fetch
assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch
assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch
assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f;
assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f;
// CBH
// CBH
assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) |
assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) |
({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) |
({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) |
({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) |
({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) |
({4{~flush_fb & fb_left }} & {fb_write_f[2:0], 1'b0}) |
({4{~flush_fb & fb_left }} & {fb_write_f[2:0], 1'b0}) |
({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}} & fb_write_f[3:0]));
({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}} & fb_write_f[3:0]));
assign fb_full_f_ns = fb_write_ns[3];
assign fb_full_f_ns = fb_write_ns[3];
assign idle = state == IDLE ;
assign idle = state == IDLE;
assign wfm = state == WFM ;
assign wfm = state == WFM;
rvdffie #(10) fbwrite_ff (.*, .clk(free_l2clk),
rvdffie #(10) fbwrite_ff (
.din( {dma_iccm_stall_any, miss_f, ifc_fetch_req_bf, next_state[1:0], fb_full_f_ns, fb_write_ns[3:0]}),
.dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]}));
.dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]})
assign ifu_pmu_fetch_stall = wfm |
assign ifu_pmu_fetch_stall = wfm |
(ifc_fetch_req_bf_raw &
(ifc_fetch_req_bf_raw &
( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) |
( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) |
assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1];
assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1];
rvdffpcie #(31) faddrf1_ff (.*, .en(fetch_bf_en), .din(fetch_addr_bf[31:1]), .dout(ifc_fetch_addr_f[31:1]));
rvdffpcie #(31) faddrf1_ff (
.en (fetch_bf_en),
.din (fetch_addr_bf[31:1]),
if (pt.ICCM_ENABLE) begin
if (pt.ICCM_ENABLE) begin
logic iccm_acc_in_region_bf;
logic iccm_acc_in_region_bf;
logic iccm_acc_in_range_bf;
logic iccm_acc_in_range_bf;
rvrangecheck #( .CCM_SADR (pt.ICCM_SADR),
rvrangecheck #(
.CCM_SIZE (pt.ICCM_SIZE) ) iccm_rangecheck (
.addr ({ifc_fetch_addr_bf[31:1],1'b0}) ,
.in_range (iccm_acc_in_range_bf) ,
) iccm_rangecheck (
.addr ({ifc_fetch_addr_bf[31:1], 1'b0}),
.in_range (iccm_acc_in_range_bf),
assign ifc_iccm_access_bf = iccm_acc_in_range_bf ;
assign ifc_iccm_access_bf = iccm_acc_in_range_bf;
assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf |
assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf |
(fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) |
(fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) |
(wfm & ~ifc_fetch_req_bf) |
(wfm & ~ifc_fetch_req_bf) |
idle ) & ~exu_flush_final) |
idle ) & ~exu_flush_final) |
assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf ;
assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf;
end else begin
else begin
assign ifc_iccm_access_bf = 1'b0;
assign ifc_iccm_access_bf = 1'b0 ;
assign ifc_dma_access_ok = 1'b0;
assign ifc_dma_access_ok = 1'b0 ;
assign ifc_region_acc_fault_bf = 1'b0;
assign ifc_region_acc_fault_bf = 1'b0 ;
assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{ifc_fetch_addr_bf[31:28] , 1'b0 }] ; // bit 0 of each region description is the cacheable bit
assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{
ifc_fetch_addr_bf[31:28], 1'b0
}]; // bit 0 of each region description is the cacheable bit
endmodule // el2_ifu_ifc_ctl
endmodule // el2_ifu_ifc_ctl
File diff suppressed because it is too large
Load Diff
@ -17,73 +17,78 @@
module el2_ifu_tb_memread;
module el2_ifu_tb_memread;
logic [15:0] compressed [0:128000]; // vector of compressed instructions
logic [15:0] compressed [0:128000]; // vector of compressed instructions
logic [31:0] expected [0:128000]; // vector of correspoding expected instruction
logic [31:0] expected [0:128000]; // vector of correspoding expected instruction
logic rst_l;
logic rst_l;
logic clk;
logic clk;
int clk_count;
int clk_count;
logic [31:0] expected_val;
logic [31:0] expected_val;
logic [15:0] compressed_din;
logic [15:0] compressed_din;
logic [31:0] actual;
logic [31:0] actual;
logic error;
logic error;
integer i;
integer i;
initial begin
initial begin
clk = 0;
rst_l = 0;
// initialize the reads and populate the instruction arrays
// initialize the reads and populate the instruction arrays
$readmemh ("left64k", compressed );
$readmemh("left64k", compressed);
$readmemh ("right64k", expected );
$readmemh("right64k", expected);
$dumpfile ("top.vcd");
always #50 clk =~clk;
always #50 clk = ~clk;
always @(posedge clk) begin
always @(posedge clk) begin
clk_count = clk_count +1;
clk_count = clk_count + 1;
if (clk_count>=1 & clk_count<=3) rst_l <= 1'b0;
if (clk_count >= 1 & clk_count <= 3) rst_l <= 1'b0;
else rst_l <= 1'b1;
else rst_l <= 1'b1;
if (clk_count > 3) begin
if (clk_count > 3) begin
compressed_din[15:0] <= compressed[clk_count-3]; //
compressed_din[15:0] <= compressed[clk_count-3]; //
expected_val[31:0] <= expected[clk_count-3];
expected_val[31:0] <= expected[clk_count-3];
if (clk_count == 65000) begin
if (clk_count == 65000) begin
end // always @ (posedge clk)
end // always @ (posedge clk)
always @(negedge clk) begin
always @(negedge clk) begin
if (clk_count > 3 & error) begin
if (clk_count > 3 & error) begin
$display("clock: %d compressed %h error actual %h expected %h",clk_count,compressed_din,actual,expected_val);
$display("clock: %d compressed %h error actual %h expected %h", clk_count, compressed_din,
actual, expected_val);
el2_ifu_compress_ctl align (.*,.din(compressed_din[15:0]),.dout(actual[31:0]));
el2_ifu_compress_ctl align (
.din (compressed_din[15:0]),
assign error = actual[31:0] != expected_val[31:0];
assign error = actual[31:0] != expected_val[31:0];
endmodule // el2_ifu_tb_memread
endmodule // el2_ifu_tb_memread
@ -3,405 +3,405 @@
//`define EL2_DEF_SV
//`define EL2_DEF_SV
package el2_pkg;
package el2_pkg;
typedef struct packed {
typedef struct packed {
logic trace_rv_i_valid_ip;
logic trace_rv_i_valid_ip;
logic [31:0] trace_rv_i_insn_ip;
logic [31:0] trace_rv_i_insn_ip;
logic [31:0] trace_rv_i_address_ip;
logic [31:0] trace_rv_i_address_ip;
logic trace_rv_i_exception_ip;
logic trace_rv_i_exception_ip;
logic [4:0] trace_rv_i_ecause_ip;
logic [4:0] trace_rv_i_ecause_ip;
logic trace_rv_i_interrupt_ip;
logic trace_rv_i_interrupt_ip;
logic [31:0] trace_rv_i_tval_ip;
logic [31:0] trace_rv_i_tval_ip;
} el2_trace_pkt_t;
} el2_trace_pkt_t;
typedef enum logic [3:0] {
typedef enum logic [3:0] {
NULL = 4'b0000,
NULL = 4'b0000,
MUL = 4'b0001,
MUL = 4'b0001,
LOAD = 4'b0010,
LOAD = 4'b0010,
STORE = 4'b0011,
STORE = 4'b0011,
ALU = 4'b0100,
ALU = 4'b0100,
CSRREAD = 4'b0101,
CSRREAD = 4'b0101,
CSRWRITE = 4'b0110,
CSRWRITE = 4'b0110,
CSRRW = 4'b0111,
CSRRW = 4'b0111,
EBREAK = 4'b1000,
EBREAK = 4'b1000,
ECALL = 4'b1001,
ECALL = 4'b1001,
FENCE = 4'b1010,
FENCE = 4'b1010,
FENCEI = 4'b1011,
FENCEI = 4'b1011,
MRET = 4'b1100,
MRET = 4'b1100,
CONDBR = 4'b1101,
CONDBR = 4'b1101,
JAL = 4'b1110,
JAL = 4'b1110,
BITMANIPU = 4'b1111
BITMANIPU = 4'b1111
} el2_inst_pkt_t;
} el2_inst_pkt_t;
typedef struct packed {
typedef struct packed {
logic valid;
logic valid;
logic wb;
logic wb;
logic [2:0] tag;
logic [2:0] tag;
logic [4:0] rd;
logic [4:0] rd;
} el2_load_cam_pkt_t;
} el2_load_cam_pkt_t;
typedef struct packed {
typedef struct packed {
logic pc0_call;
logic pc0_call;
logic pc0_ret;
logic pc0_ret;
logic pc0_pc4;
logic pc0_pc4;
} el2_rets_pkt_t;
} el2_rets_pkt_t;
typedef struct packed {
typedef struct packed {
logic valid;
logic valid;
logic [11:0] toffset;
logic [11:0] toffset;
logic [1:0] hist;
logic [1:0] hist;
logic br_error;
logic br_error;
logic br_start_error;
logic br_start_error;
logic bank;
logic bank;
logic [31:1] prett; // predicted ret target
logic [31:1] prett; // predicted ret target
logic way;
logic way;
logic ret;
logic ret;
} el2_br_pkt_t;
} el2_br_pkt_t;
typedef struct packed {
typedef struct packed {
logic valid;
logic valid;
logic [1:0] hist;
logic [1:0] hist;
logic br_error;
logic br_error;
logic br_start_error;
logic br_start_error;
logic way;
logic way;
logic middle;
logic middle;
} el2_br_tlu_pkt_t;
} el2_br_tlu_pkt_t;
typedef struct packed {
typedef struct packed {
logic misp;
logic misp;
logic ataken;
logic ataken;
logic boffset;
logic boffset;
logic pc4;
logic pc4;
logic [1:0] hist;
logic [1:0] hist;
logic [11:0] toffset;
logic [11:0] toffset;
logic valid;
logic valid;
logic br_error;
logic br_error;
logic br_start_error;
logic br_start_error;
logic pcall;
logic pcall;
logic pja;
logic pja;
logic way;
logic way;
logic pret;
logic pret;
// for power use the pret bit to clock the prett field
// for power use the pret bit to clock the prett field
logic [31:1] prett;
logic [31:1] prett;
} el2_predict_pkt_t;
} el2_predict_pkt_t;
typedef struct packed {
typedef struct packed {
// unlikely to change
// unlikely to change
logic icaf;
logic icaf;
logic icaf_second;
logic icaf_second;
logic [1:0] icaf_type;
logic [1:0] icaf_type;
logic fence_i;
logic fence_i;
logic [3:0] i0trigger;
logic [3:0] i0trigger;
logic pmu_i0_br_unpred; // pmu
logic pmu_i0_br_unpred; // pmu
logic pmu_divide;
logic pmu_divide;
// likely to change
// likely to change
logic legal;
logic legal;
logic pmu_lsu_misaligned;
logic pmu_lsu_misaligned;
el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type
el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type
} el2_trap_pkt_t;
} el2_trap_pkt_t;
typedef struct packed {
typedef struct packed {
// unlikely to change
// unlikely to change
logic i0div;
logic i0div;
logic csrwen;
logic csrwen;
logic csrwonly;
logic csrwonly;
logic [11:0] csrwaddr;
logic [11:0] csrwaddr;
// likely to change
// likely to change
logic [4:0] i0rd;
logic [4:0] i0rd;
logic i0load;
logic i0load;
logic i0store;
logic i0store;
logic i0v;
logic i0v;
logic i0valid;
logic i0valid;
} el2_dest_pkt_t;
} el2_dest_pkt_t;
typedef struct packed {
typedef struct packed {
logic mul;
logic mul;
logic load;
logic load;
logic alu;
logic alu;
} el2_class_pkt_t;
} el2_class_pkt_t;
typedef struct packed {
typedef struct packed {
logic [4:0] rs1;
logic [4:0] rs1;
logic [4:0] rs2;
logic [4:0] rs2;
logic [4:0] rd;
logic [4:0] rd;
} el2_reg_pkt_t;
} el2_reg_pkt_t;
typedef struct packed {
typedef struct packed {
logic clz;
logic clz;
logic ctz;
logic ctz;
logic cpop;
logic cpop;
logic sext_b;
logic sext_b;
logic sext_h;
logic sext_h;
logic min;
logic min;
logic max;
logic max;
logic pack;
logic pack;
logic packu;
logic packu;
logic packh;
logic packh;
logic rol;
logic rol;
logic ror;
logic ror;
logic grev;
logic grev;
logic gorc;
logic gorc;
logic zbb;
logic zbb;
logic bset;
logic bset;
logic bclr;
logic bclr;
logic binv;
logic binv;
logic bext;
logic bext;
logic sh1add;
logic sh1add;
logic sh2add;
logic sh2add;
logic sh3add;
logic sh3add;
logic zba;
logic zba;
logic land;
logic land;
logic lor;
logic lor;
logic lxor;
logic lxor;
logic sll;
logic sll;
logic srl;
logic srl;
logic sra;
logic sra;
logic beq;
logic beq;
logic bne;
logic bne;
logic blt;
logic blt;
logic bge;
logic bge;
logic add;
logic add;
logic sub;
logic sub;
logic slt;
logic slt;
logic unsign;
logic unsign;
logic jal;
logic jal;
logic predict_t;
logic predict_t;
logic predict_nt;
logic predict_nt;
logic csr_write;
logic csr_write;
logic csr_imm;
logic csr_imm;
} el2_alu_pkt_t;
} el2_alu_pkt_t;
typedef struct packed {
typedef struct packed {
logic fast_int;
logic fast_int;
/* verilator lint_off SYMRSVDWORD */
/* verilator lint_off SYMRSVDWORD */
logic stack;
logic stack;
/* verilator lint_on SYMRSVDWORD */
/* verilator lint_on SYMRSVDWORD */
logic by;
logic by;
logic half;
logic half;
logic word;
logic word;
logic dword; // for dma
logic dword; // for dma
logic load;
logic load;
logic store;
logic store;
logic unsign;
logic unsign;
logic dma; // dma pkt
logic dma; // dma pkt
logic store_data_bypass_d;
logic store_data_bypass_d;
logic load_ldst_bypass_d;
logic load_ldst_bypass_d;
logic store_data_bypass_m;
logic store_data_bypass_m;
logic valid;
logic valid;
} el2_lsu_pkt_t;
} el2_lsu_pkt_t;
typedef struct packed {
typedef struct packed {
logic inst_type; //0: Load, 1: Store
logic inst_type; //0: Load, 1: Store
//logic dma_valid;
//logic dma_valid;
logic exc_type; //0: MisAligned, 1: Access Fault
logic exc_type; //0: MisAligned, 1: Access Fault
logic [3:0] mscause;
logic [3:0] mscause;
logic [31:0] addr;
logic [31:0] addr;
logic single_ecc_error;
logic single_ecc_error;
logic exc_valid;
logic exc_valid;
} el2_lsu_error_pkt_t;
} el2_lsu_error_pkt_t;
typedef struct packed {
typedef struct packed {
logic clz;
logic clz;
logic ctz;
logic ctz;
logic cpop;
logic cpop;
logic sext_b;
logic sext_b;
logic sext_h;
logic sext_h;
logic min;
logic min;
logic max;
logic max;
logic pack;
logic pack;
logic packu;
logic packu;
logic packh;
logic packh;
logic rol;
logic rol;
logic ror;
logic ror;
logic grev;
logic grev;
logic gorc;
logic gorc;
logic zbb;
logic zbb;
logic bset;
logic bset;
logic bclr;
logic bclr;
logic binv;
logic binv;
logic bext;
logic bext;
logic zbs;
logic zbs;
logic bcompress;
logic bcompress;
logic bdecompress;
logic bdecompress;
logic zbe;
logic zbe;
logic clmul;
logic clmul;
logic clmulh;
logic clmulh;
logic clmulr;
logic clmulr;
logic zbc;
logic zbc;
logic shfl;
logic shfl;
logic unshfl;
logic unshfl;
logic xperm_n;
logic xperm_n;
logic xperm_b;
logic xperm_b;
logic xperm_h;
logic xperm_h;
logic zbp;
logic zbp;
logic crc32_b;
logic crc32_b;
logic crc32_h;
logic crc32_h;
logic crc32_w;
logic crc32_w;
logic crc32c_b;
logic crc32c_b;
logic crc32c_h;
logic crc32c_h;
logic crc32c_w;
logic crc32c_w;
logic zbr;
logic zbr;
logic bfp;
logic bfp;
logic zbf;
logic zbf;
logic sh1add;
logic sh1add;
logic sh2add;
logic sh2add;
logic sh3add;
logic sh3add;
logic zba;
logic zba;
logic alu;
logic alu;
logic rs1;
logic rs1;
logic rs2;
logic rs2;
logic imm12;
logic imm12;
logic rd;
logic rd;
logic shimm5;
logic shimm5;
logic imm20;
logic imm20;
logic pc;
logic pc;
logic load;
logic load;
logic store;
logic store;
logic lsu;
logic lsu;
logic add;
logic add;
logic sub;
logic sub;
logic land;
logic land;
logic lor;
logic lor;
logic lxor;
logic lxor;
logic sll;
logic sll;
logic sra;
logic sra;
logic srl;
logic srl;
logic slt;
logic slt;
logic unsign;
logic unsign;
logic condbr;
logic condbr;
logic beq;
logic beq;
logic bne;
logic bne;
logic bge;
logic bge;
logic blt;
logic blt;
logic jal;
logic jal;
logic by;
logic by;
logic half;
logic half;
logic word;
logic word;
logic csr_read;
logic csr_read;
logic csr_clr;
logic csr_clr;
logic csr_set;
logic csr_set;
logic csr_write;
logic csr_write;
logic csr_imm;
logic csr_imm;
logic presync;
logic presync;
logic postsync;
logic postsync;
logic ebreak;
logic ebreak;
logic ecall;
logic ecall;
logic mret;
logic mret;
logic mul;
logic mul;
logic rs1_sign;
logic rs1_sign;
logic rs2_sign;
logic rs2_sign;
logic low;
logic low;
logic div;
logic div;
logic rem;
logic rem;
logic fence;
logic fence;
logic fence_i;
logic fence_i;
logic pm_alu;
logic pm_alu;
logic legal;
logic legal;
} el2_dec_pkt_t;
} el2_dec_pkt_t;
typedef struct packed {
typedef struct packed {
logic valid;
logic valid;
logic rs1_sign;
logic rs1_sign;
logic rs2_sign;
logic rs2_sign;
logic low;
logic low;
logic bcompress;
logic bcompress;
logic bdecompress;
logic bdecompress;
logic clmul;
logic clmul;
logic clmulh;
logic clmulh;
logic clmulr;
logic clmulr;
logic grev;
logic grev;
logic gorc;
logic gorc;
logic shfl;
logic shfl;
logic unshfl;
logic unshfl;
logic crc32_b;
logic crc32_b;
logic crc32_h;
logic crc32_h;
logic crc32_w;
logic crc32_w;
logic crc32c_b;
logic crc32c_b;
logic crc32c_h;
logic crc32c_h;
logic crc32c_w;
logic crc32c_w;
logic bfp;
logic bfp;
logic xperm_n;
logic xperm_n;
logic xperm_b;
logic xperm_b;
logic xperm_h;
logic xperm_h;
} el2_mul_pkt_t;
} el2_mul_pkt_t;
typedef struct packed {
typedef struct packed {
logic valid;
logic valid;
logic unsign;
logic unsign;
logic rem;
logic rem;
} el2_div_pkt_t;
} el2_div_pkt_t;
typedef struct packed {
typedef struct packed {
logic TEST1;
logic TEST1;
logic RME;
logic RME;
logic [3:0] RM;
logic [3:0] RM;
logic LS;
logic LS;
logic DS;
logic DS;
logic SD;
logic SD;
logic TEST_RNM;
logic TEST_RNM;
logic BC1;
logic BC1;
logic BC2;
logic BC2;
} el2_ccm_ext_in_pkt_t;
} el2_ccm_ext_in_pkt_t;
typedef struct packed {
typedef struct packed {
logic TEST1;
logic TEST1;
logic RME;
logic RME;
logic [3:0] RM;
logic [3:0] RM;
logic LS;
logic LS;
logic DS;
logic DS;
logic SD;
logic SD;
logic TEST_RNM;
logic TEST_RNM;
logic BC1;
logic BC1;
logic BC2;
logic BC2;
} el2_dccm_ext_in_pkt_t;
} el2_dccm_ext_in_pkt_t;
typedef struct packed {
typedef struct packed {
logic TEST1;
logic TEST1;
logic RME;
logic RME;
logic [3:0] RM;
logic [3:0] RM;
logic LS;
logic LS;
logic DS;
logic DS;
logic SD;
logic SD;
logic TEST_RNM;
logic TEST_RNM;
logic BC1;
logic BC1;
logic BC2;
logic BC2;
} el2_ic_data_ext_in_pkt_t;
} el2_ic_data_ext_in_pkt_t;
typedef struct packed {
typedef struct packed {
logic TEST1;
logic TEST1;
logic RME;
logic RME;
logic [3:0] RM;
logic [3:0] RM;
logic LS;
logic LS;
logic DS;
logic DS;
logic SD;
logic SD;
logic TEST_RNM;
logic TEST_RNM;
logic BC1;
logic BC1;
logic BC2;
logic BC2;
} el2_ic_tag_ext_in_pkt_t;
} el2_ic_tag_ext_in_pkt_t;
typedef struct packed {
typedef struct packed {
logic select;
logic select;
logic match;
logic match;
logic store;
logic store;
logic load;
logic load;
logic execute;
logic execute;
logic m;
logic m;
logic [31:0] tdata2;
logic [31:0] tdata2;
} el2_trigger_pkt_t;
} el2_trigger_pkt_t;
typedef struct packed {
typedef struct packed {
logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]}
logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]}
logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3
logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3
logic icache_rd_valid;
logic icache_rd_valid;
logic icache_wr_valid;
logic icache_wr_valid;
} el2_cache_debug_pkt_t;
} el2_cache_debug_pkt_t;
endpackage // el2_pkg
endpackage // el2_pkg
@ -21,254 +21,412 @@
module ahb_to_axi4
module ahb_to_axi4
import el2_pkg::*;
import el2_pkg::*;
TAG = 1,
TAG = 1,
`include "el2_param.vh"
`include "el2_param.vh"
// ,TAG = 1)
// ,TAG = 1)
input clk,
input clk,
input rst_l,
input rst_l,
input scan_mode,
input scan_mode,
input bus_clk_en,
input bus_clk_en,
input clk_override,
input clk_override,
// AXI signals
// AXI signals
// AXI Write Channels
// AXI Write Channels
output logic axi_awvalid,
output logic axi_awvalid,
input logic axi_awready,
input logic axi_awready,
output logic [TAG-1:0] axi_awid,
output logic [TAG-1:0] axi_awid,
output logic [31:0] axi_awaddr,
output logic [ 31:0] axi_awaddr,
output logic [2:0] axi_awsize,
output logic [ 2:0] axi_awsize,
output logic [2:0] axi_awprot,
output logic [ 2:0] axi_awprot,
output logic [7:0] axi_awlen,
output logic [ 7:0] axi_awlen,
output logic [1:0] axi_awburst,
output logic [ 1:0] axi_awburst,
output logic axi_wvalid,
output logic axi_wvalid,
input logic axi_wready,
input logic axi_wready,
output logic [63:0] axi_wdata,
output logic [63:0] axi_wdata,
output logic [7:0] axi_wstrb,
output logic [ 7:0] axi_wstrb,
output logic axi_wlast,
output logic axi_wlast,
input logic axi_bvalid,
input logic axi_bvalid,
output logic axi_bready,
output logic axi_bready,
input logic [1:0] axi_bresp,
input logic [ 1:0] axi_bresp,
input logic [TAG-1:0] axi_bid,
input logic [TAG-1:0] axi_bid,
// AXI Read Channels
// AXI Read Channels
output logic axi_arvalid,
output logic axi_arvalid,
input logic axi_arready,
input logic axi_arready,
output logic [TAG-1:0] axi_arid,
output logic [TAG-1:0] axi_arid,
output logic [31:0] axi_araddr,
output logic [ 31:0] axi_araddr,
output logic [2:0] axi_arsize,
output logic [ 2:0] axi_arsize,
output logic [2:0] axi_arprot,
output logic [ 2:0] axi_arprot,
output logic [7:0] axi_arlen,
output logic [ 7:0] axi_arlen,
output logic [1:0] axi_arburst,
output logic [ 1:0] axi_arburst,
input logic axi_rvalid,
input logic axi_rvalid,
output logic axi_rready,
output logic axi_rready,
input logic [TAG-1:0] axi_rid,
input logic [TAG-1:0] axi_rid,
input logic [63:0] axi_rdata,
input logic [ 63:0] axi_rdata,
input logic [1:0] axi_rresp,
input logic [ 1:0] axi_rresp,
// AHB-Lite signals
// AHB-Lite signals
input logic [31:0] ahb_haddr, // ahb bus address
input logic [31:0] ahb_haddr, // ahb bus address
input logic [2:0] ahb_hburst, // tied to 0
input logic [ 2:0] ahb_hburst, // tied to 0
input logic ahb_hmastlock, // tied to 0
input logic ahb_hmastlock, // tied to 0
input logic [3:0] ahb_hprot, // tied to 4'b0011
input logic [ 3:0] ahb_hprot, // tied to 4'b0011
input logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3)
input logic [ 2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3)
input logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now)
input logic [ 1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now)
input logic ahb_hwrite, // ahb bus write
input logic ahb_hwrite, // ahb bus write
input logic [63:0] ahb_hwdata, // ahb bus write data
input logic [63:0] ahb_hwdata, // ahb bus write data
input logic ahb_hsel, // this slave was selected
input logic ahb_hsel, // this slave was selected
input logic ahb_hreadyin, // previous hready was accepted or not
input logic ahb_hreadyin, // previous hready was accepted or not
output logic [63:0] ahb_hrdata, // ahb bus read data
output logic [63:0] ahb_hrdata, // ahb bus read data
output logic ahb_hreadyout, // slave ready to accept transaction
output logic ahb_hreadyout, // slave ready to accept transaction
output logic ahb_hresp // slave response (high indicates erro)
output logic ahb_hresp // slave response (high indicates erro)
logic [7:0] master_wstrb;
logic [7:0] master_wstrb;
typedef enum logic [1:0] { IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved
typedef enum logic [1:0] {
WR = 2'b01, // Write Command recieved
IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved
RD = 2'b10, // Read Command recieved
WR = 2'b01, // Write Command recieved
PEND = 2'b11 // Waiting on Read Data from core
RD = 2'b10, // Read Command recieved
} state_t;
PEND = 2'b11 // Waiting on Read Data from core
state_t buf_state, buf_nxtstate;
} state_t;
logic buf_state_en;
state_t buf_state, buf_nxtstate;
logic buf_state_en;
// Buffer signals (one entry buffer)
// Buffer signals (one entry buffer)
logic buf_read_error_in, buf_read_error;
logic buf_read_error_in, buf_read_error;
logic [63:0] buf_rdata;
logic [63:0] buf_rdata;
logic ahb_hready;
logic ahb_hready;
logic ahb_hready_q;
logic ahb_hready_q;
logic [1:0] ahb_htrans_in, ahb_htrans_q;
logic [1:0] ahb_htrans_in, ahb_htrans_q;
logic [2:0] ahb_hsize_q;
logic [ 2:0] ahb_hsize_q;
logic ahb_hwrite_q;
logic ahb_hwrite_q;
logic [31:0] ahb_haddr_q;
logic [31:0] ahb_haddr_q;
logic [63:0] ahb_hwdata_q;
logic [63:0] ahb_hwdata_q;
logic ahb_hresp_q;
logic ahb_hresp_q;
//Miscellaneous signals
//Miscellaneous signals
logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic;
logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic;
logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc;
logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc;
// signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus
// signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus
logic buf_rdata_en;
logic buf_rdata_en;
logic ahb_addr_clk_en, buf_rdata_clk_en;
logic ahb_addr_clk_en, buf_rdata_clk_en;
logic bus_clk, ahb_addr_clk, buf_rdata_clk;
logic bus_clk, ahb_addr_clk, buf_rdata_clk;
// Command buffer is the holding station where we convert to AXI and send to core
// Command buffer is the holding station where we convert to AXI and send to core
logic cmdbuf_wr_en, cmdbuf_rst;
logic cmdbuf_wr_en, cmdbuf_rst;
logic cmdbuf_full;
logic cmdbuf_full;
logic cmdbuf_vld, cmdbuf_write;
logic cmdbuf_vld, cmdbuf_write;
logic [1:0] cmdbuf_size;
logic [ 1:0] cmdbuf_size;
logic [7:0] cmdbuf_wstrb;
logic [ 7:0] cmdbuf_wstrb;
logic [31:0] cmdbuf_addr;
logic [31:0] cmdbuf_addr;
logic [63:0] cmdbuf_wdata;
logic [63:0] cmdbuf_wdata;
// FSM to control the bus states and when to block the hready and load the command buffer
// FSM to control the bus states and when to block the hready and load the command buffer
always_comb begin
always_comb begin
buf_nxtstate = IDLE;
buf_nxtstate = IDLE;
buf_state_en = 1'b0;
buf_state_en = 1'b0;
buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back
buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back
buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core
buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core
cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes
cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes
case (buf_state)
case (buf_state)
IDLE: begin // No commands recieved
IDLE: begin // No commands recieved
buf_nxtstate = ahb_hwrite ? WR : RD;
buf_nxtstate = ahb_hwrite ? WR : RD;
buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans
buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans
WR: begin // Write command recieved last cycle
WR: begin // Write command recieved last cycle
buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD;
buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD;
buf_state_en = (~cmdbuf_full | ahb_hresp) ;
buf_state_en = (~cmdbuf_full | ahb_hresp);
cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now.
cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now.
RD: begin // Read command recieved last cycle.
RD: begin // Read command recieved last cycle.
buf_nxtstate = ahb_hresp ? IDLE :PEND; // If error go to idle, else wait for read data
buf_nxtstate = ahb_hresp ? IDLE : PEND; // If error go to idle, else wait for read data
buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error
buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error
cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error
cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error
PEND: begin // Read Command has been sent. Waiting on Data.
PEND: begin // Read Command has been sent. Waiting on Data.
buf_nxtstate = IDLE; // go back for next command and present data next cycle
buf_nxtstate = IDLE; // go back for next command and present data next cycle
buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back
buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back
buf_rdata_en = buf_state_en; // buffer the read data coming back from core
buf_rdata_en = buf_state_en; // buffer the read data coming back from core
buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC )
buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC )
end // always_comb begin
end // always_comb begin
rvdffs_fpga #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk));
rvdffs_fpga #($bits(
)) state_reg (
assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) |
assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) |
({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111);
({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111);
// AHB signals
// AHB signals
assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) :
assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) :
((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error);
((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error);
assign ahb_hready = ahb_hreadyout & ahb_hreadyin;
assign ahb_hready = ahb_hreadyout & ahb_hreadyin;
assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0];
assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0];
assign ahb_hrdata[63:0] = buf_rdata[63:0];
assign ahb_hrdata[63:0] = buf_rdata[63:0];
assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) &
assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) &
((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM
((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM
((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size
((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size
((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned
((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned
((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned
((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned
((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned
((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned
buf_read_error | // Read ECC error
buf_read_error | // Read ECC error
(ahb_hresp_q & ~ahb_hready_q);
(ahb_hresp_q & ~ahb_hready_q);
// Buffer signals - needed for the read data and ECC error response
// Buffer signals - needed for the read data and ECC error response
rvdff_fpga #(.WIDTH(64)) buf_rdata_ff (.din(axi_rdata[63:0]), .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .clken(buf_rdata_clk_en), .rawclk(clk), .*);
rvdff_fpga #(
rvdff_fpga #(.WIDTH(1)) buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); // buf_read_error will be high only one cycle
) buf_rdata_ff (
rvdff_fpga #(
) buf_read_error_ff (
); // buf_read_error will be high only one cycle
// All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer.
// All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer.
rvdff_fpga #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(
rvdff_fpga #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(2)) htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
) hresp_ff (
rvdff_fpga #(.WIDTH(3)) hsize_ff (.din(ahb_hsize[2:0]), .dout(ahb_hsize_q[2:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
rvdff_fpga #(.WIDTH(32)) haddr_ff (.din(ahb_haddr[31:0]), .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*);
rvdff_fpga #(
) hready_ff (
rvdff_fpga #(
) htrans_ff (
rvdff_fpga #(
) hsize_ff (
rvdff_fpga #(
) hwrite_ff (
rvdff_fpga #(
) haddr_ff (
// Address check dccm
// Address check dccm
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
rvrangecheck #(
.CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck (
) addr_dccm_rangecheck (
// Address check iccm
// Address check iccm
if (pt.ICCM_ENABLE == 1) begin: GenICCM
if (pt.ICCM_ENABLE == 1) begin : GenICCM
rvrangecheck #(.CCM_SADR(pt.ICCM_SADR),
rvrangecheck #(
.CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck (
) addr_iccm_rangecheck (
end else begin: GenNoICCM
assign ahb_addr_in_iccm = '0;
assign ahb_addr_in_iccm_region_nc = '0;
end else begin : GenNoICCM
assign ahb_addr_in_iccm = '0;
assign ahb_addr_in_iccm_region_nc = '0;
// PIC memory address check
// PIC memory address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
rvrangecheck #(
.CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck (
) addr_pic_rangecheck (
// Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals.
// Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals.
assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write);
assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write);
assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)));
assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)));
rvdffsc_fpga #(.WIDTH(1)) cmdbuf_vldff (.din(1'b1), .dout(cmdbuf_vld), .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffsc_fpga #(
rvdffs_fpga #(.WIDTH(1)) cmdbuf_writeff (.din(ahb_hwrite_q), .dout(cmdbuf_write), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffs_fpga #(.WIDTH(2)) cmdbuf_sizeff (.din(ahb_hsize_q[1:0]), .dout(cmdbuf_size[1:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
) cmdbuf_vldff (
rvdffs_fpga #(.WIDTH(8)) cmdbuf_wstrbff (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*);
rvdffe #(.WIDTH(32)) cmdbuf_addrff (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*);
rvdffe #(.WIDTH(64)) cmdbuf_wdataff (.din(ahb_hwdata[63:0]), .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*);
rvdffs_fpga #(
) cmdbuf_writeff (
rvdffs_fpga #(
) cmdbuf_sizeff (
rvdffs_fpga #(
) cmdbuf_wstrbff (
rvdffe #(
) cmdbuf_addrff (
.din (ahb_haddr_q[31:0]),
.en (cmdbuf_wr_en & bus_clk_en),
.clk (clk),
rvdffe #(
) cmdbuf_wdataff (
.din (ahb_hwdata[63:0]),
.en (cmdbuf_wr_en & bus_clk_en),
.clk (clk),
// AXI Write Command Channel
// AXI Write Command Channel
assign axi_awvalid = cmdbuf_vld & cmdbuf_write;
assign axi_awvalid = cmdbuf_vld & cmdbuf_write;
assign axi_awid[TAG-1:0] = '0;
assign axi_awid[TAG-1:0] = '0;
assign axi_awaddr[31:0] = cmdbuf_addr[31:0];
assign axi_awaddr[31:0] = cmdbuf_addr[31:0];
assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_awprot[2:0] = 3'b0;
assign axi_awprot[2:0] = 3'b0;
assign axi_awlen[7:0] = '0;
assign axi_awlen[7:0] = '0;
assign axi_awburst[1:0] = 2'b01;
assign axi_awburst[1:0] = 2'b01;
// AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data.
// AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data.
assign axi_wvalid = cmdbuf_vld & cmdbuf_write;
assign axi_wvalid = cmdbuf_vld & cmdbuf_write;
assign axi_wdata[63:0] = cmdbuf_wdata[63:0];
assign axi_wdata[63:0] = cmdbuf_wdata[63:0];
assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0];
assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0];
assign axi_wlast = 1'b1;
assign axi_wlast = 1'b1;
// AXI Write Response - Always ready. AHB does not require a write response.
// AXI Write Response - Always ready. AHB does not require a write response.
assign axi_bready = 1'b1;
assign axi_bready = 1'b1;
// AXI Read Channels
// AXI Read Channels
assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write;
assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write;
assign axi_arid[TAG-1:0] = '0;
assign axi_arid[TAG-1:0] = '0;
assign axi_araddr[31:0] = cmdbuf_addr[31:0];
assign axi_araddr[31:0] = cmdbuf_addr[31:0];
assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]};
assign axi_arprot = 3'b0;
assign axi_arprot = 3'b0;
assign axi_arlen[7:0] = '0;
assign axi_arlen[7:0] = '0;
assign axi_arburst[1:0] = 2'b01;
assign axi_arburst[1:0] = 2'b01;
// AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always.
// AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always.
assign axi_rready = 1'b1;
assign axi_rready = 1'b1;
// Clock header logic
// Clock header logic
assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]);
assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]);
assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en;
assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en;
rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*);
rvclkhdr bus_cgc (
rvclkhdr ahb_addr_cgc (.en(ahb_addr_clk_en), .l1clk(ahb_addr_clk), .*);
rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*);
rvclkhdr ahb_addr_cgc (
rvclkhdr buf_rdata_cgc (
endmodule // ahb_to_axi4
endmodule // ahb_to_axi4
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,64 +1,71 @@
module el2_btb_tag_hash #(
module el2_btb_tag_hash #(
`include "el2_param.vh"
`include "el2_param.vh"
) (
) (
output logic [pt.BTB_BTAG_SIZE-1:0] hash
output logic [pt.BTB_BTAG_SIZE-1:0] hash
assign hash = {
module el2_btb_tag_hash_fold #(
module el2_btb_tag_hash_fold #(
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc,
output logic [pt.BTB_BTAG_SIZE-1:0] hash
output logic [pt.BTB_BTAG_SIZE-1:0] hash
assign hash = {(
assign hash = {
module el2_btb_addr_hash #(
module el2_btb_addr_hash #(
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc,
input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc,
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash
output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash
if(pt.BTB_FOLD2_INDEX_HASH) begin : fold2
if (pt.BTB_FOLD2_INDEX_HASH) begin : fold2
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
end else begin
else begin
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^
module el2_btb_ghr_hash #(
module el2_btb_ghr_hash #(
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin,
input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin,
input logic [pt.BHT_GHR_SIZE-1:0] ghr,
input logic [pt.BHT_GHR_SIZE-1:0] ghr,
output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash
output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash
// The hash function is too complex to write in verilog for all cases.
// The hash function is too complex to write in verilog for all cases.
// The config script generates the logic string based on the bp config.
// The config script generates the logic string based on the bp config.
if(pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1
if (pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], hashin[pt.BTB_INDEX1_HI:2]^ghr[pt.BTB_INDEX1_HI-2:0]};
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = {
else begin : ghrhash_cfg2
hashin[pt.BTB_INDEX1_HI:2] ^ ghr[pt.BTB_INDEX1_HI-2:0]
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { hashin[pt.BHT_GHR_SIZE+1:2]^ghr[pt.BHT_GHR_SIZE-1:0]};
end else begin : ghrhash_cfg2
assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = {
hashin[pt.BHT_GHR_SIZE+1:2] ^ ghr[pt.BHT_GHR_SIZE-1:0]
@ -83,22 +83,28 @@ assign ROP = ME; \
// parameterizable RAM for verilator sims
// parameterizable RAM for verilator sims
module el2_ram #(depth=4096, width=39) (
module el2_ram #(
input logic [$clog2(depth)-1:0] ADR,
depth = 4096,
input logic [(width-1):0] D,
width = 39
output logic [(width-1):0] Q,
) (
input logic [$clog2(depth)-1:0] ADR,
input logic [(width-1):0] D,
output logic [(width-1):0] Q,
reg [(width-1):0] ram_core [(depth-1):0];
reg [(width-1):0] ram_core[(depth-1):0];
always @(posedge CLK) begin
always @(posedge CLK) begin
`ifdef GTLSIM
`ifdef GTLSIM
if (ME && WE) ram_core[ADR] <= D;
if (ME && WE) ram_core[ADR] <= D;
if (ME && WE) begin ram_core[ADR] <= D; Q <= 'x; end
if (ME && WE) begin
ram_core[ADR] <= D;
Q <= 'x;
if (ME && ~WE) Q <= ram_core[ADR];
if (ME && ~WE) Q <= ram_core[ADR];
@ -111,7 +117,7 @@ endmodule
`EL2_RAM(4096, 39)
`EL2_RAM(4096, 39)
`EL2_RAM(3072, 39)
`EL2_RAM(3072, 39)
`EL2_RAM(2048, 39)
`EL2_RAM(2048, 39)
`EL2_RAM(1536, 39) // need this for the 48KB DCCM option)
`EL2_RAM(1536, 39) // need this for the 48KB DCCM option)
`EL2_RAM(1024, 39)
`EL2_RAM(1024, 39)
`EL2_RAM(768, 39)
`EL2_RAM(768, 39)
`EL2_RAM(512, 39)
`EL2_RAM(512, 39)
@ -26,311 +26,312 @@
module el2_lsu
module el2_lsu
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only
input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only
input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
// chicken signals
// chicken signals
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce
input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc
input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc
input logic [31:0] exu_lsu_rs1_d, // address rs operand
input logic [31:0] exu_lsu_rs1_d, // address rs operand
input logic [31:0] exu_lsu_rs2_d, // store data
input logic [31:0] exu_lsu_rs2_d, // store data
input logic [11:0] dec_lsu_offset_d, // address offset operand
input logic [11:0] dec_lsu_offset_d, // address offset operand
input el2_lsu_pkt_t lsu_p, // lsu control packet
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
output logic lsu_load_stall_any, // This is for blocking loads in the decode
output logic lsu_load_stall_any, // This is for blocking loads in the decode
output logic lsu_store_stall_any, // This is for blocking stores in the decode
output logic lsu_store_stall_any, // This is for blocking stores in the decode
output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage
output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage
output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA
output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA
output logic lsu_active, // Used to turn off top level clk
output logic lsu_active, // Used to turn off top level clk
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
output logic [ 1:0] lsu_fir_error, // Error during fast interrupt lookup
output logic lsu_single_ecc_error_incr, // Increment the ecc counter
output logic lsu_single_ecc_error_incr, // Increment the ecc counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic lsu_imprecise_error_load_any, // bus load imprecise error
output logic lsu_imprecise_error_load_any, // bus load imprecise error
output logic lsu_imprecise_error_store_any, // bus store imprecise error
output logic lsu_imprecise_error_store_any, // bus store imprecise error
output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address
output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address
// Non-blocking loads
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
output logic lsu_pmu_load_external_m, // PMU : Bus loads
output logic lsu_pmu_load_external_m, // PMU : Bus loads
output logic lsu_pmu_store_external_m, // PMU : Bus loads
output logic lsu_pmu_store_external_m, // PMU : Bus loads
output logic lsu_pmu_misaligned_m, // PMU : misaligned
output logic lsu_pmu_misaligned_m, // PMU : misaligned
output logic lsu_pmu_bus_trxn, // PMU : bus transaction
output logic lsu_pmu_bus_trxn, // PMU : bus transaction
output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus
output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus
output logic lsu_pmu_bus_error, // PMU : bus sending error back
output logic lsu_pmu_bus_error, // PMU : bus sending error back
output logic lsu_pmu_bus_busy, // PMU : bus is not ready
output logic lsu_pmu_bus_busy, // PMU : bus is not ready
// Trigger signals
// Trigger signals
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode
output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger)
output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger)
// DCCM ports
// DCCM ports
output logic dccm_wren, // DCCM write enable
output logic dccm_wren, // DCCM write enable
output logic dccm_rden, // DCCM read enable
output logic dccm_rden, // DCCM read enable
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read)
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read)
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank
// PIC ports
// PIC ports
output logic picm_wren, // PIC memory write enable
output logic picm_wren, // PIC memory write enable
output logic picm_rden, // PIC memory read enable
output logic picm_rden, // PIC memory read enable
output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward
output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wr_data, // PIC memory write data
output logic [31:0] picm_wr_data, // PIC memory write data
input logic [31:0] picm_rd_data, // PIC memory read/mask data
input logic [31:0] picm_rd_data, // PIC memory read/mask data
// AXI Write Channels
// AXI Write Channels
output logic lsu_axi_awvalid,
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [ 31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [ 3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [ 7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [ 2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic [ 1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [ 3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [ 2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic [ 3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic [ 7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [ 1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
// AXI Read Channels
output logic lsu_axi_arvalid,
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [ 31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [ 3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [ 7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [ 2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic [ 1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [ 3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [ 2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
output logic [ 3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [ 63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic [ 1:0] lsu_axi_rresp,
input logic lsu_axi_rlast,
input logic lsu_axi_rlast,
input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio
input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio
// DMA slave
// DMA slave
input logic dma_dccm_req, // DMA read/write to dccm
input logic dma_dccm_req, // DMA read/write to dccm
input logic [2:0] dma_mem_tag, // DMA request tag
input logic [ 2:0] dma_mem_tag, // DMA request tag
input logic [31:0] dma_mem_addr, // DMA address
input logic [31:0] dma_mem_addr, // DMA address
input logic [2:0] dma_mem_sz, // DMA access size
input logic [ 2:0] dma_mem_sz, // DMA access size
input logic dma_mem_write, // DMA access is a write
input logic dma_mem_write, // DMA access is a write
input logic [63:0] dma_mem_wdata, // DMA write data
input logic [63:0] dma_mem_wdata, // DMA write data
output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read
output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic [2:0] dccm_dma_rtag, // DMA request tag
output logic [ 2:0] dccm_dma_rtag, // DMA request tag
output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read
output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read
output logic dccm_ready, // lsu ready for DMA access
output logic dccm_ready, // lsu ready for DMA access
input logic scan_mode, // scan mode
input logic scan_mode, // scan mode
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l // reset, active low
input logic rst_l // reset, active low
logic lsu_dccm_rden_m;
logic lsu_dccm_rden_m;
logic lsu_dccm_rden_r;
logic lsu_dccm_rden_r;
logic [31:0] store_data_m;
logic [31:0] store_data_m;
logic [31:0] store_data_r;
logic [31:0] store_data_r;
logic [31:0] store_data_hi_r, store_data_lo_r;
logic [31:0] store_data_hi_r, store_data_lo_r;
logic [31:0] store_datafn_hi_r, store_datafn_lo_r;
logic [31:0] store_datafn_hi_r, store_datafn_lo_r;
logic [31:0] sec_data_lo_m, sec_data_hi_m;
logic [31:0] sec_data_lo_m, sec_data_hi_m;
logic [31:0] sec_data_lo_r, sec_data_hi_r;
logic [31:0] sec_data_lo_r, sec_data_hi_r;
logic [31:0] lsu_ld_data_m;
logic [31:0] lsu_ld_data_m;
logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m;
logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m;
logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m;
logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m;
logic lsu_single_ecc_error_m;
logic lsu_single_ecc_error_m;
logic lsu_double_ecc_error_m;
logic lsu_double_ecc_error_m;
logic [31:0] lsu_ld_data_r;
logic [31:0] lsu_ld_data_r;
logic [31:0] lsu_ld_data_corr_r;
logic [31:0] lsu_ld_data_corr_r;
logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r;
logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r;
logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r;
logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r;
logic single_ecc_error_hi_r, single_ecc_error_lo_r;
logic single_ecc_error_hi_r, single_ecc_error_lo_r;
logic lsu_single_ecc_error_r;
logic lsu_single_ecc_error_r;
logic lsu_double_ecc_error_r;
logic lsu_double_ecc_error_r;
logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff;
logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff;
logic [31:0] picm_mask_data_m;
logic [31:0] picm_mask_data_m;
logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r;
logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r;
logic [31:0] end_addr_d, end_addr_m, end_addr_r;
logic [31:0] end_addr_d, end_addr_m, end_addr_r;
el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r;
el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r;
logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r;
logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r;
// Store Buffer signals
// Store Buffer signals
logic store_stbuf_reqvld_r;
logic store_stbuf_reqvld_r;
logic ldst_stbuf_reqvld_r;
logic ldst_stbuf_reqvld_r;
logic lsu_commit_r;
logic lsu_commit_r;
logic lsu_exc_m;
logic lsu_exc_m;
logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r;
logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r;
logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r;
logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r;
logic ldst_dual_d, ldst_dual_m, ldst_dual_r;
logic ldst_dual_d, ldst_dual_m, ldst_dual_r;
logic addr_external_m;
logic addr_external_m;
logic stbuf_reqvld_any;
logic stbuf_reqvld_any;
logic stbuf_reqvld_flushed_any;
logic stbuf_reqvld_flushed_any;
logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any;
logic [ pt.LSU_SB_BITS-1:0] stbuf_addr_any;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any;
logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any;
logic [ pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff;
logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff;
logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff;
logic lsu_cmpen_m;
logic lsu_cmpen_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m;
logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m;
logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m;
logic lsu_stbuf_commit_any;
logic lsu_stbuf_commit_any;
logic lsu_stbuf_empty_any; // This is for blocking loads
logic lsu_stbuf_empty_any; // This is for blocking loads
logic lsu_stbuf_full_any;
logic lsu_stbuf_full_any;
// Bus signals
// Bus signals
logic lsu_busreq_r;
logic lsu_busreq_r;
logic lsu_bus_buffer_pend_any;
logic lsu_bus_buffer_pend_any;
logic lsu_bus_buffer_empty_any;
logic lsu_bus_buffer_empty_any;
logic lsu_bus_buffer_full_any;
logic lsu_bus_buffer_full_any;
logic lsu_busreq_m;
logic lsu_busreq_m;
logic [31:0] bus_read_data_m;
logic [ 31:0] bus_read_data_m;
logic flush_m_up, flush_r;
logic flush_m_up, flush_r;
logic is_sideeffects_m;
logic is_sideeffects_m;
logic [2:0] dma_mem_tag_d, dma_mem_tag_m;
logic [2:0] dma_mem_tag_d, dma_mem_tag_m;
logic ldst_nodma_mtor;
logic ldst_nodma_mtor;
logic dma_dccm_wen, dma_pic_wen;
logic dma_dccm_wen, dma_pic_wen;
logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi;
logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi;
logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi;
logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi;
// Clocks
// Clocks
logic lsu_busm_clken;
logic lsu_busm_clken;
logic lsu_bus_obuf_c1_clken;
logic lsu_bus_obuf_c1_clken;
logic lsu_c1_m_clk, lsu_c1_r_clk;
logic lsu_c1_m_clk, lsu_c1_r_clk;
logic lsu_c2_m_clk, lsu_c2_r_clk;
logic lsu_c2_m_clk, lsu_c2_r_clk;
logic lsu_store_c1_m_clk, lsu_store_c1_r_clk;
logic lsu_store_c1_m_clk, lsu_store_c1_r_clk;
logic lsu_stbuf_c1_clk;
logic lsu_stbuf_c1_clk;
logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk;
logic lsu_busm_clk;
logic lsu_busm_clk;
logic lsu_free_c2_clk;
logic lsu_free_c2_clk;
logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m;
logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m;
logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r;
logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r;
assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]);
assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]);
assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]);
assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]);
el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*);
el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*);
// block stores in decode - for either bus or stbuf reasons
// block stores in decode - for either bus or stbuf reasons
assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff;
assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage
assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage
// Ready to accept dma trxns
// Ready to accept dma trxns
// There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m
// There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m
assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0];
assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0];
assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) &;
assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) &;
assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff);
assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff);
assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword
assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword
assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d;
assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d;
assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
// Generate per cycle flush signals
// Generate per cycle flush signals
assign flush_m_up = dec_tlu_flush_lower_r;
assign flush_m_up = dec_tlu_flush_lower_r;
assign flush_r = dec_tlu_i0_kill_writeb_r;
assign flush_r = dec_tlu_i0_kill_writeb_r;
// lsu idle
// lsu idle
// lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence.
// lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence.
// Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error
// Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error
// Store buffer now have only non-dma dccm stores
// Store buffer now have only non-dma dccm stores
// stbuf_empty not needed since it has only dccm stores
// stbuf_empty not needed since it has only dccm stores
assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) |
assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) |
(lsu_pkt_r.valid & ~lsu_pkt_r.dma)) &
(lsu_pkt_r.valid & ~lsu_pkt_r.dma)) &
assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock
assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock
// Instantiate the store buffer
// Instantiate the store buffer
assign store_stbuf_reqvld_r = lsu_pkt_r.valid & & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | (( | lsu_pkt_r.half) & ~lsu_double_ecc_error_r));
assign store_stbuf_reqvld_r = lsu_pkt_r.valid & & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | (( | lsu_pkt_r.half) & ~lsu_double_ecc_error_r));
// Disable Forwarding for now
// Disable Forwarding for now
assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | & (addr_in_dccm_m | addr_in_pic_m);
assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | & (addr_in_dccm_m | addr_in_pic_m);
// Bus signals
// Bus signals
assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int;
assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int;
// Dual signals
// Dual signals
assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]);
assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]);
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
// PMU signals
// PMU signals
assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0])));
assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0])));
assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m;
assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m;
assign lsu_pmu_store_external_m = lsu_pkt_m.valid & & addr_external_m;
assign lsu_pmu_store_external_m = lsu_pkt_m.valid & & addr_external_m;
el2_lsu_dccm_ctl #(.pt(pt)) dccm_ctl (
el2_lsu_dccm_ctl #(
) dccm_ctl (
@ -339,34 +340,42 @@ import el2_pkg::*;
el2_lsu_stbuf #(.pt(pt)) stbuf (
el2_lsu_stbuf #(
) stbuf (
el2_lsu_ecc #(.pt(pt)) ecc (
el2_lsu_ecc #(
) ecc (
el2_lsu_trigger #(.pt(pt)) trigger (
el2_lsu_trigger #(
) trigger (
// Clk domain
// Clk domain
el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*);
el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*);
// Bus interface
// Bus interface
el2_lsu_bus_intf #(.pt(pt)) bus_intf (
el2_lsu_bus_intf #(
) bus_intf (
.lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}),
.lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}),
.lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}),
@ -375,10 +384,20 @@ import el2_pkg::*;
.store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}),
.store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}),
rvdff #(3) dma_mem_tag_mff (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(3) dma_mem_tag_mff (
rvdff #(2) lsu_raw_fwd_r_ff (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), .clk(lsu_c2_r_clk));
.din (dma_mem_tag_d[2:0]),
.clk (lsu_c1_m_clk)
rvdff #(2) lsu_raw_fwd_r_ff (
.din ({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}),
.dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}),
.clk (lsu_c2_r_clk)
endmodule // el2_lsu
endmodule // el2_lsu
@ -23,111 +23,119 @@
module el2_lsu_addrcheck
module el2_lsu_addrcheck
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic lsu_c2_m_clk, // clock
input logic lsu_c2_m_clk, // clock
input logic rst_l, // reset
input logic rst_l, // reset
input logic [31:0] start_addr_d, // start address for lsu
input logic [31:0] start_addr_d, // start address for lsu
input logic [31:0] end_addr_d, // end address for lsu
input logic [31:0] end_addr_d, // end address for lsu
input el2_lsu_pkt_t lsu_pkt_d, // packet in d
input el2_lsu_pkt_t lsu_pkt_d, // packet in d
input logic [31:0] dec_tlu_mrac_ff, // CSR read
input logic [31:0] dec_tlu_mrac_ff, // CSR read
input logic [3:0] rs1_region_d, // address rs operand [31:28]
input logic [ 3:0] rs1_region_d, // address rs operand [31:28]
input logic [31:0] rs1_d, // address rs operand
input logic [31:0] rs1_d, // address rs operand
output logic is_sideeffects_m, // is sideffects space
output logic is_sideeffects_m, // is sideffects space
output logic addr_in_dccm_d, // address in dccm
output logic addr_in_dccm_d, // address in dccm
output logic addr_in_pic_d, // address in pic
output logic addr_in_pic_d, // address in pic
output logic addr_external_d, // address in external
output logic addr_external_d, // address in external
output logic access_fault_d, // access fault
output logic access_fault_d, // access fault
output logic misaligned_fault_d, // misaligned
output logic misaligned_fault_d, // misaligned
output logic [3:0] exc_mscause_d, // mscause for access/misaligned faults
output logic [3:0] exc_mscause_d, // mscause for access/misaligned faults
output logic fir_dccm_access_error_d, // Fast interrupt dccm access error
output logic fir_dccm_access_error_d, // Fast interrupt dccm access error
output logic fir_nondccm_access_error_d,// Fast interrupt dccm access error
output logic fir_nondccm_access_error_d, // Fast interrupt dccm access error
input logic scan_mode // Scan mode
input logic scan_mode // Scan mode
logic non_dccm_access_ok;
logic non_dccm_access_ok;
logic is_sideeffects_d, is_aligned_d;
logic is_sideeffects_d, is_aligned_d;
logic start_addr_in_dccm_d, end_addr_in_dccm_d;
logic start_addr_in_dccm_d, end_addr_in_dccm_d;
logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d;
logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d;
logic start_addr_in_pic_d, end_addr_in_pic_d;
logic start_addr_in_pic_d, end_addr_in_pic_d;
logic start_addr_in_pic_region_d, end_addr_in_pic_region_d;
logic start_addr_in_pic_region_d, end_addr_in_pic_region_d;
logic [4:0] csr_idx;
logic [4:0] csr_idx;
logic addr_in_iccm;
logic addr_in_iccm;
logic start_addr_dccm_or_pic;
logic start_addr_dccm_or_pic;
logic base_reg_dccm_or_pic;
logic base_reg_dccm_or_pic;
logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d;
logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d;
logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d;
logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d;
logic [3:0] access_fault_mscause_d;
logic [3:0] access_fault_mscause_d;
logic [3:0] misaligned_fault_mscause_d;
logic [3:0] misaligned_fault_mscause_d;
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable
// Start address check
// Start address check
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
rvrangecheck #(
.CCM_SIZE(pt.DCCM_SIZE)) start_addr_dccm_rangecheck (
) start_addr_dccm_rangecheck (
// End address check
// End address check
rvrangecheck #(.CCM_SADR(pt.DCCM_SADR),
rvrangecheck #(
.CCM_SIZE(pt.DCCM_SIZE)) end_addr_dccm_rangecheck (
) end_addr_dccm_rangecheck (
end else begin: Gen_dccm_disable // block: Gen_dccm_enable
assign start_addr_in_dccm_d = '0;
assign start_addr_in_dccm_region_d = '0;
end else begin : Gen_dccm_disable // block: Gen_dccm_enable
assign end_addr_in_dccm_d = '0;
assign start_addr_in_dccm_d = '0;
assign end_addr_in_dccm_region_d = '0;
assign start_addr_in_dccm_region_d = '0;
assign end_addr_in_dccm_d = '0;
assign end_addr_in_dccm_region_d = '0;
if (pt.ICCM_ENABLE == 1) begin : check_iccm
if (pt.ICCM_ENABLE == 1) begin : check_iccm
assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION);
assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION);
end else begin
end else begin
assign addr_in_iccm = 1'b0;
assign addr_in_iccm = 1'b0;
// PIC memory check
// PIC memory check
// Start address check
// Start address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
rvrangecheck #(
.CCM_SIZE(pt.PIC_SIZE)) start_addr_pic_rangecheck (
) start_addr_pic_rangecheck (
// End address check
// End address check
rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR),
rvrangecheck #(
.CCM_SIZE(pt.PIC_SIZE)) end_addr_pic_rangecheck (
) end_addr_pic_rangecheck (
assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d;
assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d;
assign base_reg_dccm_or_pic = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION);
assign base_reg_dccm_or_pic = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION);
assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d);
assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d);
assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d);
assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d);
assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d);
assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d);
assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1};
assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1};
assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & ( | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions
assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & ( | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions
assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) |
assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) |
(lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) |
(lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) |
(((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
(((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) |
(pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
(pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) |
(pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
(pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) |
@ -145,47 +153,54 @@ import el2_pkg::*;
(pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
(pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) |
(pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))));
(pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7))));
// Access fault logic
// Access fault logic
// 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region
// 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region
// 1. Uncorrectable (double bit) ECC error
// 1. Uncorrectable (double bit) ECC error
// 3. Address is not in a populated non-dccm region
// 3. Address is not in a populated non-dccm region
// 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
// 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
// 6. Ld/St access to picm are not word aligned or word size
// 6. Ld/St access to picm are not word aligned or word size
assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa
assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size
assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size
if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin
if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
(end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
(end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset
(start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region
(start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region
(start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region
(start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region
assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
end else begin
end else begin
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset
(start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset
(start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset
(end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset
(end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset
assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region
assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0;
assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0;
// Misaligned happens due to 2 reasons
// Misaligned happens due to 2 reasons
// 0. Region cross
// 0. Region cross
// 1. sideeffects access which are not aligned
// 1. sideeffects access which are not aligned
assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]);
assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]);
assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d);
assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d);
assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma;
assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0;
assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0;
assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0];
assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0];
// Fast interrupt error logic
// Fast interrupt error logic
assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) |
assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) |
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
(end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int;
rvdff #(.WIDTH(1)) is_sideeffects_mff (.din(is_sideeffects_d), .dout(is_sideeffects_m), .clk(lsu_c2_m_clk), .*);
rvdff #(
) is_sideeffects_mff (
.din (is_sideeffects_d),
.clk (lsu_c2_m_clk),
endmodule // el2_lsu_addrcheck
endmodule // el2_lsu_addrcheck
File diff suppressed because it is too large
Load Diff
@ -23,248 +23,267 @@
module el2_lsu_bus_intf
module el2_lsu_bus_intf
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic scan_mode, // scan mode
input logic scan_mode, // scan mode
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals
input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing
input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus
// various clocks needed for the bus reads and writes
// various clocks needed for the bus reads and writes
input logic lsu_bus_obuf_c1_clken, // obuf clock enable
input logic lsu_bus_obuf_c1_clken, // obuf clock enable
input logic lsu_busm_clken, // bus clock enable
input logic lsu_busm_clken, // bus clock enable
input logic lsu_c1_r_clk, // r pipe single pulse clock
input logic lsu_c1_r_clk, // r pipe single pulse clock
input logic lsu_c2_r_clk, // r pipe double pulse clock
input logic lsu_c2_r_clk, // r pipe double pulse clock
input logic lsu_bus_ibuf_c1_clk, // ibuf single pulse clock
input logic lsu_bus_ibuf_c1_clk, // ibuf single pulse clock
input logic lsu_bus_obuf_c1_clk, // obuf single pulse clock
input logic lsu_bus_obuf_c1_clk, // obuf single pulse clock
input logic lsu_bus_buf_c1_clk, // buf single pulse clock
input logic lsu_bus_buf_c1_clk, // buf single pulse clock
input logic lsu_free_c2_clk, // free clock double pulse clock
input logic lsu_free_c2_clk, // free clock double pulse clock
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic lsu_busm_clk, // bus clock
input logic lsu_busm_clk, // bus clock
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic lsu_busreq_m, // bus request is in m
input logic lsu_busreq_m, // bus request is in m
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe
input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe
input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe
input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe
input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe
input logic [31:0] end_addr_m, // lsu address flowing down the pipe
input logic [31:0] end_addr_m, // lsu address flowing down the pipe
input logic [31:0] end_addr_r, // lsu address flowing down the pipe
input logic [31:0] end_addr_r, // lsu address flowing down the pipe
input logic [31:0] store_data_r, // store data flowing down the pipe
input logic [31:0] store_data_r, // store data flowing down the pipe
input logic dec_tlu_force_halt,
input logic dec_tlu_force_halt,
input logic lsu_commit_r, // lsu instruction in r commits
input logic lsu_commit_r, // lsu instruction in r commits
input logic is_sideeffects_m, // lsu attribute is side_effects
input logic is_sideeffects_m, // lsu attribute is side_effects
input logic flush_m_up, // flush
input logic flush_m_up, // flush
input logic flush_r, // flush
input logic flush_r, // flush
input logic ldst_dual_d, ldst_dual_m, ldst_dual_r,
input logic ldst_dual_d,
output logic lsu_busreq_r, // bus request is in r
output logic lsu_busreq_r, // bus request is in r
output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
output logic lsu_bus_buffer_full_any, // write buffer is full
output logic lsu_bus_buffer_full_any, // write buffer is full
output logic lsu_bus_buffer_empty_any, // write buffer is empty
output logic lsu_bus_buffer_empty_any, // write buffer is empty
output logic [31:0] bus_read_data_m, // the bus return data
output logic [31:0] bus_read_data_m, // the bus return data
output logic lsu_imprecise_error_load_any, // imprecise load bus error
output logic lsu_imprecise_error_load_any, // imprecise load bus error
output logic lsu_imprecise_error_store_any, // imprecise store bus error
output logic lsu_imprecise_error_store_any, // imprecise store bus error
output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error
output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error
// Non-blocking loads
// Non-blocking loads
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated
output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam
output logic lsu_nonblock_load_data_error,// non block load has an error
output logic lsu_nonblock_load_data_error, // non block load has an error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
output logic [31:0] lsu_nonblock_load_data, // Data of the non block load
// PMU events
// PMU events
output logic lsu_pmu_bus_trxn,
output logic lsu_pmu_bus_trxn,
output logic lsu_pmu_bus_misaligned,
output logic lsu_pmu_bus_misaligned,
output logic lsu_pmu_bus_error,
output logic lsu_pmu_bus_error,
output logic lsu_pmu_bus_busy,
output logic lsu_pmu_bus_busy,
// AXI Write Channels
// AXI Write Channels
output logic lsu_axi_awvalid,
output logic lsu_axi_awvalid,
input logic lsu_axi_awready,
input logic lsu_axi_awready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid,
output logic [31:0] lsu_axi_awaddr,
output logic [ 31:0] lsu_axi_awaddr,
output logic [3:0] lsu_axi_awregion,
output logic [ 3:0] lsu_axi_awregion,
output logic [7:0] lsu_axi_awlen,
output logic [ 7:0] lsu_axi_awlen,
output logic [2:0] lsu_axi_awsize,
output logic [ 2:0] lsu_axi_awsize,
output logic [1:0] lsu_axi_awburst,
output logic [ 1:0] lsu_axi_awburst,
output logic lsu_axi_awlock,
output logic lsu_axi_awlock,
output logic [3:0] lsu_axi_awcache,
output logic [ 3:0] lsu_axi_awcache,
output logic [2:0] lsu_axi_awprot,
output logic [ 2:0] lsu_axi_awprot,
output logic [3:0] lsu_axi_awqos,
output logic [ 3:0] lsu_axi_awqos,
output logic lsu_axi_wvalid,
output logic lsu_axi_wvalid,
input logic lsu_axi_wready,
input logic lsu_axi_wready,
output logic [63:0] lsu_axi_wdata,
output logic [63:0] lsu_axi_wdata,
output logic [7:0] lsu_axi_wstrb,
output logic [ 7:0] lsu_axi_wstrb,
output logic lsu_axi_wlast,
output logic lsu_axi_wlast,
input logic lsu_axi_bvalid,
input logic lsu_axi_bvalid,
output logic lsu_axi_bready,
output logic lsu_axi_bready,
input logic [1:0] lsu_axi_bresp,
input logic [ 1:0] lsu_axi_bresp,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid,
// AXI Read Channels
// AXI Read Channels
output logic lsu_axi_arvalid,
output logic lsu_axi_arvalid,
input logic lsu_axi_arready,
input logic lsu_axi_arready,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid,
output logic [31:0] lsu_axi_araddr,
output logic [ 31:0] lsu_axi_araddr,
output logic [3:0] lsu_axi_arregion,
output logic [ 3:0] lsu_axi_arregion,
output logic [7:0] lsu_axi_arlen,
output logic [ 7:0] lsu_axi_arlen,
output logic [2:0] lsu_axi_arsize,
output logic [ 2:0] lsu_axi_arsize,
output logic [1:0] lsu_axi_arburst,
output logic [ 1:0] lsu_axi_arburst,
output logic lsu_axi_arlock,
output logic lsu_axi_arlock,
output logic [3:0] lsu_axi_arcache,
output logic [ 3:0] lsu_axi_arcache,
output logic [2:0] lsu_axi_arprot,
output logic [ 2:0] lsu_axi_arprot,
output logic [3:0] lsu_axi_arqos,
output logic [ 3:0] lsu_axi_arqos,
input logic lsu_axi_rvalid,
input logic lsu_axi_rvalid,
output logic lsu_axi_rready,
output logic lsu_axi_rready,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid,
input logic [63:0] lsu_axi_rdata,
input logic [ 63:0] lsu_axi_rdata,
input logic [1:0] lsu_axi_rresp,
input logic [ 1:0] lsu_axi_rresp,
input logic lsu_bus_clk_en
input logic lsu_bus_clk_en
logic lsu_bus_clk_en_q;
logic lsu_bus_clk_en_q;
logic [3:0] ldst_byteen_m, ldst_byteen_r;
logic [3:0] ldst_byteen_m, ldst_byteen_r;
logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r;
logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r;
logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r;
logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r;
logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r;
logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r;
logic is_sideeffects_r;
logic is_sideeffects_r;
logic [63:0] store_data_ext_r;
logic [63:0] store_data_ext_r;
logic [31:0] store_data_hi_r;
logic [31:0] store_data_hi_r;
logic [31:0] store_data_lo_r;
logic [31:0] store_data_lo_r;
logic addr_match_dw_lo_r_m;
logic addr_match_dw_lo_r_m;
logic addr_match_word_lo_r_m;
logic addr_match_word_lo_r_m;
logic no_word_merge_r, no_dword_merge_r;
logic no_word_merge_r, no_dword_merge_r;
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [31:0] ld_fwddata_rpipe_lo;
logic [31:0] ld_fwddata_rpipe_lo;
logic [31:0] ld_fwddata_rpipe_hi;
logic [31:0] ld_fwddata_rpipe_hi;
logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi;
logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi;
logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi;
logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi;
logic [63:0] ld_fwddata_lo, ld_fwddata_hi;
logic [63:0] ld_fwddata_lo, ld_fwddata_hi;
logic [63:0] ld_fwddata_m;
logic [63:0] ld_fwddata_m;
logic ld_full_hit_hi_m, ld_full_hit_lo_m;
logic ld_full_hit_hi_m, ld_full_hit_lo_m;
logic ld_full_hit_m;
logic ld_full_hit_m;
assign ldst_byteen_m[3:0] = ({4{}} & 4'b0001) |
assign ldst_byteen_m[3:0] = ({4{}} & 4'b0001) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.word}} & 4'b1111);
({4{lsu_pkt_m.word}} & 4'b1111);
// Read/Write Buffer
// Read/Write Buffer
el2_lsu_bus_buffer #(.pt(pt)) bus_buffer (
el2_lsu_bus_buffer #(.pt(pt)) bus_buffer (.*);
// Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced
// Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced
assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]);
assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]);
assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2]^lsu_addr_m[2]);
assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2] ^ lsu_addr_m[2]);
assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m);
assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m);
assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m);
assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m);
// Create Hi/Lo signals
// Create Hi/Lo signals
assign ldst_byteen_ext_m[7:0] = {4'b0,ldst_byteen_m[3:0]} << lsu_addr_m[1:0];
assign ldst_byteen_ext_m[7:0] = {4'b0, ldst_byteen_m[3:0]} << lsu_addr_m[1:0];
assign ldst_byteen_ext_r[7:0] = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
assign ldst_byteen_ext_r[7:0] = {4'b0, ldst_byteen_r[3:0]} << lsu_addr_r[1:0];
assign store_data_ext_r[63:0] = {32'b0,store_data_r[31:0]} << {lsu_addr_r[1:0],3'b0};
assign store_data_ext_r[63:0] = {32'b0, store_data_r[31:0]} << {lsu_addr_r[1:0], 3'b0};
assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4];
assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4];
assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0];
assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign store_data_hi_r[31:0] = store_data_ext_r[63:32];
assign store_data_hi_r[31:0] = store_data_ext_r[63:32];
assign store_data_lo_r[31:0] = store_data_ext_r[31:0];
assign store_data_lo_r[31:0] = store_data_ext_r[31:0];
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & lsu_busreq_m & lsu_busreq_r;
for (genvar i=0; i<4; i++) begin: GenBusBufFwd
for (genvar i = 0; i < 4; i++) begin : GenBusBufFwd
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i];
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] |
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] |
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] |
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] |
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
// Final muxing between m/r
// Final muxing between m/r
assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)];
assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)];
assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)];
assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)];
always_comb begin
always_comb begin
ld_full_hit_lo_m = 1'b1;
ld_full_hit_lo_m = 1'b1;
ld_full_hit_hi_m = 1'b1;
ld_full_hit_hi_m = 1'b1;
for (int i=0; i<4; i++) begin
for (int i = 0; i < 4; i++) begin
ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]);
ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]);
ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]);
ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]);
// This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf)
// This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf)
assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m;
assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m;
assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_m[1:0]);
assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8 * lsu_addr_m[1:0]);
assign bus_read_data_m[31:0] = ld_fwddata_m[31:0];
assign bus_read_data_m[31:0] = ld_fwddata_m[31:0];
// Fifo flops
// Fifo flops
rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(active_clk), .*);
rvdff #(
) clken_ff (
.din (lsu_bus_clk_en),
.clk (active_clk),
rvdff #(.WIDTH(1)) is_sideeffects_rff (.din(is_sideeffects_m), .dout(is_sideeffects_r), .clk(lsu_c1_r_clk), .*);
rvdff #(
) is_sideeffects_rff (
.din (is_sideeffects_m),
.clk (lsu_c1_r_clk),
rvdff #(4) lsu_byten_rff (.*, .din(ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk(lsu_c1_r_clk));
rvdff #(4) lsu_byten_rff (
.din (ldst_byteen_m[3:0]),
.clk (lsu_c1_r_clk)
endmodule // el2_lsu_bus_intf
endmodule // el2_lsu_bus_intf
@ -24,117 +24,180 @@
module el2_lsu_clkdomain
module el2_lsu_clkdomain
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in.
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt
// Inputs
// Inputs
input logic clk_override, // chciken bit to turn off clock gating
input logic clk_override, // chciken bit to turn off clock gating
input logic dma_dccm_req, // dma is active
input logic dma_dccm_req, // dma is active
input logic ldst_stbuf_reqvld_r, // allocating in to the store queue
input logic ldst_stbuf_reqvld_r, // allocating in to the store queue
input logic stbuf_reqvld_any, // stbuf is draining
input logic stbuf_reqvld_any, // stbuf is draining
input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed
input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed
input logic lsu_busreq_r, // busreq in r
input logic lsu_busreq_r, // busreq in r
input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry
input logic lsu_bus_buffer_empty_any, // external bus buffer is empty
input logic lsu_bus_buffer_empty_any, // external bus buffer is empty
input logic lsu_stbuf_empty_any, // stbuf is empty
input logic lsu_stbuf_empty_any, // stbuf is empty
input logic lsu_bus_clk_en, // bus clock enable
input logic lsu_bus_clk_en, // bus clock enable
input el2_lsu_pkt_t lsu_p, // lsu packet in decode
input el2_lsu_pkt_t lsu_p, // lsu packet in decode
input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d
input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r
input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r
// Outputs
// Outputs
output logic lsu_bus_obuf_c1_clken, // obuf clock enable
output logic lsu_bus_obuf_c1_clken, // obuf clock enable
output logic lsu_busm_clken, // bus clock enable
output logic lsu_busm_clken, // bus clock enable
output logic lsu_c1_m_clk, // m pipe single pulse clock
output logic lsu_c1_m_clk, // m pipe single pulse clock
output logic lsu_c1_r_clk, // r pipe single pulse clock
output logic lsu_c1_r_clk, // r pipe single pulse clock
output logic lsu_c2_m_clk, // m pipe double pulse clock
output logic lsu_c2_m_clk, // m pipe double pulse clock
output logic lsu_c2_r_clk, // r pipe double pulse clock
output logic lsu_c2_r_clk, // r pipe double pulse clock
output logic lsu_store_c1_m_clk, // store in m
output logic lsu_store_c1_m_clk, // store in m
output logic lsu_store_c1_r_clk, // store in r
output logic lsu_store_c1_r_clk, // store in r
output logic lsu_stbuf_c1_clk,
output logic lsu_stbuf_c1_clk,
output logic lsu_bus_obuf_c1_clk, // ibuf clock
output logic lsu_bus_obuf_c1_clk, // ibuf clock
output logic lsu_bus_ibuf_c1_clk, // ibuf clock
output logic lsu_bus_ibuf_c1_clk, // ibuf clock
output logic lsu_bus_buf_c1_clk, // ibuf clock
output logic lsu_bus_buf_c1_clk, // ibuf clock
output logic lsu_busm_clk, // bus clock
output logic lsu_busm_clk, // bus clock
output logic lsu_free_c2_clk, // free double pulse clock
output logic lsu_free_c2_clk, // free double pulse clock
input logic scan_mode // Scan mode
input logic scan_mode // Scan mode
logic lsu_c1_m_clken, lsu_c1_r_clken;
logic lsu_c1_m_clken, lsu_c1_r_clken;
logic lsu_c2_m_clken, lsu_c2_r_clken;
logic lsu_c2_m_clken, lsu_c2_r_clken;
logic lsu_c1_m_clken_q, lsu_c1_r_clken_q;
logic lsu_c1_m_clken_q, lsu_c1_r_clken_q;
logic lsu_store_c1_m_clken, lsu_store_c1_r_clken;
logic lsu_store_c1_m_clken, lsu_store_c1_r_clken;
logic lsu_stbuf_c1_clken;
logic lsu_stbuf_c1_clken;
logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken;
logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken;
logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken;
logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken;
// Clock Enable logic
// Clock Enable logic
assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override;
assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override;
assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override;
assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override;
assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override;
assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override;
assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override;
assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override;
assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & | clk_override) ;
assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & | clk_override);
assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & | clk_override) ;
assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & | clk_override);
assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override;
assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override;
assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override;
assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override;
assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override;
assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override;
assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) |
assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) |
~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override;
~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override;
assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override;
assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override;
// Flops
// Flops
rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(active_clk), .*);
rvdff #(1) lsu_free_c1_clkenff (
.din (lsu_free_c1_clken),
.clk (active_clk),
rvdff #(1) lsu_c1_m_clkenff (.din(lsu_c1_m_clken), .dout(lsu_c1_m_clken_q), .clk(lsu_free_c2_clk), .*);
rvdff #(1) lsu_c1_m_clkenff (
rvdff #(1) lsu_c1_r_clkenff (.din(lsu_c1_r_clken), .dout(lsu_c1_r_clken_q), .clk(lsu_free_c2_clk), .*);
.din (lsu_c1_m_clken),
.clk (lsu_free_c2_clk),
rvdff #(1) lsu_c1_r_clkenff (
.din (lsu_c1_r_clken),
.clk (lsu_free_c2_clk),
// Clock Headers
// Clock Headers
rvoclkhdr lsu_c1m_cgc ( .en(lsu_c1_m_clken), .l1clk(lsu_c1_m_clk), .* );
rvoclkhdr lsu_c1m_cgc (
rvoclkhdr lsu_c1r_cgc ( .en(lsu_c1_r_clken), .l1clk(lsu_c1_r_clk), .* );
rvoclkhdr lsu_c1r_cgc (
rvoclkhdr lsu_c2m_cgc ( .en(lsu_c2_m_clken), .l1clk(lsu_c2_m_clk), .* );
rvoclkhdr lsu_c2m_cgc (
rvoclkhdr lsu_c2r_cgc ( .en(lsu_c2_r_clken), .l1clk(lsu_c2_r_clk), .* );
rvoclkhdr lsu_c2r_cgc (
rvoclkhdr lsu_store_c1m_cgc (.en(lsu_store_c1_m_clken), .l1clk(lsu_store_c1_m_clk), .*);
rvoclkhdr lsu_store_c1m_cgc (
rvoclkhdr lsu_store_c1r_cgc (.en(lsu_store_c1_r_clken), .l1clk(lsu_store_c1_r_clk), .*);
rvoclkhdr lsu_store_c1r_cgc (
rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* );
rvoclkhdr lsu_stbuf_c1_cgc (
rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* );
rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* );
rvoclkhdr lsu_bus_ibuf_c1_cgc (
rvoclkhdr lsu_bus_buf_c1_cgc (
assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en;
rvclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* );
rvclkhdr lsu_bus_obuf_c1_cgc (
rvclkhdr lsu_busm_cgc (.en(lsu_busm_clken), .l1clk(lsu_busm_clk), .*);
rvclkhdr lsu_busm_cgc (
rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*);
rvoclkhdr lsu_free_cgc (
@ -27,387 +27,506 @@
// //********************************************************************************
// //********************************************************************************
module el2_lsu_dccm_ctl
module el2_lsu_dccm_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic lsu_c2_m_clk, // clocks
input logic lsu_c2_m_clk, // clocks
input logic lsu_c2_r_clk, // clocks
input logic lsu_c2_r_clk, // clocks
input logic lsu_c1_r_clk, // clocks
input logic lsu_c1_r_clk, // clocks
input logic lsu_store_c1_r_clk, // clocks
input logic lsu_store_c1_r_clk, // clocks
input logic lsu_free_c2_clk, // clocks
input logic lsu_free_c2_clk, // clocks
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input el2_lsu_pkt_t lsu_pkt_r,// lsu packets
input el2_lsu_pkt_t lsu_pkt_r, // lsu packets
input el2_lsu_pkt_t lsu_pkt_m,// lsu packets
input el2_lsu_pkt_t lsu_pkt_m, // lsu packets
input el2_lsu_pkt_t lsu_pkt_d,// lsu packets
input el2_lsu_pkt_t lsu_pkt_d, // lsu packets
input logic addr_in_dccm_d, // address maps to dccm
input logic addr_in_dccm_d, // address maps to dccm
input logic addr_in_pic_d, // address maps to pic
input logic addr_in_pic_d, // address maps to pic
input logic addr_in_pic_m, // address maps to pic
input logic addr_in_pic_m, // address maps to pic
input logic addr_in_dccm_m, addr_in_dccm_r, // address in dccm per pipe stage
input logic addr_in_dccm_m,
input logic addr_in_pic_r, // address in pic per pipe stage
addr_in_dccm_r, // address in dccm per pipe stage
input logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r,
input logic addr_in_pic_r, // address in pic per pipe stage
input logic lsu_commit_r, // lsu instruction in r commits
input logic lsu_raw_fwd_lo_r,
input logic ldst_dual_m, ldst_dual_r,// load/store is unaligned at 32 bit boundary per pipe stage
input logic lsu_commit_r, // lsu instruction in r commits
input logic ldst_dual_m,
ldst_dual_r, // load/store is unaligned at 32 bit boundary per pipe stage
// lsu address down the pipe
// lsu address down the pipe
input logic [31:0] lsu_addr_d,
input logic [ 31:0] lsu_addr_d,
input logic [pt.DCCM_BITS-1:0] lsu_addr_m,
input logic [pt.DCCM_BITS-1:0] lsu_addr_m,
input logic [31:0] lsu_addr_r,
input logic [ 31:0] lsu_addr_r,
// lsu address down the pipe - needed to check unaligned
// lsu address down the pipe - needed to check unaligned
input logic [pt.DCCM_BITS-1:0] end_addr_d,
input logic [pt.DCCM_BITS-1:0] end_addr_d,
input logic [pt.DCCM_BITS-1:0] end_addr_m,
input logic [pt.DCCM_BITS-1:0] end_addr_m,
input logic [pt.DCCM_BITS-1:0] end_addr_r,
input logic [pt.DCCM_BITS-1:0] end_addr_r,
input logic stbuf_reqvld_any, // write enable
input logic stbuf_reqvld_any, // write enable
input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned)
input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned)
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf
input logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits
input logic [ pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load
input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r,
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0
input logic lsu_double_ecc_error_r, // lsu has a DED
input logic lsu_double_ecc_error_r, // lsu has a DED
input logic single_ecc_error_hi_r, // sec detected on hi dccm bank
input logic single_ecc_error_hi_r, // sec detected on hi dccm bank
input logic single_ecc_error_lo_r, // sec detected on lower dccm bank
input logic single_ecc_error_lo_r, // sec detected on lower dccm bank
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data
input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits
input logic [ pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits
input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits
input logic [ pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m,
output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m,
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0
output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0
input logic lsu_double_ecc_error_m, // lsu has a DED
input logic lsu_double_ecc_error_m, // lsu has a DED
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data
input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data
input logic [31:0] store_data_m, // Store data M-stage
input logic [31:0] store_data_m, // Store data M-stage
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic dma_pic_wen, // Perform PIC writes
input logic dma_pic_wen, // Perform PIC writes
input logic [2:0] dma_mem_tag_m, // DMA Buffer entry number M-stage
input logic [2:0] dma_mem_tag_m, // DMA Buffer entry number M-stage
input logic [31:0] dma_mem_addr, // DMA request address
input logic [31:0] dma_mem_addr, // DMA request address
input logic [63:0] dma_mem_wdata, // DMA write data
input logic [63:0] dma_mem_wdata, // DMA write data
input logic [31:0] dma_dccm_wdata_lo, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_lo, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shift the dma data to lower bits to make it consistent to lsu stores
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r,
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm
output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm
output logic [31:0] store_data_r, // raw store data to be sent to bus
output logic [31:0] store_data_r, // raw store data to be sent to bus
output logic ld_single_ecc_error_r,
output logic ld_single_ecc_error_r,
output logic ld_single_ecc_error_r_ff,
output logic ld_single_ecc_error_r_ff,
output logic [31:0] picm_mask_data_m, // pic data to stbuf
output logic [31:0] picm_mask_data_m, // pic data to stbuf
output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic
output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic
output logic lsu_dccm_rden_m, // dccm read
output logic lsu_dccm_rden_m, // dccm read
output logic lsu_dccm_rden_r, // dccm read
output logic lsu_dccm_rden_r, // dccm read
output logic dccm_dma_rvalid, // dccm serviving the dma load
output logic dccm_dma_rvalid, // dccm serviving the dma load
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic dccm_dma_ecc_error, // DMA load had ecc error
output logic [2:0] dccm_dma_rtag, // DMA return tag
output logic [ 2:0] dccm_dma_rtag, // DMA return tag
output logic [63:0] dccm_dma_rdata, // dccm data to dma request
output logic [63:0] dccm_dma_rdata, // dccm data to dma request
// DCCM ports
// DCCM ports
output logic dccm_wren, // dccm interface -- write
output logic dccm_wren, // dccm interface -- write
output logic dccm_rden, // dccm interface -- write
output logic dccm_rden, // dccm interface -- write
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank
output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank
output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank
output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm
input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm
// PIC ports
// PIC ports
output logic picm_wren, // write to pic
output logic picm_wren, // write to pic
output logic picm_rden, // read to pick
output logic picm_rden, // read to pick
output logic picm_mken, // write to pic need a mask
output logic picm_mken, // write to pic need a mask
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_rdaddr, // address for pic read access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wraddr, // address for pic write access
output logic [31:0] picm_wr_data, // write data
output logic [31:0] picm_wr_data, // write data
input logic [31:0] picm_rd_data, // read data
input logic [31:0] picm_rd_data, // read data
input logic scan_mode // scan mode
input logic scan_mode // scan mode
localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH);
logic lsu_dccm_rden_d, lsu_dccm_wren_d;
logic lsu_dccm_rden_d, lsu_dccm_wren_d;
logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r;
logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r;
logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns;
logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns;
logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff;
logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff;
logic lsu_double_ecc_error_r_ff;
logic lsu_double_ecc_error_r_ff;
logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff;
logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff;
logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in ;
logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in;
logic [63:0] picm_rd_data_m;
logic [63:0] picm_rd_data_m;
logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi;
logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi;
logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo;
logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo;
logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r;
logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r;
// byte_en flowing down
// byte_en flowing down
logic [3:0] store_byteen_m ,store_byteen_r;
logic [3:0] store_byteen_m, store_byteen_r;
logic [7:0] store_byteen_ext_m, store_byteen_ext_r;
logic [7:0] store_byteen_ext_m, store_byteen_ext_r;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1
logic [63:0] lsu_rdata_r, lsu_rdata_corr_r;
logic [63:0] lsu_rdata_r, lsu_rdata_corr_r;
logic [63:0] dccm_rdata_r, dccm_rdata_corr_r;
logic [63:0] dccm_rdata_r, dccm_rdata_corr_r;
logic [63:0] stbuf_fwddata_r;
logic [63:0] stbuf_fwddata_r;
logic [7:0] stbuf_fwdbyteen_r;
logic [ 7:0] stbuf_fwdbyteen_r;
logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r;
logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r;
logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r;
logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r;
logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r;
logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r;
logic [63:0] picm_rd_data_r;
logic [63:0] picm_rd_data_r;
logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc;
logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc;
logic [2:0] dma_mem_tag_r;
logic [2:0] dma_mem_tag_r;
logic stbuf_fwddata_en;
logic stbuf_fwddata_en;
assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma;
assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma;
assign dccm_dma_ecc_error = lsu_double_ecc_error_r;
assign dccm_dma_ecc_error = lsu_double_ecc_error_r;
assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0];
assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0];
assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}};
assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}};
assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0];
assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0];
assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0];
assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0];
assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0];
assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0];
assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0],dccm_rdata_lo_r[31:0]};
assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0], dccm_rdata_lo_r[31:0]};
assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0],sec_data_lo_r[31:0]};
assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0], sec_data_lo_r[31:0]};
assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]};
assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]};
assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]};
assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]};
assign stbuf_fwddata_en = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override;
assign stbuf_fwddata_en = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override;
for (genvar i=0; i<8; i++) begin: GenDMAData
for (genvar i = 0; i < 8; i++) begin : GenDMAData
assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_corr_r[(8*i)+7:8*i]));
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_corr_r[(8*i)+7:8*i]));
assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] :
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_r[(8*i)+7:8*i]));
(addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_r[(8*i)+7:8*i]));
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff (.*, .din(dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((lsu_dccm_rden_m & ldst_dual_m) | clk_override));
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff (
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff (.*, .din(dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m | clk_override));
rvdffe #(2*pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff (.*, .din({dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]}),
.din (dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]),
.dout({dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]}), .en(lsu_dccm_rden_m | clk_override));
rvdff #(8) stbuf_fwdbyteen_ff (.*, .din({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), .clk(lsu_c2_r_clk));
.en ((lsu_dccm_rden_m & ldst_dual_m) | clk_override)
rvdffe #(64) stbuf_fwddata_ff (.*, .din({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}), .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}), .en(stbuf_fwddata_en));
rvdffe #(32) picm_rddata_rff (.*, .din(picm_rd_data_m[31:0]), .dout(picm_rd_data_r[31:0]), .en(addr_in_pic_m | clk_override));
rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff (
rvdff #(3) dma_mem_tag_rff (.*, .din(dma_mem_tag_m[2:0]), .dout(dma_mem_tag_r[2:0]), .clk(lsu_c1_r_clk));
.din (dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]),
.en (lsu_dccm_rden_m | clk_override)
rvdffe #(2 * pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff (
dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]
dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]
.en(lsu_dccm_rden_m | clk_override)
rvdff #(8) stbuf_fwdbyteen_ff (
.din ({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}),
.dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}),
.clk (lsu_c2_r_clk)
rvdffe #(64) stbuf_fwddata_ff (
.din ({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}),
.dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}),
.en (stbuf_fwddata_en)
rvdffe #(32) picm_rddata_rff (
.din (picm_rd_data_m[31:0]),
.en (addr_in_pic_m | clk_override)
rvdff #(3) dma_mem_tag_rff (
.din (dma_mem_tag_m[2:0]),
.clk (lsu_c1_r_clk)
end else begin: L2U_Plus1_0
end else begin : L2U_Plus1_0
logic [63:0] lsu_rdata_m, lsu_rdata_corr_m;
logic [63:0] lsu_rdata_m, lsu_rdata_corr_m;
logic [63:0] dccm_rdata_m, dccm_rdata_corr_m;
logic [63:0] dccm_rdata_m, dccm_rdata_corr_m;
logic [63:0] stbuf_fwddata_m;
logic [63:0] stbuf_fwddata_m;
logic [7:0] stbuf_fwdbyteen_m;
logic [ 7:0] stbuf_fwdbyteen_m;
logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc;
logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc;
logic [31:0] lsu_ld_data_corr_m;
logic [31:0] lsu_ld_data_corr_m;
assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma;
assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma;
assign dccm_dma_ecc_error = lsu_double_ecc_error_m;
assign dccm_dma_ecc_error = lsu_double_ecc_error_m;
assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0];
assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0];
assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}};
assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}};
assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0];
assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0];
assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0];
assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0];
assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0],dccm_rdata_lo_m[31:0]};
assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0], dccm_rdata_lo_m[31:0]};
assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0],sec_data_lo_m[31:0]};
assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0], sec_data_lo_m[31:0]};
assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]};
assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]};
assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]};
assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]};
for (genvar i=0; i<8; i++) begin: GenLoop
for (genvar i = 0; i < 8; i++) begin : GenLoop
assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_corr_m[(8*i)+7:8*i]));
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_corr_m[(8*i)+7:8*i]));
assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] :
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_m[(8*i)+7:8*i]));
(addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_m[(8*i)+7:8*i]));
rvdffe #(32) lsu_ld_data_corr_rff(.*, .din(lsu_ld_data_corr_m[31:0]), .dout(lsu_ld_data_corr_r[31:0]), .en((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override));
rvdffe #(32) lsu_ld_data_corr_rff (
.din (lsu_ld_data_corr_m[31:0]),
.en ((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override)
assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & & lsu_pkt_d.dma & addr_in_dccm_d) |
assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & & lsu_pkt_d.dma & addr_in_dccm_d) |
(((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & & lsu_pkt_m.dma & addr_in_dccm_m);
(((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & & lsu_pkt_m.dma & addr_in_dccm_m);
assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & & lsu_pkt_d.dma & addr_in_dccm_d) |
assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & & lsu_pkt_d.dma & addr_in_dccm_d) |
(((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & & lsu_pkt_m.dma & addr_in_dccm_m);
(((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & & lsu_pkt_m.dma & addr_in_dccm_m);
assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r;
assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r;
assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r;
assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r;
assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r;
assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r;
assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r;
assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r;
assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r;
assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r;
assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff;
assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff;
assign lsu_stbuf_commit_any = stbuf_reqvld_any &
assign lsu_stbuf_commit_any = stbuf_reqvld_any &
(~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) |
(~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) |
(lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) |
(lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) |
(stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]))));
(stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]))));
// No need to read for aligned word/dword stores since ECC will come by new data completely
// No need to read for aligned word/dword stores since ECC will come by new data completely
assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | ( & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d;
assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | ( & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d;
// DMA will read/write in decode stage
// DMA will read/write in decode stage
assign lsu_dccm_wren_d = dma_dccm_wen;
assign lsu_dccm_wren_d = dma_dccm_wen;
// DCCM inputs
// DCCM inputs
assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff;
assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff;
assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d;
assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d;
assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) :
assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) :
lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) :
assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) :
lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0];
assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0];
assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
{sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
{sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} :
assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} :
{sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
{sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} :
(dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} :
// DCCM outputs
// DCCM outputs
assign store_byteen_m[3:0] = {4{}} &
assign store_byteen_m[3:0] = {4{}} &
(({4{}} & 4'b0001) |
(({4{}} & 4'b0001) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.half}} & 4'b0011) |
({4{lsu_pkt_m.word}} & 4'b1111));
({4{lsu_pkt_m.word}} & 4'b1111));
assign store_byteen_r[3:0] = {4{}} &
assign store_byteen_r[3:0] = {4{}} &
(({4{}} & 4'b0001) |
(({4{}} & 4'b0001) |
({4{lsu_pkt_r.half}} & 4'b0011) |
({4{lsu_pkt_r.half}} & 4'b0011) |
({4{lsu_pkt_r.word}} & 4'b1111));
({4{lsu_pkt_r.word}} & 4'b1111));
assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m
assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m
assign store_byteen_ext_r[7:0] = {4'b0,store_byteen_r[3:0]} << lsu_addr_r[1:0];
assign store_byteen_ext_r[7:0] = {4'b0, store_byteen_r[3:0]} << lsu_addr_r[1:0];
assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m;
assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U1_Plus1_1
logic dccm_wren_Q;
logic dccm_wren_Q;
logic [31:0] dccm_wr_data_Q;
logic [31:0] dccm_wr_data_Q;
logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q;
logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q;
logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r;
logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r;
assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0];
for (genvar i=0; i<4; i++) begin
for (genvar i = 0; i < 4; i++) begin
assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]);
assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]);
assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]);
assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]);
assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] :
assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] :
((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]));
((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]));
assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] :
assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] :
((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]));
((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]));
rvdff #(1) dccm_wren_ff (.*, .din(lsu_stbuf_commit_any), .dout(dccm_wren_Q), .clk(lsu_free_c2_clk)); // ECC load errors writing to dccm shouldn't fwd to stores in pipe
rvdff #(1) dccm_wren_ff (
rvdffe #(32) dccm_wrdata_ff (.*, .din(stbuf_data_any[31:0]), .dout(dccm_wr_data_Q[31:0]), .en(lsu_stbuf_commit_any | clk_override), .clk(clk));
rvdff #(1) dccm_wrbyp_dm_loff (.*, .din(dccm_wr_bypass_d_m_lo), .dout(dccm_wr_bypass_d_m_lo_Q), .clk(lsu_free_c2_clk));
.din (lsu_stbuf_commit_any),
rvdff #(1) dccm_wrbyp_dm_hiff (.*, .din(dccm_wr_bypass_d_m_hi), .dout(dccm_wr_bypass_d_m_hi_Q), .clk(lsu_free_c2_clk));
rvdff #(32) store_data_rff (.*, .din(store_data_m[31:0]), .dout(store_data_r[31:0]), .clk(lsu_store_c1_r_clk));
.clk (lsu_free_c2_clk)
); // ECC load errors writing to dccm shouldn't fwd to stores in pipe
rvdffe #(32) dccm_wrdata_ff (
.din (stbuf_data_any[31:0]),
.en (lsu_stbuf_commit_any | clk_override),
.clk (clk)
rvdff #(1) dccm_wrbyp_dm_loff (
.din (dccm_wr_bypass_d_m_lo),
.clk (lsu_free_c2_clk)
rvdff #(1) dccm_wrbyp_dm_hiff (
.din (dccm_wr_bypass_d_m_hi),
.clk (lsu_free_c2_clk)
rvdff #(32) store_data_rff (
.din (store_data_m[31:0]),
.clk (lsu_store_c1_r_clk)
end else begin: L2U1_Plus1_0
end else begin : L2U1_Plus1_0
logic [31:0] store_data_hi_m, store_data_lo_m;
logic [31:0] store_data_hi_m, store_data_lo_m;
logic [63:0] store_data_mask;
logic [63:0] store_data_mask;
assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0];
assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0];
for (genvar i=0; i<4; i++) begin
for (genvar i = 0; i < 4; i++) begin
assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] :
assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] :
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]);
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]);
assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] :
assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] :
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]);
((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]);
assign store_datafn_lo_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)];
assign store_datafn_lo_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)];
assign store_datafn_hi_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)];
assign store_datafn_hi_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)];
end // for (genvar i=0; i<BYTE_WIDTH; i++)
end // for (genvar i=0; i<BYTE_WIDTH; i++)
for (genvar i=0; i<4; i++) begin
for (genvar i = 0; i < 4; i++) begin
assign store_data_mask[(8*i)+7:(8*i)] = {8{store_byteen_r[i]}};
assign store_data_mask[(8*i)+7:(8*i)] = {8{store_byteen_r[i]}};
assign store_data_r[31:0] = 32'({store_data_hi_r[31:0],store_data_lo_r[31:0]} >> 8*lsu_addr_r[1:0]) & store_data_mask[31:0];
assign store_data_r[31:0] = 32'({store_data_hi_r[31:0],store_data_lo_r[31:0]} >> 8*lsu_addr_r[1:0]) & store_data_mask[31:0];
rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (.*, .din(store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((ldst_dual_m & lsu_pkt_m.valid & | clk_override), .clk(clk));
rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (
rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (.*, .din(store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk));
.din (store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]),
.en ((ldst_dual_m & lsu_pkt_m.valid & | clk_override),
.clk (clk)
rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (
.din (store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]),
.clk (lsu_store_c1_r_clk)
assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out
assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out
assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc
assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc
assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH];
// PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits
// PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits
assign picm_wren = (lsu_pkt_r.valid & & addr_in_pic_r & lsu_commit_r) | dma_pic_wen;
assign picm_wren = (lsu_pkt_r.valid & & addr_in_pic_r & lsu_commit_r) | dma_pic_wen;
assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d;
assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d;
assign picm_mken = lsu_pkt_d.valid & & addr_in_pic_d; // Get the mask for stores
assign picm_mken = lsu_pkt_d.valid & & addr_in_pic_d; // Get the mask for stores
assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]};
assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]};
assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])};
assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])};
assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0];
assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0];
assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0];
assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0];
assign picm_rd_data_m[63:0] = {picm_rd_data[31:0],picm_rd_data[31:0]};
assign picm_rd_data_m[63:0] = {picm_rd_data[31:0], picm_rd_data[31:0]};
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable
rvdff #(1) dccm_rden_mff (.*, .din(lsu_dccm_rden_d), .dout(lsu_dccm_rden_m), .clk(lsu_c2_m_clk));
rvdff #(1) dccm_rden_mff (
rvdff #(1) dccm_rden_rff (.*, .din(lsu_dccm_rden_m), .dout(lsu_dccm_rden_r), .clk(lsu_c2_r_clk));
.din (lsu_dccm_rden_d),
.clk (lsu_c2_m_clk)
rvdff #(1) dccm_rden_rff (
.din (lsu_dccm_rden_m),
.clk (lsu_c2_r_clk)
// ECC correction flops since dccm write happens next cycle
// ECC correction flops since dccm write happens next cycle
// We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M.
// We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M.
// In that case these (_ff) flops are needed only in plus1 configuration
// In that case these (_ff) flops are needed only in plus1 configuration
rvdff #(1) ld_double_ecc_error_rff (.*, .din(lsu_double_ecc_error_r), .dout(lsu_double_ecc_error_r_ff), .clk(lsu_free_c2_clk));
rvdff #(1) ld_double_ecc_error_rff (
rvdff #(1) ld_single_ecc_error_hi_rff (.*, .din(ld_single_ecc_error_hi_r_ns), .dout(ld_single_ecc_error_hi_r_ff), .clk(lsu_free_c2_clk));
rvdff #(1) ld_single_ecc_error_lo_rff (.*, .din(ld_single_ecc_error_lo_r_ns), .dout(ld_single_ecc_error_lo_r_ff), .clk(lsu_free_c2_clk));
.din (lsu_double_ecc_error_r),
rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (.*, .din(end_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (.*, .din(lsu_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk));
.clk (lsu_free_c2_clk)
rvdff #(1) ld_single_ecc_error_hi_rff (
.din (ld_single_ecc_error_hi_r_ns),
.clk (lsu_free_c2_clk)
rvdff #(1) ld_single_ecc_error_lo_rff (
.din (ld_single_ecc_error_lo_r_ns),
.clk (lsu_free_c2_clk)
rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (
.din (end_addr_r[pt.DCCM_BITS-1:0]),
.en (ld_single_ecc_error_r | clk_override),
.clk (clk)
rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (
.din (lsu_addr_r[pt.DCCM_BITS-1:0]),
.en (ld_single_ecc_error_r | clk_override),
.clk (clk)
end else begin: Gen_dccm_disable
end else begin : Gen_dccm_disable
assign lsu_dccm_rden_m = '0;
assign lsu_dccm_rden_m = '0;
assign lsu_dccm_rden_r = '0;
assign lsu_dccm_rden_r = '0;
assign lsu_double_ecc_error_r_ff = 1'b0;
assign lsu_double_ecc_error_r_ff = 1'b0;
assign ld_single_ecc_error_hi_r_ff = 1'b0;
assign ld_single_ecc_error_hi_r_ff = 1'b0;
assign ld_single_ecc_error_lo_r_ff = 1'b0;
assign ld_single_ecc_error_lo_r_ff = 1'b0;
assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0;
assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0;
assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0;
assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0;
@ -27,17 +27,14 @@
// //********************************************************************************
// //********************************************************************************
`define EL2_LOCAL_DCCM_RAM_TEST_PORTS .TEST1(dccm_ext_in_pkt[i].TEST1), \
`define EL2_LOCAL_DCCM_RAM_TEST_PORTS .TEST1(dccm_ext_in_pkt[i].TEST1),\
.RME(dccm_ext_in_pkt[i].RME), \
.RM(dccm_ext_in_pkt[i].RM), \
.LS(dccm_ext_in_pkt[i].LS), \
.DS(dccm_ext_in_pkt[i].DS), \
.SD(dccm_ext_in_pkt[i].SD), \
.TEST_RNM(dccm_ext_in_pkt[i].TEST_RNM), \
.BC1(dccm_ext_in_pkt[i].BC1), \
.BC2(dccm_ext_in_pkt[i].BC2), \
module el2_lsu_dccm_mem
module el2_lsu_dccm_mem
import el2_pkg::*;
import el2_pkg::*;
@ -26,216 +26,269 @@
module el2_lsu_ecc
module el2_lsu_ecc
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic lsu_c2_r_clk, // clock
input logic lsu_c2_r_clk, // clock
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic scan_mode, // scan mode
input logic scan_mode, // scan mode
input el2_lsu_pkt_t lsu_pkt_m, // packet in m
input el2_lsu_pkt_t lsu_pkt_m, // packet in m
input el2_lsu_pkt_t lsu_pkt_r, // packet in r
input el2_lsu_pkt_t lsu_pkt_r, // packet in r
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,
input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,
input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging
input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging
input logic lsu_dccm_rden_r, // dccm rden
input logic lsu_dccm_rden_r, // dccm rden
input logic addr_in_dccm_r, // address in dccm
input logic addr_in_dccm_r, // address in dccm
input logic [pt.DCCM_BITS-1:0] lsu_addr_r, // start address
input logic [ pt.DCCM_BITS-1:0] lsu_addr_r, // start address
input logic [pt.DCCM_BITS-1:0] end_addr_r, // end address
input logic [ pt.DCCM_BITS-1:0] end_addr_r, // end address
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, // data from the dccm + ecc
input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, // data from the dccm + ecc
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data R+1 stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data R+1 stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data R+1 stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data R+1 stage
input logic ld_single_ecc_error_r, // ld has a single ecc error
input logic ld_single_ecc_error_r, // ld has a single ecc error
input logic ld_single_ecc_error_r_ff, // ld has a single ecc error
input logic ld_single_ecc_error_r_ff, // ld has a single ecc error
input logic lsu_dccm_rden_m, // dccm rden
input logic lsu_dccm_rden_m, // dccm rden
input logic addr_in_dccm_m, // address in dccm
input logic addr_in_dccm_m, // address in dccm
input logic [pt.DCCM_BITS-1:0] lsu_addr_m, // start address
input logic [ pt.DCCM_BITS-1:0] lsu_addr_m, // start address
input logic [pt.DCCM_BITS-1:0] end_addr_m, // end address
input logic [ pt.DCCM_BITS-1:0] end_addr_m, // end address
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem
input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem
input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem
input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem
input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data M-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data M-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data M-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data M-stage
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic dma_dccm_wen, // Perform DMA writes only for word/dword
input logic [31:0] dma_dccm_wdata_lo, // Shifted dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_lo, // Shifted dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shifted dma data to lower bits to make it consistent to lsu stores
input logic [31:0] dma_dccm_wdata_hi, // Shifted dma data to lower bits to make it consistent to lsu stores
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata
output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // Encoded data with ECC bits
output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // Encoded data with ECC bits
output logic single_ecc_error_hi_r, // sec detected
output logic single_ecc_error_hi_r, // sec detected
output logic single_ecc_error_lo_r, // sec detected on lower dccm bank
output logic single_ecc_error_lo_r, // sec detected on lower dccm bank
output logic lsu_single_ecc_error_r, // or of the 2
output logic lsu_single_ecc_error_r, // or of the 2
output logic lsu_double_ecc_error_r, // double error detected
output logic lsu_double_ecc_error_r, // double error detected
output logic lsu_single_ecc_error_m, // or of the 2
output logic lsu_single_ecc_error_m, // or of the 2
output logic lsu_double_ecc_error_m // double error detected
output logic lsu_double_ecc_error_m // double error detected
logic is_ldst_r;
logic is_ldst_r;
logic is_ldst_hi_any, is_ldst_lo_any;
logic is_ldst_hi_any, is_ldst_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any;
logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any;
logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any;
logic single_ecc_error_hi_any, single_ecc_error_lo_any;
logic single_ecc_error_hi_any, single_ecc_error_lo_any;
logic double_ecc_error_hi_any, double_ecc_error_lo_any;
logic double_ecc_error_hi_any, double_ecc_error_lo_any;
logic double_ecc_error_hi_m, double_ecc_error_lo_m;
logic double_ecc_error_hi_m, double_ecc_error_lo_m;
logic double_ecc_error_hi_r, double_ecc_error_lo_r;
logic double_ecc_error_hi_r, double_ecc_error_lo_r;
logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc;
logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1
logic ldst_dual_m, ldst_dual_r;
logic ldst_dual_m, ldst_dual_r;
logic is_ldst_m;
logic is_ldst_m;
logic is_ldst_hi_r, is_ldst_lo_r;
logic is_ldst_hi_r, is_ldst_lo_r;
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]);
assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | & addr_in_dccm_r & lsu_dccm_rden_r;
assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | & addr_in_dccm_r & lsu_dccm_rden_r;
assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable;
assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable;
assign is_ldst_hi_r = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_r = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_any = is_ldst_hi_r;
assign is_ldst_hi_any = is_ldst_hi_r;
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0];
assign is_ldst_lo_any = is_ldst_lo_r;
assign is_ldst_lo_any = is_ldst_lo_r;
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign single_ecc_error_hi_r = single_ecc_error_hi_any;
assign single_ecc_error_hi_r = single_ecc_error_hi_any;
assign double_ecc_error_hi_r = double_ecc_error_hi_any;
assign double_ecc_error_hi_r = double_ecc_error_hi_any;
assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign single_ecc_error_lo_r = single_ecc_error_lo_any;
assign single_ecc_error_lo_r = single_ecc_error_lo_any;
assign double_ecc_error_lo_r = double_ecc_error_lo_any;
assign double_ecc_error_lo_r = double_ecc_error_lo_any;
assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r;
assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r;
assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r;
assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r;
end else begin: L2U_Plus1_0
end else begin : L2U_Plus1_0
logic ldst_dual_m;
logic ldst_dual_m;
logic is_ldst_m;
logic is_ldst_m;
logic is_ldst_hi_m, is_ldst_lo_m;
logic is_ldst_hi_m, is_ldst_lo_m;
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]);
assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | & addr_in_dccm_m & lsu_dccm_rden_m;
assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | & addr_in_dccm_m & lsu_dccm_rden_m;
assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable;
assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable;
assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA
assign is_ldst_hi_any = is_ldst_hi_m;
assign is_ldst_hi_any = is_ldst_hi_m;
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0];
assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0];
assign is_ldst_lo_any = is_ldst_lo_m;
assign is_ldst_lo_any = is_ldst_lo_m;
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0];
assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0];
assign double_ecc_error_hi_m = double_ecc_error_hi_any;
assign double_ecc_error_hi_m = double_ecc_error_hi_any;
assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0];
assign double_ecc_error_lo_m = double_ecc_error_lo_any;
assign double_ecc_error_lo_m = double_ecc_error_lo_any;
assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any;
assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any;
assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m;
assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m;
// Flops
// Flops
rvdff #(1) lsu_single_ecc_err_r (.din(lsu_single_ecc_error_m), .dout(lsu_single_ecc_error_r), .clk(lsu_c2_r_clk), .*);
rvdff #(1) lsu_single_ecc_err_r (
rvdff #(1) lsu_double_ecc_err_r (.din(lsu_double_ecc_error_m), .dout(lsu_double_ecc_error_r), .clk(lsu_c2_r_clk), .*);
.din (lsu_single_ecc_error_m),
rvdff #(.WIDTH(1)) ldst_sec_lo_rff (.din(single_ecc_error_lo_any), .dout(single_ecc_error_lo_r), .clk(lsu_c2_r_clk), .*);
rvdff #(.WIDTH(1)) ldst_sec_hi_rff (.din(single_ecc_error_hi_any), .dout(single_ecc_error_hi_r), .clk(lsu_c2_r_clk), .*);
.clk (lsu_c2_r_clk),
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rff (.din(sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rff (.din(sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*);
rvdff #(1) lsu_double_ecc_err_r (
.din (lsu_double_ecc_error_m),
.clk (lsu_c2_r_clk),
rvdff #(
) ldst_sec_lo_rff (
.din (single_ecc_error_lo_any),
.clk (lsu_c2_r_clk),
rvdff #(
) ldst_sec_hi_rff (
.din (single_ecc_error_hi_any),
.clk (lsu_c2_r_clk),
rvdffe #(
) sec_data_hi_rff (
.din (sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]),
.en (lsu_single_ecc_error_m | clk_override),
rvdffe #(
) sec_data_lo_rff (
.din (sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]),
.en (lsu_single_ecc_error_m | clk_override),
// Logic for ECC generation during write
// Logic for ECC generation during write
assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]);
assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]);
assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0);
assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0);
assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0];
// Instantiate ECC blocks
// Instantiate ECC blocks
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable
//Detect/Repair for Hi
//Detect/Repair for Hi
rvecc_decode lsu_ecc_decode_hi (
rvecc_decode lsu_ecc_decode_hi (
// Inputs
// Inputs
.sed_ded (1'b0), // 1 : means only detection
.sed_ded(1'b0), // 1 : means only detection
// Outputs
// Outputs
.ecc_out (ecc_out_hi_nc[6:0]),
//Detect/Repair for Lo
//Detect/Repair for Lo
rvecc_decode lsu_ecc_decode_lo (
rvecc_decode lsu_ecc_decode_lo (
// Inputs
// Inputs
.sed_ded (1'b0), // 1 : means only detection
.sed_ded(1'b0), // 1 : means only detection
.din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] ),
// Outputs
// Outputs
.ecc_out (ecc_out_lo_nc[6:0]),
rvecc_encode lsu_ecc_encode_hi (
rvecc_encode lsu_ecc_encode_hi (
rvecc_encode lsu_ecc_encode_lo (
rvecc_encode lsu_ecc_encode_lo (
end else begin: Gen_dccm_disable // block: Gen_dccm_enable
end else begin : Gen_dccm_disable // block: Gen_dccm_enable
assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0;
assign single_ecc_error_hi_any = '0;
assign single_ecc_error_hi_any = '0;
assign double_ecc_error_hi_any = '0;
assign double_ecc_error_hi_any = '0;
assign single_ecc_error_lo_any = '0;
assign single_ecc_error_lo_any = '0;
assign double_ecc_error_lo_any = '0;
assign double_ecc_error_lo_any = '0;
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rplus1ff (.din(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
rvdffe #(
rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rplus1ff (.din(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*);
) sec_data_hi_rplus1ff (
.din (sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]),
.en (ld_single_ecc_error_r | clk_override),
.clk (clk),
rvdffe #(
) sec_data_lo_rplus1ff (
.din (sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]),
.en (ld_single_ecc_error_r | clk_override),
.clk (clk),
endmodule // el2_lsu_ecc
endmodule // el2_lsu_ecc
@ -26,316 +26,483 @@
module el2_lsu_lsc_ctl
module el2_lsu_lsc_ctl
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic rst_l, // reset, active low
input logic rst_l, // reset, active low
input logic clk_override, // Override non-functional clock gating
input logic clk_override, // Override non-functional clock gating
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK.
// clocks per pipe
// clocks per pipe
input logic lsu_c1_m_clk,
input logic lsu_c1_m_clk,
input logic lsu_c1_r_clk,
input logic lsu_c1_r_clk,
input logic lsu_c2_m_clk,
input logic lsu_c2_m_clk,
input logic lsu_c2_r_clk,
input logic lsu_c2_r_clk,
input logic lsu_store_c1_m_clk,
input logic lsu_store_c1_m_clk,
input logic [31:0] lsu_ld_data_r, // Load data R-stage
input logic [31:0] lsu_ld_data_r, // Load data R-stage
input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage
input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage
input logic lsu_single_ecc_error_r, // ECC single bit error R-stage
input logic lsu_single_ecc_error_r, // ECC single bit error R-stage
input logic lsu_double_ecc_error_r, // ECC double bit error R-stage
input logic lsu_double_ecc_error_r, // ECC double bit error R-stage
input logic [31:0] lsu_ld_data_m, // Load data M-stage
input logic [31:0] lsu_ld_data_m, // Load data M-stage
input logic lsu_single_ecc_error_m, // ECC single bit error M-stage
input logic lsu_single_ecc_error_m, // ECC single bit error M-stage
input logic lsu_double_ecc_error_m, // ECC double bit error M-stage
input logic lsu_double_ecc_error_m, // ECC double bit error M-stage
input logic flush_m_up, // Flush M and D stage
input logic flush_m_up, // Flush M and D stage
input logic flush_r, // Flush R-stage
input logic flush_r, // Flush R-stage
input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage
input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage
input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage
input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage
input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage
input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage
input logic [31:0] exu_lsu_rs1_d, // address
input logic [31:0] exu_lsu_rs1_d, // address
input logic [31:0] exu_lsu_rs2_d, // store data
input logic [31:0] exu_lsu_rs2_d, // store data
input el2_lsu_pkt_t lsu_p, // lsu control packet
input el2_lsu_pkt_t lsu_p, // lsu control packet
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic dec_lsu_valid_raw_d, // Raw valid for address computation
input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses
input logic [31:0] picm_mask_data_m, // PIC data M-stage
input logic [31:0] picm_mask_data_m, // PIC data M-stage
input logic [31:0] bus_read_data_m, // the bus return data
input logic [31:0] bus_read_data_m, // the bus return data
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_m, // lsu load data
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF
// lsu address down the pipe
// lsu address down the pipe
output logic [31:0] lsu_addr_d,
output logic [31:0] lsu_addr_d,
output logic [31:0] lsu_addr_m,
output logic [31:0] lsu_addr_m,
output logic [31:0] lsu_addr_r,
output logic [31:0] lsu_addr_r,
// lsu address down the pipe - needed to check unaligned
// lsu address down the pipe - needed to check unaligned
output logic [31:0] end_addr_d,
output logic [31:0] end_addr_d,
output logic [31:0] end_addr_m,
output logic [31:0] end_addr_m,
output logic [31:0] end_addr_r,
output logic [31:0] end_addr_r,
// store data down the pipe
// store data down the pipe
output logic [31:0] store_data_m,
output logic [31:0] store_data_m,
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control
output logic lsu_exc_m, // Access or misaligned fault
output logic lsu_exc_m, // Access or misaligned fault
output logic is_sideeffects_m, // is sideffects space
output logic is_sideeffects_m, // is sideffects space
output logic lsu_commit_r, // lsu instruction in r commits
output logic lsu_commit_r, // lsu instruction in r commits
output logic lsu_single_ecc_error_incr,// LSU inc SB error counter
output logic lsu_single_ecc_error_incr, // LSU inc SB error counter
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [31:1] lsu_fir_addr, // fast interrupt address
output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup
output logic [ 1:0] lsu_fir_error, // Error during fast interrupt lookup
// address in dccm/pic/external per pipe stage
// address in dccm/pic/external per pipe stage
output logic addr_in_dccm_d,
output logic addr_in_dccm_d,
output logic addr_in_dccm_m,
output logic addr_in_dccm_m,
output logic addr_in_dccm_r,
output logic addr_in_dccm_r,
output logic addr_in_pic_d,
output logic addr_in_pic_d,
output logic addr_in_pic_m,
output logic addr_in_pic_m,
output logic addr_in_pic_r,
output logic addr_in_pic_r,
output logic addr_external_m,
output logic addr_external_m,
// DMA slave
// DMA slave
input logic dma_dccm_req,
input logic dma_dccm_req,
input logic [31:0] dma_mem_addr,
input logic [31:0] dma_mem_addr,
input logic [2:0] dma_mem_sz,
input logic [ 2:0] dma_mem_sz,
input logic dma_mem_write,
input logic dma_mem_write,
input logic [63:0] dma_mem_wdata,
input logic [63:0] dma_mem_wdata,
// Store buffer related signals
// Store buffer related signals
output el2_lsu_pkt_t lsu_pkt_d,
output el2_lsu_pkt_t lsu_pkt_d,
output el2_lsu_pkt_t lsu_pkt_m,
output el2_lsu_pkt_t lsu_pkt_m,
output el2_lsu_pkt_t lsu_pkt_r,
output el2_lsu_pkt_t lsu_pkt_r,
input logic scan_mode // Scan mode
input logic scan_mode // Scan mode
logic [31:3] end_addr_pre_m, end_addr_pre_r;
logic [31:3] end_addr_pre_m, end_addr_pre_r;
logic [31:0] full_addr_d;
logic [31:0] full_addr_d;
logic [31:0] full_end_addr_d;
logic [31:0] full_end_addr_d;
logic [31:0] lsu_rs1_d;
logic [31:0] lsu_rs1_d;
logic [11:0] lsu_offset_d;
logic [11:0] lsu_offset_d;
logic [31:0] rs1_d;
logic [31:0] rs1_d;
logic [11:0] offset_d;
logic [11:0] offset_d;
logic [12:0] end_addr_offset_d;
logic [12:0] end_addr_offset_d;
logic [2:0] addr_offset_d;
logic [ 2:0] addr_offset_d;
logic [63:0] dma_mem_wdata_shifted;
logic [63:0] dma_mem_wdata_shifted;
logic addr_external_d;
logic addr_external_d;
logic addr_external_r;
logic addr_external_r;
logic access_fault_d, misaligned_fault_d;
logic access_fault_d, misaligned_fault_d;
logic access_fault_m, misaligned_fault_m;
logic access_fault_m, misaligned_fault_m;
logic fir_dccm_access_error_d, fir_nondccm_access_error_d;
logic fir_dccm_access_error_d, fir_nondccm_access_error_d;
logic fir_dccm_access_error_m, fir_nondccm_access_error_m;
logic fir_dccm_access_error_m, fir_nondccm_access_error_m;
logic [3:0] exc_mscause_d, exc_mscause_m;
logic [3:0] exc_mscause_d, exc_mscause_m;
logic [31:0] rs1_d_raw;
logic [31:0] rs1_d_raw;
logic [31:0] store_data_d, store_data_pre_m, store_data_m_in;
logic [31:0] store_data_d, store_data_pre_m, store_data_m_in;
logic [31:0] bus_read_data_r;
logic [31:0] bus_read_data_r;
el2_lsu_pkt_t dma_pkt_d;
el2_lsu_pkt_t dma_pkt_d;
el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in;
el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in;
el2_lsu_error_pkt_t lsu_error_pkt_m;
el2_lsu_error_pkt_t lsu_error_pkt_m;
// Premux the rs1/offset for dma
// Premux the rs1/offset for dma
assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
assign rs1_d_raw[31:0] = lsu_rs1_d[31:0];
assign rs1_d_raw[31:0] = lsu_rs1_d[31:0];
assign offset_d[11:0] = lsu_offset_d[11:0];
assign offset_d[11:0] = lsu_offset_d[11:0];
assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
// generate the ls address
// generate the ls address
rvlsadder lsadder (.rs1(rs1_d[31:0]),
rvlsadder lsadder (
// Module to generate the memory map of the address
el2_lsu_addrcheck addrcheck (
// Calculate start/end address for load/store
// Module to generate the memory map of the address
assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
el2_lsu_addrcheck addrcheck (
assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
.end_addr_d (full_end_addr_d[31:0]),
assign end_addr_d[31:0] = full_end_addr_d[31:0];
assign lsu_exc_m = access_fault_m | misaligned_fault_m;
// Goes to TLU to increment the ECC error counter
// Calculate start/end address for load/store
assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
assign end_addr_offset_d[12:0] = {offset_d[11], offset_d[11:0]} + {9'b0, addr_offset_d[2:0]};
assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
assign end_addr_d[31:0] = full_end_addr_d[31:0];
assign lsu_exc_m = access_fault_m | misaligned_fault_m;
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
// Goes to TLU to increment the ECC error counter
logic access_fault_r, misaligned_fault_r;
assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
logic [3:0] exc_mscause_r;
logic fir_dccm_access_error_r, fir_nondccm_access_error_r;
// Generate exception packet
if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1
assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
logic access_fault_r, misaligned_fault_r;
assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
logic [3:0] exc_mscause_r;
assign lsu_error_pkt_r.inst_type =;
logic fir_dccm_access_error_r, fir_nondccm_access_error_r;
assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r;
assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
// Generate exception packet
assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
assign lsu_error_pkt_r.inst_type =;
assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r;
assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*);
assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*);
rvdff #(4) exc_mscause_rff (.din(exc_mscause_m[3:0]), .dout(exc_mscause_r[3:0]), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*);
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
end else begin: L2U_Plus1_0
rvdff #(1) access_fault_rff (
logic [1:0] lsu_fir_error_m;
.din (access_fault_m),
.clk (lsu_c1_r_clk),
rvdff #(1) misaligned_fault_rff (
.din (misaligned_fault_m),
.clk (lsu_c1_r_clk),
rvdff #(4) exc_mscause_rff (
.din (exc_mscause_m[3:0]),
.clk (lsu_c1_r_clk),
rvdff #(1) fir_dccm_access_error_mff (
.din (fir_dccm_access_error_m),
.clk (lsu_c1_r_clk),
rvdff #(1) fir_nondccm_access_error_mff (
.din (fir_nondccm_access_error_m),
.clk (lsu_c1_r_clk),
// Generate exception packet
end else begin : L2U_Plus1_0
assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
logic [1:0] lsu_fir_error_m;
assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
assign lsu_error_pkt_m.inst_type =;
assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m;
assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
// Generate exception packet
assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
assign lsu_error_pkt_m.inst_type =;
assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m;
assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
rvdff #(1) lsu_exc_valid_rff (.*, .din(lsu_error_pkt_m.exc_valid), .dout(lsu_error_pkt_r.exc_valid), .clk(lsu_c2_r_clk));
assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
rvdff #(1) lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error), .dout(lsu_error_pkt_r.single_ecc_error), .clk(lsu_c2_r_clk));
rvdffe #($bits(el2_lsu_error_pkt_t)-2) lsu_error_pkt_rff (.*, .din(lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override));
rvdff #(2) lsu_fir_error_rff (.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk));
//Create DMA packet
rvdff #(1) lsu_exc_valid_rff (
always_comb begin
dma_pkt_d = '0;
.din (lsu_error_pkt_m.exc_valid),
dma_pkt_d.valid = dma_dccm_req;
dma_pkt_d.dma = 1'b1;
.clk (lsu_c2_r_clk)
|||||| = dma_mem_write;
dma_pkt_d.load = ~dma_mem_write;
rvdff #(1) lsu_single_ecc_error_rff (
|||||| = (dma_mem_sz[2:0] == 3'b0);
dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
.din (lsu_error_pkt_m.single_ecc_error),
dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
.clk (lsu_c2_r_clk)
rvdffe #($bits(
) - 2) lsu_error_pkt_rff (
.din (lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]),
.en (lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override)
rvdff #(2) lsu_fir_error_rff (
.din (lsu_fir_error_m[1:0]),
.clk (lsu_c2_r_clk)
always_comb begin
//Create DMA packet
lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
always_comb begin
lsu_pkt_m_in = lsu_pkt_d;
dma_pkt_d = '0;
lsu_pkt_r_in = lsu_pkt_m;
dma_pkt_d.valid = dma_dccm_req;
dma_pkt_d.dma = 1'b1;
| = dma_mem_write;
dma_pkt_d.load = ~dma_mem_write;
| = (dma_mem_sz[2:0] == 3'b0);
dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1);
dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10);
dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11);
lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
always_comb begin
lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
lsu_pkt_m_in = lsu_pkt_d;
lsu_pkt_r_in = lsu_pkt_m;
// C2 clock for valid and C1 for other bits of packet
lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma);
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
// C2 clock for valid and C1 for other bits of packet
rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
rvdff #(1) lsu_pkt_vldmff (
.din (lsu_pkt_m_in.valid),
.clk (lsu_c2_m_clk)
rvdff #(1) lsu_pkt_vldrff (
.din (lsu_pkt_r_in.valid),
.clk (lsu_c2_r_clk)
rvdff #($bits(
) - 1) lsu_pkt_mff (
.din (lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]),
.clk (lsu_c1_m_clk)
rvdff #($bits(
) - 1) lsu_pkt_rff (
.din (lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]),
.clk (lsu_c1_r_clk)
if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this is really R stage signal
// this is really R stage signal
assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]);
({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]);
// this signal is used for gpr update
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
end else begin : L2U1_Plus1_0 // block: L2U1_Plus1_1
logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
// this result must look at prior stores and merge them in
// this result must look at prior stores and merge them in
assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
({32{~lsu_pkt_m.unsign & }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
({32{~lsu_pkt_m.unsign & }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]);
({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]);
// this signal is used for gpr update
// this signal is used for gpr update
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]);
// Fast interrupt address
// Fast interrupt address
assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1];
assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1];
// absence load/store all 0's
// absence load/store all 0's
assign lsu_addr_d[31:0] = full_addr_d[31:0];
assign lsu_addr_d[31:0] = full_addr_d[31:0];
// Interrupt as a flush source allows the WB to occur
// Interrupt as a flush source allows the WB to occur
assign lsu_commit_r = lsu_pkt_r.valid & ( | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
assign lsu_commit_r = lsu_pkt_r.valid & ( | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores
assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage
assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage
assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk));
rvdff #(32) sdmff (
.din (store_data_m_in[31:0]),
.clk (lsu_store_c1_m_clk)
rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
rvdff #(32) samff (
rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
.din (lsu_addr_d[31:0]),
.clk (lsu_c1_m_clk)
rvdff #(32) sarff (
.din (lsu_addr_m[31:0]),
.clk (lsu_c1_r_clk)
assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving
assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving
assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving
assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving
rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override));
rvdffe #(29) end_addr_hi_mff (
rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override));
.din (end_addr_d[31:3]),
.en ((lsu_pkt_d.valid & ldst_dual_d) | clk_override)
rvdffe #(29) end_addr_hi_rff (
.din (end_addr_m[31:3]),
.en ((lsu_pkt_m.valid & ldst_dual_m) | clk_override)
rvdff #(3) end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk));
rvdff #(3) end_addr_lo_mff (
rvdff #(3) end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk));
.din (end_addr_d[2:0]),
.clk (lsu_c1_m_clk)
rvdff #(3) end_addr_lo_rff (
.din (end_addr_m[2:0]),
.clk (lsu_c1_r_clk)
rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_dccm_mff (
rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
.din (addr_in_dccm_d),
.clk (lsu_c1_m_clk),
rvdff #(1) addr_in_dccm_rff (
.din (addr_in_dccm_m),
.clk (lsu_c1_r_clk),
rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_in_pic_mff (
rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
.din (addr_in_pic_d),
.clk (lsu_c1_m_clk),
rvdff #(1) addr_in_pic_rff (
.din (addr_in_pic_m),
.clk (lsu_c1_r_clk),
rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) addr_external_mff (
rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
.din (addr_external_d),
.clk (lsu_c1_m_clk),
rvdff #(1) addr_external_rff (
.din (addr_external_m),
.clk (lsu_c1_r_clk),
rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) access_fault_mff (
rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
.din (access_fault_d),
rvdff #(4) exc_mscause_mff (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
.clk (lsu_c1_m_clk),
rvdff #(1) misaligned_fault_mff (
.din (misaligned_fault_d),
.clk (lsu_c1_m_clk),
rvdff #(4) exc_mscause_mff (
.din (exc_mscause_d[3:0]),
.clk (lsu_c1_m_clk),
rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*);
rvdff #(1) fir_dccm_access_error_mff (
rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
.din (fir_dccm_access_error_d),
.clk (lsu_c1_m_clk),
rvdff #(1) fir_nondccm_access_error_mff (
.din (fir_nondccm_access_error_d),
.clk (lsu_c1_m_clk),
rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override));
rvdffe #(32) bus_read_data_r_ff (
.din (bus_read_data_m[31:0]),
.en (addr_external_m | clk_override)
@ -28,313 +28,392 @@
module el2_lsu_stbuf
module el2_lsu_stbuf
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input logic clk, // core clock
input logic clk, // core clock
input logic rst_l, // reset
input logic rst_l, // reset
input logic lsu_stbuf_c1_clk, // stbuf clock
input logic lsu_stbuf_c1_clk, // stbuf clock
input logic lsu_free_c2_clk, // free clk
input logic lsu_free_c2_clk, // free clk
// Store Buffer input
// Store Buffer input
input logic store_stbuf_reqvld_r, // core instruction goes to stbuf
input logic store_stbuf_reqvld_r, // core instruction goes to stbuf
input logic lsu_commit_r, // lsu commits
input logic lsu_commit_r, // lsu commits
input logic dec_lsu_valid_raw_d, // Speculative decode valid
input logic dec_lsu_valid_raw_d, // Speculative decode valid
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores
input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores
// Store Buffer output
// Store Buffer output
output logic stbuf_reqvld_any, // stbuf is draining
output logic stbuf_reqvld_any, // stbuf is draining
output logic stbuf_reqvld_flushed_any, // Top entry is flushed
output logic stbuf_reqvld_flushed_any, // Top entry is flushed
output logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address
output logic [ pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data
input logic lsu_stbuf_commit_any, // pop the stbuf as it commite
input logic lsu_stbuf_commit_any, // pop the stbuf as it commite
output logic lsu_stbuf_full_any, // stbuf is full
output logic lsu_stbuf_full_any, // stbuf is full
output logic lsu_stbuf_empty_any, // stbuf is empty
output logic lsu_stbuf_empty_any, // stbuf is empty
output logic ldst_stbuf_reqvld_r, // needed for clocking
output logic ldst_stbuf_reqvld_r, // needed for clocking
input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address D-stage
input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address D-stage
input logic [31:0] lsu_addr_m, // lsu address M-stage
input logic [ 31:0] lsu_addr_m, // lsu address M-stage
input logic [31:0] lsu_addr_r, // lsu address R-stage
input logic [ 31:0] lsu_addr_r, // lsu address R-stage
input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end address D-stage - needed to check unaligned
input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end address D-stage - needed to check unaligned
input logic [31:0] end_addr_m, // lsu end address M-stage - needed to check unaligned
input logic [31:0] end_addr_m, // lsu end address M-stage - needed to check unaligned
input logic [31:0] end_addr_r, // lsu end address R-stage - needed to check unaligned
input logic [31:0] end_addr_r, // lsu end address R-stage - needed to check unaligned
input logic ldst_dual_d, ldst_dual_m, ldst_dual_r,
input logic ldst_dual_d,
input logic addr_in_dccm_m, // address is in dccm
input logic addr_in_dccm_r, // address is in dccm
input logic addr_in_dccm_m, // address is in dccm
input logic addr_in_dccm_r, // address is in dccm
// Forwarding signals
// Forwarding signals
input logic lsu_cmpen_m, // needed for forwarding stbuf - load
input logic lsu_cmpen_m, // needed for forwarding stbuf - load
input el2_lsu_pkt_t lsu_pkt_m, // LSU packet M-stage
input el2_lsu_pkt_t lsu_pkt_m, // LSU packet M-stage
input el2_lsu_pkt_t lsu_pkt_r, // LSU packet R-stage
input el2_lsu_pkt_t lsu_pkt_r, // LSU packet R-stage
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf data
output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf data
output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf data
input logic scan_mode // Scan mode
input logic scan_mode // Scan mode
localparam DEPTH = pt.LSU_STBUF_DEPTH;
localparam DEPTH = pt.LSU_STBUF_DEPTH;
localparam DEPTH_LOG2 = $clog2(DEPTH);
localparam DEPTH_LOG2 = $clog2(DEPTH);
// These are the fields in the store queue
// These are the fields in the store queue
logic [DEPTH-1:0] stbuf_vld;
logic [ DEPTH-1:0] stbuf_vld;
logic [DEPTH-1:0] stbuf_dma_kill;
logic [ DEPTH-1:0] stbuf_dma_kill;
logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
logic [ DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteen;
logic [ DEPTH-1:0][ BYTE_WIDTH-1:0] stbuf_byteen;
logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_data;
logic [ DEPTH-1:0][ DATA_WIDTH-1:0] stbuf_data;
logic [DEPTH-1:0] sel_lo;
logic [ DEPTH-1:0] sel_lo;
logic [DEPTH-1:0] stbuf_wr_en;
logic [ DEPTH-1:0] stbuf_wr_en;
logic [DEPTH-1:0] stbuf_dma_kill_en;
logic [ DEPTH-1:0] stbuf_dma_kill_en;
logic [DEPTH-1:0] stbuf_reset;
logic [ DEPTH-1:0] stbuf_reset;
logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
logic [ DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_datain;
logic [ DEPTH-1:0][ DATA_WIDTH-1:0] stbuf_datain;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteenin;
logic [ DEPTH-1:0][ BYTE_WIDTH-1:0] stbuf_byteenin;
logic [7:0] store_byteen_ext_r;
logic [ 7:0] store_byteen_ext_r;
logic [BYTE_WIDTH-1:0] store_byteen_hi_r;
logic [BYTE_WIDTH-1:0] store_byteen_hi_r;
logic [BYTE_WIDTH-1:0] store_byteen_lo_r;
logic [BYTE_WIDTH-1:0] store_byteen_lo_r;
logic WrPtrEn, RdPtrEn;
logic WrPtrEn, RdPtrEn;
logic [DEPTH_LOG2-1:0] WrPtr, RdPtr;
logic [DEPTH_LOG2-1:0] WrPtr, RdPtr;
logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr;
logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr;
logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
logic dual_stbuf_write_r;
logic dual_stbuf_write_r;
logic isdccmst_m, isdccmst_r;
logic isdccmst_m, isdccmst_r;
logic [3:0] stbuf_numvld_any, stbuf_specvld_any;
logic [3:0] stbuf_numvld_any, stbuf_specvld_any;
logic [1:0] stbuf_specvld_m, stbuf_specvld_r;
logic [1:0] stbuf_specvld_m, stbuf_specvld_r;
logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
// variables to detect matching from the store queue
// variables to detect matching from the store queue
logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo;
logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
// logic to detect matching from the pipe - needed for store - load forwarding
// logic to detect matching from the pipe - needed for store - load forwarding
logic [BYTE_WIDTH-1:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic [BYTE_WIDTH-1:0]
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo;
logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi;
logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r;
logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r;
logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r;
logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r;
// byte_en flowing down
// byte_en flowing down
logic [7:0] ldst_byteen_r;
logic [ 7:0] ldst_byteen_r;
logic [7:0] ldst_byteen_ext_r;
logic [ 7:0] ldst_byteen_ext_r;
// fwd data through the pipe
// fwd data through the pipe
logic [31:0] ld_fwddata_rpipe_lo;
logic [ 31:0] ld_fwddata_rpipe_lo;
logic [31:0] ld_fwddata_rpipe_hi;
logic [ 31:0] ld_fwddata_rpipe_hi;
// coalescing signals
// coalescing signals
logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r;
logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r;
logic store_coalesce_lo_r, store_coalesce_hi_r;
logic store_coalesce_lo_r, store_coalesce_hi_r;
// Logic starts here
// Logic starts here
// Create high/low byte enables
// Create high/low byte enables
assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{}};
assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{}};
assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{}};
assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{}};
assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
// ecc error on both hi/lo
// ecc error on both hi/lo
assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r;
assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r;
assign ldst_stbuf_reqvld_r = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
assign ldst_stbuf_reqvld_r = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
// Store Buffer coalescing
// Store Buffer coalescing
for (genvar i=0; i<DEPTH; i++) begin: FindMatchEntry
for (genvar i = 0; i < DEPTH; i++) begin : FindMatchEntry
assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(
assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
end: FindMatchEntry
)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(
)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(
)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(
)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
end : FindMatchEntry
assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable
// Allocate new in this entry if :
// Allocate new in this entry if :
// 1. wrptr, single allocate, lo did not coalesce
// 1. wrptr, single allocate, lo did not coalesce
// 2. wrptr, double allocate, lo ^ hi coalesced
// 2. wrptr, double allocate, lo ^ hi coalesced
// 3. wrptr + 1, double alloacte, niether lo or hi coalesced
// 3. wrptr + 1, double alloacte, niether lo or hi coalesced
// Also update if there is a hi or a lo coalesce to this entry
// Also update if there is a hi or a lo coalesce to this entry
// Store Buffer instantiation
// Store Buffer instantiation
for (genvar i=0; i<DEPTH; i++) begin: GenStBuf
for (genvar i = 0; i < DEPTH; i++) begin : GenStBuf
assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
( (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // Allocate : new Lo
( (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // Allocate : new Lo
( (i == WrPtr[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~store_coalesce_hi_r) | // Allocate : only 1 new Write Either
( (i == WrPtr[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~store_coalesce_hi_r) | // Allocate : only 1 new Write Either
( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) | // Allocate2 : 2 new so Write Hi
( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) & dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) | // Allocate2 : 2 new so Write Hi
store_matchvec_lo_r[i] | store_matchvec_hi_r[i]); // Coalesced Write Lo or Hi
store_matchvec_lo_r[i] | store_matchvec_hi_r[i]); // Coalesced Write Lo or Hi
assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
// Mux select for start/end address
// Mux select for start/end address
assign sel_lo[i] = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // lo allocated new entry
assign sel_lo[i] = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) | // lo allocated new entry
store_matchvec_lo_r[i]; // lo coalesced in to this entry
store_matchvec_lo_r[i]; // lo coalesced in to this entry
assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0] = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0] : end_addr_r[pt.LSU_SB_BITS-1:0];
assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0] = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0] : end_addr_r[pt.LSU_SB_BITS-1:0];
assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0]) : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0]) : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
assign stbuf_datain[i][7:0] = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0] : stbuf_data[i][7:0]) :
assign stbuf_datain[i][7:0] = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0] : stbuf_data[i][7:0]) :
((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0] : stbuf_data[i][7:0]);
((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0] : stbuf_data[i][7:0]);
assign stbuf_datain[i][15:8] = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8] : stbuf_data[i][15:8]) :
assign stbuf_datain[i][15:8] = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8] : stbuf_data[i][15:8]) :
((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8] : stbuf_data[i][15:8]);
((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8] : stbuf_data[i][15:8]);
assign stbuf_datain[i][23:16] = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16]) :
assign stbuf_datain[i][23:16] = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16]) :
((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
assign stbuf_datain[i][31:24] = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24]) :
assign stbuf_datain[i][31:24] = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24]) :
((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
rvdffsc #(.WIDTH(1)) stbuf_vldff (.din(1'b1), .dout(stbuf_vld[i]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
rvdffsc #(
rvdffsc #(.WIDTH(1)) stbuf_killff (.din(1'b1), .dout(stbuf_dma_kill[i]), .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
rvdffe #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*);
) stbuf_vldff (
rvdffsc #(.WIDTH(BYTE_WIDTH)) stbuf_byteenff (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]), .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]), .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*);
rvdffe #(.WIDTH(DATA_WIDTH)) stbuf_dataff (.din(stbuf_datain[i][DATA_WIDTH-1:0]), .dout(stbuf_data[i][DATA_WIDTH-1:0]), .en(stbuf_wr_en[i]), .*);
end else begin: Gen_dccm_disable
assign stbuf_wr_en[DEPTH-1:0] = '0;
assign stbuf_reset[DEPTH-1:0] = '0;
assign stbuf_vld[DEPTH-1:0] = '0;
assign stbuf_dma_kill[DEPTH-1:0] = '0;
rvdffsc #(
assign stbuf_addr[DEPTH-1:0] = '0;
assign stbuf_byteen[DEPTH-1:0] = '0;
) stbuf_killff (
assign stbuf_data[DEPTH-1:0] = '0;
rvdffe #(
) stbuf_addrff (
.din (stbuf_addrin[i][pt.LSU_SB_BITS-1:0]),
.en (stbuf_wr_en[i]),
rvdffsc #(
) stbuf_byteenff (
rvdffe #(
) stbuf_dataff (
.din (stbuf_datain[i][DATA_WIDTH-1:0]),
.en (stbuf_wr_en[i]),
end else begin : Gen_dccm_disable
assign stbuf_wr_en[DEPTH-1:0] = '0;
assign stbuf_reset[DEPTH-1:0] = '0;
assign stbuf_vld[DEPTH-1:0] = '0;
assign stbuf_dma_kill[DEPTH-1:0] = '0;
assign stbuf_addr[DEPTH-1:0] = '0;
assign stbuf_byteen[DEPTH-1:0] = '0;
assign stbuf_data[DEPTH-1:0] = '0;
// Store Buffer drain logic
// Store Buffer drain logic
assign stbuf_reqvld_flushed_any = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
assign stbuf_reqvld_flushed_any = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
assign stbuf_reqvld_any = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]); // Don't drain if some kill bit is being set this cycle
assign stbuf_reqvld_any = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]); // Don't drain if some kill bit is being set this cycle
assign stbuf_addr_any[pt.LSU_SB_BITS-1:0] = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
assign stbuf_addr_any[pt.LSU_SB_BITS-1:0] = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
assign stbuf_data_any[DATA_WIDTH-1:0] = stbuf_data[RdPtr][DATA_WIDTH-1:0];
assign stbuf_data_any[DATA_WIDTH-1:0] = stbuf_data[RdPtr][DATA_WIDTH-1:0];
// Update the RdPtr/WrPtr logic
// Update the RdPtr/WrPtr logic
// Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
// Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
assign WrPtrEn = (ldst_stbuf_reqvld_r & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) | // writing 1 and did not coalesce
assign WrPtrEn = (ldst_stbuf_reqvld_r & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) | // writing 1 and did not coalesce
(ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r)); // writing 2 and atleast 1 did not coalesce
(ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r)); // writing 2 and atleast 1 did not coalesce
assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
assign RdPtrEn = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
assign RdPtrEn = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
always_comb begin
always_comb begin
stbuf_numvld_any[3:0] = '0;
stbuf_numvld_any[3:0] = '0;
for (int i=0; i<DEPTH; i++) begin
for (int i = 0; i < DEPTH; i++) begin
stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
// These go to store buffer to detect full
assign isdccmst_m = lsu_pkt_m.valid & & addr_in_dccm_m & ~lsu_pkt_m.dma;
assign isdccmst_r = lsu_pkt_r.valid & & addr_in_dccm_r & ~lsu_pkt_r.dma;
assign stbuf_specvld_m[1:0] = {1'b0, isdccmst_m} << (isdccmst_m & ldst_dual_m);
assign stbuf_specvld_r[1:0] = {1'b0, isdccmst_r} << (isdccmst_r & ldst_dual_r);
assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] + {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
assign lsu_stbuf_full_any = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
// Load forwarding logic from the store queue
assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(
)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(
assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(
)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(
always_comb begin : GenLdFwd
stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0;
stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0;
for (int i = 0; i < DEPTH; i++) begin
stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] ==
cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) &
stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] ==
cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) &
stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
// Kill the store buffer entry if there is a dma store since it already updated the dccm
stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma &;
for (int j = 0; j < BYTE_WIDTH; j++) begin
stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_hi_pre_m[j] |= stbuf_fwdbyteenvec_hi[i][j];
stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_lo_pre_m[j] |= stbuf_fwdbyteenvec_lo[i][j];
end // block: GenLdFwd
// These go to store buffer to detect full
always_comb begin : GenLdData
assign isdccmst_m = lsu_pkt_m.valid & & addr_in_dccm_m & ~lsu_pkt_m.dma;
stbuf_fwddata_hi_pre_m[31:0] = '0;
assign isdccmst_r = lsu_pkt_r.valid & & addr_in_dccm_r & ~lsu_pkt_r.dma;
stbuf_fwddata_lo_pre_m[31:0] = '0;
assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m);
for (int i = 0; i < DEPTH; i++) begin
assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r);
stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] + {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
assign lsu_stbuf_full_any = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
// Load forwarding logic from the store queue
end // block: GenLdData
assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
// Create Hi/Lo signals - needed for the pipe forwarding
assign ldst_byteen_r[7:0] = ({8{}} & 8'b0000_0001) |
always_comb begin: GenLdFwd
stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0;
stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0;
for (int i=0; i<DEPTH; i++) begin
stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
// Kill the store buffer entry if there is a dma store since it already updated the dccm
stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma &;
for (int j=0; j<BYTE_WIDTH; j++) begin
stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_hi_pre_m[j] |= stbuf_fwdbyteenvec_hi[i][j];
stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
stbuf_fwdbyteen_lo_pre_m[j] |= stbuf_fwdbyteenvec_lo[i][j];
end // block: GenLdFwd
always_comb begin: GenLdData
stbuf_fwddata_hi_pre_m[31:0] = '0;
stbuf_fwddata_lo_pre_m[31:0] = '0;
for (int i=0; i<DEPTH; i++) begin
stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
end // block: GenLdData
// Create Hi/Lo signals - needed for the pipe forwarding
assign ldst_byteen_r[7:0] = ({8{}} & 8'b0000_0001) |
({8{lsu_pkt_r.half}} & 8'b0000_0011) |
({8{lsu_pkt_r.half}} & 8'b0000_0011) |
({8{lsu_pkt_r.word}} & 8'b0000_1111) |
({8{lsu_pkt_r.word}} & 8'b0000_1111) |
({8{lsu_pkt_r.dword}} & 8'b1111_1111);
({8{lsu_pkt_r.dword}} & 8'b1111_1111);
assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0];
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma;
assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma;
assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma & dual_stbuf_write_r;
assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma & dual_stbuf_write_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma & dual_stbuf_write_r;
assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & & ~lsu_pkt_r.dma & dual_stbuf_write_r;
for (genvar i=0; i<BYTE_WIDTH; i++) begin
for (genvar i = 0; i < BYTE_WIDTH; i++) begin
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
// // Pipe vs Store Queue priority
// // Pipe vs Store Queue priority
assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
// // Pipe vs Store Queue priority
// // Pipe vs Store Queue priority
assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
// Flops
// Flops
rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
rvdffs #(
rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
) WrPtrff (
.din (NxtWrPtr[DEPTH_LOG2-1:0]),
.en (WrPtrEn),
.clk (lsu_stbuf_c1_clk),
rvdffs #(
) RdPtrff (
.din (NxtRdPtr[DEPTH_LOG2-1:0]),
.en (RdPtrEn),
.clk (lsu_stbuf_c1_clk),
@ -23,46 +23,51 @@
module el2_lsu_trigger
module el2_lsu_trigger
import el2_pkg::*;
import el2_pkg::*;
`include "el2_param.vh"
`include "el2_param.vh"
) (
input el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger packet from dec
input el2_trigger_pkt_t [ 3:0] trigger_pkt_any, // trigger packet from dec
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet
input el2_lsu_pkt_t lsu_pkt_m, // lsu packet
input logic [31:0] lsu_addr_m, // address
input logic [31:0] lsu_addr_m, // address
input logic [31:0] store_data_m, // store data
input logic [31:0] store_data_m, // store data
output logic [3:0] lsu_trigger_match_m // match result
output logic [3:0] lsu_trigger_match_m // match result
logic trigger_enable;
logic trigger_enable;
logic [3:0][31:0] lsu_match_data;
logic [ 3:0][31:0] lsu_match_data;
logic [3:0] lsu_trigger_data_match;
logic [ 3:0] lsu_trigger_data_match;
logic [31:0] store_data_trigger_m;
logic [31:0] store_data_trigger_m;
logic [31:0] ldst_addr_trigger_m;
logic [31:0] ldst_addr_trigger_m;
// Generate the trigger enable (This is for power)
// Generate the trigger enable (This is for power)
always_comb begin
always_comb begin
trigger_enable = 1'b0;
trigger_enable = 1'b0;
for (int i=0; i<4; i++) begin
for (int i = 0; i < 4; i++) begin
trigger_enable |= trigger_pkt_any[i].m;
trigger_enable |= trigger_pkt_any[i].m;
assign store_data_trigger_m[31:0] = {({16{lsu_pkt_m.word}} & store_data_m[31:16]),({8{(lsu_pkt_m.half | lsu_pkt_m.word)}} & store_data_m[15:8]), store_data_m[7:0]} & {32{trigger_enable}};
assign store_data_trigger_m[31:0] = {({16{lsu_pkt_m.word}} & store_data_m[31:16]),({8{(lsu_pkt_m.half | lsu_pkt_m.word)}} & store_data_m[15:8]), store_data_m[7:0]} & {32{trigger_enable}};
assign ldst_addr_trigger_m[31:0] = lsu_addr_m[31:0] & {32{trigger_enable}};
assign ldst_addr_trigger_m[31:0] = lsu_addr_m[31:0] & {32{trigger_enable}};
for (genvar i=0; i<4; i++) begin
for (genvar i = 0; i < 4; i++) begin
assign lsu_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select}} & ldst_addr_trigger_m[31:0]) |
assign lsu_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select}} & ldst_addr_trigger_m[31:0]) |
({32{trigger_pkt_any[i].select & trigger_pkt_any[i].store}} & store_data_trigger_m[31:0]);
({32{trigger_pkt_any[i].select & trigger_pkt_any[i].store}} & store_data_trigger_m[31:0]);
rvmaskandmatch trigger_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(lsu_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(lsu_trigger_data_match[i]));
rvmaskandmatch trigger_match (
.mask (trigger_pkt_any[i].tdata2[31:0]),
.data (lsu_match_data[i][31:0]),
.match (lsu_trigger_data_match[i])
assign lsu_trigger_match_m[i] = lsu_pkt_m.valid & ~lsu_pkt_m.dma & trigger_enable &
assign lsu_trigger_match_m[i] = lsu_pkt_m.valid & ~lsu_pkt_m.dma & trigger_enable &
((trigger_pkt_any[i].store & | (trigger_pkt_any[i].load & lsu_pkt_m.load & ~trigger_pkt_any[i].select)) &
((trigger_pkt_any[i].store & | (trigger_pkt_any[i].load & lsu_pkt_m.load & ~trigger_pkt_any[i].select)) &
endmodule // el2_lsu_trigger
endmodule // el2_lsu_trigger
Reference in New Issue