785 lines
44 KiB
Scala
785 lines
44 KiB
Scala
package dec
|
|
import chisel3._
|
|
|
|
import scala.collection._
|
|
import chisel3.util._
|
|
import include._
|
|
import lib._
|
|
import exu._
|
|
import lsu._
|
|
|
|
class dec_decode_ctl extends Module with lib with RequireAsyncReset{
|
|
val io = IO(new Bundle{
|
|
val decode_exu = Flipped(new decode_exu) //connection with exu top
|
|
val dec_alu = Flipped(new dec_alu) //connection with alu
|
|
val dec_div = Flipped(new dec_div) //connection with divider
|
|
val dctl_busbuff = Flipped(new dctl_busbuff()) //connection with bus buffer
|
|
val dctl_dma = new dctl_dma //connection with dma
|
|
val dec_aln = Flipped(new aln_dec) //connection with aligner
|
|
val dbg_dctl = new dbg_dctl() //connection with dbg
|
|
val dec_tlu_flush_extint = Input(Bool())
|
|
val dec_tlu_force_halt = Input(Bool()) // invalidate nonblock load cam on a force halt event
|
|
val dec_i0_inst_wb1 = Output(UInt(32.W)) // 32b instruction at wb+1 for trace encoder
|
|
val dec_i0_pc_wb1 = Output(UInt(31.W)) // 31b pc at wb+1 for trace encoder
|
|
val dec_i0_trigger_match_d = Input(UInt(4.W)) // i0 decode trigger matches
|
|
val dec_tlu_wr_pause_r = Input(Bool()) // pause instruction at r
|
|
val dec_tlu_pipelining_disable = Input(Bool()) // pipeline disable - presync, i0 decode only
|
|
val lsu_trigger_match_m = Input(UInt(4.W)) // lsu trigger matches
|
|
val lsu_pmu_misaligned_m = Input(Bool()) // perf mon: load/store misalign
|
|
val dec_tlu_debug_stall = Input(Bool()) // debug stall decode
|
|
val dec_tlu_flush_leak_one_r = Input(Bool()) // leak1 instruction
|
|
val dec_debug_fence_d = Input(Bool()) // debug fence instruction
|
|
val dec_i0_icaf_d = Input(Bool()) // icache access fault
|
|
val dec_i0_icaf_f1_d = Input(Bool()) // i0 instruction access fault at decode for f1 fetch group
|
|
val dec_i0_icaf_type_d = Input(UInt(2.W)) // i0 instruction access fault type
|
|
val dec_i0_dbecc_d = Input(Bool()) // icache/iccm double-bit error
|
|
val dec_i0_brp = Flipped(Valid(new br_pkt_t)) // branch packet
|
|
val dec_i0_bp_index = Input(UInt(((BTB_ADDR_HI-BTB_ADDR_LO)+1).W)) // i0 branch index
|
|
val dec_i0_bp_fghr = Input(UInt(BHT_GHR_SIZE.W)) // BP FGHR
|
|
val dec_i0_bp_btag = Input(UInt(BTB_BTAG_SIZE.W)) // BP tag
|
|
val dec_i0_pc_d = Input(UInt(31.W)) // pc
|
|
val lsu_idle_any = Input(Bool()) // lsu idle: if fence instr & !!!!!!!!!!!!!!!!!!!!!!!!!lsu_idle then stall decode
|
|
val lsu_load_stall_any = Input(Bool()) // stall any load at decode
|
|
val lsu_store_stall_any = Input(Bool()) // stall any store at decode6
|
|
val exu_div_wren = Input(Bool()) // nonblocking divide write enable to GPR.
|
|
val dec_tlu_i0_kill_writeb_wb = Input(Bool()) // I0 is flushed, don't writeback any results to arch state
|
|
val dec_tlu_flush_lower_wb = Input(Bool()) // trap lower flush
|
|
val dec_tlu_i0_kill_writeb_r = Input(Bool()) // I0 is flushed, don't writeback any results to arch state
|
|
val dec_tlu_flush_lower_r = Input(Bool()) // trap lower flush
|
|
val dec_tlu_flush_pause_r = Input(Bool()) // don't clear pause state on initial lower flush
|
|
val dec_tlu_presync_d = Input(Bool()) // CSR read needs to be presync'd
|
|
val dec_tlu_postsync_d = Input(Bool()) // CSR ops that need to be postsync'd
|
|
val dec_i0_pc4_d = Input(Bool()) // inst is 4B inst else 2B
|
|
val dec_csr_rddata_d = Input(UInt(32.W)) // csr read data at wb
|
|
val dec_csr_legal_d = Input(Bool()) // csr indicates legal operation
|
|
val lsu_result_m = Input(UInt(32.W)) // load result
|
|
val lsu_result_corr_r = Input(UInt(32.W)) // load result - corrected data for writing gpr's, not for bypassing
|
|
val exu_flush_final = Input(Bool()) // lower flush or i0 flush at X or D
|
|
val dec_i0_instr_d = Input(UInt(32.W)) // inst at decode
|
|
val dec_ib0_valid_d = Input(Bool()) // inst valid at decode
|
|
val free_clk = Input(Clock())
|
|
val active_clk = Input(Clock()) // clk except for halt / pause
|
|
val clk_override = Input(Bool()) // test stuff
|
|
val dec_i0_rs1_d = Output(UInt(5.W)) // rs1 logical source
|
|
val dec_i0_rs2_d = Output(UInt(5.W))
|
|
val dec_i0_waddr_r = Output(UInt(5.W)) // i0 logical source to write to gpr's
|
|
val dec_i0_wen_r = Output(Bool()) // i0 write enable
|
|
val dec_i0_wdata_r = Output(UInt(32.W)) // i0 write data
|
|
val lsu_p = Valid(new lsu_pkt_t) // load/store packet
|
|
val div_waddr_wb = Output(UInt(5.W)) // DIV write address to GPR
|
|
val dec_lsu_valid_raw_d = Output(Bool())
|
|
val dec_lsu_offset_d = Output(UInt(12.W))
|
|
val dec_csr_wen_unq_d = Output(Bool()) // valid csr with write - for csr legal
|
|
val dec_csr_any_unq_d = Output(Bool()) // valid csr - for csr legal
|
|
val dec_csr_rdaddr_d = Output(UInt(12.W)) // read address for csr
|
|
val dec_csr_wen_r = Output(Bool()) // csr write enable at r
|
|
val dec_csr_wraddr_r = Output(UInt(12.W)) // write address for csr
|
|
val dec_csr_wrdata_r = Output(UInt(32.W)) // csr write data at r
|
|
val dec_csr_stall_int_ff = Output(Bool()) // csr is mie/mstatus
|
|
val dec_tlu_i0_valid_r = Output(Bool()) // i0 valid inst at c
|
|
val dec_tlu_packet_r = Output(new trap_pkt_t) // trap packet
|
|
val dec_tlu_i0_pc_r = Output(UInt(31.W)) // i0 trap pc
|
|
val dec_illegal_inst = Output(UInt(32.W)) // illegal inst
|
|
val dec_pmu_instr_decoded = Output(Bool()) // number of instructions decode this cycle encoded
|
|
val dec_pmu_decode_stall = Output(Bool()) // decode is stalled
|
|
val dec_pmu_presync_stall = Output(Bool()) // decode has presync stall
|
|
val dec_pmu_postsync_stall = Output(Bool()) // decode has postsync stall
|
|
val dec_nonblock_load_wen = Output(Bool()) // write enable for nonblock load
|
|
val dec_nonblock_load_waddr = Output(UInt(5.W)) // logical write addr for nonblock load
|
|
val dec_pause_state = Output(Bool()) // core in pause state
|
|
val dec_pause_state_cg = Output(Bool()) // pause state for clock-gating
|
|
val dec_div_active = Output(Bool()) // non-block divide is active
|
|
val scan_mode = Input(Bool())
|
|
})
|
|
//packets zero initialization
|
|
io.decode_exu.mul_p := 0.U.asTypeOf(io.decode_exu.mul_p)
|
|
// Vals defined
|
|
val leak1_i1_stall_in = WireInit(UInt(1.W), 0.U)
|
|
val leak1_i0_stall_in = WireInit(UInt(1.W), 0.U)
|
|
val i0r = Wire(new reg_pkt_t)
|
|
val d_t = Wire(new trap_pkt_t)
|
|
val x_t = Wire(new trap_pkt_t)
|
|
val x_t_in = Wire(new trap_pkt_t)
|
|
val r_t = Wire(new trap_pkt_t)
|
|
val r_t_in = Wire(new trap_pkt_t)
|
|
val d_d = Wire(Valid(new dest_pkt_t))
|
|
val x_d = Wire(Valid(new dest_pkt_t))
|
|
val r_d = Wire(Valid(new dest_pkt_t))
|
|
val r_d_in = Wire(Valid(new dest_pkt_t))
|
|
val wbd = Wire(Valid(new dest_pkt_t))
|
|
val i0_d_c = Wire(new class_pkt_t)
|
|
val i0_rs1_class_d = Wire(new class_pkt_t)
|
|
val i0_rs2_class_d = Wire(new class_pkt_t)
|
|
val i0_rs1_depth_d = WireInit(UInt(2.W),0.U)
|
|
val i0_rs2_depth_d = WireInit(UInt(2.W),0.U)
|
|
val cam_wen = WireInit(UInt(LSU_NUM_NBLOAD.W), 0.U)
|
|
val cam = Wire(Vec(LSU_NUM_NBLOAD,Valid(new load_cam_pkt_t)))
|
|
val cam_write = WireInit(UInt(1.W), 0.U)
|
|
val cam_inv_reset_val = Wire(Vec(LSU_NUM_NBLOAD,UInt(1.W)))
|
|
val cam_data_reset_val = Wire(Vec(LSU_NUM_NBLOAD,UInt(1.W)))
|
|
val nonblock_load_write = Wire(Vec(LSU_NUM_NBLOAD,UInt(1.W)))
|
|
val cam_raw = Wire(Vec(LSU_NUM_NBLOAD,Valid(new load_cam_pkt_t)))
|
|
val cam_in = Wire(Vec(LSU_NUM_NBLOAD,Valid(new load_cam_pkt_t)))
|
|
val i0_dp = Wire(new dec_pkt_t)
|
|
val i0_dp_raw = Wire(new dec_pkt_t)
|
|
val i0_rs1bypass = WireInit(UInt(3.W), 0.U)
|
|
val i0_rs2bypass = WireInit(UInt(3.W), 0.U)
|
|
val illegal_lockout = WireInit(UInt(1.W), 0.U)
|
|
val postsync_stall = WireInit(UInt(1.W), 0.U)
|
|
val ps_stall_in = WireInit(UInt(1.W), 0.U)
|
|
val i0_pipe_en = WireInit(UInt(4.W), 0.U)
|
|
val i0_load_block_d = WireInit(UInt(1.W), 0.U)
|
|
val load_ldst_bypass_d = WireInit(UInt(1.W), 0.U)
|
|
val store_data_bypass_d = WireInit(UInt(1.W), 0.U)
|
|
val store_data_bypass_m = WireInit(UInt(1.W), 0.U)
|
|
val tlu_wr_pause_r1 = WireInit(UInt(1.W), 0.U)
|
|
val tlu_wr_pause_r2 = WireInit(UInt(1.W), 0.U)
|
|
val leak1_i1_stall = WireInit(UInt(1.W), 0.U)
|
|
val leak1_i0_stall = WireInit(UInt(1.W), 0.U)
|
|
val pause_state = WireInit(Bool(), 0.B)
|
|
val flush_final_r = WireInit(UInt(1.W), 0.U)
|
|
val illegal_lockout_in = WireInit(UInt(1.W), 0.U)
|
|
val lsu_idle = WireInit(Bool(), 0.B)
|
|
val pause_state_in = WireInit(Bool(), 0.B)
|
|
val leak1_mode = WireInit(UInt(1.W), 0.U)
|
|
val i0_pcall = WireInit(UInt(1.W), 0.U)
|
|
val i0_pja = WireInit(UInt(1.W), 0.U)
|
|
val i0_pret = WireInit(UInt(1.W), 0.U)
|
|
val i0_legal_decode_d = WireInit(UInt(1.W), 0.U)
|
|
val i0_pcall_raw = WireInit(UInt(1.W), 0.U)
|
|
val i0_pja_raw = WireInit(UInt(1.W), 0.U)
|
|
val i0_pret_raw = WireInit(UInt(1.W), 0.U)
|
|
val i0_br_offset = WireInit(UInt(12.W), 0.U)
|
|
val i0_csr_write_only_d = WireInit(UInt(1.W), 0.U)
|
|
val i0_jal = WireInit(UInt(1.W), 0.U)
|
|
val i0_wen_r = WireInit(UInt(1.W), 0.U)
|
|
val i0_x_ctl_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_r_ctl_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_wb_ctl_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_x_data_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_r_data_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_wb_data_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_wb1_data_en = WireInit(UInt(1.W), 0.U)
|
|
val i0_nonblock_load_stall = WireInit(UInt(1.W), 0.U)
|
|
val csr_ren_qual_d = WireInit(Bool(), 0.B)
|
|
val lsu_decode_d = WireInit(UInt(1.W), 0.U)
|
|
val mul_decode_d = WireInit(UInt(1.W), 0.U)
|
|
val div_decode_d = WireInit(UInt(1.W), 0.U)
|
|
val write_csr_data = WireInit(UInt(32.W),0.U)
|
|
val i0_result_corr_r = WireInit(UInt(32.W),0.U)
|
|
val presync_stall = WireInit(UInt(1.W), 0.U)
|
|
val i0_nonblock_div_stall = WireInit(UInt(1.W), 0.U)
|
|
val debug_fence = WireInit(Bool(), 0.B)
|
|
val i0_immed_d = WireInit(UInt(32.W), 0.U)
|
|
val i0_result_x = WireInit(UInt(32.W), 0.U)
|
|
val i0_result_r = WireInit(UInt(32.W), 0.U)
|
|
//////////////////////////////////////////////////////////////////////
|
|
// Start - Data gating {{
|
|
val data_gate_en = (io.dec_tlu_wr_pause_r ^ tlu_wr_pause_r1 ) | // replaces free_clk
|
|
(tlu_wr_pause_r1 ^ tlu_wr_pause_r2 ) | // replaces free_clk
|
|
(io.dec_tlu_flush_extint ^ io.decode_exu.dec_extint_stall) |
|
|
(leak1_i1_stall_in ^ leak1_i1_stall ) | // replaces free_clk
|
|
(leak1_i0_stall_in ^ leak1_i0_stall ) | // replaces free_clk
|
|
(pause_state_in ^ pause_state ) | // replaces free_clk
|
|
(ps_stall_in ^ postsync_stall ) | // replaces free_clk
|
|
(io.exu_flush_final ^ flush_final_r ) | // replaces free_clk
|
|
(illegal_lockout_in ^ illegal_lockout ) // replaces active_clk
|
|
|
|
|
|
val data_gate_clk = rvclkhdr(clock,data_gate_en.asBool(),io.scan_mode)
|
|
// End - Data gating
|
|
|
|
val i0_brp_valid = io.dec_i0_brp.valid & !leak1_mode
|
|
io.decode_exu.dec_i0_predict_p_d.bits.misp := 0.U
|
|
io.decode_exu.dec_i0_predict_p_d.bits.ataken := 0.U
|
|
io.decode_exu.dec_i0_predict_p_d.bits.boffset := 0.U
|
|
io.decode_exu.dec_i0_predict_p_d.bits.pcall := i0_pcall // don't mark as pcall if branch error
|
|
io.decode_exu.dec_i0_predict_p_d.bits.pja := i0_pja
|
|
io.decode_exu.dec_i0_predict_p_d.bits.pret := i0_pret
|
|
io.decode_exu.dec_i0_predict_p_d.bits.prett := io.dec_i0_brp.bits.prett
|
|
io.decode_exu.dec_i0_predict_p_d.bits.pc4 := io.dec_i0_pc4_d
|
|
io.decode_exu.dec_i0_predict_p_d.bits.hist := io.dec_i0_brp.bits.hist
|
|
io.decode_exu.dec_i0_predict_p_d.valid := i0_brp_valid & i0_legal_decode_d
|
|
val i0_notbr_error = i0_brp_valid & !(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw)
|
|
|
|
// no toffset error for a pret
|
|
val i0_br_toffset_error = i0_brp_valid & io.dec_i0_brp.bits.hist(1) & (io.dec_i0_brp.bits.toffset =/= i0_br_offset) & !i0_pret_raw
|
|
val i0_ret_error = i0_brp_valid & io.dec_i0_brp.bits.ret & !i0_pret_raw;
|
|
val i0_br_error = io.dec_i0_brp.bits.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error
|
|
io.decode_exu.dec_i0_predict_p_d.bits.br_error := i0_br_error & i0_legal_decode_d & !leak1_mode
|
|
io.decode_exu.dec_i0_predict_p_d.bits.br_start_error := io.dec_i0_brp.bits.br_start_error & i0_legal_decode_d & !leak1_mode
|
|
io.decode_exu.i0_predict_index_d := io.dec_i0_bp_index
|
|
io.decode_exu.i0_predict_btag_d := io.dec_i0_bp_btag
|
|
val i0_br_error_all = (i0_br_error | io.dec_i0_brp.bits.br_start_error) & !leak1_mode
|
|
io.decode_exu.dec_i0_predict_p_d.bits.toffset := i0_br_offset
|
|
io.decode_exu.i0_predict_fghr_d := io.dec_i0_bp_fghr
|
|
io.decode_exu.dec_i0_predict_p_d.bits.way := io.dec_i0_brp.bits.way
|
|
// end
|
|
|
|
// on br error turn anything into a nop
|
|
// on i0 instruction fetch access fault turn anything into a nop
|
|
// nop => alu rs1 imm12 rd lor
|
|
val i0_icaf_d = io.dec_i0_icaf_d | io.dec_i0_dbecc_d
|
|
val i0_instr_error = i0_icaf_d;
|
|
i0_dp := i0_dp_raw
|
|
when((i0_br_error_all | i0_instr_error).asBool){
|
|
i0_dp := 0.U.asTypeOf(i0_dp)
|
|
i0_dp.alu := 1.B
|
|
i0_dp.rs1 := 1.B
|
|
i0_dp.rs2 := 1.B
|
|
i0_dp.lor := 1.B
|
|
i0_dp.legal := 1.B
|
|
i0_dp.postsync := 1.B
|
|
}
|
|
|
|
val i0 = io.dec_i0_instr_d
|
|
io.decode_exu.dec_i0_select_pc_d := i0_dp.pc
|
|
|
|
// branches that can be predicted
|
|
val i0_predict_br = i0_dp.condbr | i0_pcall | i0_pja | i0_pret;
|
|
val i0_predict_nt = !(io.dec_i0_brp.bits.hist(1) & i0_brp_valid) & i0_predict_br
|
|
val i0_predict_t = (io.dec_i0_brp.bits.hist(1) & i0_brp_valid) & i0_predict_br
|
|
val i0_ap_pc2 = !io.dec_i0_pc4_d
|
|
val i0_ap_pc4 = io.dec_i0_pc4_d
|
|
io.decode_exu.i0_ap.predict_nt := i0_predict_nt
|
|
io.decode_exu.i0_ap.predict_t := i0_predict_t
|
|
|
|
io.decode_exu.i0_ap.add := i0_dp.add
|
|
io.decode_exu.i0_ap.sub := i0_dp.sub
|
|
io.decode_exu.i0_ap.land := i0_dp.land
|
|
io.decode_exu.i0_ap.lor := i0_dp.lor
|
|
io.decode_exu.i0_ap.lxor := i0_dp.lxor
|
|
io.decode_exu.i0_ap.sll := i0_dp.sll
|
|
io.decode_exu.i0_ap.srl := i0_dp.srl
|
|
io.decode_exu.i0_ap.sra := i0_dp.sra
|
|
io.decode_exu.i0_ap.slt := i0_dp.slt
|
|
io.decode_exu.i0_ap.unsign := i0_dp.unsign
|
|
io.decode_exu.i0_ap.beq := i0_dp.beq
|
|
io.decode_exu.i0_ap.bne := i0_dp.bne
|
|
io.decode_exu.i0_ap.blt := i0_dp.blt
|
|
io.decode_exu.i0_ap.bge := i0_dp.bge
|
|
io.decode_exu.i0_ap.csr_write := i0_csr_write_only_d
|
|
io.decode_exu.i0_ap.csr_imm := i0_dp.csr_imm
|
|
io.decode_exu.i0_ap.jal := i0_jal
|
|
|
|
// non block load cam logic
|
|
// val found=Wire(UInt(1.W))
|
|
cam_wen := Mux1H((0 until LSU_NUM_NBLOAD).map(i=>(0 to i).map(j=> if(i==j) !cam(j).valid else cam(j).valid).reduce(_.asBool&_.asBool).asBool -> (cam_write << i)))
|
|
|
|
cam_write := io.dctl_busbuff.lsu_nonblock_load_valid_m
|
|
val cam_write_tag = io.dctl_busbuff.lsu_nonblock_load_tag_m(LSU_NUM_NBLOAD_WIDTH-1,0)
|
|
|
|
val cam_inv_reset = io.dctl_busbuff.lsu_nonblock_load_inv_r
|
|
val cam_inv_reset_tag = io.dctl_busbuff.lsu_nonblock_load_inv_tag_r
|
|
|
|
val cam_data_reset = io.dctl_busbuff.lsu_nonblock_load_data_valid | io.dctl_busbuff.lsu_nonblock_load_data_error
|
|
val cam_data_reset_tag = io.dctl_busbuff.lsu_nonblock_load_data_tag
|
|
|
|
val nonblock_load_rd = Mux(x_d.bits.i0load.asBool, x_d.bits.i0rd, 0.U(5.W)) // rd data
|
|
val load_data_tag = io.dctl_busbuff.lsu_nonblock_load_data_tag
|
|
// case of multiple loads to same dest ie. x1 ... you have to invalidate the older one
|
|
// don't writeback a nonblock load
|
|
val nonblock_load_valid_m_delay=withClock(io.active_clk){RegEnable(io.dctl_busbuff.lsu_nonblock_load_valid_m,0.U, i0_r_ctl_en.asBool)}
|
|
val i0_load_kill_wen_r = nonblock_load_valid_m_delay & r_d.bits.i0load
|
|
for(i <- 0 until LSU_NUM_NBLOAD){
|
|
cam_inv_reset_val(i) := cam_inv_reset & (cam_inv_reset_tag === cam(i).bits.tag) & cam(i).valid
|
|
cam_data_reset_val(i) := cam_data_reset & (cam_data_reset_tag === cam(i).bits.tag) & cam_raw(i).valid
|
|
cam_in(i):=0.U.asTypeOf(cam(0))
|
|
cam(i):=cam_raw(i)
|
|
|
|
when(cam_data_reset_val(i).asBool){
|
|
cam(i).valid := 0.U(1.W)
|
|
}
|
|
when(cam_wen(i).asBool){
|
|
cam_in(i).valid := 1.U(1.W)
|
|
cam_in(i).bits.wb := 0.U(1.W)
|
|
cam_in(i).bits.tag := cam_write_tag
|
|
cam_in(i).bits.rd := nonblock_load_rd
|
|
}.elsewhen(cam_inv_reset_val(i).asBool || (i0_wen_r.asBool && (r_d_in.bits.i0rd === cam(i).bits.rd) && cam(i).bits.wb.asBool)){
|
|
cam_in(i).valid := 0.U
|
|
}.otherwise{
|
|
cam_in(i) := cam(i)
|
|
}
|
|
when(nonblock_load_valid_m_delay===1.U && (io.dctl_busbuff.lsu_nonblock_load_inv_tag_r === cam(i).bits.tag) && cam(i).valid===1.U){
|
|
cam_in(i).bits.wb := 1.U
|
|
}
|
|
// force debug halt forces cam valids to 0; highest priority
|
|
when(io.dec_tlu_force_halt){
|
|
cam_in(i).valid := 0.U
|
|
}
|
|
|
|
cam_raw(i):=withClock(io.free_clk){RegNext(cam_in(i),0.U.asTypeOf(cam(0)))}
|
|
nonblock_load_write(i) := (load_data_tag === cam_raw(i).bits.tag) & cam_raw(i).valid
|
|
}
|
|
|
|
io.dec_nonblock_load_waddr:=0.U(5.W)
|
|
// cancel if any younger inst (including another nonblock) committing this cycle
|
|
val nonblock_load_cancel = ((r_d_in.bits.i0rd === io.dec_nonblock_load_waddr) & i0_wen_r)
|
|
io.dec_nonblock_load_wen := (io.dctl_busbuff.lsu_nonblock_load_data_valid && nonblock_load_write.reduce(_|_).asBool && !nonblock_load_cancel)
|
|
val i0_nonblock_boundary_stall = ((nonblock_load_rd===i0r.rs1) & io.dctl_busbuff.lsu_nonblock_load_valid_m & io.decode_exu.dec_i0_rs1_en_d)|((nonblock_load_rd===i0r.rs2) & io.dctl_busbuff.lsu_nonblock_load_valid_m & io.decode_exu.dec_i0_rs2_en_d)
|
|
|
|
i0_nonblock_load_stall := i0_nonblock_boundary_stall
|
|
|
|
val cal_temp= for(i <-0 until LSU_NUM_NBLOAD) yield ((Fill(5,nonblock_load_write(i)) & cam(i).bits.rd), io.decode_exu.dec_i0_rs1_en_d & cam(i).valid & (cam(i).bits.rd === i0r.rs1), io.decode_exu.dec_i0_rs2_en_d & cam(i).valid & (cam(i).bits.rd === i0r.rs2))
|
|
val (waddr, ld_stall_1, ld_stall_2) = (cal_temp.map(_._1).reduce(_|_) , cal_temp.map(_._2).reduce(_|_), cal_temp.map(_._3).reduce(_|_) )
|
|
io.dec_nonblock_load_waddr:=waddr
|
|
i0_nonblock_load_stall:=ld_stall_1 | ld_stall_2 | i0_nonblock_boundary_stall
|
|
//i0_nonblock_load_stall:=ld_stall_2
|
|
|
|
// end non block load cam logic
|
|
|
|
// pmu start
|
|
|
|
val csr_read = csr_ren_qual_d
|
|
val csr_write = io.dec_csr_wen_unq_d
|
|
val i0_br_unpred = i0_dp.jal & !i0_predict_br
|
|
|
|
// the classes must be mutually exclusive with one another
|
|
import inst_pkt_t._
|
|
d_t.pmu_i0_itype :=Fill(4,i0_legal_decode_d) & MuxCase(NULL ,Array(
|
|
i0_dp.jal -> JAL,
|
|
i0_dp.condbr -> CONDBR,
|
|
i0_dp.mret -> MRET,
|
|
i0_dp.fence_i -> FENCEI,
|
|
i0_dp.fence -> FENCE,
|
|
i0_dp.ecall -> ECALL,
|
|
i0_dp.ebreak -> EBREAK,
|
|
( csr_read & csr_write).asBool -> CSRRW,
|
|
(!csr_read & csr_write).asBool -> CSRWRITE,
|
|
( csr_read & !csr_write).asBool -> CSRREAD,
|
|
i0_dp.pm_alu -> ALU,
|
|
i0_dp.store -> STORE,
|
|
i0_dp.load -> LOAD,
|
|
i0_dp.mul -> MUL))
|
|
// end pmu
|
|
|
|
val i0_dec =Module(new dec_dec_ctl)
|
|
i0_dec.io.ins:= i0
|
|
i0_dp_raw:=i0_dec.io.out
|
|
|
|
lsu_idle:=withClock(io.active_clk){RegNext(io.lsu_idle_any,0.U)}
|
|
|
|
// can't make this clock active_clock
|
|
leak1_i1_stall_in := (io.dec_tlu_flush_leak_one_r | (leak1_i1_stall & !io.dec_tlu_flush_lower_r))
|
|
leak1_i1_stall := withClock(data_gate_clk){RegNext(leak1_i1_stall_in,0.U)}
|
|
leak1_mode := leak1_i1_stall
|
|
leak1_i0_stall_in := ((io.dec_aln.dec_i0_decode_d & leak1_i1_stall) | (leak1_i0_stall & !io.dec_tlu_flush_lower_r))
|
|
leak1_i0_stall := withClock(data_gate_clk){RegNext(leak1_i0_stall_in,0.U)}
|
|
|
|
// 12b jal's can be predicted - these are calls
|
|
|
|
val i0_pcall_imm = Cat(i0(31),i0(19,12),i0(20),i0(30,21))
|
|
val i0_pcall_12b_offset = Mux(i0_pcall_imm(11).asBool, i0_pcall_imm(19,12) === 0xff.U , i0_pcall_imm(19,12) === 0.U(8.W))
|
|
val i0_pcall_case = i0_pcall_12b_offset & i0_dp_raw.imm20 & (i0r.rd === 1.U(5.W) | i0r.rd === 5.U(5.W))
|
|
val i0_pja_case = i0_pcall_12b_offset & i0_dp_raw.imm20 & !(i0r.rd === 1.U(5.W) | i0r.rd === 5.U(5.W))
|
|
i0_pcall_raw := i0_dp_raw.jal & i0_pcall_case // this includes ja
|
|
i0_pcall := i0_dp.jal & i0_pcall_case
|
|
i0_pja_raw := i0_dp_raw.jal & i0_pja_case
|
|
i0_pja := i0_dp.jal & i0_pja_case
|
|
i0_br_offset := Mux((i0_pcall_raw | i0_pja_raw).asBool, i0_pcall_imm(11,0) , Cat(i0(31),i0(7),i0(30,25),i0(11,8)))
|
|
// jalr with rd==0, rs1==1 or rs1==5 is a ret
|
|
val i0_pret_case = (i0_dp_raw.jal & i0_dp_raw.imm12 & (i0r.rd === 0.U(5.W)) & (i0r.rs1===1.U(5.W) | i0r.rs1 === 5.U(5.W)))
|
|
i0_pret_raw := i0_dp_raw.jal & i0_pret_case
|
|
i0_pret := i0_dp.jal & i0_pret_case
|
|
i0_jal := i0_dp.jal & !i0_pcall_case & !i0_pja_case & !i0_pret_case
|
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
io.dec_div.div_p.valid := div_decode_d
|
|
io.dec_div.div_p.bits.unsign := i0_dp.unsign
|
|
io.dec_div.div_p.bits.rem := i0_dp.rem
|
|
|
|
io.decode_exu.mul_p.valid := mul_decode_d
|
|
io.decode_exu.mul_p.bits.rs1_sign := i0_dp.rs1_sign
|
|
io.decode_exu.mul_p.bits.rs2_sign := i0_dp.rs2_sign
|
|
io.decode_exu.mul_p.bits.low := i0_dp.low
|
|
|
|
io.decode_exu.dec_extint_stall := withClock(data_gate_clk){RegNext(io.dec_tlu_flush_extint,0.U)}
|
|
|
|
io.lsu_p := 0.U.asTypeOf(io.lsu_p)
|
|
when (io.decode_exu.dec_extint_stall){
|
|
io.lsu_p.bits.load := 1.U(1.W)
|
|
io.lsu_p.bits.word := 1.U(1.W)
|
|
io.lsu_p.bits.fast_int := 1.U(1.W)
|
|
io.lsu_p.valid := 1.U(1.W)
|
|
}.otherwise {
|
|
io.lsu_p.valid := lsu_decode_d
|
|
io.lsu_p.bits.load := i0_dp.load
|
|
io.lsu_p.bits.store := i0_dp.store
|
|
io.lsu_p.bits.by := i0_dp.by
|
|
io.lsu_p.bits.half := i0_dp.half
|
|
io.lsu_p.bits.word := i0_dp.word
|
|
io.lsu_p.bits.load_ldst_bypass_d := load_ldst_bypass_d
|
|
io.lsu_p.bits.store_data_bypass_d := store_data_bypass_d
|
|
io.lsu_p.bits.store_data_bypass_m := store_data_bypass_m
|
|
io.lsu_p.bits.unsign := i0_dp.unsign
|
|
}
|
|
|
|
//////////////////////////////////////
|
|
io.dec_alu.dec_csr_ren_d := i0_dp.csr_read //H: assigning csr read enable signal decoded from decode_ctl going as input to EXU
|
|
csr_ren_qual_d := i0_dp.csr_read & i0_legal_decode_d.asBool //csr_ren_qual_d assigned as csr_read above
|
|
|
|
val i0_csr_write = i0_dp.csr_write & !io.dec_debug_fence_d
|
|
val csr_clr_d = i0_dp.csr_clr & i0_legal_decode_d.asBool
|
|
val csr_set_d = i0_dp.csr_set & i0_legal_decode_d.asBool
|
|
val csr_write_d = i0_csr_write & i0_legal_decode_d.asBool
|
|
|
|
i0_csr_write_only_d := i0_csr_write & !i0_dp.csr_read
|
|
io.dec_csr_wen_unq_d := (i0_dp.csr_clr | i0_dp.csr_set | i0_csr_write) // for csr legal, can't write read-only csr
|
|
//dec_csr_wen_unq_d assigned as csr_write above
|
|
|
|
io.dec_csr_rdaddr_d := i0(31,20)
|
|
io.dec_csr_wraddr_r := r_d.bits.csrwaddr //r_d is a dest_pkt
|
|
|
|
// make sure csr doesn't write same cycle as dec_tlu_flush_lower_wb
|
|
// also use valid so it's flushable
|
|
io.dec_csr_wen_r := r_d.bits.csrwen & r_d.valid & !io.dec_tlu_i0_kill_writeb_r;
|
|
|
|
// If we are writing MIE or MSTATUS, hold off the external interrupt for a cycle on the write.
|
|
io.dec_csr_stall_int_ff := ((r_d.bits.csrwaddr === "h300".U) | (r_d.bits.csrwaddr === "h304".U)) & r_d.bits.csrwen & r_d.valid & !io.dec_tlu_i0_kill_writeb_wb;
|
|
|
|
val csr_read_x = withClock(io.active_clk){RegNext(csr_ren_qual_d,init=0.B)}
|
|
val csr_clr_x = withClock(io.active_clk){RegNext(csr_clr_d, init=0.B)}
|
|
val csr_set_x = withClock(io.active_clk){RegNext(csr_set_d, init=0.B)}
|
|
val csr_write_x = withClock(io.active_clk){RegNext(csr_write_d, init=0.B)}
|
|
val csr_imm_x = withClock(io.active_clk){RegNext(i0_dp.csr_imm, init=0.U)}
|
|
|
|
// perform the update operation if any
|
|
val csrimm_x = rvdffe(i0(19,15),i0_x_data_en.asBool,clock,io.scan_mode)
|
|
val csr_rddata_x = rvdffe(io.dec_csr_rddata_d,i0_x_data_en.asBool,clock,io.scan_mode)
|
|
|
|
val csr_mask_x = Mux1H(Seq(
|
|
csr_imm_x.asBool -> Cat(repl(27,0.U),csrimm_x(4,0)),
|
|
!csr_imm_x.asBool -> io.decode_exu.exu_csr_rs1_x))
|
|
|
|
val write_csr_data_x = Mux1H(Seq(
|
|
csr_clr_x -> (csr_rddata_x & (~csr_mask_x).asUInt),
|
|
csr_set_x -> (csr_rddata_x | csr_mask_x),
|
|
csr_write_x -> ( csr_mask_x)))
|
|
// pause instruction
|
|
val clear_pause = (io.dec_tlu_flush_lower_r & !io.dec_tlu_flush_pause_r) | (pause_state & (write_csr_data === Cat(Fill(31,0.U),write_csr_data(0)))) // if 0 or 1 then exit pause state - 1 cycle pause
|
|
pause_state_in := (io.dec_tlu_wr_pause_r | pause_state) & !clear_pause
|
|
pause_state := withClock(data_gate_clk){RegNext(pause_state_in, 0.U)}
|
|
io.dec_pause_state := pause_state
|
|
tlu_wr_pause_r1 := withClock(data_gate_clk){RegNext(io.dec_tlu_wr_pause_r, 0.U)}
|
|
tlu_wr_pause_r2 := withClock(data_gate_clk){RegNext(tlu_wr_pause_r1, 0.U)}
|
|
//pause for clock gating
|
|
io.dec_pause_state_cg := (pause_state & (!tlu_wr_pause_r1 && !tlu_wr_pause_r2))
|
|
// end pause
|
|
|
|
val write_csr_data_in = Mux(pause_state,(write_csr_data - 1.U(32.W)),
|
|
Mux(io.dec_tlu_wr_pause_r,io.dec_csr_wrdata_r,write_csr_data_x))
|
|
val csr_data_wen = ((csr_clr_x | csr_set_x | csr_write_x) & csr_read_x) | io.dec_tlu_wr_pause_r | pause_state
|
|
write_csr_data := rvdffe(write_csr_data_in,csr_data_wen,clock,io.scan_mode)
|
|
|
|
// will hold until write-back at which time the CSR will be updated while GPR is possibly written with prior CSR
|
|
val pause_stall = pause_state
|
|
|
|
// for csr write only data is produced by the alu
|
|
io.dec_csr_wrdata_r := Mux(r_d.bits.csrwonly.asBool,i0_result_corr_r,write_csr_data)
|
|
|
|
val prior_csr_write = x_d.bits.csrwonly | r_d.bits.csrwonly | wbd.bits.csrwonly;
|
|
|
|
val debug_fence_i = io.dec_debug_fence_d & io.dbg_dctl.dbg_cmd_wrdata(0)
|
|
val debug_fence_raw = io.dec_debug_fence_d & io.dbg_dctl.dbg_cmd_wrdata(1)
|
|
debug_fence := debug_fence_raw | debug_fence_i
|
|
|
|
// some CSR reads need to be presync'd
|
|
val i0_presync = i0_dp.presync | io.dec_tlu_presync_d | debug_fence_i | debug_fence_raw | io.dec_tlu_pipelining_disable // both fence's presync
|
|
|
|
// some CSR writes need to be postsync'd
|
|
val i0_postsync = i0_dp.postsync | io.dec_tlu_postsync_d | debug_fence_i | (i0_csr_write_only_d & (i0(31,20) === "h7c2".U))
|
|
|
|
val any_csr_d = i0_dp.csr_read | i0_csr_write
|
|
io.dec_csr_any_unq_d := any_csr_d
|
|
val i0_legal = i0_dp.legal & (!any_csr_d | io.dec_csr_legal_d)
|
|
val i0_inst_d = Mux(io.dec_i0_pc4_d,i0,Cat(repl(16,0.U), io.dec_aln.ifu_i0_cinst))
|
|
// illegal inst handling
|
|
|
|
val shift_illegal = io.dec_aln.dec_i0_decode_d & !i0_legal//lm: valid but not legal
|
|
val illegal_inst_en = shift_illegal & !illegal_lockout
|
|
io.dec_illegal_inst := rvdffe(i0_inst_d,illegal_inst_en,clock,io.scan_mode)
|
|
illegal_lockout_in := (shift_illegal | illegal_lockout) & !flush_final_r
|
|
illegal_lockout := withClock(data_gate_clk){RegNext(illegal_lockout_in, 0.U)}
|
|
val i0_div_prior_div_stall = i0_dp.div & io.dec_div_active
|
|
//stalls signals
|
|
val i0_block_raw_d = (i0_dp.csr_read & prior_csr_write) | io.decode_exu.dec_extint_stall | pause_stall |
|
|
leak1_i0_stall | io.dec_tlu_debug_stall | postsync_stall | presync_stall |
|
|
((i0_dp.fence | debug_fence) & !lsu_idle) | i0_nonblock_load_stall |
|
|
i0_load_block_d | i0_nonblock_div_stall | i0_div_prior_div_stall
|
|
|
|
val i0_store_stall_d = i0_dp.store & (io.lsu_store_stall_any | io.dctl_dma.dma_dccm_stall_any)
|
|
val i0_load_stall_d = i0_dp.load & (io.lsu_load_stall_any | io.dctl_dma.dma_dccm_stall_any)
|
|
val i0_block_d = i0_block_raw_d | i0_store_stall_d | i0_load_stall_d
|
|
val i0_exublock_d = i0_block_raw_d
|
|
|
|
//decode valid
|
|
io.dec_aln.dec_i0_decode_d := io.dec_ib0_valid_d & !i0_block_d & !io.dec_tlu_flush_lower_r & !flush_final_r
|
|
val i0_exudecode_d = io.dec_ib0_valid_d & !i0_exublock_d & !io.dec_tlu_flush_lower_r & !flush_final_r
|
|
val i0_exulegal_decode_d = i0_exudecode_d & i0_legal
|
|
|
|
// performance monitor signals
|
|
io.dec_pmu_instr_decoded := io.dec_aln.dec_i0_decode_d
|
|
io.dec_pmu_decode_stall := io.dec_ib0_valid_d & !io.dec_aln.dec_i0_decode_d
|
|
io.dec_pmu_postsync_stall := postsync_stall.asBool
|
|
io.dec_pmu_presync_stall := presync_stall.asBool
|
|
|
|
val prior_inflight_x = x_d.valid
|
|
val prior_inflight_wb = r_d.valid
|
|
val prior_inflight = prior_inflight_x | prior_inflight_wb
|
|
val prior_inflight_eff = Mux(i0_dp.div,prior_inflight_x,prior_inflight)
|
|
|
|
presync_stall := (i0_presync & prior_inflight_eff)
|
|
postsync_stall := withClock(data_gate_clk){RegNext(ps_stall_in, 0.U)}
|
|
// illegals will postsync
|
|
ps_stall_in := (io.dec_aln.dec_i0_decode_d & (i0_postsync | !i0_legal) ) | ( postsync_stall & prior_inflight_x)
|
|
|
|
io.dec_alu.dec_i0_alu_decode_d := i0_exulegal_decode_d & i0_dp.alu
|
|
|
|
lsu_decode_d := i0_legal_decode_d & i0_dp.lsu
|
|
mul_decode_d := i0_exulegal_decode_d & i0_dp.mul
|
|
div_decode_d := i0_exulegal_decode_d & i0_dp.div
|
|
|
|
io.dec_tlu_i0_valid_r := r_d.valid & !io.dec_tlu_flush_lower_wb
|
|
|
|
//traps for TLU (tlu stuff)
|
|
d_t.legal := i0_legal_decode_d
|
|
d_t.icaf := i0_icaf_d & i0_legal_decode_d // dbecc is icaf exception
|
|
d_t.icaf_f1 := io.dec_i0_icaf_f1_d & i0_legal_decode_d // this includes icaf and dbecc
|
|
d_t.icaf_type := io.dec_i0_icaf_type_d
|
|
|
|
d_t.fence_i := (i0_dp.fence_i | debug_fence_i) & i0_legal_decode_d
|
|
|
|
// put pmu info into the trap packet
|
|
d_t.pmu_i0_br_unpred := i0_br_unpred
|
|
d_t.pmu_divide := 0.U(1.W)
|
|
d_t.pmu_lsu_misaligned := 0.U(1.W)
|
|
|
|
d_t.i0trigger := io.dec_i0_trigger_match_d & repl(4,io.dec_aln.dec_i0_decode_d)
|
|
|
|
|
|
x_t := rvdffe(d_t,i0_x_ctl_en.asBool,clock,io.scan_mode)
|
|
|
|
x_t_in := x_t
|
|
x_t_in.i0trigger := x_t.i0trigger & ~(repl(4,io.dec_tlu_flush_lower_wb))
|
|
|
|
r_t := rvdffe(x_t_in,i0_x_ctl_en.asBool,clock,io.scan_mode)
|
|
val lsu_trigger_match_r = RegNext(io.lsu_trigger_match_m, 0.U)
|
|
val lsu_pmu_misaligned_r = RegNext(io.lsu_pmu_misaligned_m, 0.U)
|
|
|
|
r_t_in := r_t
|
|
|
|
r_t_in.i0trigger := (repl(4,(r_d.bits.i0load | r_d.bits.i0store)) & lsu_trigger_match_r) | r_t.i0trigger
|
|
r_t_in.pmu_lsu_misaligned := lsu_pmu_misaligned_r // only valid if a load/store is valid in DC3 stage
|
|
|
|
when (io.dec_tlu_flush_lower_wb.asBool) {r_t_in := 0.U.asTypeOf(r_t_in) }
|
|
|
|
io.dec_tlu_packet_r := r_t_in
|
|
io.dec_tlu_packet_r.pmu_divide := r_d.bits.i0div & r_d.valid
|
|
// end tlu stuff
|
|
|
|
flush_final_r := withClock(data_gate_clk){RegNext(io.exu_flush_final, 0.U)}
|
|
|
|
io.dec_aln.dec_i0_decode_d := io.dec_ib0_valid_d & !i0_block_d & !io.dec_tlu_flush_lower_r & !flush_final_r
|
|
|
|
i0r.rs1 := i0(19,15) //H: assigning reg packets the instructions bits
|
|
i0r.rs2 := i0(24,20)
|
|
i0r.rd := i0(11,7)
|
|
|
|
io.decode_exu.dec_i0_rs1_en_d := i0_dp.rs1 & (i0r.rs1 =/= 0.U(5.W)) // if rs1_en=0 then read will be all 0's
|
|
io.decode_exu.dec_i0_rs2_en_d := i0_dp.rs2 & (i0r.rs2 =/= 0.U(5.W))
|
|
val i0_rd_en_d = i0_dp.rd & (i0r.rd =/= 0.U(5.W))
|
|
io.dec_i0_rs1_d := i0r.rs1//H:assiging packets to output signals leading to gprfile
|
|
io.dec_i0_rs2_d := i0r.rs2
|
|
|
|
val i0_jalimm20 = i0_dp.jal & i0_dp.imm20 // H:jal (used at line 915)
|
|
val i0_uiimm20 = !i0_dp.jal & i0_dp.imm20
|
|
|
|
io.decode_exu.dec_i0_immed_d := Mux1H(Seq(
|
|
i0_dp.csr_read -> io.dec_csr_rddata_d,
|
|
!i0_dp.csr_read -> i0_immed_d))
|
|
|
|
i0_immed_d := Mux1H(Seq(
|
|
i0_dp.imm12 -> Cat(repl(20,i0(31)),i0(31,20)), // jalr
|
|
i0_dp.shimm5 -> Cat(repl(27,0.U),i0(24,20)),
|
|
i0_jalimm20 -> Cat(repl(12,i0(31)),i0(19,12),i0(20),i0(30,21),0.U),
|
|
i0_uiimm20 -> Cat(i0(31,12),repl(12,0.U)),
|
|
(i0_csr_write_only_d & i0_dp.csr_imm).asBool -> Cat(repl(27,0.U),i0(19,15)))) // for csr's that only write
|
|
|
|
i0_legal_decode_d := io.dec_aln.dec_i0_decode_d & i0_legal
|
|
|
|
i0_d_c.mul := i0_dp.mul & i0_legal_decode_d
|
|
i0_d_c.load := i0_dp.load & i0_legal_decode_d
|
|
i0_d_c.alu := i0_dp.alu & i0_legal_decode_d
|
|
|
|
val i0_x_c = withClock(io.active_clk){RegEnable(i0_d_c,0.U.asTypeOf(i0_d_c), i0_x_ctl_en.asBool)}
|
|
val i0_r_c = withClock(io.active_clk){RegEnable(i0_x_c,0.U.asTypeOf(i0_x_c), i0_r_ctl_en.asBool)}
|
|
i0_pipe_en := Cat(io.dec_aln.dec_i0_decode_d,withClock(io.active_clk){RegNext(i0_pipe_en(3,1), init=0.U)})
|
|
|
|
i0_x_ctl_en := (i0_pipe_en(3,2).orR | io.clk_override)
|
|
i0_r_ctl_en := (i0_pipe_en(2,1).orR | io.clk_override)
|
|
i0_wb_ctl_en := (i0_pipe_en(1,0).orR | io.clk_override)
|
|
i0_x_data_en := ( i0_pipe_en(3) | io.clk_override)
|
|
i0_r_data_en := ( i0_pipe_en(2) | io.clk_override)
|
|
i0_wb_data_en := ( i0_pipe_en(1) | io.clk_override)
|
|
i0_wb1_data_en := ( i0_pipe_en(0) | io.clk_override)
|
|
|
|
io.decode_exu.dec_data_en := Cat(i0_x_data_en, i0_r_data_en)
|
|
io.decode_exu.dec_ctl_en := Cat(i0_x_ctl_en, i0_r_ctl_en)
|
|
|
|
d_d.bits.i0rd := i0r.rd
|
|
d_d.bits.i0v := i0_rd_en_d & i0_legal_decode_d
|
|
d_d.valid := io.dec_aln.dec_i0_decode_d // has flush_final_r
|
|
|
|
d_d.bits.i0load := i0_dp.load & i0_legal_decode_d
|
|
d_d.bits.i0store := i0_dp.store & i0_legal_decode_d
|
|
d_d.bits.i0div := i0_dp.div & i0_legal_decode_d
|
|
|
|
d_d.bits.csrwen := io.dec_csr_wen_unq_d & i0_legal_decode_d
|
|
d_d.bits.csrwonly := i0_csr_write_only_d & io.dec_aln.dec_i0_decode_d
|
|
d_d.bits.csrwaddr := i0(31,20)
|
|
|
|
x_d := rvdffe(d_d, i0_x_ctl_en.asBool,clock,io.scan_mode)
|
|
val x_d_in = Wire(Valid(new dest_pkt_t))
|
|
x_d_in := x_d
|
|
x_d_in.bits.i0v := x_d.bits.i0v & !io.dec_tlu_flush_lower_wb & !io.dec_tlu_flush_lower_r
|
|
x_d_in.valid := x_d.valid & !io.dec_tlu_flush_lower_wb & !io.dec_tlu_flush_lower_r
|
|
|
|
r_d := rvdffe(x_d_in,i0_r_ctl_en.asBool,clock,io.scan_mode)
|
|
r_d_in := r_d
|
|
r_d_in.bits.i0rd := r_d.bits.i0rd
|
|
|
|
r_d_in.bits.i0v := (r_d.bits.i0v & !io.dec_tlu_flush_lower_wb)
|
|
r_d_in.valid := (r_d.valid & !io.dec_tlu_flush_lower_wb)
|
|
r_d_in.bits.i0load := r_d.bits.i0load & !io.dec_tlu_flush_lower_wb
|
|
r_d_in.bits.i0store := r_d.bits.i0store & !io.dec_tlu_flush_lower_wb
|
|
|
|
wbd := rvdffe(r_d_in,i0_wb_ctl_en.asBool,clock,io.scan_mode)
|
|
|
|
io.dec_i0_waddr_r := r_d_in.bits.i0rd
|
|
i0_wen_r := r_d_in.bits.i0v & !io.dec_tlu_i0_kill_writeb_r
|
|
io.dec_i0_wen_r := i0_wen_r & !r_d_in.bits.i0div & !i0_load_kill_wen_r // don't write a nonblock load 1st time down the pipe
|
|
io.dec_i0_wdata_r := i0_result_corr_r
|
|
|
|
val i0_result_r_raw = rvdffe(i0_result_x,i0_r_data_en.asBool,clock,io.scan_mode)
|
|
if ( LOAD_TO_USE_PLUS1) {
|
|
i0_result_x := io.decode_exu.exu_i0_result_x
|
|
i0_result_r := Mux((r_d.bits.i0v & r_d.bits.i0load).asBool,io.lsu_result_m, i0_result_r_raw)
|
|
}
|
|
else {
|
|
i0_result_x := Mux((x_d.bits.i0v & x_d.bits.i0load).asBool,io.lsu_result_m,io.decode_exu.exu_i0_result_x)
|
|
i0_result_r := i0_result_r_raw
|
|
}
|
|
|
|
// correct lsu load data - don't use for bypass, do pass down the pipe
|
|
i0_result_corr_r := Mux((r_d.bits.i0v & r_d.bits.i0load).asBool,io.lsu_result_corr_r,i0_result_r_raw)
|
|
io.dec_alu.dec_i0_br_immed_d := Mux((io.decode_exu.i0_ap.predict_nt & !i0_dp.jal).asBool,i0_br_offset,Cat(repl(10,0.U),i0_ap_pc4,i0_ap_pc2))
|
|
val last_br_immed_d = WireInit(UInt(12.W),0.U)
|
|
last_br_immed_d := Mux((io.decode_exu.i0_ap.predict_nt).asBool,Cat(repl(10,0.U),i0_ap_pc4,i0_ap_pc2),i0_br_offset)
|
|
val last_br_immed_x = WireInit(UInt(12.W),0.U)
|
|
last_br_immed_x := rvdffe(last_br_immed_d,i0_x_data_en.asBool,clock,io.scan_mode)
|
|
|
|
// divide stuff
|
|
|
|
val div_e1_to_r = (x_d.bits.i0div & x_d.valid) | (r_d.bits.i0div & r_d.valid)
|
|
|
|
val div_flush = (x_d.bits.i0div & x_d.valid & (x_d.bits.i0rd === 0.U(5.W))) |
|
|
(x_d.bits.i0div & x_d.valid & io.dec_tlu_flush_lower_r ) |
|
|
(r_d.bits.i0div & r_d.valid & io.dec_tlu_flush_lower_r & io.dec_tlu_i0_kill_writeb_r)
|
|
|
|
// cancel if any younger inst committing this cycle to same dest as nonblock divide
|
|
|
|
val nonblock_div_cancel = (io.dec_div_active & div_flush) |
|
|
(io.dec_div_active & !div_e1_to_r & (r_d.bits.i0rd === io.div_waddr_wb) & i0_wen_r)
|
|
|
|
io.dec_div.dec_div_cancel := nonblock_div_cancel.asBool
|
|
val i0_div_decode_d = i0_legal_decode_d & i0_dp.div
|
|
|
|
val div_active_in = i0_div_decode_d | (io.dec_div_active & !io.exu_div_wren & !nonblock_div_cancel)
|
|
|
|
io.dec_div_active := withClock(io.free_clk){RegNext(div_active_in, 0.U)}
|
|
|
|
// nonblocking div scheme
|
|
i0_nonblock_div_stall := (io.decode_exu.dec_i0_rs1_en_d & io.dec_div_active & (io.div_waddr_wb === i0r.rs1)) |
|
|
(io.decode_exu.dec_i0_rs2_en_d & io.dec_div_active & (io.div_waddr_wb === i0r.rs2))
|
|
|
|
io.div_waddr_wb := RegEnable(i0r.rd,0.U,i0_div_decode_d.asBool)
|
|
///div end
|
|
|
|
//for tracing instruction
|
|
val i0_wb_en = i0_wb_data_en
|
|
val i0_wb1_en = i0_wb1_data_en
|
|
|
|
val div_inst = rvdffe(i0_inst_d(24,7),i0_div_decode_d.asBool,clock,io.scan_mode)
|
|
val i0_inst_x = rvdffe(i0_inst_d,i0_x_data_en.asBool,clock,io.scan_mode)
|
|
val i0_inst_r = rvdffe(i0_inst_x,i0_r_data_en.asBool,clock,io.scan_mode)
|
|
val i0_inst_wb_in = i0_inst_r
|
|
val i0_inst_wb = rvdffe(i0_inst_wb_in,i0_wb_en.asBool,clock,io.scan_mode)
|
|
io.dec_i0_inst_wb1 := rvdffe(i0_inst_wb,i0_wb1_en.asBool,clock,io.scan_mode)
|
|
val i0_pc_wb = rvdffe(io.dec_tlu_i0_pc_r,i0_wb_en.asBool,clock,io.scan_mode)
|
|
|
|
io.dec_i0_pc_wb1 := rvdffe(i0_pc_wb,i0_wb1_en.asBool,clock,io.scan_mode)
|
|
val dec_i0_pc_r = rvdffe(io.dec_alu.exu_i0_pc_x,i0_r_data_en.asBool,clock,io.scan_mode)
|
|
|
|
io.dec_tlu_i0_pc_r := dec_i0_pc_r
|
|
|
|
//end tracing
|
|
|
|
val temp_pred_correct_npc_x = rvbradder(Cat(io.dec_alu.exu_i0_pc_x,0.U),Cat(last_br_immed_x,0.U))
|
|
io.decode_exu.pred_correct_npc_x := temp_pred_correct_npc_x(31,1)
|
|
|
|
// scheduling logic for primary alu's
|
|
|
|
val i0_rs1_depend_i0_x = io.decode_exu.dec_i0_rs1_en_d & x_d.bits.i0v & (x_d.bits.i0rd === i0r.rs1)
|
|
val i0_rs1_depend_i0_r = io.decode_exu.dec_i0_rs1_en_d & r_d.bits.i0v & (r_d.bits.i0rd === i0r.rs1)
|
|
|
|
val i0_rs2_depend_i0_x = io.decode_exu.dec_i0_rs2_en_d & x_d.bits.i0v & (x_d.bits.i0rd === i0r.rs2)
|
|
val i0_rs2_depend_i0_r = io.decode_exu.dec_i0_rs2_en_d & r_d.bits.i0v & (r_d.bits.i0rd === i0r.rs2)
|
|
// order the producers as follows: , i0_x, i0_r, i0_wb
|
|
i0_rs1_class_d := Mux(i0_rs1_depend_i0_x.asBool,i0_x_c,Mux(i0_rs1_depend_i0_r.asBool, i0_r_c, 0.U.asTypeOf(i0_rs1_class_d)))
|
|
i0_rs1_depth_d := Mux(i0_rs1_depend_i0_x.asBool,1.U(2.W),Mux(i0_rs1_depend_i0_r.asBool, 2.U(2.W), 0.U))
|
|
i0_rs2_class_d := Mux(i0_rs2_depend_i0_x.asBool,i0_x_c,Mux(i0_rs2_depend_i0_r.asBool, i0_r_c, 0.U.asTypeOf(i0_rs2_class_d)))
|
|
i0_rs2_depth_d := Mux(i0_rs2_depend_i0_x.asBool,1.U(2.W),Mux(i0_rs2_depend_i0_r.asBool, 2.U(2.W), 0.U))
|
|
|
|
// stores will bypass load data in the lsu pipe
|
|
if (LOAD_TO_USE_PLUS1) {
|
|
i0_load_block_d := (i0_rs1_class_d.load & i0_rs1_depth_d) | (i0_rs2_class_d.load & i0_rs2_depth_d(0) & !i0_dp.store)
|
|
load_ldst_bypass_d := (i0_dp.load | i0_dp.store) & i0_rs1_depth_d(1) & i0_rs1_class_d.load
|
|
store_data_bypass_d := i0_dp.store & (i0_rs2_depth_d(1) & i0_rs2_class_d.load)
|
|
store_data_bypass_m := i0_dp.store & (i0_rs2_depth_d(0) & i0_rs2_class_d.load)
|
|
}
|
|
else {
|
|
i0_load_block_d := 0.B
|
|
load_ldst_bypass_d := (i0_dp.load | i0_dp.store) & i0_rs1_depth_d(0) & i0_rs1_class_d.load
|
|
store_data_bypass_d := i0_dp.store & i0_rs2_depth_d(0) & i0_rs2_class_d.load
|
|
store_data_bypass_m := 0.B
|
|
}
|
|
// add nonblock load rs1/rs2 bypass cases
|
|
|
|
val i0_rs1_nonblock_load_bypass_en_d = io.decode_exu.dec_i0_rs1_en_d & io.dec_nonblock_load_wen & (io.dec_nonblock_load_waddr === i0r.rs1)
|
|
|
|
val i0_rs2_nonblock_load_bypass_en_d = io.decode_exu.dec_i0_rs2_en_d & io.dec_nonblock_load_wen & (io.dec_nonblock_load_waddr === i0r.rs2)
|
|
|
|
// bit 2 is priority match, bit 0 lowest priority , i0_x, i0_r
|
|
i0_rs1bypass := Cat((i0_rs1_depth_d(0) &(i0_rs1_class_d.alu | i0_rs1_class_d.mul)),(i0_rs1_depth_d(0) & (i0_rs1_class_d.load)), (i0_rs1_depth_d(1) & (i0_rs1_class_d.alu | i0_rs1_class_d.mul | i0_rs1_class_d.load)))
|
|
|
|
i0_rs2bypass := Cat((i0_rs2_depth_d(0) & (i0_rs2_class_d.alu | i0_rs2_class_d.mul)),(i0_rs2_depth_d(0) & (i0_rs2_class_d.load)),(i0_rs2_depth_d(1) & (i0_rs2_class_d.alu | i0_rs2_class_d.mul | i0_rs2_class_d.load)))
|
|
|
|
io.decode_exu.dec_i0_rs1_bypass_en_d := Cat(i0_rs1bypass(2),(i0_rs1bypass(1) | i0_rs1bypass(0) | (!i0_rs1bypass(2) & i0_rs1_nonblock_load_bypass_en_d)))
|
|
io.decode_exu.dec_i0_rs2_bypass_en_d := Cat(i0_rs2bypass(2),(i0_rs2bypass(1) | i0_rs2bypass(0) | (!i0_rs2bypass(2) & i0_rs2_nonblock_load_bypass_en_d)))
|
|
|
|
|
|
io.decode_exu.dec_i0_rs1_bypass_data_d := Mux1H(Seq(
|
|
i0_rs1bypass(1).asBool -> io.lsu_result_m,
|
|
i0_rs1bypass(0).asBool -> i0_result_r,
|
|
(!i0_rs1bypass(1) & !i0_rs1bypass(0) & i0_rs1_nonblock_load_bypass_en_d).asBool -> io.dctl_busbuff.lsu_nonblock_load_data,
|
|
))
|
|
io.decode_exu.dec_i0_rs2_bypass_data_d := Mux1H(Seq(
|
|
i0_rs2bypass(1).asBool -> io.lsu_result_m,
|
|
i0_rs2bypass(0).asBool -> i0_result_r,
|
|
(!i0_rs2bypass(1) & !i0_rs2bypass(0) & i0_rs2_nonblock_load_bypass_en_d).asBool -> io.dctl_busbuff.lsu_nonblock_load_data,
|
|
))
|
|
io.dec_lsu_valid_raw_d := ((io.dec_ib0_valid_d & (i0_dp_raw.load | i0_dp_raw.store) & !io.dctl_dma.dma_dccm_stall_any & !i0_block_raw_d) | io.decode_exu.dec_extint_stall)
|
|
io.dec_lsu_offset_d := Mux1H(Seq(
|
|
(!io.decode_exu.dec_extint_stall & i0_dp.lsu & i0_dp.load).asBool -> i0(31,20),
|
|
(!io.decode_exu.dec_extint_stall & i0_dp.lsu & i0_dp.store).asBool -> Cat(i0(31,25),i0(11,7))))
|
|
} |