diff --git a/Flow/design/dbg/el2_dbg.sv b/Flow/design/dbg/el2_dbg.sv index fcdb242..ed14ba6 100644 --- a/Flow/design/dbg/el2_dbg.sv +++ b/Flow/design/dbg/el2_dbg.sv @@ -22,532 +22,724 @@ // Author : //******************************************************************************** module el2_dbg -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - )( - // outputs to the core for command and data interface - output logic [31:0] dbg_cmd_addr, - output logic [31:0] dbg_cmd_wrdata, - output logic dbg_cmd_valid, - output logic dbg_cmd_write, // 1: write command, 0: read_command - output logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory - output logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command - output logic dbg_core_rst_l, // core reset from dm + `include "el2_param.vh" +) ( + // outputs to the core for command and data interface + output logic [31:0] dbg_cmd_addr, + output logic [31:0] dbg_cmd_wrdata, + output logic dbg_cmd_valid, + output logic dbg_cmd_write, // 1: write command, 0: read_command + output logic [ 1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + output logic [ 1:0] dbg_cmd_size, // size of the abstract mem access debug command + output logic dbg_core_rst_l, // core reset from dm - // inputs back from the core/dec - input logic [31:0] core_dbg_rddata, - input logic core_dbg_cmd_done, // This will be treated like a valid signal - input logic core_dbg_cmd_fail, // Exception during command run + // inputs back from the core/dec + input logic [31:0] core_dbg_rddata, + input logic core_dbg_cmd_done, // This will be treated like a valid signal + input logic core_dbg_cmd_fail, // Exception during command run - // Signals to dma to get a bubble - output logic dbg_dma_bubble, // Debug needs a bubble to send a valid - input logic dma_dbg_ready, // DMA is ready to accept debug request + // Signals to dma to get a bubble + output logic dbg_dma_bubble, // Debug needs a bubble to send a valid + input logic dma_dbg_ready, // DMA is ready to accept debug request - // interface with the rest of the core to halt/resume handshaking - output logic dbg_halt_req, // This is a pulse - output logic dbg_resume_req, // Debug sends a resume requests. Pulse - input logic dec_tlu_debug_mode, // Core is in debug mode - input logic dec_tlu_dbg_halted, // The core has finished the queiscing sequence. Core is halted now - input logic dec_tlu_mpc_halted_only, // Only halted due to MPC - input logic dec_tlu_resume_ack, // core sends back an ack for the resume (pulse) + // interface with the rest of the core to halt/resume handshaking + output logic dbg_halt_req, // This is a pulse + output logic dbg_resume_req, // Debug sends a resume requests. Pulse + input logic dec_tlu_debug_mode, // Core is in debug mode + input logic dec_tlu_dbg_halted, // The core has finished the queiscing sequence. Core is halted now + input logic dec_tlu_mpc_halted_only, // Only halted due to MPC + input logic dec_tlu_resume_ack, // core sends back an ack for the resume (pulse) - // inputs from the JTAG - input logic dmi_reg_en, // read or write - input logic [6:0] dmi_reg_addr, // address of DM register - input logic dmi_reg_wr_en, // write instruction - input logic [31:0] dmi_reg_wdata, // write data + // inputs from the JTAG + input logic dmi_reg_en, // read or write + input logic [ 6:0] dmi_reg_addr, // address of DM register + input logic dmi_reg_wr_en, // write instruction + input logic [31:0] dmi_reg_wdata, // write data - // output - output logic [31:0] dmi_reg_rdata, // read data + // output + output logic [31:0] dmi_reg_rdata, // read data - // AXI Write Channels - output logic sb_axi_awvalid, - input logic sb_axi_awready, - output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, - output logic [31:0] sb_axi_awaddr, - output logic [3:0] sb_axi_awregion, - output logic [7:0] sb_axi_awlen, - output logic [2:0] sb_axi_awsize, - output logic [1:0] sb_axi_awburst, - output logic sb_axi_awlock, - output logic [3:0] sb_axi_awcache, - output logic [2:0] sb_axi_awprot, - output logic [3:0] sb_axi_awqos, + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, + output logic [ 31:0] sb_axi_awaddr, + output logic [ 3:0] sb_axi_awregion, + output logic [ 7:0] sb_axi_awlen, + output logic [ 2:0] sb_axi_awsize, + output logic [ 1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [ 3:0] sb_axi_awcache, + output logic [ 2:0] sb_axi_awprot, + output logic [ 3:0] sb_axi_awqos, - output logic sb_axi_wvalid, - input logic sb_axi_wready, - output logic [63:0] sb_axi_wdata, - output logic [7:0] sb_axi_wstrb, - output logic sb_axi_wlast, + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [ 7:0] sb_axi_wstrb, + output logic sb_axi_wlast, - input logic sb_axi_bvalid, - output logic sb_axi_bready, - input logic [1:0] sb_axi_bresp, + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [1:0] sb_axi_bresp, - // AXI Read Channels - output logic sb_axi_arvalid, - input logic sb_axi_arready, - output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, - output logic [31:0] sb_axi_araddr, - output logic [3:0] sb_axi_arregion, - output logic [7:0] sb_axi_arlen, - output logic [2:0] sb_axi_arsize, - output logic [1:0] sb_axi_arburst, - output logic sb_axi_arlock, - output logic [3:0] sb_axi_arcache, - output logic [2:0] sb_axi_arprot, - output logic [3:0] sb_axi_arqos, + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, + output logic [ 31:0] sb_axi_araddr, + output logic [ 3:0] sb_axi_arregion, + output logic [ 7:0] sb_axi_arlen, + output logic [ 2:0] sb_axi_arsize, + output logic [ 1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [ 3:0] sb_axi_arcache, + output logic [ 2:0] sb_axi_arprot, + output logic [ 3:0] sb_axi_arqos, - input logic sb_axi_rvalid, - output logic sb_axi_rready, - input logic [63:0] sb_axi_rdata, - input logic [1:0] sb_axi_rresp, + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [63:0] sb_axi_rdata, + input logic [ 1:0] sb_axi_rresp, - input logic dbg_bus_clk_en, + input logic dbg_bus_clk_en, - // general inputs - input logic clk, - input logic free_clk, - input logic rst_l, // This includes both top rst and debug rst - input logic dbg_rst_l, - input logic clk_override, - input logic scan_mode + // general inputs + input logic clk, + input logic free_clk, + input logic rst_l, // This includes both top rst and debug rst + input logic dbg_rst_l, + input logic clk_override, + input logic scan_mode ); - typedef enum logic [3:0] {IDLE=4'h0, HALTING=4'h1, HALTED=4'h2, CORE_CMD_START=4'h3, CORE_CMD_WAIT=4'h4, SB_CMD_START=4'h5, SB_CMD_SEND=4'h6, SB_CMD_RESP=4'h7, CMD_DONE=4'h8, RESUMING=4'h9} state_t; - typedef enum logic [3:0] {SBIDLE=4'h0, WAIT_RD=4'h1, WAIT_WR=4'h2, CMD_RD=4'h3, CMD_WR=4'h4, CMD_WR_ADDR=4'h5, CMD_WR_DATA=4'h6, RSP_RD=4'h7, RSP_WR=4'h8, DONE=4'h9} sb_state_t; + typedef enum logic [3:0] { + IDLE = 4'h0, + HALTING = 4'h1, + HALTED = 4'h2, + CORE_CMD_START = 4'h3, + CORE_CMD_WAIT = 4'h4, + SB_CMD_START = 4'h5, + SB_CMD_SEND = 4'h6, + SB_CMD_RESP = 4'h7, + CMD_DONE = 4'h8, + RESUMING = 4'h9 + } state_t; + typedef enum logic [3:0] { + SBIDLE = 4'h0, + WAIT_RD = 4'h1, + WAIT_WR = 4'h2, + CMD_RD = 4'h3, + CMD_WR = 4'h4, + CMD_WR_ADDR = 4'h5, + CMD_WR_DATA = 4'h6, + RSP_RD = 4'h7, + RSP_WR = 4'h8, + DONE = 4'h9 + } sb_state_t; - state_t dbg_state; - state_t dbg_nxtstate; - logic dbg_state_en; - // these are the registers that the debug module implements - logic [31:0] dmstatus_reg; // [26:24]-dmerr, [17:16]-resume ack, [9:8]-halted, [3:0]-version - logic [31:0] dmcontrol_reg; // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. - logic [31:0] command_reg; - logic [31:0] abstractcs_reg; // bits implemted are [12] - busy and [10:8]= command error - logic [31:0] haltsum0_reg; - logic [31:0] data0_reg; - logic [31:0] data1_reg; + state_t dbg_state; + state_t dbg_nxtstate; + logic dbg_state_en; + // these are the registers that the debug module implements + logic [31:0] dmstatus_reg; // [26:24]-dmerr, [17:16]-resume ack, [9:8]-halted, [3:0]-version + logic [31:0] dmcontrol_reg; // dmcontrol register has only 6 bits implemented. 31: haltreq, 30: resumereq, 29: haltreset, 28: ackhavereset, 1: ndmreset, 0: dmactive. + logic [31:0] command_reg; + logic [31:0] abstractcs_reg; // bits implemted are [12] - busy and [10:8]= command error + logic [31:0] haltsum0_reg; + logic [31:0] data0_reg; + logic [31:0] data1_reg; - // data 0 - logic [31:0] data0_din; - logic data0_reg_wren, data0_reg_wren0, data0_reg_wren1, data0_reg_wren2; - // data 1 - logic [31:0] data1_din; - logic data1_reg_wren, data1_reg_wren0, data1_reg_wren1; - // abstractcs - logic abstractcs_busy_wren; - logic abstractcs_busy_din; - logic [2:0] abstractcs_error_din; - logic abstractcs_error_sel0, abstractcs_error_sel1, abstractcs_error_sel2, abstractcs_error_sel3, abstractcs_error_sel4, abstractcs_error_sel5, abstractcs_error_sel6; - logic dbg_sb_bus_error; - // abstractauto - logic abstractauto_reg_wren; - logic [1:0] abstractauto_reg; + // data 0 + logic [31:0] data0_din; + logic data0_reg_wren, data0_reg_wren0, data0_reg_wren1, data0_reg_wren2; + // data 1 + logic [31:0] data1_din; + logic data1_reg_wren, data1_reg_wren0, data1_reg_wren1; + // abstractcs + logic abstractcs_busy_wren; + logic abstractcs_busy_din; + logic [2:0] abstractcs_error_din; + logic + abstractcs_error_sel0, + abstractcs_error_sel1, + abstractcs_error_sel2, + abstractcs_error_sel3, + abstractcs_error_sel4, + abstractcs_error_sel5, + abstractcs_error_sel6; + logic dbg_sb_bus_error; + // abstractauto + logic abstractauto_reg_wren; + logic [1:0] abstractauto_reg; - // dmstatus - logic dmstatus_resumeack_wren; - logic dmstatus_resumeack_din; - logic dmstatus_haveresetn_wren; - logic dmstatus_resumeack; - logic dmstatus_unavail; - logic dmstatus_running; - logic dmstatus_halted; - logic dmstatus_havereset, dmstatus_haveresetn; + // dmstatus + logic dmstatus_resumeack_wren; + logic dmstatus_resumeack_din; + logic dmstatus_haveresetn_wren; + logic dmstatus_resumeack; + logic dmstatus_unavail; + logic dmstatus_running; + logic dmstatus_halted; + logic dmstatus_havereset, dmstatus_haveresetn; - // dmcontrol - logic resumereq; - logic dmcontrol_wren, dmcontrol_wren_Q; - // command - logic execute_command_ns, execute_command; - logic command_wren, command_regno_wren; - logic command_transfer_din; - logic command_postexec_din; - logic [31:0] command_din; - logic [3:0] dbg_cmd_addr_incr; - logic [31:0] dbg_cmd_curr_addr; - logic [31:0] dbg_cmd_next_addr; + // dmcontrol + logic resumereq; + logic dmcontrol_wren, dmcontrol_wren_Q; + // command + logic execute_command_ns, execute_command; + logic command_wren, command_regno_wren; + logic command_transfer_din; + logic command_postexec_din; + logic [ 31:0] command_din; + logic [ 3:0] dbg_cmd_addr_incr; + logic [ 31:0] dbg_cmd_curr_addr; + logic [ 31:0] dbg_cmd_next_addr; - // needed to send the read data back for dmi reads - logic [31:0] dmi_reg_rdata_din; + // needed to send the read data back for dmi reads + logic [ 31:0] dmi_reg_rdata_din; - sb_state_t sb_state; - sb_state_t sb_nxtstate; - logic sb_state_en; + sb_state_t sb_state; + sb_state_t sb_nxtstate; + logic sb_state_en; - //System bus section - logic sbcs_wren; - logic sbcs_sbbusy_wren; - logic sbcs_sbbusy_din; - logic sbcs_sbbusyerror_wren; - logic sbcs_sbbusyerror_din; + //System bus section + logic sbcs_wren; + logic sbcs_sbbusy_wren; + logic sbcs_sbbusy_din; + logic sbcs_sbbusyerror_wren; + logic sbcs_sbbusyerror_din; - logic sbcs_sberror_wren; - logic [2:0] sbcs_sberror_din; - logic sbcs_unaligned; - logic sbcs_illegal_size; - logic [19:15] sbcs_reg_int; + logic sbcs_sberror_wren; + logic [ 2:0] sbcs_sberror_din; + logic sbcs_unaligned; + logic sbcs_illegal_size; + logic [19:15] sbcs_reg_int; - // data - logic sbdata0_reg_wren0; - logic sbdata0_reg_wren1; - logic sbdata0_reg_wren; - logic [31:0] sbdata0_din; + // data + logic sbdata0_reg_wren0; + logic sbdata0_reg_wren1; + logic sbdata0_reg_wren; + logic [ 31:0] sbdata0_din; - logic sbdata1_reg_wren0; - logic sbdata1_reg_wren1; - logic sbdata1_reg_wren; - logic [31:0] sbdata1_din; + logic sbdata1_reg_wren0; + logic sbdata1_reg_wren1; + logic sbdata1_reg_wren; + logic [ 31:0] sbdata1_din; - logic sbaddress0_reg_wren0; - logic sbaddress0_reg_wren1; - logic sbaddress0_reg_wren; - logic [31:0] sbaddress0_reg_din; - logic [3:0] sbaddress0_incr; - logic sbreadonaddr_access; - logic sbreadondata_access; - logic sbdata0wr_access; + logic sbaddress0_reg_wren0; + logic sbaddress0_reg_wren1; + logic sbaddress0_reg_wren; + logic [ 31:0] sbaddress0_reg_din; + logic [ 3:0] sbaddress0_incr; + logic sbreadonaddr_access; + logic sbreadondata_access; + logic sbdata0wr_access; - logic sb_abmem_cmd_done_in, sb_abmem_data_done_in; - logic sb_abmem_cmd_done_en, sb_abmem_data_done_en; - logic sb_abmem_cmd_done, sb_abmem_data_done; - logic [31:0] abmem_addr; - logic abmem_addr_in_dccm_region, abmem_addr_in_iccm_region, abmem_addr_in_pic_region; - logic abmem_addr_core_local; - logic abmem_addr_external; + logic sb_abmem_cmd_done_in, sb_abmem_data_done_in; + logic sb_abmem_cmd_done_en, sb_abmem_data_done_en; + logic sb_abmem_cmd_done, sb_abmem_data_done; + logic [31:0] abmem_addr; + logic abmem_addr_in_dccm_region, abmem_addr_in_iccm_region, abmem_addr_in_pic_region; + logic abmem_addr_core_local; + logic abmem_addr_external; - logic sb_cmd_pending, sb_abmem_cmd_pending; - logic sb_abmem_cmd_write; - logic [2:0] sb_abmem_cmd_size; - logic [31:0] sb_abmem_cmd_addr; - logic [31:0] sb_abmem_cmd_wdata; + logic sb_cmd_pending, sb_abmem_cmd_pending; + logic sb_abmem_cmd_write; + logic [ 2:0] sb_abmem_cmd_size; + logic [31:0] sb_abmem_cmd_addr; + logic [31:0] sb_abmem_cmd_wdata; - logic [2:0] sb_cmd_size; - logic [31:0] sb_cmd_addr; - logic [63:0] sb_cmd_wdata; + logic [ 2:0] sb_cmd_size; + logic [31:0] sb_cmd_addr; + logic [63:0] sb_cmd_wdata; - logic sb_bus_cmd_read, sb_bus_cmd_write_addr, sb_bus_cmd_write_data; - logic sb_bus_rsp_read, sb_bus_rsp_write; - logic sb_bus_rsp_error; - logic [63:0] sb_bus_rdata; + logic sb_bus_cmd_read, sb_bus_cmd_write_addr, sb_bus_cmd_write_data; + logic sb_bus_rsp_read, sb_bus_rsp_write; + logic sb_bus_rsp_error; + logic [63:0] sb_bus_rdata; - //registers - logic [31:0] sbcs_reg; - logic [31:0] sbaddress0_reg; - logic [31:0] sbdata0_reg; - logic [31:0] sbdata1_reg; + //registers + logic [31:0] sbcs_reg; + logic [31:0] sbaddress0_reg; + logic [31:0] sbdata0_reg; + logic [31:0] sbdata1_reg; - logic sb_abmem_cmd_arvalid, sb_abmem_cmd_awvalid, sb_abmem_cmd_wvalid; - logic sb_abmem_read_pend; - logic sb_cmd_awvalid, sb_cmd_wvalid, sb_cmd_arvalid; - logic sb_read_pend; - logic [31:0] sb_axi_addr; - logic [63:0] sb_axi_wrdata; - logic [2:0] sb_axi_size; + logic sb_abmem_cmd_arvalid, sb_abmem_cmd_awvalid, sb_abmem_cmd_wvalid; + logic sb_abmem_read_pend; + logic sb_cmd_awvalid, sb_cmd_wvalid, sb_cmd_arvalid; + logic sb_read_pend; + logic [31:0] sb_axi_addr; + logic [63:0] sb_axi_wrdata; + logic [ 2:0] sb_axi_size; - logic dbg_dm_rst_l; - logic rst_l_sync; + logic dbg_dm_rst_l; + logic rst_l_sync; - //clken - logic dbg_free_clken; - logic dbg_free_clk; + //clken + logic dbg_free_clken; + logic dbg_free_clk; - logic sb_free_clken; - logic sb_free_clk; + logic sb_free_clken; + logic sb_free_clk; - // clocking - // used for the abstract commands. - assign dbg_free_clken = dmi_reg_en | execute_command | (dbg_state != IDLE) | dbg_state_en | dec_tlu_dbg_halted | dec_tlu_mpc_halted_only | dec_tlu_debug_mode | dbg_halt_req | clk_override; + // clocking + // used for the abstract commands. + assign dbg_free_clken = dmi_reg_en | execute_command | (dbg_state != IDLE) | dbg_state_en | dec_tlu_dbg_halted | dec_tlu_mpc_halted_only | dec_tlu_debug_mode | dbg_halt_req | clk_override; - // used for the system bus - assign sb_free_clken = dmi_reg_en | execute_command | sb_state_en | (sb_state != SBIDLE) | clk_override; + // used for the system bus + assign sb_free_clken = dmi_reg_en | execute_command | sb_state_en | (sb_state != SBIDLE) | clk_override; - rvoclkhdr dbg_free_cgc (.en(dbg_free_clken), .l1clk(dbg_free_clk), .*); - rvoclkhdr sb_free_cgc (.en(sb_free_clken), .l1clk(sb_free_clk), .*); + rvoclkhdr dbg_free_cgc ( + .en(dbg_free_clken), + .l1clk(dbg_free_clk), + .* + ); + rvoclkhdr sb_free_cgc ( + .en(sb_free_clken), + .l1clk(sb_free_clk), + .* + ); - // end clocking section + // end clocking section - // Reset logic - assign dbg_dm_rst_l = dbg_rst_l & (dmcontrol_reg[0] | scan_mode); - assign dbg_core_rst_l = ~dmcontrol_reg[1] | scan_mode; + // Reset logic + assign dbg_dm_rst_l = dbg_rst_l & (dmcontrol_reg[0] | scan_mode); + assign dbg_core_rst_l = ~dmcontrol_reg[1] | scan_mode; - // synchronize the rst - rvsyncss #(1) rstl_syncff (.din(rst_l), .dout(rst_l_sync), .clk(free_clk), .rst_l(dbg_rst_l)); + // synchronize the rst + rvsyncss #(1) rstl_syncff ( + .din (rst_l), + .dout (rst_l_sync), + .clk (free_clk), + .rst_l(dbg_rst_l) + ); - // system bus register - // sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal - assign sbcs_reg[31:29] = 3'b1; - assign sbcs_reg[28:23] = '0; - assign sbcs_reg[19:15] = {sbcs_reg_int[19], ~sbcs_reg_int[18], sbcs_reg_int[17:15]}; - assign sbcs_reg[11:5] = 7'h20; - assign sbcs_reg[4:0] = 5'b01111; - assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); - assign sbcs_sbbusyerror_wren = (sbcs_wren & dmi_reg_wdata[22]) | + // system bus register + // sbcs[31:29], sbcs - [22]:sbbusyerror, [21]: sbbusy, [20]:sbreadonaddr, [19:17]:sbaccess, [16]:sbautoincrement, [15]:sbreadondata, [14:12]:sberror, sbsize=32, 128=0, 64/32/16/8 are legal + assign sbcs_reg[31:29] = 3'b1; + assign sbcs_reg[28:23] = '0; + assign sbcs_reg[19:15] = {sbcs_reg_int[19], ~sbcs_reg_int[18], sbcs_reg_int[17:15]}; + assign sbcs_reg[11:5] = 7'h20; + assign sbcs_reg[4:0] = 5'b01111; + assign sbcs_wren = (dmi_reg_addr == 7'h38) & dmi_reg_en & dmi_reg_wr_en & (sb_state == SBIDLE); + assign sbcs_sbbusyerror_wren = (sbcs_wren & dmi_reg_wdata[22]) | (sbcs_reg[21] & dmi_reg_en & ((dmi_reg_wr_en & (dmi_reg_addr == 7'h39)) | (dmi_reg_addr == 7'h3c) | (dmi_reg_addr == 7'h3d))); - assign sbcs_sbbusyerror_din = ~(sbcs_wren & dmi_reg_wdata[22]); // Clear when writing one + assign sbcs_sbbusyerror_din = ~(sbcs_wren & dmi_reg_wdata[22]); // Clear when writing one - rvdffs #(1) sbcs_sbbusyerror_reg (.din(sbcs_sbbusyerror_din), .dout(sbcs_reg[22]), .en(sbcs_sbbusyerror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); - rvdffs #(1) sbcs_sbbusy_reg (.din(sbcs_sbbusy_din), .dout(sbcs_reg[21]), .en(sbcs_sbbusy_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); - rvdffs #(1) sbcs_sbreadonaddr_reg (.din(dmi_reg_wdata[20]), .dout(sbcs_reg[20]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); - rvdffs #(5) sbcs_misc_reg (.din({dmi_reg_wdata[19],~dmi_reg_wdata[18],dmi_reg_wdata[17:15]}), - .dout(sbcs_reg_int[19:15]), .en(sbcs_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); - rvdffs #(3) sbcs_error_reg (.din(sbcs_sberror_din[2:0]), .dout(sbcs_reg[14:12]), .en(sbcs_sberror_wren), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #(1) sbcs_sbbusyerror_reg ( + .din(sbcs_sbbusyerror_din), + .dout(sbcs_reg[22]), + .en(sbcs_sbbusyerror_wren), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); + rvdffs #(1) sbcs_sbbusy_reg ( + .din(sbcs_sbbusy_din), + .dout(sbcs_reg[21]), + .en(sbcs_sbbusy_wren), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); + rvdffs #(1) sbcs_sbreadonaddr_reg ( + .din(dmi_reg_wdata[20]), + .dout(sbcs_reg[20]), + .en(sbcs_wren), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); + rvdffs #(5) sbcs_misc_reg ( + .din({dmi_reg_wdata[19], ~dmi_reg_wdata[18], dmi_reg_wdata[17:15]}), + .dout(sbcs_reg_int[19:15]), + .en(sbcs_wren), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); + rvdffs #(3) sbcs_error_reg ( + .din(sbcs_sberror_din[2:0]), + .dout(sbcs_reg[14:12]), + .en(sbcs_sberror_wren), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); - assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) | + assign sbcs_unaligned = ((sbcs_reg[19:17] == 3'b001) & sbaddress0_reg[0]) | ((sbcs_reg[19:17] == 3'b010) & (|sbaddress0_reg[1:0])) | ((sbcs_reg[19:17] == 3'b011) & (|sbaddress0_reg[2:0])); - assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal + assign sbcs_illegal_size = sbcs_reg[19]; // Anything bigger than 64 bits is illegal - assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'h0)}} & 4'b0001) | + assign sbaddress0_incr[3:0] = ({4{(sbcs_reg[19:17] == 3'h0)}} & 4'b0001) | ({4{(sbcs_reg[19:17] == 3'h1)}} & 4'b0010) | ({4{(sbcs_reg[19:17] == 3'h2)}} & 4'b0100) | ({4{(sbcs_reg[19:17] == 3'h3)}} & 4'b1000); - // sbdata - assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0 - assign sbdata0_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; - assign sbdata0_reg_wren = sbdata0_reg_wren0 | sbdata0_reg_wren1; + // sbdata + assign sbdata0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write data only when single read is 0 + assign sbdata0_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata0_reg_wren = sbdata0_reg_wren0 | sbdata0_reg_wren1; - assign sbdata1_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3d); // write data only when single read is 0; - assign sbdata1_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; - assign sbdata1_reg_wren = sbdata1_reg_wren0 | sbdata1_reg_wren1; + assign sbdata1_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3d); // write data only when single read is 0; + assign sbdata1_reg_wren1 = (sb_state == RSP_RD) & sb_state_en & ~sbcs_sberror_wren; + assign sbdata1_reg_wren = sbdata1_reg_wren0 | sbdata1_reg_wren1; - assign sbdata0_din[31:0] = ({32{sbdata0_reg_wren0}} & dmi_reg_wdata[31:0]) | + assign sbdata0_din[31:0] = ({32{sbdata0_reg_wren0}} & dmi_reg_wdata[31:0]) | ({32{sbdata0_reg_wren1}} & sb_bus_rdata[31:0]); - assign sbdata1_din[31:0] = ({32{sbdata1_reg_wren0}} & dmi_reg_wdata[31:0]) | + assign sbdata1_din[31:0] = ({32{sbdata1_reg_wren0}} & dmi_reg_wdata[31:0]) | ({32{sbdata1_reg_wren1}} & sb_bus_rdata[63:32]); - rvdffe #(32) dbg_sbdata0_reg (.*, .din(sbdata0_din[31:0]), .dout(sbdata0_reg[31:0]), .en(sbdata0_reg_wren), .rst_l(dbg_dm_rst_l)); - rvdffe #(32) dbg_sbdata1_reg (.*, .din(sbdata1_din[31:0]), .dout(sbdata1_reg[31:0]), .en(sbdata1_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_sbdata0_reg ( + .*, + .din(sbdata0_din[31:0]), + .dout(sbdata0_reg[31:0]), + .en(sbdata0_reg_wren), + .rst_l(dbg_dm_rst_l) + ); + rvdffe #(32) dbg_sbdata1_reg ( + .*, + .din(sbdata1_din[31:0]), + .dout(sbdata1_reg[31:0]), + .en(sbdata1_reg_wren), + .rst_l(dbg_dm_rst_l) + ); - // sbaddress - assign sbaddress0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39); - assign sbaddress0_reg_wren = sbaddress0_reg_wren0 | sbaddress0_reg_wren1; - assign sbaddress0_reg_din[31:0]= ({32{sbaddress0_reg_wren0}} & dmi_reg_wdata[31:0]) | + // sbaddress + assign sbaddress0_reg_wren0 = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39); + assign sbaddress0_reg_wren = sbaddress0_reg_wren0 | sbaddress0_reg_wren1; + assign sbaddress0_reg_din[31:0]= ({32{sbaddress0_reg_wren0}} & dmi_reg_wdata[31:0]) | ({32{sbaddress0_reg_wren1}} & (sbaddress0_reg[31:0] + {28'b0,sbaddress0_incr[3:0]})); - rvdffe #(32) dbg_sbaddress0_reg (.*, .din(sbaddress0_reg_din[31:0]), .dout(sbaddress0_reg[31:0]), .en(sbaddress0_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_sbaddress0_reg ( + .*, + .din(sbaddress0_reg_din[31:0]), + .dout(sbaddress0_reg[31:0]), + .en(sbaddress0_reg_wren), + .rst_l(dbg_dm_rst_l) + ); - assign sbreadonaddr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39) & sbcs_reg[20]; // if readonaddr is set the next command will start upon writing of addr0 - assign sbreadondata_access = dmi_reg_en & ~dmi_reg_wr_en & (dmi_reg_addr == 7'h3c) & sbcs_reg[15]; // if readondata is set the next command will start upon reading of data0 - assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus + assign sbreadonaddr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h39) & sbcs_reg[20]; // if readonaddr is set the next command will start upon writing of addr0 + assign sbreadondata_access = dmi_reg_en & ~dmi_reg_wr_en & (dmi_reg_addr == 7'h3c) & sbcs_reg[15]; // if readondata is set the next command will start upon reading of data0 + assign sbdata0wr_access = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h3c); // write to sbdata0 will start write command to system bus - // memory mapped registers - // dmcontrol register has only 5 bits implemented. 31: haltreq, 30: resumereq, 28: ackhavereset, 1: ndmreset, 0: dmactive. - // rest all the bits are zeroed out - // dmactive flop is reset based on core rst_l, all other flops use dm_rst_l - assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en; - assign dmcontrol_reg[29] = '0; - assign dmcontrol_reg[27:2] = '0; - assign resumereq = dmcontrol_reg[30] & ~dmcontrol_reg[31] & dmcontrol_wren_Q; - rvdffs #(4) dmcontrolff (.din({dmi_reg_wdata[31:30],dmi_reg_wdata[28],dmi_reg_wdata[1]}), .dout({dmcontrol_reg[31:30], dmcontrol_reg[28], dmcontrol_reg[1]}), .en(dmcontrol_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); - rvdffs #(1) dmcontrol_dmactive_ff (.din(dmi_reg_wdata[0]), .dout(dmcontrol_reg[0]), .en(dmcontrol_wren), .rst_l(dbg_rst_l), .clk(dbg_free_clk)); - rvdff #(1) dmcontrol_wrenff(.din(dmcontrol_wren), .dout(dmcontrol_wren_Q), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + // memory mapped registers + // dmcontrol register has only 5 bits implemented. 31: haltreq, 30: resumereq, 28: ackhavereset, 1: ndmreset, 0: dmactive. + // rest all the bits are zeroed out + // dmactive flop is reset based on core rst_l, all other flops use dm_rst_l + assign dmcontrol_wren = (dmi_reg_addr == 7'h10) & dmi_reg_en & dmi_reg_wr_en; + assign dmcontrol_reg[29] = '0; + assign dmcontrol_reg[27:2] = '0; + assign resumereq = dmcontrol_reg[30] & ~dmcontrol_reg[31] & dmcontrol_wren_Q; + rvdffs #(4) dmcontrolff ( + .din({dmi_reg_wdata[31:30], dmi_reg_wdata[28], dmi_reg_wdata[1]}), + .dout({dmcontrol_reg[31:30], dmcontrol_reg[28], dmcontrol_reg[1]}), + .en(dmcontrol_wren), + .rst_l(dbg_dm_rst_l), + .clk(dbg_free_clk) + ); + rvdffs #(1) dmcontrol_dmactive_ff ( + .din(dmi_reg_wdata[0]), + .dout(dmcontrol_reg[0]), + .en(dmcontrol_wren), + .rst_l(dbg_rst_l), + .clk(dbg_free_clk) + ); + rvdff #(1) dmcontrol_wrenff ( + .din (dmcontrol_wren), + .dout (dmcontrol_wren_Q), + .rst_l(dbg_dm_rst_l), + .clk (dbg_free_clk) + ); - // dmstatus register bits that are implemented - // [19:18]-havereset,[17:16]-resume ack, [9:8]-halted, [3:0]-version - // rest all the bits are zeroed out - //assign dmstatus_wren = (dmi_reg_addr[31:0] == 32'h11) & dmi_reg_en; - assign dmstatus_reg[31:20] = '0; - assign dmstatus_reg[19:18] = {2{dmstatus_havereset}}; - assign dmstatus_reg[15:14] = '0; - assign dmstatus_reg[7] = '1; - assign dmstatus_reg[6:4] = '0; - assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}}; - assign dmstatus_reg[13:12] = {2{dmstatus_unavail}}; - assign dmstatus_reg[11:10] = {2{dmstatus_running}}; - assign dmstatus_reg[9:8] = {2{dmstatus_halted}}; - assign dmstatus_reg[3:0] = 4'h2; + // dmstatus register bits that are implemented + // [19:18]-havereset,[17:16]-resume ack, [9:8]-halted, [3:0]-version + // rest all the bits are zeroed out + //assign dmstatus_wren = (dmi_reg_addr[31:0] == 32'h11) & dmi_reg_en; + assign dmstatus_reg[31:20] = '0; + assign dmstatus_reg[19:18] = {2{dmstatus_havereset}}; + assign dmstatus_reg[15:14] = '0; + assign dmstatus_reg[7] = '1; + assign dmstatus_reg[6:4] = '0; + assign dmstatus_reg[17:16] = {2{dmstatus_resumeack}}; + assign dmstatus_reg[13:12] = {2{dmstatus_unavail}}; + assign dmstatus_reg[11:10] = {2{dmstatus_running}}; + assign dmstatus_reg[9:8] = {2{dmstatus_halted}}; + assign dmstatus_reg[3:0] = 4'h2; - assign dmstatus_resumeack_wren = ((dbg_state == RESUMING) & dec_tlu_resume_ack) | (dmstatus_resumeack & resumereq & dmstatus_halted); - assign dmstatus_resumeack_din = (dbg_state == RESUMING) & dec_tlu_resume_ack; + assign dmstatus_resumeack_wren = ((dbg_state == RESUMING) & dec_tlu_resume_ack) | (dmstatus_resumeack & resumereq & dmstatus_halted); + assign dmstatus_resumeack_din = (dbg_state == RESUMING) & dec_tlu_resume_ack; - assign dmstatus_haveresetn_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en & dmcontrol_reg[0]; // clear the havereset - assign dmstatus_havereset = ~dmstatus_haveresetn; + assign dmstatus_haveresetn_wren = (dmi_reg_addr == 7'h10) & dmi_reg_wdata[28] & dmi_reg_en & dmi_reg_wr_en & dmcontrol_reg[0]; // clear the havereset + assign dmstatus_havereset = ~dmstatus_haveresetn; - assign dmstatus_unavail = dmcontrol_reg[1] | ~rst_l_sync; - assign dmstatus_running = ~(dmstatus_unavail | dmstatus_halted); + assign dmstatus_unavail = dmcontrol_reg[1] | ~rst_l_sync; + assign dmstatus_running = ~(dmstatus_unavail | dmstatus_halted); - rvdffs #(1) dmstatus_resumeack_reg (.din(dmstatus_resumeack_din), .dout(dmstatus_resumeack), .en(dmstatus_resumeack_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); - rvdff #(1) dmstatus_halted_reg (.din(dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), .dout(dmstatus_halted), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); - rvdffs #(1) dmstatus_haveresetn_reg (.din(1'b1), .dout(dmstatus_haveresetn), .en(dmstatus_haveresetn_wren), .rst_l(rst_l), .clk(dbg_free_clk)); + rvdffs #(1) dmstatus_resumeack_reg ( + .din(dmstatus_resumeack_din), + .dout(dmstatus_resumeack), + .en(dmstatus_resumeack_wren), + .rst_l(dbg_dm_rst_l), + .clk(dbg_free_clk) + ); + rvdff #(1) dmstatus_halted_reg ( + .din (dec_tlu_dbg_halted & ~dec_tlu_mpc_halted_only), + .dout (dmstatus_halted), + .rst_l(dbg_dm_rst_l), + .clk (dbg_free_clk) + ); + rvdffs #(1) dmstatus_haveresetn_reg ( + .din(1'b1), + .dout(dmstatus_haveresetn), + .en(dmstatus_haveresetn_wren), + .rst_l(rst_l), + .clk(dbg_free_clk) + ); - // haltsum0 register - assign haltsum0_reg[31:1] = '0; - assign haltsum0_reg[0] = dmstatus_halted; + // haltsum0 register + assign haltsum0_reg[31:1] = '0; + assign haltsum0_reg[0] = dmstatus_halted; - // abstractcs register - // bits implemted are [12] - busy and [10:8]= command error - assign abstractcs_reg[31:13] = '0; - assign abstractcs_reg[11] = '0; - assign abstractcs_reg[7:4] = '0; - assign abstractcs_reg[3:0] = 4'h2; // One data register + // abstractcs register + // bits implemted are [12] - busy and [10:8]= command error + assign abstractcs_reg[31:13] = '0; + assign abstractcs_reg[11] = '0; + assign abstractcs_reg[7:4] = '0; + assign abstractcs_reg[3:0] = 4'h2; // One data register - assign abstractcs_error_sel0 = abstractcs_reg[12] & ~(|abstractcs_reg[10:8]) & dmi_reg_en & ((dmi_reg_wr_en & ((dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17)) | (dmi_reg_addr == 7'h18)) | + assign abstractcs_error_sel0 = abstractcs_reg[12] & ~(|abstractcs_reg[10:8]) & dmi_reg_en & ((dmi_reg_wr_en & ((dmi_reg_addr == 7'h16) | (dmi_reg_addr == 7'h17)) | (dmi_reg_addr == 7'h18)) | (dmi_reg_addr == 7'h4) | (dmi_reg_addr == 7'h5)); - assign abstractcs_error_sel1 = execute_command & ~(|abstractcs_reg[10:8]) & + assign abstractcs_error_sel1 = execute_command & ~(|abstractcs_reg[10:8]) & ((~((command_reg[31:24] == 8'b0) | (command_reg[31:24] == 8'h2))) | // Illegal command - (((command_reg[22:20] == 3'b011) | (command_reg[22])) & (command_reg[31:24] == 8'h2)) | // Illegal abstract memory size (can't be DW or higher) - ((command_reg[22:20] != 3'b010) & ((command_reg[31:24] == 8'h0) & command_reg[17])) | // Illegal abstract reg size - ((command_reg[31:24] == 8'h0) & command_reg[18])); //postexec for abstract register access - assign abstractcs_error_sel2 = ((core_dbg_cmd_done & core_dbg_cmd_fail) | // exception from core - (execute_command & (command_reg[31:24] == 8'h0) & // unimplemented regs - (((command_reg[15:12] == 4'h1) & (command_reg[11:5] != 0)) | (command_reg[15:13] != 0)))) & ~(|abstractcs_reg[10:8]); - assign abstractcs_error_sel3 = execute_command & (dbg_state != HALTED) & ~(|abstractcs_reg[10:8]); - assign abstractcs_error_sel4 = dbg_sb_bus_error & dbg_bus_clk_en & ~(|abstractcs_reg[10:8]);// sb bus error for abstract memory command - assign abstractcs_error_sel5 = execute_command & (command_reg[31:24] == 8'h2) & ~(|abstractcs_reg[10:8]) & + (((command_reg[22:20] == 3'b011) | (command_reg[22])) & (command_reg[31:24] == 8'h2)) | // Illegal abstract memory size (can't be DW or higher) + ((command_reg[22:20] != 3'b010) & ((command_reg[31:24] == 8'h0) & command_reg[17])) | // Illegal abstract reg size + ((command_reg[31:24] == 8'h0) & command_reg[18])); //postexec for abstract register access + assign abstractcs_error_sel2 = ((core_dbg_cmd_done & core_dbg_cmd_fail) | // exception from core + (execute_command & (command_reg[31:24] == 8'h0) & // unimplemented regs + (((command_reg[15:12] == 4'h1) & (command_reg[11:5] != 0)) | (command_reg[15:13] != 0)))) & ~(|abstractcs_reg[10:8]); + assign abstractcs_error_sel3 = execute_command & (dbg_state != HALTED) & ~(|abstractcs_reg[10:8]); + assign abstractcs_error_sel4 = dbg_sb_bus_error & dbg_bus_clk_en & ~(|abstractcs_reg[10:8]);// sb bus error for abstract memory command + assign abstractcs_error_sel5 = execute_command & (command_reg[31:24] == 8'h2) & ~(|abstractcs_reg[10:8]) & (((command_reg[22:20] == 3'b001) & data1_reg[0]) | ((command_reg[22:20] == 3'b010) & (|data1_reg[1:0]))); //Unaligned address for abstract memory - assign abstractcs_error_sel6 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; + assign abstractcs_error_sel6 = (dmi_reg_addr == 7'h16) & dmi_reg_en & dmi_reg_wr_en; - assign abstractcs_error_din[2:0] = abstractcs_error_sel0 ? 3'b001 : // writing command or abstractcs while a command was executing. Or accessing data0 - abstractcs_error_sel1 ? 3'b010 : // writing a illegal command type to cmd field of command - abstractcs_error_sel2 ? 3'b011 : // exception while running command - abstractcs_error_sel3 ? 3'b100 : // writing a comnand when not in the halted state - abstractcs_error_sel4 ? 3'b101 : // Bus error - abstractcs_error_sel5 ? 3'b111 : // unaligned or illegal size abstract memory command - abstractcs_error_sel6 ? (~dmi_reg_wdata[10:8] & abstractcs_reg[10:8]) : //W1C - abstractcs_reg[10:8]; //hold + assign abstractcs_error_din[2:0] = abstractcs_error_sel0 ? 3'b001 : // writing command or abstractcs while a command was executing. Or accessing data0 + abstractcs_error_sel1 ? 3'b010 : // writing a illegal command type to cmd field of command + abstractcs_error_sel2 ? 3'b011 : // exception while running command + abstractcs_error_sel3 ? 3'b100 : // writing a comnand when not in the halted state + abstractcs_error_sel4 ? 3'b101 : // Bus error + abstractcs_error_sel5 ? 3'b111 : // unaligned or illegal size abstract memory command + abstractcs_error_sel6 ? (~dmi_reg_wdata[10:8] & abstractcs_reg[10:8]) : //W1C + abstractcs_reg[10:8]; //hold - rvdffs #(1) dmabstractcs_busy_reg (.din(abstractcs_busy_din), .dout(abstractcs_reg[12]), .en(abstractcs_busy_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); - rvdff #(3) dmabstractcs_error_reg (.din(abstractcs_error_din[2:0]), .dout(abstractcs_reg[10:8]), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + rvdffs #(1) dmabstractcs_busy_reg ( + .din(abstractcs_busy_din), + .dout(abstractcs_reg[12]), + .en(abstractcs_busy_wren), + .rst_l(dbg_dm_rst_l), + .clk(dbg_free_clk) + ); + rvdff #(3) dmabstractcs_error_reg ( + .din (abstractcs_error_din[2:0]), + .dout (abstractcs_reg[10:8]), + .rst_l(dbg_dm_rst_l), + .clk (dbg_free_clk) + ); - // abstract auto reg - assign abstractauto_reg_wren = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h18) & ~abstractcs_reg[12]; - rvdffs #(2) dbg_abstractauto_reg (.*, .din(dmi_reg_wdata[1:0]), .dout(abstractauto_reg[1:0]), .en(abstractauto_reg_wren), .rst_l(dbg_dm_rst_l), .clk(dbg_free_clk)); + // abstract auto reg + assign abstractauto_reg_wren = dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h18) & ~abstractcs_reg[12]; + rvdffs #(2) dbg_abstractauto_reg ( + .*, + .din(dmi_reg_wdata[1:0]), + .dout(abstractauto_reg[1:0]), + .en(abstractauto_reg_wren), + .rst_l(dbg_dm_rst_l), + .clk(dbg_free_clk) + ); - // command register - implemented all the bits in this register - // command[16] = 1: write, 0: read - assign execute_command_ns = command_wren | + // command register - implemented all the bits in this register + // command[16] = 1: write, 0: read + assign execute_command_ns = command_wren | (dmi_reg_en & ~abstractcs_reg[12] & (((dmi_reg_addr == 7'h4) & abstractauto_reg[0]) | ((dmi_reg_addr == 7'h5) & abstractauto_reg[1]))); - assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en; - assign command_regno_wren = command_wren | ((command_reg[31:24] == 8'h0) & command_reg[19] & (dbg_state == CMD_DONE) & ~(|abstractcs_reg[10:8])); // aarpostincrement - assign command_postexec_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[18]; - assign command_transfer_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17]; - assign command_din[31:16] = {dmi_reg_wdata[31:24],1'b0,dmi_reg_wdata[22:19],command_postexec_din,command_transfer_din, dmi_reg_wdata[16]}; - assign command_din[15:0] = command_wren ? dmi_reg_wdata[15:0] : dbg_cmd_next_addr[15:0]; - rvdff #(1) execute_commandff (.*, .din(execute_command_ns), .dout(execute_command), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l)); - rvdffe #(16) dmcommand_reg (.*, .din(command_din[31:16]), .dout(command_reg[31:16]), .en(command_wren), .rst_l(dbg_dm_rst_l)); - rvdffe #(16) dmcommand_regno_reg (.*, .din(command_din[15:0]), .dout(command_reg[15:0]), .en(command_regno_wren), .rst_l(dbg_dm_rst_l)); + assign command_wren = (dmi_reg_addr == 7'h17) & dmi_reg_en & dmi_reg_wr_en; + assign command_regno_wren = command_wren | ((command_reg[31:24] == 8'h0) & command_reg[19] & (dbg_state == CMD_DONE) & ~(|abstractcs_reg[10:8])); // aarpostincrement + assign command_postexec_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[18]; + assign command_transfer_din = (dmi_reg_wdata[31:24] == 8'h0) & dmi_reg_wdata[17]; + assign command_din[31:16] = { + dmi_reg_wdata[31:24], + 1'b0, + dmi_reg_wdata[22:19], + command_postexec_din, + command_transfer_din, + dmi_reg_wdata[16] + }; + assign command_din[15:0] = command_wren ? dmi_reg_wdata[15:0] : dbg_cmd_next_addr[15:0]; + rvdff #(1) execute_commandff ( + .*, + .din (execute_command_ns), + .dout (execute_command), + .clk (dbg_free_clk), + .rst_l(dbg_dm_rst_l) + ); + rvdffe #(16) dmcommand_reg ( + .*, + .din(command_din[31:16]), + .dout(command_reg[31:16]), + .en(command_wren), + .rst_l(dbg_dm_rst_l) + ); + rvdffe #(16) dmcommand_regno_reg ( + .*, + .din(command_din[15:0]), + .dout(command_reg[15:0]), + .en(command_regno_wren), + .rst_l(dbg_dm_rst_l) + ); // data0 reg - assign data0_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h4) & (dbg_state == HALTED) & ~abstractcs_reg[12]); - assign data0_reg_wren1 = core_dbg_cmd_done & (dbg_state == CORE_CMD_WAIT) & ~command_reg[16]; - assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1 | data0_reg_wren2; + assign data0_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h4) & (dbg_state == HALTED) & ~abstractcs_reg[12]); + assign data0_reg_wren1 = core_dbg_cmd_done & (dbg_state == CORE_CMD_WAIT) & ~command_reg[16]; + assign data0_reg_wren = data0_reg_wren0 | data0_reg_wren1 | data0_reg_wren2; - assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) | + assign data0_din[31:0] = ({32{data0_reg_wren0}} & dmi_reg_wdata[31:0]) | ({32{data0_reg_wren1}} & core_dbg_rddata[31:0]) | ({32{data0_reg_wren2}} & sb_bus_rdata[31:0]); - rvdffe #(32) dbg_data0_reg (.*, .din(data0_din[31:0]), .dout(data0_reg[31:0]), .en(data0_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_data0_reg ( + .*, + .din(data0_din[31:0]), + .dout(data0_reg[31:0]), + .en(data0_reg_wren), + .rst_l(dbg_dm_rst_l) + ); - // data 1 - assign data1_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h5) & (dbg_state == HALTED) & ~abstractcs_reg[12]); - assign data1_reg_wren1 = (dbg_state == CMD_DONE) & (command_reg[31:24] == 8'h2) & command_reg[19] & ~(|abstractcs_reg[10:8]); // aampostincrement - assign data1_reg_wren = data1_reg_wren0 | data1_reg_wren1; + // data 1 + assign data1_reg_wren0 = (dmi_reg_en & dmi_reg_wr_en & (dmi_reg_addr == 7'h5) & (dbg_state == HALTED) & ~abstractcs_reg[12]); + assign data1_reg_wren1 = (dbg_state == CMD_DONE) & (command_reg[31:24] == 8'h2) & command_reg[19] & ~(|abstractcs_reg[10:8]); // aampostincrement + assign data1_reg_wren = data1_reg_wren0 | data1_reg_wren1; - assign data1_din[31:0] = ({32{data1_reg_wren0}} & dmi_reg_wdata[31:0]) | + assign data1_din[31:0] = ({32{data1_reg_wren0}} & dmi_reg_wdata[31:0]) | ({32{data1_reg_wren1}} & dbg_cmd_next_addr[31:0]); - rvdffe #(32) dbg_data1_reg (.*, .din(data1_din[31:0]), .dout(data1_reg[31:0]), .en(data1_reg_wren), .rst_l(dbg_dm_rst_l)); + rvdffe #(32) dbg_data1_reg ( + .*, + .din(data1_din[31:0]), + .dout(data1_reg[31:0]), + .en(data1_reg_wren), + .rst_l(dbg_dm_rst_l) + ); - rvdffs #(1) sb_abmem_cmd_doneff (.din(sb_abmem_cmd_done_in), .dout(sb_abmem_cmd_done), .en(sb_abmem_cmd_done_en), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l), .*); - rvdffs #(1) sb_abmem_data_doneff (.din(sb_abmem_data_done_in), .dout(sb_abmem_data_done), .en(sb_abmem_data_done_en), .clk(dbg_free_clk), .rst_l(dbg_dm_rst_l), .*); + rvdffs #(1) sb_abmem_cmd_doneff ( + .din(sb_abmem_cmd_done_in), + .dout(sb_abmem_cmd_done), + .en(sb_abmem_cmd_done_en), + .clk(dbg_free_clk), + .rst_l(dbg_dm_rst_l), + .* + ); + rvdffs #(1) sb_abmem_data_doneff ( + .din(sb_abmem_data_done_in), + .dout(sb_abmem_data_done), + .en(sb_abmem_data_done_en), + .clk(dbg_free_clk), + .rst_l(dbg_dm_rst_l), + .* + ); - // FSM to control the debug mode entry, command send/recieve, and Resume flow. - always_comb begin - dbg_nxtstate = IDLE; - dbg_state_en = 1'b0; - abstractcs_busy_wren = 1'b0; - abstractcs_busy_din = 1'b0; - dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC - dbg_resume_req = 1'b0; // single pulse output to the core - dbg_sb_bus_error = 1'b0; - data0_reg_wren2 = 1'b0; - sb_abmem_cmd_done_in = 1'b0; - sb_abmem_data_done_in = 1'b0; - sb_abmem_cmd_done_en = 1'b0; - sb_abmem_data_done_en = 1'b0; + // FSM to control the debug mode entry, command send/recieve, and Resume flow. + always_comb begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = dmcontrol_wren_Q & dmcontrol_reg[31]; // single pulse output to the core. Need to drive every time this register is written since core might be halted due to MPC + dbg_resume_req = 1'b0; // single pulse output to the core + dbg_sb_bus_error = 1'b0; + data0_reg_wren2 = 1'b0; + sb_abmem_cmd_done_in = 1'b0; + sb_abmem_data_done_in = 1'b0; + sb_abmem_cmd_done_en = 1'b0; + sb_abmem_data_done_en = 1'b0; - case (dbg_state) - IDLE: begin - dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core - dbg_state_en = dmcontrol_reg[31] | dmstatus_reg[9] | dec_tlu_mpc_halted_only; // when the jtag writes the halt bit in the DM register, OR when the status indicates H - dbg_halt_req = dmcontrol_reg[31]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes - end - HALTING : begin - dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK - dbg_state_en = dmstatus_reg[9] | dec_tlu_mpc_halted_only; // core indicates halted - end - HALTED: begin - // wait for halted to go away before send to resume. Else start of new command - dbg_nxtstate = dmstatus_reg[9] ? (resumereq ? RESUMING : (((command_reg[31:24] == 8'h2) & abmem_addr_external) ? SB_CMD_START : CORE_CMD_START)) : + case (dbg_state) + IDLE: begin + dbg_nxtstate = (dmstatus_reg[9] | dec_tlu_mpc_halted_only) ? HALTED : HALTING; // initiate the halt command to the core + dbg_state_en = dmcontrol_reg[31] | dmstatus_reg[9] | dec_tlu_mpc_halted_only; // when the jtag writes the halt bit in the DM register, OR when the status indicates H + dbg_halt_req = dmcontrol_reg[31]; // only when jtag has written the halt_req bit in the control. Removed debug mode qualification during MPC changes + end + HALTING: begin + dbg_nxtstate = HALTED; // Goto HALTED once the core sends an ACK + dbg_state_en = dmstatus_reg[9] | dec_tlu_mpc_halted_only; // core indicates halted + end + HALTED: begin + // wait for halted to go away before send to resume. Else start of new command + dbg_nxtstate = dmstatus_reg[9] ? (resumereq ? RESUMING : (((command_reg[31:24] == 8'h2) & abmem_addr_external) ? SB_CMD_START : CORE_CMD_START)) : (dmcontrol_reg[31] ? HALTING : IDLE); // This is MPC halted case - dbg_state_en = (dmstatus_reg[9] & resumereq) | execute_command | ~(dmstatus_reg[9] | dec_tlu_mpc_halted_only); - abstractcs_busy_wren = dbg_state_en & ((dbg_nxtstate == CORE_CMD_START) | (dbg_nxtstate == SB_CMD_START)); // write busy when a new command was written by jtag - abstractcs_busy_din = 1'b1; - dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming - end - CORE_CMD_START: begin - // Don't execute the command if cmderror or transfer=0 for abstract register access - dbg_nxtstate = ((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) ? CMD_DONE : CORE_CMD_WAIT; // new command sent to the core - dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]); - end - CORE_CMD_WAIT: begin - dbg_nxtstate = CMD_DONE; - dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command - end - SB_CMD_START: begin - dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : SB_CMD_SEND; - dbg_state_en = (dbg_bus_clk_en & ~sb_cmd_pending) | (|abstractcs_reg[10:8]); - end - SB_CMD_SEND: begin - sb_abmem_cmd_done_in = 1'b1; - sb_abmem_data_done_in= 1'b1; - sb_abmem_cmd_done_en = (sb_bus_cmd_read | sb_bus_cmd_write_addr) & dbg_bus_clk_en; - sb_abmem_data_done_en= (sb_bus_cmd_read | sb_bus_cmd_write_data) & dbg_bus_clk_en; - dbg_nxtstate = SB_CMD_RESP; - dbg_state_en = (sb_abmem_cmd_done | sb_abmem_cmd_done_en) & (sb_abmem_data_done | sb_abmem_data_done_en) & dbg_bus_clk_en; - end - SB_CMD_RESP: begin - dbg_nxtstate = CMD_DONE; - dbg_state_en = (sb_bus_rsp_read | sb_bus_rsp_write) & dbg_bus_clk_en; - dbg_sb_bus_error = (sb_bus_rsp_read | sb_bus_rsp_write) & sb_bus_rsp_error & dbg_bus_clk_en; - data0_reg_wren2 = dbg_state_en & ~sb_abmem_cmd_write & ~dbg_sb_bus_error; - end - CMD_DONE: begin - dbg_nxtstate = HALTED; - dbg_state_en = 1'b1; - abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 ) - abstractcs_busy_din = 1'b0; - sb_abmem_cmd_done_in = 1'b0; - sb_abmem_data_done_in= 1'b0; - sb_abmem_cmd_done_en = 1'b1; - sb_abmem_data_done_en= 1'b1; - end - RESUMING : begin - dbg_nxtstate = IDLE; - dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register - end - default : begin - dbg_nxtstate = IDLE; - dbg_state_en = 1'b0; - abstractcs_busy_wren = 1'b0; - abstractcs_busy_din = 1'b0; - dbg_halt_req = 1'b0; // single pulse output to the core - dbg_resume_req = 1'b0; // single pulse output to the core - dbg_sb_bus_error = 1'b0; - data0_reg_wren2 = 1'b0; - sb_abmem_cmd_done_in = 1'b0; - sb_abmem_data_done_in = 1'b0; - sb_abmem_cmd_done_en = 1'b0; - sb_abmem_data_done_en = 1'b0; - end - endcase - end // always_comb begin + dbg_state_en = (dmstatus_reg[9] & resumereq) | execute_command | ~(dmstatus_reg[9] | dec_tlu_mpc_halted_only); + abstractcs_busy_wren = dbg_state_en & ((dbg_nxtstate == CORE_CMD_START) | (dbg_nxtstate == SB_CMD_START)); // write busy when a new command was written by jtag + abstractcs_busy_din = 1'b1; + dbg_resume_req = dbg_state_en & (dbg_nxtstate == RESUMING); // single cycle pulse to core if resuming + end + CORE_CMD_START: begin + // Don't execute the command if cmderror or transfer=0 for abstract register access + dbg_nxtstate = ((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17])) ? CMD_DONE : CORE_CMD_WAIT; // new command sent to the core + dbg_state_en = dbg_cmd_valid | (|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]); + end + CORE_CMD_WAIT: begin + dbg_nxtstate = CMD_DONE; + dbg_state_en = core_dbg_cmd_done; // go to done state for one cycle after completing current command + end + SB_CMD_START: begin + dbg_nxtstate = (|abstractcs_reg[10:8]) ? CMD_DONE : SB_CMD_SEND; + dbg_state_en = (dbg_bus_clk_en & ~sb_cmd_pending) | (|abstractcs_reg[10:8]); + end + SB_CMD_SEND: begin + sb_abmem_cmd_done_in = 1'b1; + sb_abmem_data_done_in = 1'b1; + sb_abmem_cmd_done_en = (sb_bus_cmd_read | sb_bus_cmd_write_addr) & dbg_bus_clk_en; + sb_abmem_data_done_en = (sb_bus_cmd_read | sb_bus_cmd_write_data) & dbg_bus_clk_en; + dbg_nxtstate = SB_CMD_RESP; + dbg_state_en = (sb_abmem_cmd_done | sb_abmem_cmd_done_en) & (sb_abmem_data_done | sb_abmem_data_done_en) & dbg_bus_clk_en; + end + SB_CMD_RESP: begin + dbg_nxtstate = CMD_DONE; + dbg_state_en = (sb_bus_rsp_read | sb_bus_rsp_write) & dbg_bus_clk_en; + dbg_sb_bus_error = (sb_bus_rsp_read | sb_bus_rsp_write) & sb_bus_rsp_error & dbg_bus_clk_en; + data0_reg_wren2 = dbg_state_en & ~sb_abmem_cmd_write & ~dbg_sb_bus_error; + end + CMD_DONE: begin + dbg_nxtstate = HALTED; + dbg_state_en = 1'b1; + abstractcs_busy_wren = dbg_state_en; // remove the busy bit from the abstracts ( bit 12 ) + abstractcs_busy_din = 1'b0; + sb_abmem_cmd_done_in = 1'b0; + sb_abmem_data_done_in = 1'b0; + sb_abmem_cmd_done_en = 1'b1; + sb_abmem_data_done_en = 1'b1; + end + RESUMING: begin + dbg_nxtstate = IDLE; + dbg_state_en = dmstatus_reg[17]; // resume ack has been updated in the dmstatus register + end + default: begin + dbg_nxtstate = IDLE; + dbg_state_en = 1'b0; + abstractcs_busy_wren = 1'b0; + abstractcs_busy_din = 1'b0; + dbg_halt_req = 1'b0; // single pulse output to the core + dbg_resume_req = 1'b0; // single pulse output to the core + dbg_sb_bus_error = 1'b0; + data0_reg_wren2 = 1'b0; + sb_abmem_cmd_done_in = 1'b0; + sb_abmem_data_done_in = 1'b0; + sb_abmem_cmd_done_en = 1'b0; + sb_abmem_data_done_en = 1'b0; + end + endcase + end // always_comb begin - assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) | + assign dmi_reg_rdata_din[31:0] = ({32{dmi_reg_addr == 7'h4}} & data0_reg[31:0]) | ({32{dmi_reg_addr == 7'h5}} & data1_reg[31:0]) | ({32{dmi_reg_addr == 7'h10}} & {2'b0,dmcontrol_reg[29],1'b0,dmcontrol_reg[27:0]}) | // Read0 to Write only bits ({32{dmi_reg_addr == 7'h11}} & dmstatus_reg[31:0]) | @@ -561,188 +753,211 @@ import el2_pkg::*; ({32{dmi_reg_addr == 7'h3d}} & sbdata1_reg[31:0]); - rvdffs #($bits(state_t)) dbg_state_reg (.din(dbg_nxtstate), .dout({dbg_state}), .en(dbg_state_en), .rst_l(dbg_dm_rst_l & rst_l), .clk(dbg_free_clk)); - rvdffe #(32) dmi_rddata_reg (.din(dmi_reg_rdata_din[31:0]), .dout(dmi_reg_rdata[31:0]), .en(dmi_reg_en), .rst_l(dbg_dm_rst_l), .clk(clk), .*); + rvdffs #($bits( + state_t + )) dbg_state_reg ( + .din(dbg_nxtstate), + .dout({dbg_state}), + .en(dbg_state_en), + .rst_l(dbg_dm_rst_l & rst_l), + .clk(dbg_free_clk) + ); + rvdffe #(32) dmi_rddata_reg ( + .din(dmi_reg_rdata_din[31:0]), + .dout(dmi_reg_rdata[31:0]), + .en(dmi_reg_en), + .rst_l(dbg_dm_rst_l), + .clk(clk), + .* + ); - assign abmem_addr[31:0] = data1_reg[31:0]; - assign abmem_addr_core_local = (abmem_addr_in_dccm_region | abmem_addr_in_iccm_region | abmem_addr_in_pic_region); - assign abmem_addr_external = ~abmem_addr_core_local; + assign abmem_addr[31:0] = data1_reg[31:0]; + assign abmem_addr_core_local = (abmem_addr_in_dccm_region | abmem_addr_in_iccm_region | abmem_addr_in_pic_region); + assign abmem_addr_external = ~abmem_addr_core_local; - assign abmem_addr_in_dccm_region = (abmem_addr[31:28] == pt.DCCM_REGION) & pt.DCCM_ENABLE; - assign abmem_addr_in_iccm_region = (abmem_addr[31:28] == pt.ICCM_REGION) & pt.ICCM_ENABLE; - assign abmem_addr_in_pic_region = (abmem_addr[31:28] == pt.PIC_REGION); + assign abmem_addr_in_dccm_region = (abmem_addr[31:28] == pt.DCCM_REGION) & pt.DCCM_ENABLE; + assign abmem_addr_in_iccm_region = (abmem_addr[31:28] == pt.ICCM_REGION) & pt.ICCM_ENABLE; + assign abmem_addr_in_pic_region = (abmem_addr[31:28] == pt.PIC_REGION); - // interface for the core - assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {20'b0, command_reg[11:0]}; - assign dbg_cmd_wrdata[31:0] = data0_reg[31:0]; - assign dbg_cmd_valid = (dbg_state == CORE_CMD_START) & ~((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]) | ((command_reg[31:24] == 8'h2) & abmem_addr_external)) & dma_dbg_ready; - assign dbg_cmd_write = command_reg[16]; - assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)}; - assign dbg_cmd_size[1:0] = command_reg[21:20]; + // interface for the core + assign dbg_cmd_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {20'b0, command_reg[11:0]}; + assign dbg_cmd_wrdata[31:0] = data0_reg[31:0]; + assign dbg_cmd_valid = (dbg_state == CORE_CMD_START) & ~((|abstractcs_reg[10:8]) | ((command_reg[31:24] == 8'h0) & ~command_reg[17]) | ((command_reg[31:24] == 8'h2) & abmem_addr_external)) & dma_dbg_ready; + assign dbg_cmd_write = command_reg[16]; + assign dbg_cmd_type[1:0] = (command_reg[31:24] == 8'h2) ? 2'b10 : {1'b0, (command_reg[15:12] == 4'b0)}; + assign dbg_cmd_size[1:0] = command_reg[21:20]; - assign dbg_cmd_addr_incr[3:0] = (command_reg[31:24] == 8'h2) ? (4'h1 << sb_abmem_cmd_size[1:0]) : 4'h1; - assign dbg_cmd_curr_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {16'b0, command_reg[15:0]}; - assign dbg_cmd_next_addr[31:0] = dbg_cmd_curr_addr[31:0] + {28'h0,dbg_cmd_addr_incr[3:0]}; + assign dbg_cmd_addr_incr[3:0] = (command_reg[31:24] == 8'h2) ? (4'h1 << sb_abmem_cmd_size[1:0]) : 4'h1; + assign dbg_cmd_curr_addr[31:0] = (command_reg[31:24] == 8'h2) ? data1_reg[31:0] : {16'b0, command_reg[15:0]}; + assign dbg_cmd_next_addr[31:0] = dbg_cmd_curr_addr[31:0] + {28'h0, dbg_cmd_addr_incr[3:0]}; - // Ask DMA to stop taking bus trxns since debug request is done - assign dbg_dma_bubble = ((dbg_state == CORE_CMD_START) & ~(|abstractcs_reg[10:8])) | (dbg_state == CORE_CMD_WAIT); + // Ask DMA to stop taking bus trxns since debug request is done + assign dbg_dma_bubble = ((dbg_state == CORE_CMD_START) & ~(|abstractcs_reg[10:8])) | (dbg_state == CORE_CMD_WAIT); - assign sb_cmd_pending = (sb_state == CMD_RD) | (sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR) | (sb_state == CMD_WR_DATA) | (sb_state == RSP_RD) | (sb_state == RSP_WR); - assign sb_abmem_cmd_pending = (dbg_state == SB_CMD_START) | (dbg_state == SB_CMD_SEND) | (dbg_state== SB_CMD_RESP); + assign sb_cmd_pending = (sb_state == CMD_RD) | (sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR) | (sb_state == CMD_WR_DATA) | (sb_state == RSP_RD) | (sb_state == RSP_WR); + assign sb_abmem_cmd_pending = (dbg_state == SB_CMD_START) | (dbg_state == SB_CMD_SEND) | (dbg_state== SB_CMD_RESP); // system bus FSM always_comb begin - sb_nxtstate = SBIDLE; - sb_state_en = 1'b0; - sbcs_sbbusy_wren = 1'b0; - sbcs_sbbusy_din = 1'b0; - sbcs_sberror_wren = 1'b0; - sbcs_sberror_din[2:0] = 3'b0; - sbaddress0_reg_wren1 = 1'b0; - case (sb_state) - SBIDLE: begin - sb_nxtstate = sbdata0wr_access ? WAIT_WR : WAIT_RD; - sb_state_en = (sbdata0wr_access | sbreadondata_access | sbreadonaddr_access) & ~(|sbcs_reg[14:12]) & ~sbcs_reg[22]; - sbcs_sbbusy_wren = sb_state_en; // set the single read bit if it is a singlread command - sbcs_sbbusy_din = 1'b1; - sbcs_sberror_wren = sbcs_wren & (|dmi_reg_wdata[14:12]); // write to clear the error bits - sbcs_sberror_din[2:0] = ~dmi_reg_wdata[14:12] & sbcs_reg[14:12]; - end - WAIT_RD: begin - sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_RD; - sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size; - sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; - sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; - end - WAIT_WR: begin - sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_WR; - sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size; - sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; - sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; - end - CMD_RD : begin - sb_nxtstate = RSP_RD; - sb_state_en = sb_bus_cmd_read & dbg_bus_clk_en; - end - CMD_WR : begin - sb_nxtstate = (sb_bus_cmd_write_addr & sb_bus_cmd_write_data) ? RSP_WR : (sb_bus_cmd_write_data ? CMD_WR_ADDR : CMD_WR_DATA); - sb_state_en = (sb_bus_cmd_write_addr | sb_bus_cmd_write_data) & dbg_bus_clk_en; - end - CMD_WR_ADDR : begin - sb_nxtstate = RSP_WR; - sb_state_en = sb_bus_cmd_write_addr & dbg_bus_clk_en; - end - CMD_WR_DATA : begin - sb_nxtstate = RSP_WR; - sb_state_en = sb_bus_cmd_write_data & dbg_bus_clk_en; - end - RSP_RD: begin - sb_nxtstate = DONE; - sb_state_en = sb_bus_rsp_read & dbg_bus_clk_en; - sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; - sbcs_sberror_din[2:0] = 3'b010; - end - RSP_WR: begin - sb_nxtstate = DONE; - sb_state_en = sb_bus_rsp_write & dbg_bus_clk_en; - sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; - sbcs_sberror_din[2:0] = 3'b010; - end - DONE: begin - sb_nxtstate = SBIDLE; - sb_state_en = 1'b1; - sbcs_sbbusy_wren = 1'b1; // reset the single read - sbcs_sbbusy_din = 1'b0; - sbaddress0_reg_wren1 = sbcs_reg[16] & (sbcs_reg[14:12] == 3'b0); // auto increment was set and no error. Update to new address after completing the current command - end - default : begin - sb_nxtstate = SBIDLE; - sb_state_en = 1'b0; - sbcs_sbbusy_wren = 1'b0; - sbcs_sbbusy_din = 1'b0; - sbcs_sberror_wren = 1'b0; - sbcs_sberror_din[2:0] = 3'b0; - sbaddress0_reg_wren1 = 1'b0; - end - endcase - end // always_comb begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + case (sb_state) + SBIDLE: begin + sb_nxtstate = sbdata0wr_access ? WAIT_WR : WAIT_RD; + sb_state_en = (sbdata0wr_access | sbreadondata_access | sbreadonaddr_access) & ~(|sbcs_reg[14:12]) & ~sbcs_reg[22]; + sbcs_sbbusy_wren = sb_state_en; // set the single read bit if it is a singlread command + sbcs_sbbusy_din = 1'b1; + sbcs_sberror_wren = sbcs_wren & (|dmi_reg_wdata[14:12]); // write to clear the error bits + sbcs_sberror_din[2:0] = ~dmi_reg_wdata[14:12] & sbcs_reg[14:12]; + end + WAIT_RD: begin + sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_RD; + sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; + end + WAIT_WR: begin + sb_nxtstate = (sbcs_unaligned | sbcs_illegal_size) ? DONE : CMD_WR; + sb_state_en = (dbg_bus_clk_en & ~sb_abmem_cmd_pending) | sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_wren = sbcs_unaligned | sbcs_illegal_size; + sbcs_sberror_din[2:0] = sbcs_unaligned ? 3'b011 : 3'b100; + end + CMD_RD: begin + sb_nxtstate = RSP_RD; + sb_state_en = sb_bus_cmd_read & dbg_bus_clk_en; + end + CMD_WR: begin + sb_nxtstate = (sb_bus_cmd_write_addr & sb_bus_cmd_write_data) ? RSP_WR : (sb_bus_cmd_write_data ? CMD_WR_ADDR : CMD_WR_DATA); + sb_state_en = (sb_bus_cmd_write_addr | sb_bus_cmd_write_data) & dbg_bus_clk_en; + end + CMD_WR_ADDR: begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_bus_cmd_write_addr & dbg_bus_clk_en; + end + CMD_WR_DATA: begin + sb_nxtstate = RSP_WR; + sb_state_en = sb_bus_cmd_write_data & dbg_bus_clk_en; + end + RSP_RD: begin + sb_nxtstate = DONE; + sb_state_en = sb_bus_rsp_read & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; + sbcs_sberror_din[2:0] = 3'b010; + end + RSP_WR: begin + sb_nxtstate = DONE; + sb_state_en = sb_bus_rsp_write & dbg_bus_clk_en; + sbcs_sberror_wren = sb_state_en & sb_bus_rsp_error; + sbcs_sberror_din[2:0] = 3'b010; + end + DONE: begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b1; + sbcs_sbbusy_wren = 1'b1; // reset the single read + sbcs_sbbusy_din = 1'b0; + sbaddress0_reg_wren1 = sbcs_reg[16] & (sbcs_reg[14:12] == 3'b0); // auto increment was set and no error. Update to new address after completing the current command + end + default: begin + sb_nxtstate = SBIDLE; + sb_state_en = 1'b0; + sbcs_sbbusy_wren = 1'b0; + sbcs_sbbusy_din = 1'b0; + sbcs_sberror_wren = 1'b0; + sbcs_sberror_din[2:0] = 3'b0; + sbaddress0_reg_wren1 = 1'b0; + end + endcase + end // always_comb begin - rvdffs #($bits(sb_state_t)) sb_state_reg (.din(sb_nxtstate), .dout({sb_state}), .en(sb_state_en), .rst_l(dbg_dm_rst_l), .clk(sb_free_clk)); + rvdffs #($bits( + sb_state_t + )) sb_state_reg ( + .din(sb_nxtstate), + .dout({sb_state}), + .en(sb_state_en), + .rst_l(dbg_dm_rst_l), + .clk(sb_free_clk) + ); - assign sb_abmem_cmd_write = command_reg[16]; - assign sb_abmem_cmd_size[2:0] = {1'b0, command_reg[21:20]}; - assign sb_abmem_cmd_addr[31:0] = abmem_addr[31:0]; - assign sb_abmem_cmd_wdata[31:0] = data0_reg[31:0]; + assign sb_abmem_cmd_write = command_reg[16]; + assign sb_abmem_cmd_size[2:0] = {1'b0, command_reg[21:20]}; + assign sb_abmem_cmd_addr[31:0] = abmem_addr[31:0]; + assign sb_abmem_cmd_wdata[31:0] = data0_reg[31:0]; - assign sb_cmd_size[2:0] = sbcs_reg[19:17]; - assign sb_cmd_wdata[63:0] = {sbdata1_reg[31:0], sbdata0_reg[31:0]}; - assign sb_cmd_addr[31:0] = sbaddress0_reg[31:0]; + assign sb_cmd_size[2:0] = sbcs_reg[19:17]; + assign sb_cmd_wdata[63:0] = {sbdata1_reg[31:0], sbdata0_reg[31:0]}; + assign sb_cmd_addr[31:0] = sbaddress0_reg[31:0]; - assign sb_abmem_cmd_awvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_cmd_done; - assign sb_abmem_cmd_wvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_data_done; - assign sb_abmem_cmd_arvalid = (dbg_state == SB_CMD_SEND) & ~sb_abmem_cmd_write & ~sb_abmem_cmd_done & ~sb_abmem_data_done; - assign sb_abmem_read_pend = (dbg_state == SB_CMD_RESP) & ~sb_abmem_cmd_write; + assign sb_abmem_cmd_awvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_cmd_done; + assign sb_abmem_cmd_wvalid = (dbg_state == SB_CMD_SEND) & sb_abmem_cmd_write & ~sb_abmem_data_done; + assign sb_abmem_cmd_arvalid = (dbg_state == SB_CMD_SEND) & ~sb_abmem_cmd_write & ~sb_abmem_cmd_done & ~sb_abmem_data_done; + assign sb_abmem_read_pend = (dbg_state == SB_CMD_RESP) & ~sb_abmem_cmd_write; - assign sb_cmd_awvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR)); - assign sb_cmd_wvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_DATA)); - assign sb_cmd_arvalid = (sb_state == CMD_RD); - assign sb_read_pend = (sb_state == RSP_RD); + assign sb_cmd_awvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_ADDR)); + assign sb_cmd_wvalid = ((sb_state == CMD_WR) | (sb_state == CMD_WR_DATA)); + assign sb_cmd_arvalid = (sb_state == CMD_RD); + assign sb_read_pend = (sb_state == RSP_RD); - assign sb_axi_size[2:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_size[2:0] : sb_cmd_size[2:0]; - assign sb_axi_addr[31:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_addr[31:0] : sb_cmd_addr[31:0]; - assign sb_axi_wrdata[63:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid) ? {2{sb_abmem_cmd_wdata[31:0]}} : sb_cmd_wdata[63:0]; + assign sb_axi_size[2:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_size[2:0] : sb_cmd_size[2:0]; + assign sb_axi_addr[31:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid | sb_abmem_cmd_arvalid | sb_abmem_read_pend) ? sb_abmem_cmd_addr[31:0] : sb_cmd_addr[31:0]; + assign sb_axi_wrdata[63:0] = (sb_abmem_cmd_awvalid | sb_abmem_cmd_wvalid) ? {2{sb_abmem_cmd_wdata[31:0]}} : sb_cmd_wdata[63:0]; - // Generic bus response signals - assign sb_bus_cmd_read = sb_axi_arvalid & sb_axi_arready; - assign sb_bus_cmd_write_addr = sb_axi_awvalid & sb_axi_awready; - assign sb_bus_cmd_write_data = sb_axi_wvalid & sb_axi_wready; + // Generic bus response signals + assign sb_bus_cmd_read = sb_axi_arvalid & sb_axi_arready; + assign sb_bus_cmd_write_addr = sb_axi_awvalid & sb_axi_awready; + assign sb_bus_cmd_write_data = sb_axi_wvalid & sb_axi_wready; - assign sb_bus_rsp_read = sb_axi_rvalid & sb_axi_rready; - assign sb_bus_rsp_write = sb_axi_bvalid & sb_axi_bready; - assign sb_bus_rsp_error = (sb_bus_rsp_read & (|(sb_axi_rresp[1:0]))) | (sb_bus_rsp_write & (|(sb_axi_bresp[1:0]))); + assign sb_bus_rsp_read = sb_axi_rvalid & sb_axi_rready; + assign sb_bus_rsp_write = sb_axi_bvalid & sb_axi_bready; + assign sb_bus_rsp_error = (sb_bus_rsp_read & (|(sb_axi_rresp[1:0]))) | (sb_bus_rsp_write & (|(sb_axi_bresp[1:0]))); - // AXI Request signals - assign sb_axi_awvalid = sb_abmem_cmd_awvalid | sb_cmd_awvalid; - assign sb_axi_awaddr[31:0] = sb_axi_addr[31:0]; - assign sb_axi_awid[pt.SB_BUS_TAG-1:0] = '0; - assign sb_axi_awsize[2:0] = sb_axi_size[2:0]; - assign sb_axi_awprot[2:0] = 3'b001; - assign sb_axi_awcache[3:0] = 4'b1111; - assign sb_axi_awregion[3:0] = sb_axi_addr[31:28]; - assign sb_axi_awlen[7:0] = '0; - assign sb_axi_awburst[1:0] = 2'b01; - assign sb_axi_awqos[3:0] = '0; - assign sb_axi_awlock = '0; + // AXI Request signals + assign sb_axi_awvalid = sb_abmem_cmd_awvalid | sb_cmd_awvalid; + assign sb_axi_awaddr[31:0] = sb_axi_addr[31:0]; + assign sb_axi_awid[pt.SB_BUS_TAG-1:0] = '0; + assign sb_axi_awsize[2:0] = sb_axi_size[2:0]; + assign sb_axi_awprot[2:0] = 3'b001; + assign sb_axi_awcache[3:0] = 4'b1111; + assign sb_axi_awregion[3:0] = sb_axi_addr[31:28]; + assign sb_axi_awlen[7:0] = '0; + assign sb_axi_awburst[1:0] = 2'b01; + assign sb_axi_awqos[3:0] = '0; + assign sb_axi_awlock = '0; - assign sb_axi_wvalid = sb_abmem_cmd_wvalid | sb_cmd_wvalid; - assign sb_axi_wdata[63:0] = ({64{(sb_axi_size[2:0] == 3'h0)}} & {8{sb_axi_wrdata[7:0]}}) | + assign sb_axi_wvalid = sb_abmem_cmd_wvalid | sb_cmd_wvalid; + assign sb_axi_wdata[63:0] = ({64{(sb_axi_size[2:0] == 3'h0)}} & {8{sb_axi_wrdata[7:0]}}) | ({64{(sb_axi_size[2:0] == 3'h1)}} & {4{sb_axi_wrdata[15:0]}}) | ({64{(sb_axi_size[2:0] == 3'h2)}} & {2{sb_axi_wrdata[31:0]}}) | ({64{(sb_axi_size[2:0] == 3'h3)}} & {sb_axi_wrdata[63:0]}); - assign sb_axi_wstrb[7:0] = ({8{(sb_axi_size[2:0] == 3'h0)}} & (8'h1 << sb_axi_addr[2:0])) | + assign sb_axi_wstrb[7:0] = ({8{(sb_axi_size[2:0] == 3'h0)}} & (8'h1 << sb_axi_addr[2:0])) | ({8{(sb_axi_size[2:0] == 3'h1)}} & (8'h3 << {sb_axi_addr[2:1],1'b0})) | ({8{(sb_axi_size[2:0] == 3'h2)}} & (8'hf << {sb_axi_addr[2],2'b0})) | ({8{(sb_axi_size[2:0] == 3'h3)}} & 8'hff); - assign sb_axi_wlast = '1; + assign sb_axi_wlast = '1; - assign sb_axi_arvalid = sb_abmem_cmd_arvalid | sb_cmd_arvalid; - assign sb_axi_araddr[31:0] = sb_axi_addr[31:0]; - assign sb_axi_arid[pt.SB_BUS_TAG-1:0] = '0; - assign sb_axi_arsize[2:0] = sb_axi_size[2:0]; - assign sb_axi_arprot[2:0] = 3'b001; - assign sb_axi_arcache[3:0] = 4'b0; - assign sb_axi_arregion[3:0] = sb_axi_addr[31:28]; - assign sb_axi_arlen[7:0] = '0; - assign sb_axi_arburst[1:0] = 2'b01; - assign sb_axi_arqos[3:0] = '0; - assign sb_axi_arlock = '0; + assign sb_axi_arvalid = sb_abmem_cmd_arvalid | sb_cmd_arvalid; + assign sb_axi_araddr[31:0] = sb_axi_addr[31:0]; + assign sb_axi_arid[pt.SB_BUS_TAG-1:0] = '0; + assign sb_axi_arsize[2:0] = sb_axi_size[2:0]; + assign sb_axi_arprot[2:0] = 3'b001; + assign sb_axi_arcache[3:0] = 4'b0; + assign sb_axi_arregion[3:0] = sb_axi_addr[31:28]; + assign sb_axi_arlen[7:0] = '0; + assign sb_axi_arburst[1:0] = 2'b01; + assign sb_axi_arqos[3:0] = '0; + assign sb_axi_arlock = '0; - // AXI Response signals - assign sb_axi_bready = 1'b1; + // AXI Response signals + assign sb_axi_bready = 1'b1; - assign sb_axi_rready = 1'b1; - assign sb_bus_rdata[63:0] = ({64{sb_axi_size == 3'h0}} & ((sb_axi_rdata[63:0] >> 8*sb_axi_addr[2:0]) & 64'hff)) | + assign sb_axi_rready = 1'b1; + assign sb_bus_rdata[63:0] = ({64{sb_axi_size == 3'h0}} & ((sb_axi_rdata[63:0] >> 8*sb_axi_addr[2:0]) & 64'hff)) | ({64{sb_axi_size == 3'h1}} & ((sb_axi_rdata[63:0] >> 16*sb_axi_addr[2:1]) & 64'hffff)) | ({64{sb_axi_size == 3'h2}} & ((sb_axi_rdata[63:0] >> 32*sb_axi_addr[2]) & 64'hffff_ffff)) | ({64{sb_axi_size == 3'h3}} & sb_axi_rdata[63:0]); diff --git a/Flow/design/dec/el2_dec.sv b/Flow/design/dec/el2_dec.sv index d22997a..4be201a 100644 --- a/Flow/design/dec/el2_dec.sv +++ b/Flow/design/dec/el2_dec.sv @@ -28,419 +28,430 @@ //******************************************************************************** module el2_dec -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic free_clk, // Clock always. Through two clock headers. For flops without second clock header built in. - input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. - - input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle - - output logic dec_extint_stall, // Stall on external interrupt - - output logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked - output logic dec_pause_state_cg, // to top for active state clock gating + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic free_clk, // Clock always. Through two clock headers. For flops without second clock header built in. + input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. + + input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle + + output logic dec_extint_stall, // Stall on external interrupt + + output logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked + output logic dec_pause_state_cg, // to top for active state clock gating - output logic dec_tlu_core_empty, - - input logic rst_l, // reset, active low - input logic [31:1] rst_vec, // reset vector, from core pins - - input logic nmi_int, // NMI pin - input logic [31:1] nmi_vec, // NMI vector, from pins + output logic dec_tlu_core_empty, + + input logic rst_l, // reset, active low + input logic [31:1] rst_vec, // reset vector, from core pins - input logic i_cpu_halt_req, // Asynchronous Halt request to CPU - input logic i_cpu_run_req, // Asynchronous Restart request to CPU + input logic nmi_int, // NMI pin + input logic [31:1] nmi_vec, // NMI vector, from pins - output logic o_cpu_halt_status, // Halt status of core (pmu/fw) - output logic o_cpu_halt_ack, // Halt request ack - output logic o_cpu_run_ack, // Run request ack - output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU - input logic [31:4] core_id, // CORE ID + output logic o_cpu_halt_status, // Halt status of core (pmu/fw) + output logic o_cpu_halt_ack, // Halt request ack + output logic o_cpu_run_ack, // Run request ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request - // external MPC halt/run interface - input logic mpc_debug_halt_req, // Async halt request - input logic mpc_debug_run_req, // Async run request - input logic mpc_reset_run_req, // Run/halt after reset - output logic mpc_debug_halt_ack, // Halt ack - output logic mpc_debug_run_ack, // Run ack - output logic debug_brkpt_status, // debug breakpoint + input logic [31:4] core_id, // CORE ID - input logic exu_pmu_i0_br_misp, // slot 0 branch misp - input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken - input logic exu_pmu_i0_pc4, // slot 0 4 byte branch + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + input logic exu_pmu_i0_br_misp, // slot 0 branch misp + input logic exu_pmu_i0_br_ataken, // slot 0 branch actual taken + input logic exu_pmu_i0_pc4, // slot 0 4 byte branch - input logic lsu_nonblock_load_valid_m, // valid nonblock load at m - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag - input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag - input logic lsu_nonblock_load_data_valid, // valid nonblock load data back - input logic lsu_nonblock_load_data_error, // nonblock load bus error - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag - input logic [31:0] lsu_nonblock_load_data, // nonblock load data - input logic lsu_pmu_bus_trxn, // D side bus transaction - input logic lsu_pmu_bus_misaligned, // D side bus misaligned - input logic lsu_pmu_bus_error, // D side bus error - input logic lsu_pmu_bus_busy, // D side bus busy - input logic lsu_pmu_misaligned_m, // D side load or store misaligned - input logic lsu_pmu_load_external_m, // D side bus load - input logic lsu_pmu_store_external_m, // D side bus store - input logic dma_pmu_dccm_read, // DMA DCCM read - input logic dma_pmu_dccm_write, // DMA DCCM write - input logic dma_pmu_any_read, // DMA read - input logic dma_pmu_any_write, // DMA write + input logic lsu_nonblock_load_valid_m, // valid nonblock load at m + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag + input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + input logic [31:0] lsu_nonblock_load_data, // nonblock load data - input logic [31:1] lsu_fir_addr, // Fast int address - input logic [1:0] lsu_fir_error, // Fast int lookup error + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + input logic lsu_pmu_misaligned_m, // D side load or store misaligned + input logic lsu_pmu_load_external_m, // D side bus load + input logic lsu_pmu_store_external_m, // D side bus store + input logic dma_pmu_dccm_read, // DMA DCCM read + input logic dma_pmu_dccm_write, // DMA DCCM write + input logic dma_pmu_any_read, // DMA read + input logic dma_pmu_any_write, // DMA write - input logic ifu_pmu_instr_aligned, // aligned instructions - input logic ifu_pmu_fetch_stall, // fetch unit stalled - input logic ifu_pmu_ic_miss, // icache miss - input logic ifu_pmu_ic_hit, // icache hit - input logic ifu_pmu_bus_error, // Instruction side bus error - input logic ifu_pmu_bus_busy, // Instruction side bus busy - input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + input logic [31:1] lsu_fir_addr, // Fast int address + input logic [ 1:0] lsu_fir_error, // Fast int lookup error - input logic ifu_ic_error_start, // IC single bit error - input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error + input logic ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction - input logic [3:0] lsu_trigger_match_m, - input logic dbg_cmd_valid, // debugger abstract command valid - input logic dbg_cmd_write, // command is a write - input logic [1:0] dbg_cmd_type, // command type - input logic [31:0] dbg_cmd_addr, // command address - input logic [1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i + input logic ifu_ic_error_start, // IC single bit error + input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error + input logic [ 3:0] lsu_trigger_match_m, + input logic dbg_cmd_valid, // debugger abstract command valid + input logic dbg_cmd_write, // command is a write + input logic [ 1:0] dbg_cmd_type, // command type + input logic [31:0] dbg_cmd_addr, // command address + input logic [ 1:0] dbg_cmd_wrdata, // command write data, for fence/fence_i - input logic ifu_i0_icaf, // icache access fault - input logic [1:0] ifu_i0_icaf_type, // icache access fault type - input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst - input logic ifu_i0_dbecc, // icache/iccm double-bit error + input logic ifu_i0_icaf, // icache access fault + input logic [1:0] ifu_i0_icaf_type, // icache access fault type - input logic lsu_idle_any, // lsu idle for halting + input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst + input logic ifu_i0_dbecc, // icache/iccm double-bit error - input el2_br_pkt_t i0_brp, // branch packet - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index - input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR - input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag - input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index + input logic lsu_idle_any, // lsu idle for halting - input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet - input logic lsu_single_ecc_error_incr, // LSU inc SB error counter + input el2_br_pkt_t i0_brp, // branch packet + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + input logic [ pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + input logic [ pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + input logic [ $clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index - input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error - input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error - input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address + input el2_lsu_error_pkt_t lsu_error_pkt_r, // LSU exception/error packet + input logic lsu_single_ecc_error_incr, // LSU inc SB error counter - input logic [31:0] exu_div_result, // final div result - input logic exu_div_wren, // Divide write enable to GPR + input logic lsu_imprecise_error_load_any, // LSU imprecise load bus error + input logic lsu_imprecise_error_store_any, // LSU imprecise store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // LSU imprecise bus error address - input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction + input logic [31:0] exu_div_result, // final div result + input logic exu_div_wren, // Divide write enable to GPR - input logic [31:0] lsu_result_m, // load result - input logic [31:0] lsu_result_corr_r, // load result - corrected load data + input logic [31:0] exu_csr_rs1_x, // rs1 for csr instruction - input logic lsu_load_stall_any, // This is for blocking loads - input logic lsu_store_stall_any, // This is for blocking stores - input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event - input logic dma_iccm_stall_any, // iccm stalled, pmu event + input logic [31:0] lsu_result_m, // load result + input logic [31:0] lsu_result_corr_r, // load result - corrected load data - input logic iccm_dma_sb_error, // ICCM DMA single bit error + input logic lsu_load_stall_any, // This is for blocking loads + input logic lsu_store_stall_any, // This is for blocking stores + input logic dma_dccm_stall_any, // stall any load/store at decode, pmu event + input logic dma_iccm_stall_any, // iccm stalled, pmu event - input logic exu_flush_final, // slot0 flush + input logic iccm_dma_sb_error, // ICCM DMA single bit error - input logic [31:1] exu_npc_r, // next PC + input logic exu_flush_final, // slot0 flush - input logic [31:0] exu_i0_result_x, // alu result x + input logic [31:1] exu_npc_r, // next PC + input logic [31:0] exu_i0_result_x, // alu result x - input logic ifu_i0_valid, // fetch valids to instruction buffer - input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer - input logic [31:1] ifu_i0_pc, // pc's for instruction buffer - input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst - input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's - input logic mexintpend, // External interrupt pending - input logic timer_int, // Timer interrupt pending (from pin) - input logic soft_int, // Software interrupt pending (from pin) + input logic ifu_i0_valid, // fetch valids to instruction buffer + input logic [31:0] ifu_i0_instr, // fetch inst's to instruction buffer + input logic [31:1] ifu_i0_pc, // pc's for instruction buffer + input logic ifu_i0_pc4, // indication of 4B or 2B for corresponding inst + input logic [31:1] exu_i0_pc_x, // pc's for e1 from the alu's - input logic [7:0] pic_claimid, // PIC claimid - input logic [3:0] pic_pl, // PIC priv level - input logic mhwakeup, // High priority wakeup + input logic mexintpend, // External interrupt pending + input logic timer_int, // Timer interrupt pending (from pin) + input logic soft_int, // Software interrupt pending (from pin) - output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level - output logic [3:0] dec_tlu_meipt, // to PIC + input logic [7:0] pic_claimid, // PIC claimid + input logic [3:0] pic_pl, // PIC priv level + input logic mhwakeup, // High priority wakeup - input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data - input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid - output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + output logic [3:0] dec_tlu_meicurpl, // to PIC, Current priv level + output logic [3:0] dec_tlu_meipt, // to PIC + input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics -// Debug start - input logic dbg_halt_req, // DM requests a halt - input logic dbg_resume_req, // DM requests a resume - input logic ifu_miss_state_idle, // I-side miss buffer empty - output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command - output logic dec_tlu_debug_mode, // Core is in debug mode - output logic dec_tlu_resume_ack, // Resume acknowledge - output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush - output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC - output logic dec_tlu_flush_leak_one_r, // single step - output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc - output logic [31:2] dec_tlu_meihap, // Fast ext int base + // Debug start + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty - output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_flush_leak_one_r, // single step + output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc + output logic [31:2] dec_tlu_meihap, // Fast ext int base - output logic [31:0] dec_dbg_rddata, // debug command read data + output logic dec_debug_wdata_rs1_d, // insert debug write data into rs1 at decode - output logic dec_dbg_cmd_done, // abstract command is done - output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address) + output logic [31:0] dec_dbg_rddata, // debug command read data - output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks + output logic dec_dbg_cmd_done, // abstract command is done + output logic dec_dbg_cmd_fail, // abstract command failed (illegal reg address) - output logic dec_tlu_force_halt, // halt has been forced -// Debug end - // branch info from pipe0 for errors or counter updates - input logic [1:0] exu_i0_br_hist_r, // history - input logic exu_i0_br_error_r, // error - input logic exu_i0_br_start_error_r, // start error - input logic exu_i0_br_valid_r, // valid - input logic exu_i0_br_mp_r, // mispredict - input logic exu_i0_br_middle_r, // middle of bank + output el2_trigger_pkt_t [3:0] trigger_pkt_any, // info needed by debug trigger blocks - // branch info from pipe1 for errors or counter updates + output logic dec_tlu_force_halt, // halt has been forced + // Debug end + // branch info from pipe0 for errors or counter updates + input logic [1:0] exu_i0_br_hist_r, // history + input logic exu_i0_br_error_r, // error + input logic exu_i0_br_start_error_r, // start error + input logic exu_i0_br_valid_r, // valid + input logic exu_i0_br_mp_r, // mispredict + input logic exu_i0_br_middle_r, // middle of bank - input logic exu_i0_br_way_r, // way hit or repl + // branch info from pipe1 for errors or counter updates - output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data - output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data - output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data - output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data + input logic exu_i0_br_way_r, // way hit or repl - output logic [31:0] dec_i0_immed_d, // immediate data - output logic [12:1] dec_i0_br_immed_d, // br immediate data + output logic dec_i0_rs1_en_d, // Qualify GPR RS1 data + output logic dec_i0_rs2_en_d, // Qualify GPR RS2 data + output logic [31:0] gpr_i0_rs1_d, // gpr rs1 data + output logic [31:0] gpr_i0_rs2_d, // gpr rs2 data - output el2_alu_pkt_t i0_ap, // alu packet + output logic [31:0] dec_i0_immed_d, // immediate data + output logic [12:1] dec_i0_br_immed_d, // br immediate data - output logic dec_i0_alu_decode_d, // schedule on D-stage alu - output logic dec_i0_branch_d, // Branch in D-stage + output el2_alu_pkt_t i0_ap, // alu packet - output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's + output logic dec_i0_alu_decode_d, // schedule on D-stage alu + output logic dec_i0_branch_d, // Branch in D-stage - output logic [31:1] dec_i0_pc_d, // pc's at decode - output logic [3:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable - output logic [3:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable + output logic dec_i0_select_pc_d, // select pc onto rs1 for jal's - output logic [31:0] dec_i0_result_r, // Result R-stage + output logic [31:1] dec_i0_pc_d, // pc's at decode + output logic [ 3:0] dec_i0_rs1_bypass_en_d, // rs1 bypass enable + output logic [ 3:0] dec_i0_rs2_bypass_en_d, // rs2 bypass enable - output el2_lsu_pkt_t lsu_p, // lsu packet - output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands - output el2_mul_pkt_t mul_p, // mul packet - output el2_div_pkt_t div_p, // div packet - output logic dec_div_cancel, // cancel divide operation + output logic [31:0] dec_i0_result_r, // Result R-stage - output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses + output el2_lsu_pkt_t lsu_p, // lsu packet + output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands + output el2_mul_pkt_t mul_p, // mul packet + output el2_div_pkt_t div_p, // div packet + output logic dec_div_cancel, // cancel divide operation - output logic dec_csr_ren_d, // CSR read enable - output logic [31:0] dec_csr_rddata_d, // CSR read data + output logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses - output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int - output logic dec_tlu_flush_lower_wb, - output logic [31:1] dec_tlu_flush_path_r, // tlu flush target - output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state - output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache + output logic dec_csr_ren_d, // CSR read enable + output logic [31:0] dec_csr_rddata_d, // CSR read data - output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage + output logic dec_tlu_flush_lower_r, // tlu flush due to late mp, exception, rfpc, or int + output logic dec_tlu_flush_lower_wb, + output logic [31:1] dec_tlu_flush_path_r, // tlu flush target + output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache - output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet + output logic [31:1] pred_correct_npc_x, // npc if prediction is correct at e2 stage - output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc - output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc - output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc - output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc + output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot 0 branch predictor update packet - output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus - output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index - output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag + output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, // toggles when slot0 perf counter 1 has an event inc + output logic dec_tlu_perfcnt2, // toggles when slot0 perf counter 2 has an event inc + output logic dec_tlu_perfcnt3, // toggles when slot0 perf counter 3 has an event inc - output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index + output el2_predict_pkt_t dec_i0_predict_p_d, // prediction packet to alus + output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index + output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag - output logic dec_lsu_valid_raw_d, + output logic [$clog2( +pt.BTB_SIZE +)-1:0] dec_fa_error_index, // Fully associt btb error index - output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + output logic dec_lsu_valid_raw_d, - output logic [1:0] dec_data_en, // clock-gate control logic - output logic [1:0] dec_ctl_en, + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control - input logic [15:0] ifu_i0_cinst, // 16b compressed instruction + output logic [1:0] dec_data_en, // clock-gate control logic + output logic [1:0] dec_ctl_en, - output el2_trace_pkt_t trace_rv_trace_pkt, // trace packet + input logic [15:0] ifu_i0_cinst, // 16b compressed instruction - // feature disable from mfdc - output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding - output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address - output logic dec_tlu_core_ecc_disable, // disable core ECC - output logic dec_tlu_bpred_disable, // disable branch prediction - output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing - output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + output el2_trace_pkt_t trace_rv_trace_pkt, // trace packet - // clock gating overrides from mcgc - output logic dec_tlu_misc_clk_override, // override misc clock domain gating - output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating - output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating - output logic dec_tlu_bus_clk_override, // override bus clock domain gating - output logic dec_tlu_pic_clk_override, // override PIC clock domain gating - output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating - output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating - output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating + // feature disable from mfdc + output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding + output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] - output logic dec_tlu_i0_commit_cmt, // committed i0 instruction - input logic scan_mode // Flop scan mode control + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override, // override ICCM clock domain gating - ); + output logic dec_tlu_i0_commit_cmt, // committed i0 instruction + input logic scan_mode // Flop scan mode control +); - logic dec_tlu_dec_clk_override; // to and from dec blocks - logic clk_override; - logic dec_ib0_valid_d; + logic dec_tlu_dec_clk_override; // to and from dec blocks + logic clk_override; - logic dec_pmu_instr_decoded; - logic dec_pmu_decode_stall; - logic dec_pmu_presync_stall; - logic dec_pmu_postsync_stall; + logic dec_ib0_valid_d; - logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R. + logic dec_pmu_instr_decoded; + logic dec_pmu_decode_stall; + logic dec_pmu_presync_stall; + logic dec_pmu_postsync_stall; - logic [4:0] dec_i0_rs1_d; - logic [4:0] dec_i0_rs2_d; + logic dec_tlu_wr_pause_r; // CSR write to pause reg is at R. - logic [31:0] dec_i0_instr_d; + logic [4:0] dec_i0_rs1_d; + logic [4:0] dec_i0_rs2_d; - logic dec_tlu_trace_disable; - logic dec_tlu_pipelining_disable; + logic [31:0] dec_i0_instr_d; + logic dec_tlu_trace_disable; + logic dec_tlu_pipelining_disable; - logic [4:0] dec_i0_waddr_r; - logic dec_i0_wen_r; - logic [31:0] dec_i0_wdata_r; - logic dec_csr_wen_r; // csr write enable at wb - logic [11:0] dec_csr_wraddr_r; // write address for csryes - logic [31:0] dec_csr_wrdata_r; // csr write data at wb - logic [11:0] dec_csr_rdaddr_d; // read address for csr - logic dec_csr_legal_d; // csr indicates legal operation + logic [4:0] dec_i0_waddr_r; + logic dec_i0_wen_r; + logic [31:0] dec_i0_wdata_r; + logic dec_csr_wen_r; // csr write enable at wb + logic [11:0] dec_csr_wraddr_r; // write address for csryes + logic [31:0] dec_csr_wrdata_r; // csr write data at wb - logic dec_csr_wen_unq_d; // valid csr with write - for csr legal - logic dec_csr_any_unq_d; // valid csr - for csr legal - logic dec_csr_stall_int_ff; // csr is mie/mstatus + logic [11:0] dec_csr_rdaddr_d; // read address for csr + logic dec_csr_legal_d; // csr indicates legal operation - el2_trap_pkt_t dec_tlu_packet_r; + logic dec_csr_wen_unq_d; // valid csr with write - for csr legal + logic dec_csr_any_unq_d; // valid csr - for csr legal + logic dec_csr_stall_int_ff; // csr is mie/mstatus - logic dec_i0_pc4_d; - logic dec_tlu_presync_d; - logic dec_tlu_postsync_d; - logic dec_tlu_debug_stall; + el2_trap_pkt_t dec_tlu_packet_r; - logic [31:0] dec_illegal_inst; + logic dec_i0_pc4_d; + logic dec_tlu_presync_d; + logic dec_tlu_postsync_d; + logic dec_tlu_debug_stall; - logic dec_i0_icaf_d; + logic [31:0] dec_illegal_inst; - logic dec_i0_dbecc_d; - logic dec_i0_icaf_second_d; - logic [3:0] dec_i0_trigger_match_d; - logic dec_debug_fence_d; - logic dec_nonblock_load_wen; - logic [4:0] dec_nonblock_load_waddr; - logic dec_tlu_flush_pause_r; - el2_br_pkt_t dec_i0_brp; - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index; - logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr; - logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag; - logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index; // Fully associt btb index + logic dec_i0_icaf_d; - logic [31:1] dec_tlu_i0_pc_r; - logic dec_tlu_i0_kill_writeb_wb; - logic dec_tlu_i0_valid_r; + logic dec_i0_dbecc_d; + logic dec_i0_icaf_second_d; + logic [3:0] dec_i0_trigger_match_d; + logic dec_debug_fence_d; + logic dec_nonblock_load_wen; + logic [4:0] dec_nonblock_load_waddr; + logic dec_tlu_flush_pause_r; + el2_br_pkt_t dec_i0_brp; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index; + logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr; + logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag; + logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index; // Fully associt btb index - logic dec_pause_state; + logic [31:1] dec_tlu_i0_pc_r; + logic dec_tlu_i0_kill_writeb_wb; + logic dec_tlu_i0_valid_r; - logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type + logic dec_pause_state; - logic dec_tlu_flush_extint; // Fast ext int started + logic [1:0] dec_i0_icaf_type_d; // i0 instruction access fault type - logic [31:0] dec_i0_inst_wb; - logic [31:1] dec_i0_pc_wb; - logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1; - logic [4:0] dec_tlu_exc_cause_wb1; - logic [31:0] dec_tlu_mtval_wb1; - logic dec_tlu_i0_exc_valid_wb1; + logic dec_tlu_flush_extint; // Fast ext int started - logic [4:0] div_waddr_wb; - logic dec_div_active; + logic [31:0] dec_i0_inst_wb; + logic [31:1] dec_i0_pc_wb; + logic dec_tlu_i0_valid_wb1, dec_tlu_int_valid_wb1; + logic [ 4:0] dec_tlu_exc_cause_wb1; + logic [31:0] dec_tlu_mtval_wb1; + logic dec_tlu_i0_exc_valid_wb1; - logic dec_debug_valid_d; + logic [ 4:0] div_waddr_wb; + logic dec_div_active; - assign clk_override = dec_tlu_dec_clk_override; + logic dec_debug_valid_d; + assign clk_override = dec_tlu_dec_clk_override; - assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0]; + assign dec_dbg_rddata[31:0] = dec_i0_wdata_r[31:0]; - el2_dec_ib_ctl #(.pt(pt)) instbuff (.*); + el2_dec_ib_ctl #(.pt(pt)) instbuff (.*); - el2_dec_decode_ctl #(.pt(pt)) decode (.*); + el2_dec_decode_ctl #(.pt(pt)) decode (.*); - el2_dec_tlu_ctl #(.pt(pt)) tlu (.*); + el2_dec_tlu_ctl #(.pt(pt)) tlu (.*); - el2_dec_gpr_ctl #(.pt(pt)) arf (.*, - // inputs - .raddr0(dec_i0_rs1_d[4:0]), - .raddr1(dec_i0_rs2_d[4:0]), - .wen0(dec_i0_wen_r), .waddr0(dec_i0_waddr_r[4:0]), .wd0(dec_i0_wdata_r[31:0]), - .wen1(dec_nonblock_load_wen), .waddr1(dec_nonblock_load_waddr[4:0]), .wd1(lsu_nonblock_load_data[31:0]), - .wen2(exu_div_wren), .waddr2(div_waddr_wb), .wd2(exu_div_result[31:0]), + el2_dec_gpr_ctl #( + .pt(pt) + ) arf ( + .*, + // inputs + .raddr0(dec_i0_rs1_d[4:0]), + .raddr1(dec_i0_rs2_d[4:0]), - // outputs - .rd0(gpr_i0_rs1_d[31:0]), .rd1(gpr_i0_rs2_d[31:0]) - ); + .wen0(dec_i0_wen_r), + .waddr0(dec_i0_waddr_r[4:0]), + .wd0(dec_i0_wdata_r[31:0]), + .wen1(dec_nonblock_load_wen), + .waddr1(dec_nonblock_load_waddr[4:0]), + .wd1(lsu_nonblock_load_data[31:0]), + .wen2(exu_div_wren), + .waddr2(div_waddr_wb), + .wd2(exu_div_result[31:0]), + // outputs + .rd0(gpr_i0_rs1_d[31:0]), + .rd1(gpr_i0_rs2_d[31:0]) + ); -// Trigger - el2_dec_trigger #(.pt(pt)) dec_trigger (.*); + // Trigger + el2_dec_trigger #(.pt(pt)) dec_trigger (.*); -// trace - assign trace_rv_trace_pkt.trace_rv_i_insn_ip = dec_i0_inst_wb[31:0]; - assign trace_rv_trace_pkt.trace_rv_i_address_ip = { dec_i0_pc_wb[31:1], 1'b0}; - assign trace_rv_trace_pkt.trace_rv_i_valid_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1; - assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_exc_valid_wb1; - assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports - assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1; - assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports + // trace + assign trace_rv_trace_pkt.trace_rv_i_insn_ip = dec_i0_inst_wb[31:0]; + assign trace_rv_trace_pkt.trace_rv_i_address_ip = {dec_i0_pc_wb[31:1], 1'b0}; + assign trace_rv_trace_pkt.trace_rv_i_valid_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_valid_wb1 | dec_tlu_i0_exc_valid_wb1; + assign trace_rv_trace_pkt.trace_rv_i_exception_ip = dec_tlu_int_valid_wb1 | dec_tlu_i0_exc_valid_wb1; + assign trace_rv_trace_pkt.trace_rv_i_ecause_ip = dec_tlu_exc_cause_wb1[4:0]; // replicate across ports + assign trace_rv_trace_pkt.trace_rv_i_interrupt_ip = dec_tlu_int_valid_wb1; + assign trace_rv_trace_pkt.trace_rv_i_tval_ip = dec_tlu_mtval_wb1[31:0]; // replicate across ports -// end trace + // end trace -endmodule // el2_dec + +endmodule // el2_dec diff --git a/Flow/design/dec/el2_dec_decode_ctl.sv b/Flow/design/dec/el2_dec_decode_ctl.sv index b4c2a2a..69c3eb2 100644 --- a/Flow/design/dec/el2_dec_decode_ctl.sv +++ b/Flow/design/dec/el2_dec_decode_ctl.sv @@ -15,1021 +15,1078 @@ module el2_dec_decode_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic dec_tlu_trace_disable, - input logic dec_debug_valid_d, + `include "el2_param.vh" +) ( + input logic dec_tlu_trace_disable, + input logic dec_debug_valid_d, - input logic dec_tlu_flush_extint, // Flush external interrupt + input logic dec_tlu_flush_extint, // Flush external interrupt - input logic dec_tlu_force_halt, // invalidate nonblock load cam on a force halt event + input logic dec_tlu_force_halt, // invalidate nonblock load cam on a force halt event - output logic dec_extint_stall, // Stall from external interrupt + output logic dec_extint_stall, // Stall from external interrupt - input logic [15:0] ifu_i0_cinst, // 16b compressed instruction - output logic [31:0] dec_i0_inst_wb, // 32b instruction at wb+1 for trace encoder - output logic [31:1] dec_i0_pc_wb, // 31b pc at wb+1 for trace encoder + input logic [15:0] ifu_i0_cinst, // 16b compressed instruction + output logic [31:0] dec_i0_inst_wb, // 32b instruction at wb+1 for trace encoder + output logic [31:1] dec_i0_pc_wb, // 31b pc at wb+1 for trace encoder - input logic lsu_nonblock_load_valid_m, // valid nonblock load at m - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag - input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag - input logic lsu_nonblock_load_data_valid, // valid nonblock load data back - input logic lsu_nonblock_load_data_error, // nonblock load bus error - input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag + input logic lsu_nonblock_load_valid_m, // valid nonblock load at m + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // -> corresponding tag + input logic lsu_nonblock_load_inv_r, // invalidate request for nonblock load r + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // -> corresponding tag + input logic lsu_nonblock_load_data_valid, // valid nonblock load data back + input logic lsu_nonblock_load_data_error, // nonblock load bus error + input logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // -> corresponding tag - input logic [3:0] dec_i0_trigger_match_d, // i0 decode trigger matches + input logic [3:0] dec_i0_trigger_match_d, // i0 decode trigger matches - input logic dec_tlu_wr_pause_r, // pause instruction at r - input logic dec_tlu_pipelining_disable, // pipeline disable - presync, i0 decode only + input logic dec_tlu_wr_pause_r, // pause instruction at r + input logic dec_tlu_pipelining_disable, // pipeline disable - presync, i0 decode only - input logic [3:0] lsu_trigger_match_m, // lsu trigger matches + input logic [3:0] lsu_trigger_match_m, // lsu trigger matches - input logic lsu_pmu_misaligned_m, // perf mon: load/store misalign - input logic dec_tlu_debug_stall, // debug stall decode - input logic dec_tlu_flush_leak_one_r, // leak1 instruction + input logic lsu_pmu_misaligned_m, // perf mon: load/store misalign + input logic dec_tlu_debug_stall, // debug stall decode + input logic dec_tlu_flush_leak_one_r, // leak1 instruction - input logic dec_debug_fence_d, // debug fence instruction + input logic dec_debug_fence_d, // debug fence instruction - input logic [1:0] dbg_cmd_wrdata, // disambiguate fence, fence_i + input logic [1:0] dbg_cmd_wrdata, // disambiguate fence, fence_i - input logic dec_i0_icaf_d, // icache access fault - input logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst - input logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type + input logic dec_i0_icaf_d, // icache access fault + input logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst + input logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type - input logic dec_i0_dbecc_d, // icache/iccm double-bit error + input logic dec_i0_dbecc_d, // icache/iccm double-bit error - input el2_br_pkt_t dec_i0_brp, // branch packet - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index - input logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR - input logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag - input logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index + input el2_br_pkt_t dec_i0_brp, // branch packet + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index + input logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR + input logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag + input logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index - input logic lsu_idle_any, // lsu idle: if fence instr & ~lsu_idle then stall decode + input logic lsu_idle_any, // lsu idle: if fence instr & ~lsu_idle then stall decode - input logic lsu_load_stall_any, // stall any load at decode - input logic lsu_store_stall_any, // stall any store at decode - input logic dma_dccm_stall_any, // stall any load/store at decode + input logic lsu_load_stall_any, // stall any load at decode + input logic lsu_store_stall_any, // stall any store at decode + input logic dma_dccm_stall_any, // stall any load/store at decode - input logic exu_div_wren, // nonblocking divide write enable to GPR. + input logic exu_div_wren, // nonblocking divide write enable to GPR. - input logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state - input logic dec_tlu_flush_lower_wb, // trap lower flush - input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state - input logic dec_tlu_flush_lower_r, // trap lower flush - input logic dec_tlu_flush_pause_r, // don't clear pause state on initial lower flush - input logic dec_tlu_presync_d, // CSR read needs to be presync'd - input logic dec_tlu_postsync_d, // CSR ops that need to be postsync'd + input logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_flush_lower_wb, // trap lower flush + input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_flush_lower_r, // trap lower flush + input logic dec_tlu_flush_pause_r, // don't clear pause state on initial lower flush + input logic dec_tlu_presync_d, // CSR read needs to be presync'd + input logic dec_tlu_postsync_d, // CSR ops that need to be postsync'd - input logic dec_i0_pc4_d, // inst is 4B inst else 2B + input logic dec_i0_pc4_d, // inst is 4B inst else 2B - input logic [31:0] dec_csr_rddata_d, // csr read data at wb - input logic dec_csr_legal_d, // csr indicates legal operation + input logic [31:0] dec_csr_rddata_d, // csr read data at wb + input logic dec_csr_legal_d, // csr indicates legal operation - input logic [31:0] exu_csr_rs1_x, // rs1 for csr instr + input logic [31:0] exu_csr_rs1_x, // rs1 for csr instr - input logic [31:0] lsu_result_m, // load result - input logic [31:0] lsu_result_corr_r, // load result - corrected data for writing gpr's, not for bypassing + input logic [31:0] lsu_result_m, // load result + input logic [31:0] lsu_result_corr_r, // load result - corrected data for writing gpr's, not for bypassing - input logic exu_flush_final, // lower flush or i0 flush at X or D + input logic exu_flush_final, // lower flush or i0 flush at X or D - input logic [31:1] exu_i0_pc_x, // pcs at e1 + input logic [31:1] exu_i0_pc_x, // pcs at e1 - input logic [31:0] dec_i0_instr_d, // inst at decode + input logic [31:0] dec_i0_instr_d, // inst at decode - input logic dec_ib0_valid_d, // inst valid at decode + input logic dec_ib0_valid_d, // inst valid at decode - input logic [31:0] exu_i0_result_x, // from primary alu's + input logic [31:0] exu_i0_result_x, // from primary alu's - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. - input logic clk_override, // Override non-functional clock gating - input logic rst_l, // Flop reset + input logic clk_override, // Override non-functional clock gating + input logic rst_l, // Flop reset - output logic dec_i0_rs1_en_d, // rs1 enable at decode - output logic dec_i0_rs2_en_d, // rs2 enable at decode + output logic dec_i0_rs1_en_d, // rs1 enable at decode + output logic dec_i0_rs2_en_d, // rs2 enable at decode - output logic [4:0] dec_i0_rs1_d, // rs1 logical source - output logic [4:0] dec_i0_rs2_d, // rs2 logical source + output logic [4:0] dec_i0_rs1_d, // rs1 logical source + output logic [4:0] dec_i0_rs2_d, // rs2 logical source - output logic [31:0] dec_i0_immed_d, // 32b immediate data decode + output logic [31:0] dec_i0_immed_d, // 32b immediate data decode - output logic [12:1] dec_i0_br_immed_d, // 12b branch immediate + output logic [12:1] dec_i0_br_immed_d, // 12b branch immediate - output el2_alu_pkt_t i0_ap, // alu packets + output el2_alu_pkt_t i0_ap, // alu packets - output logic dec_i0_decode_d, // i0 decode + output logic dec_i0_decode_d, // i0 decode - output logic dec_i0_alu_decode_d, // decode to D-stage alu - output logic dec_i0_branch_d, // Branch in D-stage + output logic dec_i0_alu_decode_d, // decode to D-stage alu + output logic dec_i0_branch_d, // Branch in D-stage - output logic [4:0] dec_i0_waddr_r, // i0 logical source to write to gpr's - output logic dec_i0_wen_r, // i0 write enable - output logic [31:0] dec_i0_wdata_r, // i0 write data + output logic [ 4:0] dec_i0_waddr_r, // i0 logical source to write to gpr's + output logic dec_i0_wen_r, // i0 write enable + output logic [31:0] dec_i0_wdata_r, // i0 write data - output logic dec_i0_select_pc_d, // i0 select pc for rs1 - branches + output logic dec_i0_select_pc_d, // i0 select pc for rs1 - branches - output logic [3:0] dec_i0_rs1_bypass_en_d, // i0 rs1 bypass enable - output logic [3:0] dec_i0_rs2_bypass_en_d, // i0 rs2 bypass enable - output logic [31:0] dec_i0_result_r, // Result R-stage + output logic [ 3:0] dec_i0_rs1_bypass_en_d, // i0 rs1 bypass enable + output logic [ 3:0] dec_i0_rs2_bypass_en_d, // i0 rs2 bypass enable + output logic [31:0] dec_i0_result_r, // Result R-stage - output el2_lsu_pkt_t lsu_p, // load/store packet - output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands + output el2_lsu_pkt_t lsu_p, // load/store packet + output logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands - output el2_mul_pkt_t mul_p, // multiply packet + output el2_mul_pkt_t mul_p, // multiply packet - output el2_div_pkt_t div_p, // divide packet - output logic [4:0] div_waddr_wb, // DIV write address to GPR - output logic dec_div_cancel, // cancel the divide operation + output el2_div_pkt_t div_p, // divide packet + output logic [4:0] div_waddr_wb, // DIV write address to GPR + output logic dec_div_cancel, // cancel the divide operation - output logic dec_lsu_valid_raw_d, - output logic [11:0] dec_lsu_offset_d, + output logic dec_lsu_valid_raw_d, + output logic [11:0] dec_lsu_offset_d, - output logic dec_csr_ren_d, // valid csr decode - output logic dec_csr_wen_unq_d, // valid csr with write - for csr legal - output logic dec_csr_any_unq_d, // valid csr - for csr legal - output logic [11:0] dec_csr_rdaddr_d, // read address for csr - output logic dec_csr_wen_r, // csr write enable at r - output logic [11:0] dec_csr_wraddr_r, // write address for csr - output logic [31:0] dec_csr_wrdata_r, // csr write data at r - output logic dec_csr_stall_int_ff, // csr is mie/mstatus + output logic dec_csr_ren_d, // valid csr decode + output logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + output logic dec_csr_any_unq_d, // valid csr - for csr legal + output logic [11:0] dec_csr_rdaddr_d, // read address for csr + output logic dec_csr_wen_r, // csr write enable at r + output logic [11:0] dec_csr_wraddr_r, // write address for csr + output logic [31:0] dec_csr_wrdata_r, // csr write data at r + output logic dec_csr_stall_int_ff, // csr is mie/mstatus - output dec_tlu_i0_valid_r, // i0 valid inst at c + output dec_tlu_i0_valid_r, // i0 valid inst at c - output el2_trap_pkt_t dec_tlu_packet_r, // trap packet + output el2_trap_pkt_t dec_tlu_packet_r, // trap packet - output logic [31:1] dec_tlu_i0_pc_r, // i0 trap pc + output logic [31:1] dec_tlu_i0_pc_r, // i0 trap pc - output logic [31:0] dec_illegal_inst, // illegal inst - output logic [31:1] pred_correct_npc_x, // npc e2 if the prediction is correct + output logic [31:0] dec_illegal_inst, // illegal inst + output logic [31:1] pred_correct_npc_x, // npc e2 if the prediction is correct - output el2_predict_pkt_t dec_i0_predict_p_d, // i0 predict packet decode - output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // i0 predict fghr - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // i0 predict index - output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // i0_predict branch tag + output el2_predict_pkt_t dec_i0_predict_p_d, // i0 predict packet decode + output logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // i0 predict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // i0 predict index + output logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // i0_predict branch tag - output logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index + output logic [$clog2( +pt.BTB_SIZE +)-1:0] dec_fa_error_index, // Fully associt btb error index - output logic [1:0] dec_data_en, // clock-gating logic - output logic [1:0] dec_ctl_en, + output logic [1:0] dec_data_en, // clock-gating logic + output logic [1:0] dec_ctl_en, - output logic dec_pmu_instr_decoded, // number of instructions decode this cycle encoded - output logic dec_pmu_decode_stall, // decode is stalled - output logic dec_pmu_presync_stall, // decode has presync stall - output logic dec_pmu_postsync_stall, // decode has postsync stall + output logic dec_pmu_instr_decoded, // number of instructions decode this cycle encoded + output logic dec_pmu_decode_stall, // decode is stalled + output logic dec_pmu_presync_stall, // decode has presync stall + output logic dec_pmu_postsync_stall, // decode has postsync stall - output logic dec_nonblock_load_wen, // write enable for nonblock load - output logic [4:0] dec_nonblock_load_waddr, // logical write addr for nonblock load - output logic dec_pause_state, // core in pause state - output logic dec_pause_state_cg, // pause state for clock-gating + output logic dec_nonblock_load_wen, // write enable for nonblock load + output logic [4:0] dec_nonblock_load_waddr, // logical write addr for nonblock load + output logic dec_pause_state, // core in pause state + output logic dec_pause_state_cg, // pause state for clock-gating - output logic dec_div_active, // non-block divide is active + output logic dec_div_active, // non-block divide is active - input logic scan_mode - ); + input logic scan_mode +); - el2_dec_pkt_t i0_dp_raw, i0_dp; + el2_dec_pkt_t i0_dp_raw, i0_dp; - logic [31:0] i0; - logic i0_valid_d; + logic [31:0] i0; + logic i0_valid_d; - logic [31:0] i0_result_r; + logic [31:0] i0_result_r; - logic [2:0] i0_rs1bypass, i0_rs2bypass; + logic [2:0] i0_rs1bypass, i0_rs2bypass; - logic i0_jalimm20; - logic i0_uiimm20; + logic i0_jalimm20; + logic i0_uiimm20; - logic lsu_decode_d; - logic [31:0] i0_immed_d; - logic i0_presync; - logic i0_postsync; + logic lsu_decode_d; + logic [31:0] i0_immed_d; + logic i0_presync; + logic i0_postsync; - logic postsync_stall; - logic ps_stall; + logic postsync_stall; + logic ps_stall; - logic prior_inflight, prior_inflight_wb; + logic prior_inflight, prior_inflight_wb; - logic csr_clr_d, csr_set_d, csr_write_d; + logic csr_clr_d, csr_set_d, csr_write_d; - logic csr_clr_x,csr_set_x,csr_write_x,csr_imm_x; - logic [31:0] csr_mask_x; - logic [31:0] write_csr_data_x; - logic [31:0] write_csr_data_in; - logic [31:0] write_csr_data; - logic csr_data_wen; + logic csr_clr_x, csr_set_x, csr_write_x, csr_imm_x; + logic [31:0] csr_mask_x; + logic [31:0] write_csr_data_x; + logic [31:0] write_csr_data_in; + logic [31:0] write_csr_data; + logic csr_data_wen; - logic [4:0] csrimm_x; + logic [ 4:0] csrimm_x; - logic [31:0] csr_rddata_x; + logic [31:0] csr_rddata_x; - logic mul_decode_d; - logic div_decode_d; - logic div_e1_to_r; - logic div_flush; - logic div_active_in; - logic div_active; - logic i0_nonblock_div_stall; - logic i0_div_prior_div_stall; - logic nonblock_div_cancel; + logic mul_decode_d; + logic div_decode_d; + logic div_e1_to_r; + logic div_flush; + logic div_active_in; + logic div_active; + logic i0_nonblock_div_stall; + logic i0_div_prior_div_stall; + logic nonblock_div_cancel; - logic i0_legal; - logic shift_illegal; - logic illegal_inst_en; - logic illegal_lockout_in, illegal_lockout; - logic i0_legal_decode_d; - logic i0_exulegal_decode_d, i0_exudecode_d, i0_exublock_d; + logic i0_legal; + logic shift_illegal; + logic illegal_inst_en; + logic illegal_lockout_in, illegal_lockout; + logic i0_legal_decode_d; + logic i0_exulegal_decode_d, i0_exudecode_d, i0_exublock_d; - logic [12:1] last_br_immed_d; - logic i0_rs1_depend_i0_x, i0_rs1_depend_i0_r; - logic i0_rs2_depend_i0_x, i0_rs2_depend_i0_r; + logic [12:1] last_br_immed_d; + logic i0_rs1_depend_i0_x, i0_rs1_depend_i0_r; + logic i0_rs2_depend_i0_x, i0_rs2_depend_i0_r; - logic i0_div_decode_d; - logic i0_load_block_d; - logic [1:0] i0_rs1_depth_d, i0_rs2_depth_d; + logic i0_div_decode_d; + logic i0_load_block_d; + logic [1:0] i0_rs1_depth_d, i0_rs2_depth_d; - logic i0_load_stall_d; - logic i0_store_stall_d; + logic i0_load_stall_d; + logic i0_store_stall_d; - logic i0_predict_nt, i0_predict_t; + logic i0_predict_nt, i0_predict_t; - logic i0_notbr_error, i0_br_toffset_error; - logic i0_ret_error; - logic i0_br_error; - logic i0_br_error_all; - logic [11:0] i0_br_offset; + logic i0_notbr_error, i0_br_toffset_error; + logic i0_ret_error; + logic i0_br_error; + logic i0_br_error_all; + logic [11:0] i0_br_offset; - logic [20:1] i0_pcall_imm; // predicted jal's - logic i0_pcall_12b_offset; - logic i0_pcall_raw; - logic i0_pcall_case; - logic i0_pcall; + logic [20:1] i0_pcall_imm; // predicted jal's + logic i0_pcall_12b_offset; + logic i0_pcall_raw; + logic i0_pcall_case; + logic i0_pcall; - logic i0_pja_raw; - logic i0_pja_case; - logic i0_pja; + logic i0_pja_raw; + logic i0_pja_case; + logic i0_pja; - logic i0_pret_case; - logic i0_pret_raw, i0_pret; + logic i0_pret_case; + logic i0_pret_raw, i0_pret; - logic i0_jal; // jal's that are not predicted + logic i0_jal; // jal's that are not predicted - logic i0_predict_br; + logic i0_predict_br; - logic store_data_bypass_d, store_data_bypass_m; + logic store_data_bypass_d, store_data_bypass_m; - el2_class_pkt_t i0_rs1_class_d, i0_rs2_class_d; + el2_class_pkt_t i0_rs1_class_d, i0_rs2_class_d; - el2_class_pkt_t i0_d_c, i0_x_c, i0_r_c; + el2_class_pkt_t i0_d_c, i0_x_c, i0_r_c; - logic i0_ap_pc2, i0_ap_pc4; + logic i0_ap_pc2, i0_ap_pc4; - logic i0_rd_en_d; + logic i0_rd_en_d; - logic load_ldst_bypass_d; + logic load_ldst_bypass_d; - logic leak1_i0_stall_in, leak1_i0_stall; - logic leak1_i1_stall_in, leak1_i1_stall; - logic leak1_mode; + logic leak1_i0_stall_in, leak1_i0_stall; + logic leak1_i1_stall_in, leak1_i1_stall; + logic leak1_mode; - logic i0_csr_write_only_d; + logic i0_csr_write_only_d; - logic prior_inflight_x, prior_inflight_eff; - logic any_csr_d; + logic prior_inflight_x, prior_inflight_eff; + logic any_csr_d; - logic prior_csr_write; + logic prior_csr_write; - logic [3:0] i0_pipe_en; - logic i0_r_ctl_en, i0_x_ctl_en, i0_wb_ctl_en; - logic i0_x_data_en, i0_r_data_en, i0_wb_data_en; + logic [3:0] i0_pipe_en; + logic i0_r_ctl_en, i0_x_ctl_en, i0_wb_ctl_en; + logic i0_x_data_en, i0_r_data_en, i0_wb_data_en; - logic debug_fence_i; - logic debug_fence; + logic debug_fence_i; + logic debug_fence; - logic i0_csr_write; - logic presync_stall; + logic i0_csr_write; + logic presync_stall; - logic i0_instr_error; - logic i0_icaf_d; + logic i0_instr_error; + logic i0_icaf_d; - logic clear_pause; - logic pause_state_in, pause_state; - logic pause_stall; + logic clear_pause; + logic pause_state_in, pause_state; + logic pause_stall; - logic i0_brp_valid; - logic nonblock_load_cancel; - logic lsu_idle; - logic lsu_pmu_misaligned_r; - logic csr_ren_qual_d; - logic csr_read_x; - logic i0_block_d; - logic i0_block_raw_d; // This is use to create the raw valid - logic ps_stall_in; - logic [31:0] i0_result_x; + logic i0_brp_valid; + logic nonblock_load_cancel; + logic lsu_idle; + logic lsu_pmu_misaligned_r; + logic csr_ren_qual_d; + logic csr_read_x; + logic i0_block_d; + logic i0_block_raw_d; // This is use to create the raw valid + logic ps_stall_in; + logic [31:0] i0_result_x; - el2_dest_pkt_t d_d, x_d, r_d, wbd; - el2_dest_pkt_t x_d_in, r_d_in; + el2_dest_pkt_t d_d, x_d, r_d, wbd; + el2_dest_pkt_t x_d_in, r_d_in; - el2_trap_pkt_t d_t, x_t, x_t_in, r_t_in, r_t; + el2_trap_pkt_t d_t, x_t, x_t_in, r_t_in, r_t; - logic [3:0] lsu_trigger_match_r; + logic [ 3:0] lsu_trigger_match_r; - logic [31:1] dec_i0_pc_r; + logic [31:1] dec_i0_pc_r; - logic csr_read, csr_write; - logic i0_br_unpred; + logic csr_read, csr_write; + logic i0_br_unpred; - logic nonblock_load_valid_m_delay; - logic i0_wen_r; + logic nonblock_load_valid_m_delay; + logic i0_wen_r; - logic tlu_wr_pause_r1; - logic tlu_wr_pause_r2; + logic tlu_wr_pause_r1; + logic tlu_wr_pause_r2; - logic flush_final_r; + logic flush_final_r; - logic bitmanip_zbb_legal; - logic bitmanip_zbs_legal; - logic bitmanip_zbe_legal; - logic bitmanip_zbc_legal; - logic bitmanip_zbp_legal; - logic bitmanip_zbr_legal; - logic bitmanip_zbf_legal; - logic bitmanip_zba_legal; - logic bitmanip_zbb_zbp_legal; - logic bitmanip_zbp_zbe_zbf_legal; - logic bitmanip_zbb_zbp_zbe_zbf_legal; - logic bitmanip_legal; + logic bitmanip_zbb_legal; + logic bitmanip_zbs_legal; + logic bitmanip_zbe_legal; + logic bitmanip_zbc_legal; + logic bitmanip_zbp_legal; + logic bitmanip_zbr_legal; + logic bitmanip_zbf_legal; + logic bitmanip_zba_legal; + logic bitmanip_zbb_zbp_legal; + logic bitmanip_zbp_zbe_zbf_legal; + logic bitmanip_zbb_zbp_zbe_zbf_legal; + logic bitmanip_legal; - logic data_gate_en; - logic data_gate_clk; + logic data_gate_en; + logic data_gate_clk; - localparam NBLOAD_SIZE = pt.LSU_NUM_NBLOAD; - localparam NBLOAD_SIZE_MSB = int'(pt.LSU_NUM_NBLOAD)-1; - localparam NBLOAD_TAG_MSB = pt.LSU_NUM_NBLOAD_WIDTH-1; + localparam NBLOAD_SIZE = pt.LSU_NUM_NBLOAD; + localparam NBLOAD_SIZE_MSB = int'(pt.LSU_NUM_NBLOAD) - 1; + localparam NBLOAD_TAG_MSB = pt.LSU_NUM_NBLOAD_WIDTH - 1; - logic cam_write, cam_inv_reset, cam_data_reset; - logic [NBLOAD_TAG_MSB:0] cam_write_tag, cam_inv_reset_tag, cam_data_reset_tag; - logic [NBLOAD_SIZE_MSB:0] cam_wen; + logic cam_write, cam_inv_reset, cam_data_reset; + logic [NBLOAD_TAG_MSB:0] cam_write_tag, cam_inv_reset_tag, cam_data_reset_tag; + logic [NBLOAD_SIZE_MSB:0] cam_wen; - logic [NBLOAD_TAG_MSB:0] load_data_tag; - logic [NBLOAD_SIZE_MSB:0] nonblock_load_write; + logic [NBLOAD_TAG_MSB:0] load_data_tag; + logic [NBLOAD_SIZE_MSB:0] nonblock_load_write; - el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam; - el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_in; - el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_raw; + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam; + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_in; + el2_load_cam_pkt_t [NBLOAD_SIZE_MSB:0] cam_raw; - logic [4:0] nonblock_load_rd; - logic i0_nonblock_load_stall; - logic i0_nonblock_boundary_stall; + logic [4:0] nonblock_load_rd; + logic i0_nonblock_load_stall; + logic i0_nonblock_boundary_stall; - logic i0_rs1_nonblock_load_bypass_en_d, i0_rs2_nonblock_load_bypass_en_d; + logic i0_rs1_nonblock_load_bypass_en_d, i0_rs2_nonblock_load_bypass_en_d; - logic i0_load_kill_wen_r; + logic i0_load_kill_wen_r; - logic found; + logic found; + + logic [NBLOAD_SIZE_MSB:0] cam_inv_reset_val, cam_data_reset_val; - logic [NBLOAD_SIZE_MSB:0] cam_inv_reset_val, cam_data_reset_val; + logic debug_fence_raw; - logic debug_fence_raw; + logic [31:0] i0_result_r_raw; + logic [31:0] i0_result_corr_r; - logic [31:0] i0_result_r_raw; - logic [31:0] i0_result_corr_r; + logic [12:1] last_br_immed_x; + + logic [31:0] i0_inst_d; + logic [31:0] i0_inst_x; + logic [31:0] i0_inst_r; + logic [31:0] i0_inst_wb_in; + logic [31:0] i0_inst_wb; + + logic [31:1] i0_pc_wb; + + logic i0_wb_en; + + logic trace_enable; + + logic debug_valid_x; + + el2_inst_pkt_t i0_itype; + el2_reg_pkt_t i0r; + + + rvdffie #(8) misc1ff ( + .*, + .clk(free_l2clk), + .din({ + leak1_i1_stall_in, + leak1_i0_stall_in, + dec_tlu_flush_extint, + pause_state_in, + dec_tlu_wr_pause_r, + tlu_wr_pause_r1, + illegal_lockout_in, + ps_stall_in + }), + .dout({ + leak1_i1_stall, + leak1_i0_stall, + dec_extint_stall, + pause_state, + tlu_wr_pause_r1, + tlu_wr_pause_r2, + illegal_lockout, + ps_stall + }) + ); + + rvdffie #(8) misc2ff ( + .*, + .clk(free_l2clk), + .din({ + lsu_trigger_match_m[3:0], + lsu_pmu_misaligned_m, + div_active_in, + exu_flush_final, + dec_debug_valid_d + }), + .dout({ + lsu_trigger_match_r[3:0], lsu_pmu_misaligned_r, div_active, flush_final_r, debug_valid_x + }) + ); + + if (pt.BTB_ENABLE == 1) begin + // branch prediction - logic [12:1] last_br_immed_x; - - logic [31:0] i0_inst_d; - logic [31:0] i0_inst_x; - logic [31:0] i0_inst_r; - logic [31:0] i0_inst_wb_in; - logic [31:0] i0_inst_wb; - - logic [31:1] i0_pc_wb; - - logic i0_wb_en; - - logic trace_enable; - - logic debug_valid_x; - - el2_inst_pkt_t i0_itype; - el2_reg_pkt_t i0r; - - - rvdffie #(8) misc1ff (.*, - .clk(free_l2clk), - .din( {leak1_i1_stall_in,leak1_i0_stall_in,dec_tlu_flush_extint,pause_state_in ,dec_tlu_wr_pause_r, tlu_wr_pause_r1,illegal_lockout_in,ps_stall_in}), - .dout({leak1_i1_stall, leak1_i0_stall, dec_extint_stall, pause_state, tlu_wr_pause_r1,tlu_wr_pause_r2,illegal_lockout, ps_stall }) - ); - - rvdffie #(8) misc2ff (.*, - .clk(free_l2clk), - .din( {lsu_trigger_match_m[3:0],lsu_pmu_misaligned_m,div_active_in,exu_flush_final, dec_debug_valid_d}), - .dout({lsu_trigger_match_r[3:0],lsu_pmu_misaligned_r,div_active, flush_final_r, debug_valid_x}) - ); - -if(pt.BTB_ENABLE==1) begin -// branch prediction - - - // in leak1_mode, ignore any predictions for i0, treat branch as if we haven't seen it before - // in leak1 mode, also ignore branch errors for i0 - assign i0_brp_valid = dec_i0_brp.valid & ~leak1_mode & ~i0_icaf_d; - - assign dec_i0_predict_p_d.misp = '0; - assign dec_i0_predict_p_d.ataken = '0; - assign dec_i0_predict_p_d.boffset = '0; - - assign dec_i0_predict_p_d.pcall = i0_pcall; // don't mark as pcall if branch error - assign dec_i0_predict_p_d.pja = i0_pja; - assign dec_i0_predict_p_d.pret = i0_pret; - assign dec_i0_predict_p_d.prett[31:1] = dec_i0_brp.prett[31:1]; - assign dec_i0_predict_p_d.pc4 = dec_i0_pc4_d; - assign dec_i0_predict_p_d.hist[1:0] = dec_i0_brp.hist[1:0]; - assign dec_i0_predict_p_d.valid = i0_brp_valid & i0_legal_decode_d; - assign i0_notbr_error = i0_brp_valid & ~(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw); - - // no toffset error for a pret - assign i0_br_toffset_error = i0_brp_valid & dec_i0_brp.hist[1] & (dec_i0_brp.toffset[11:0] != i0_br_offset[11:0]) & ~i0_pret_raw; - assign i0_ret_error = i0_brp_valid & (dec_i0_brp.ret ^ i0_pret_raw); - assign i0_br_error = dec_i0_brp.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error; - assign dec_i0_predict_p_d.br_error = i0_br_error & i0_legal_decode_d & ~leak1_mode; - assign dec_i0_predict_p_d.br_start_error = dec_i0_brp.br_start_error & i0_legal_decode_d & ~leak1_mode; - assign i0_predict_index_d[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_i0_bp_index; - - assign i0_predict_btag_d[pt.BTB_BTAG_SIZE-1:0] = dec_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0]; - assign i0_br_error_all = (i0_br_error | dec_i0_brp.br_start_error) & ~leak1_mode; - assign dec_i0_predict_p_d.toffset[11:0] = i0_br_offset[11:0]; - assign i0_predict_fghr_d[pt.BHT_GHR_SIZE-1:0] = dec_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0]; - assign dec_i0_predict_p_d.way = dec_i0_brp.way; - - - if(pt.BTB_FULLYA) begin + + // in leak1_mode, ignore any predictions for i0, treat branch as if we haven't seen it before + // in leak1 mode, also ignore branch errors for i0 + assign i0_brp_valid = dec_i0_brp.valid & ~leak1_mode & ~i0_icaf_d; + + assign dec_i0_predict_p_d.misp = '0; + assign dec_i0_predict_p_d.ataken = '0; + assign dec_i0_predict_p_d.boffset = '0; + + assign dec_i0_predict_p_d.pcall = i0_pcall; // don't mark as pcall if branch error + assign dec_i0_predict_p_d.pja = i0_pja; + assign dec_i0_predict_p_d.pret = i0_pret; + assign dec_i0_predict_p_d.prett[31:1] = dec_i0_brp.prett[31:1]; + assign dec_i0_predict_p_d.pc4 = dec_i0_pc4_d; + assign dec_i0_predict_p_d.hist[1:0] = dec_i0_brp.hist[1:0]; + assign dec_i0_predict_p_d.valid = i0_brp_valid & i0_legal_decode_d; + assign i0_notbr_error = i0_brp_valid & ~(i0_dp_raw.condbr | i0_pcall_raw | i0_pja_raw | i0_pret_raw); + + // no toffset error for a pret + assign i0_br_toffset_error = i0_brp_valid & dec_i0_brp.hist[1] & (dec_i0_brp.toffset[11:0] != i0_br_offset[11:0]) & ~i0_pret_raw; + assign i0_ret_error = i0_brp_valid & (dec_i0_brp.ret ^ i0_pret_raw); + assign i0_br_error = dec_i0_brp.br_error | i0_notbr_error | i0_br_toffset_error | i0_ret_error; + assign dec_i0_predict_p_d.br_error = i0_br_error & i0_legal_decode_d & ~leak1_mode; + assign dec_i0_predict_p_d.br_start_error = dec_i0_brp.br_start_error & i0_legal_decode_d & ~leak1_mode; + assign i0_predict_index_d[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_i0_bp_index; + + assign i0_predict_btag_d[pt.BTB_BTAG_SIZE-1:0] = dec_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0]; + assign i0_br_error_all = (i0_br_error | dec_i0_brp.br_start_error) & ~leak1_mode; + assign dec_i0_predict_p_d.toffset[11:0] = i0_br_offset[11:0]; + assign i0_predict_fghr_d[pt.BHT_GHR_SIZE-1:0] = dec_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0]; + assign dec_i0_predict_p_d.way = dec_i0_brp.way; + + + if (pt.BTB_FULLYA) begin logic btb_error_found, btb_error_found_f; logic [$clog2(pt.BTB_SIZE)-1:0] fa_error_index_ns; assign btb_error_found = (i0_br_error_all | btb_error_found_f) & ~dec_tlu_flush_lower_r; assign fa_error_index_ns = (i0_br_error_all & ~btb_error_found_f) ? dec_i0_bp_fa_index : dec_fa_error_index; - rvdff #($clog2(pt.BTB_SIZE)+1) btberrorfa_f (.*, .clk(active_clk), - .din({btb_error_found, fa_error_index_ns}), - .dout({btb_error_found_f, dec_fa_error_index})); + rvdff #($clog2( + pt.BTB_SIZE + ) + 1) btberrorfa_f ( + .*, + .clk (active_clk), + .din ({btb_error_found, fa_error_index_ns}), + .dout({btb_error_found_f, dec_fa_error_index}) + ); - end - else - assign dec_fa_error_index = 'b0; + end else assign dec_fa_error_index = 'b0; - // end -end // if (pt.BTB_ENABLE==1) + // end + end // if (pt.BTB_ENABLE==1) else begin - always_comb begin - dec_i0_predict_p_d = '0; - dec_i0_predict_p_d.pcall = i0_pcall; // don't mark as pcall if branch error - dec_i0_predict_p_d.pja = i0_pja; - dec_i0_predict_p_d.pret = i0_pret; - dec_i0_predict_p_d.pc4 = dec_i0_pc4_d; - end + always_comb begin + dec_i0_predict_p_d = '0; + dec_i0_predict_p_d.pcall = i0_pcall; // don't mark as pcall if branch error + dec_i0_predict_p_d.pja = i0_pja; + dec_i0_predict_p_d.pret = i0_pret; + dec_i0_predict_p_d.pc4 = dec_i0_pc4_d; + end - assign i0_br_error_all = '0; - assign i0_predict_index_d = '0; - assign i0_predict_btag_d = '0; - assign i0_predict_fghr_d = '0; - assign i0_brp_valid = '0; -end // else: !if(pt.BTB_ENABLE==1) + assign i0_br_error_all = '0; + assign i0_predict_index_d = '0; + assign i0_predict_btag_d = '0; + assign i0_predict_fghr_d = '0; + assign i0_brp_valid = '0; + end // else: !if(pt.BTB_ENABLE==1) - // on br error turn anything into a nop - // on i0 instruction fetch access fault turn anything into a nop - // nop => alu rs1 imm12 rd lor + // on br error turn anything into a nop + // on i0 instruction fetch access fault turn anything into a nop + // nop => alu rs1 imm12 rd lor - assign i0_icaf_d = dec_i0_icaf_d | dec_i0_dbecc_d; + assign i0_icaf_d = dec_i0_icaf_d | dec_i0_dbecc_d; - assign i0_instr_error = i0_icaf_d; + assign i0_instr_error = i0_icaf_d; - always_comb begin - i0_dp = i0_dp_raw; - if (i0_br_error_all | i0_instr_error) begin - i0_dp = '0; - i0_dp.alu = 1'b1; - i0_dp.rs1 = 1'b1; - i0_dp.rs2 = 1'b1; - i0_dp.lor = 1'b1; - i0_dp.legal = 1'b1; - i0_dp.postsync = 1'b1; + always_comb begin + i0_dp = i0_dp_raw; + if (i0_br_error_all | i0_instr_error) begin + i0_dp = '0; + i0_dp.alu = 1'b1; + i0_dp.rs1 = 1'b1; + i0_dp.rs2 = 1'b1; + i0_dp.lor = 1'b1; + i0_dp.legal = 1'b1; + i0_dp.postsync = 1'b1; + end + end + + assign i0[31:0] = dec_i0_instr_d[31:0]; + + assign dec_i0_select_pc_d = i0_dp.pc; + + // branches that can be predicted + + assign i0_predict_br = i0_dp.condbr | i0_pcall | i0_pja | i0_pret; + + assign i0_predict_nt = ~(dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + assign i0_predict_t = (dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; + + assign i0_ap.add = i0_dp.add; + assign i0_ap.sub = i0_dp.sub; + assign i0_ap.land = i0_dp.land; + assign i0_ap.lor = i0_dp.lor; + assign i0_ap.lxor = i0_dp.lxor; + assign i0_ap.sll = i0_dp.sll; + assign i0_ap.srl = i0_dp.srl; + assign i0_ap.sra = i0_dp.sra; + assign i0_ap.slt = i0_dp.slt; + assign i0_ap.unsign = i0_dp.unsign; + assign i0_ap.beq = i0_dp.beq; + assign i0_ap.bne = i0_dp.bne; + assign i0_ap.blt = i0_dp.blt; + assign i0_ap.bge = i0_dp.bge; + + assign i0_ap.clz = i0_dp.clz; + assign i0_ap.ctz = i0_dp.ctz; + assign i0_ap.cpop = i0_dp.cpop; + assign i0_ap.sext_b = i0_dp.sext_b; + assign i0_ap.sext_h = i0_dp.sext_h; + assign i0_ap.sh1add = i0_dp.sh1add; + assign i0_ap.sh2add = i0_dp.sh2add; + assign i0_ap.sh3add = i0_dp.sh3add; + assign i0_ap.zba = i0_dp.zba; + assign i0_ap.min = i0_dp.min; + assign i0_ap.max = i0_dp.max; + assign i0_ap.pack = i0_dp.pack; + assign i0_ap.packu = i0_dp.packu; + assign i0_ap.packh = i0_dp.packh; + assign i0_ap.rol = i0_dp.rol; + assign i0_ap.ror = i0_dp.ror; + assign i0_ap.grev = i0_dp.grev; + assign i0_ap.gorc = i0_dp.gorc; + assign i0_ap.zbb = i0_dp.zbb; + assign i0_ap.bset = i0_dp.bset; + assign i0_ap.bclr = i0_dp.bclr; + assign i0_ap.binv = i0_dp.binv; + assign i0_ap.bext = i0_dp.bext; + + assign i0_ap.csr_write = i0_csr_write_only_d; + assign i0_ap.csr_imm = i0_dp.csr_imm; + assign i0_ap.jal = i0_jal; + + assign i0_ap_pc2 = ~dec_i0_pc4_d; + assign i0_ap_pc4 = dec_i0_pc4_d; + + assign i0_ap.predict_nt = i0_predict_nt; + assign i0_ap.predict_t = i0_predict_t; + + + // non block load cam logic + + always_comb begin + found = 0; + for (int i = 0; i < NBLOAD_SIZE; i++) begin + if (~found) begin + if (~cam[i].valid) begin + cam_wen[i] = cam_write; + found = 1'b1; + end else begin + cam_wen[i] = 0; + end + end else cam_wen[i] = 0; + end + end + + assign cam_write = lsu_nonblock_load_valid_m; + assign cam_write_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_tag_m[NBLOAD_TAG_MSB:0]; + + assign cam_inv_reset = lsu_nonblock_load_inv_r; + assign cam_inv_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_inv_tag_r[NBLOAD_TAG_MSB:0]; + + assign cam_data_reset = lsu_nonblock_load_data_valid | lsu_nonblock_load_data_error; + assign cam_data_reset_tag[NBLOAD_TAG_MSB:0] = lsu_nonblock_load_data_tag[NBLOAD_TAG_MSB:0]; + + assign nonblock_load_rd[4:0] = (x_d.i0load) ? x_d.i0rd[4:0] : 5'b0; // rd data + + + // case of multiple loads to same dest ie. x1 ... you have to invalidate the older one + + for (genvar i = 0; i < NBLOAD_SIZE; i++) begin : cam_array + + assign cam_inv_reset_val[i] = cam_inv_reset & (cam_inv_reset_tag[NBLOAD_TAG_MSB:0] == cam[i].tag[NBLOAD_TAG_MSB:0]) & cam[i].valid; + + assign cam_data_reset_val[i] = cam_data_reset & (cam_data_reset_tag[NBLOAD_TAG_MSB:0] == cam_raw[i].tag[NBLOAD_TAG_MSB:0]) & cam_raw[i].valid; + + always_comb begin + + cam[i] = cam_raw[i]; + + if (cam_data_reset_val[i]) cam[i].valid = 1'b0; + + cam_in[i] = '0; + + if (cam_wen[i]) begin + cam_in[i].valid = 1'b1; + cam_in[i].wb = 1'b0; + cam_in[i].tag[NBLOAD_TAG_MSB:0] = cam_write_tag[NBLOAD_TAG_MSB:0]; + cam_in[i].rd[4:0] = nonblock_load_rd[4:0]; end - end - - assign i0[31:0] = dec_i0_instr_d[31:0]; - - assign dec_i0_select_pc_d = i0_dp.pc; - - // branches that can be predicted - - assign i0_predict_br = i0_dp.condbr | i0_pcall | i0_pja | i0_pret; - - assign i0_predict_nt = ~(dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; - assign i0_predict_t = (dec_i0_brp.hist[1] & i0_brp_valid) & i0_predict_br; - - assign i0_ap.add = i0_dp.add; - assign i0_ap.sub = i0_dp.sub; - assign i0_ap.land = i0_dp.land; - assign i0_ap.lor = i0_dp.lor; - assign i0_ap.lxor = i0_dp.lxor; - assign i0_ap.sll = i0_dp.sll; - assign i0_ap.srl = i0_dp.srl; - assign i0_ap.sra = i0_dp.sra; - assign i0_ap.slt = i0_dp.slt; - assign i0_ap.unsign = i0_dp.unsign; - assign i0_ap.beq = i0_dp.beq; - assign i0_ap.bne = i0_dp.bne; - assign i0_ap.blt = i0_dp.blt; - assign i0_ap.bge = i0_dp.bge; - - assign i0_ap.clz = i0_dp.clz; - assign i0_ap.ctz = i0_dp.ctz; - assign i0_ap.cpop = i0_dp.cpop; - assign i0_ap.sext_b = i0_dp.sext_b; - assign i0_ap.sext_h = i0_dp.sext_h; - assign i0_ap.sh1add = i0_dp.sh1add; - assign i0_ap.sh2add = i0_dp.sh2add; - assign i0_ap.sh3add = i0_dp.sh3add; - assign i0_ap.zba = i0_dp.zba; - assign i0_ap.min = i0_dp.min; - assign i0_ap.max = i0_dp.max; - assign i0_ap.pack = i0_dp.pack; - assign i0_ap.packu = i0_dp.packu; - assign i0_ap.packh = i0_dp.packh; - assign i0_ap.rol = i0_dp.rol; - assign i0_ap.ror = i0_dp.ror; - assign i0_ap.grev = i0_dp.grev; - assign i0_ap.gorc = i0_dp.gorc; - assign i0_ap.zbb = i0_dp.zbb; - assign i0_ap.bset = i0_dp.bset; - assign i0_ap.bclr = i0_dp.bclr; - assign i0_ap.binv = i0_dp.binv; - assign i0_ap.bext = i0_dp.bext; - - assign i0_ap.csr_write = i0_csr_write_only_d; - assign i0_ap.csr_imm = i0_dp.csr_imm; - assign i0_ap.jal = i0_jal; - - assign i0_ap_pc2 = ~dec_i0_pc4_d; - assign i0_ap_pc4 = dec_i0_pc4_d; - - assign i0_ap.predict_nt = i0_predict_nt; - assign i0_ap.predict_t = i0_predict_t; - - -// non block load cam logic - - always_comb begin - found = 0; - for (int i=0; i legal_equation module el2_dec_dec_ctl -import el2_pkg::*; - ( - input logic [31:0] inst, + import el2_pkg::*; +( + input logic [31:0] inst, - output el2_dec_pkt_t out - ); + output el2_dec_pkt_t out +); - logic [31:0] i; + logic [31:0] i; - assign i[31:0] = inst[31:0]; + assign i[31:0] = inst[31:0]; -assign out.alu = (i[30]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[5]&i[4]) | (i[30] + assign out.alu = (i[30]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[5]&i[4]) | (i[30] &!i[27]&!i[24]&i[4]) | (!i[30]&!i[25]&i[13]&i[12]) | (!i[29]&!i[27] &!i[5]&i[4]) | (i[27]&i[25]&i[14]&i[4]) | (!i[29]&!i[25]&!i[13]&!i[12] &i[4]) | (i[29]&i[27]&!i[14]&i[12]&i[4]) | (!i[27]&i[14]&!i[5]&i[4]) | ( @@ -1512,116 +1657,116 @@ assign out.alu = (i[30]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14]&!i[5]&i[4]) | (i[ &i[4]) | (i[2]) | (i[6]) | (!i[30]&i[29]&!i[24]&!i[23]&i[22]&i[21] &i[20]&!i[5]&i[4]) | (!i[12]&!i[5]&i[4]); -assign out.rs1 = (!i[13]&i[11]&!i[2]) | (!i[13]&i[10]&!i[2]) | (i[19]&i[13]&!i[2]) | ( + assign out.rs1 = (!i[13]&i[11]&!i[2]) | (!i[13]&i[10]&!i[2]) | (i[19]&i[13]&!i[2]) | ( !i[13]&i[9]&!i[2]) | (i[18]&i[13]&!i[2]) | (!i[13]&i[8]&!i[2]) | ( i[17]&i[13]&!i[2]) | (!i[13]&i[7]&!i[2]) | (i[16]&i[13]&!i[2]) | ( i[15]&i[13]&!i[2]) | (!i[4]&!i[2]) | (!i[14]&!i[13]&i[6]&!i[3]) | ( !i[6]&!i[2]); -assign out.rs2 = (i[5]&!i[4]&!i[2]) | (!i[6]&i[5]&!i[2]); + assign out.rs2 = (i[5] & !i[4] & !i[2]) | (!i[6] & i[5] & !i[2]); -assign out.imm12 = (!i[4]&!i[3]&i[2]) | (i[13]&!i[5]&i[4]&!i[2]) | (!i[13]&!i[12] + assign out.imm12 = (!i[4]&!i[3]&i[2]) | (i[13]&!i[5]&i[4]&!i[2]) | (!i[13]&!i[12] &i[6]&i[4]) | (!i[12]&!i[5]&i[4]&!i[2]); -assign out.rd = (!i[5]&!i[2]) | (i[5]&i[2]) | (i[4]); + assign out.rd = (!i[5] & !i[2]) | (i[5] & i[2]) | (i[4]); -assign out.shimm5 = (!i[29]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[27]&!i[13]&i[12] + assign out.shimm5 = (!i[29]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[27]&!i[13]&i[12] &!i[5]&i[4]&!i[2]) | (i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.imm20 = (i[5]&i[3]) | (i[4]&i[2]); + assign out.imm20 = (i[5] & i[3]) | (i[4] & i[2]); -assign out.pc = (!i[5]&!i[3]&i[2]) | (i[5]&i[3]); + assign out.pc = (!i[5] & !i[3] & i[2]) | (i[5] & i[3]); -assign out.load = (!i[5]&!i[4]&!i[2]); + assign out.load = (!i[5] & !i[4] & !i[2]); -assign out.store = (!i[6]&i[5]&!i[4]); + assign out.store = (!i[6] & i[5] & !i[4]); -assign out.lsu = (!i[6]&!i[4]&!i[2]); + assign out.lsu = (!i[6] & !i[4] & !i[2]); -assign out.add = (!i[14]&!i[13]&!i[12]&!i[5]&i[4]) | (!i[5]&!i[3]&i[2]) | (!i[30] + assign out.add = (!i[14]&!i[13]&!i[12]&!i[5]&i[4]) | (!i[5]&!i[3]&i[2]) | (!i[30] &!i[25]&!i[14]&!i[13]&!i[12]&!i[6]&i[4]&!i[2]); -assign out.sub = (i[30]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[29]&!i[25]&!i[14] + assign out.sub = (i[30]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[29]&!i[25]&!i[14] &i[13]&!i[6]&i[4]&!i[2]) | (i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]) | ( !i[14]&i[13]&!i[5]&i[4]&!i[2]) | (i[6]&!i[4]&!i[2]); -assign out.land = (!i[27]&!i[25]&i[14]&i[13]&i[12]&!i[6]&!i[2]) | (i[14]&i[13]&i[12] + assign out.land = (!i[27]&!i[25]&i[14]&i[13]&i[12]&!i[6]&!i[2]) | (i[14]&i[13]&i[12] &!i[5]&!i[2]); -assign out.lor = (!i[6]&i[3]) | (!i[29]&!i[27]&!i[25]&i[14]&i[13]&!i[12]&!i[6]&!i[2]) | ( + assign out.lor = (!i[6]&i[3]) | (!i[29]&!i[27]&!i[25]&i[14]&i[13]&!i[12]&!i[6]&!i[2]) | ( i[5]&i[4]&i[2]) | (!i[13]&!i[12]&i[6]&i[4]) | (i[14]&i[13]&!i[12] &!i[5]&!i[2]); -assign out.lxor = (!i[29]&!i[27]&!i[25]&i[14]&!i[13]&!i[12]&i[4]&!i[2]) | (i[14] + assign out.lxor = (!i[29]&!i[27]&!i[25]&i[14]&!i[13]&!i[12]&i[4]&!i[2]) | (i[14] &!i[13]&!i[12]&!i[5]&i[4]&!i[2]); -assign out.sll = (!i[29]&!i[27]&!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.sll = (!i[29] & !i[27] & !i[25] & !i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.sra = (i[30]&!i[29]&!i[27]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.sra = (i[30] & !i[29] & !i[27] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.srl = (!i[30]&!i[27]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.srl = (!i[30] & !i[27] & !i[25] & i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.slt = (!i[29]&!i[25]&!i[14]&i[13]&!i[6]&i[4]&!i[2]) | (!i[14]&i[13]&!i[5] + assign out.slt = (!i[29]&!i[25]&!i[14]&i[13]&!i[6]&i[4]&!i[2]) | (!i[14]&i[13]&!i[5] &i[4]&!i[2]); -assign out.unsign = (!i[14]&i[13]&i[12]&!i[5]&!i[2]) | (i[13]&i[6]&!i[4]&!i[2]) | ( + assign out.unsign = (!i[14]&i[13]&i[12]&!i[5]&!i[2]) | (i[13]&i[6]&!i[4]&!i[2]) | ( i[14]&!i[5]&!i[4]) | (!i[25]&!i[14]&i[13]&i[12]&!i[6]&!i[2]) | ( i[25]&i[14]&i[12]&!i[6]&i[5]&!i[2]); -assign out.condbr = (i[6]&!i[4]&!i[2]); + assign out.condbr = (i[6] & !i[4] & !i[2]); -assign out.beq = (!i[14]&!i[12]&i[6]&!i[4]&!i[2]); + assign out.beq = (!i[14] & !i[12] & i[6] & !i[4] & !i[2]); -assign out.bne = (!i[14]&i[12]&i[6]&!i[4]&!i[2]); + assign out.bne = (!i[14] & i[12] & i[6] & !i[4] & !i[2]); -assign out.bge = (i[14]&i[12]&i[5]&!i[4]&!i[2]); + assign out.bge = (i[14] & i[12] & i[5] & !i[4] & !i[2]); -assign out.blt = (i[14]&!i[12]&i[5]&!i[4]&!i[2]); + assign out.blt = (i[14] & !i[12] & i[5] & !i[4] & !i[2]); -assign out.jal = (i[6]&i[2]); + assign out.jal = (i[6] & i[2]); -assign out.by = (!i[13]&!i[12]&!i[6]&!i[4]&!i[2]); + assign out.by = (!i[13] & !i[12] & !i[6] & !i[4] & !i[2]); -assign out.half = (i[12]&!i[6]&!i[4]&!i[2]); + assign out.half = (i[12] & !i[6] & !i[4] & !i[2]); -assign out.word = (i[13]&!i[6]&!i[4]); + assign out.word = (i[13] & !i[6] & !i[4]); -assign out.csr_read = (i[13]&i[6]&i[4]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | ( + assign out.csr_read = (i[13]&i[6]&i[4]) | (i[7]&i[6]&i[4]) | (i[8]&i[6]&i[4]) | ( i[9]&i[6]&i[4]) | (i[10]&i[6]&i[4]) | (i[11]&i[6]&i[4]); -assign out.csr_clr = (i[15]&i[13]&i[12]&i[6]&i[4]) | (i[16]&i[13]&i[12]&i[6]&i[4]) | ( + assign out.csr_clr = (i[15]&i[13]&i[12]&i[6]&i[4]) | (i[16]&i[13]&i[12]&i[6]&i[4]) | ( i[17]&i[13]&i[12]&i[6]&i[4]) | (i[18]&i[13]&i[12]&i[6]&i[4]) | ( i[19]&i[13]&i[12]&i[6]&i[4]); -assign out.csr_set = (i[15]&!i[12]&i[6]&i[4]) | (i[16]&!i[12]&i[6]&i[4]) | (i[17] + assign out.csr_set = (i[15]&!i[12]&i[6]&i[4]) | (i[16]&!i[12]&i[6]&i[4]) | (i[17] &!i[12]&i[6]&i[4]) | (i[18]&!i[12]&i[6]&i[4]) | (i[19]&!i[12]&i[6] &i[4]); -assign out.csr_write = (!i[13]&i[12]&i[6]&i[4]); + assign out.csr_write = (!i[13] & i[12] & i[6] & i[4]); -assign out.csr_imm = (i[14]&!i[13]&i[6]&i[4]) | (i[15]&i[14]&i[6]&i[4]) | (i[16] + assign out.csr_imm = (i[14]&!i[13]&i[6]&i[4]) | (i[15]&i[14]&i[6]&i[4]) | (i[16] &i[14]&i[6]&i[4]) | (i[17]&i[14]&i[6]&i[4]) | (i[18]&i[14]&i[6]&i[4]) | ( i[19]&i[14]&i[6]&i[4]); -assign out.presync = (!i[5]&i[3]) | (!i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | ( + assign out.presync = (!i[5]&i[3]) | (!i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | ( !i[13]&i[9]&i[6]&i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11] &i[6]&i[4]) | (i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | ( i[17]&i[13]&i[6]&i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6] &i[4]); -assign out.postsync = (i[12]&!i[5]&i[3]) | (!i[22]&!i[13]&!i[12]&i[6]&i[4]) | ( + assign out.postsync = (i[12]&!i[5]&i[3]) | (!i[22]&!i[13]&!i[12]&i[6]&i[4]) | ( !i[13]&i[7]&i[6]&i[4]) | (!i[13]&i[8]&i[6]&i[4]) | (!i[13]&i[9]&i[6] &i[4]) | (!i[13]&i[10]&i[6]&i[4]) | (!i[13]&i[11]&i[6]&i[4]) | ( i[15]&i[13]&i[6]&i[4]) | (i[16]&i[13]&i[6]&i[4]) | (i[17]&i[13]&i[6] &i[4]) | (i[18]&i[13]&i[6]&i[4]) | (i[19]&i[13]&i[6]&i[4]); -assign out.ebreak = (!i[22]&i[20]&!i[13]&!i[12]&i[6]&i[4]); + assign out.ebreak = (!i[22] & i[20] & !i[13] & !i[12] & i[6] & i[4]); -assign out.ecall = (!i[21]&!i[20]&!i[13]&!i[12]&i[6]&i[4]); + assign out.ecall = (!i[21] & !i[20] & !i[13] & !i[12] & i[6] & i[4]); -assign out.mret = (i[29]&!i[13]&!i[12]&i[6]&i[4]); + assign out.mret = (i[29] & !i[13] & !i[12] & i[6] & i[4]); -assign out.mul = (i[29]&!i[27]&i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[30] + assign out.mul = (i[29]&!i[27]&i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[30] &i[27]&i[13]&!i[6]&i[5]&i[4]&!i[2]) | (i[29]&i[27]&!i[23]&!i[20] &i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[29]&i[27]&!i[21]&i[20] &i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[29]&i[27]&i[24]&i[21] @@ -1633,48 +1778,48 @@ assign out.mul = (i[29]&!i[27]&i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[ &i[4]&!i[2]) | (i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]) | (i[29]&i[27] &i[14]&!i[6]&i[5]&!i[2]); -assign out.rs1_sign = (!i[27]&i[25]&!i[14]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | ( + assign out.rs1_sign = (!i[27]&i[25]&!i[14]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]) | ( !i[27]&i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); -assign out.rs2_sign = (!i[27]&i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.rs2_sign = (!i[27] & i[25] & !i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.low = (i[25]&!i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + assign out.low = (i[25] & !i[14] & !i[13] & !i[12] & i[5] & i[4] & !i[2]); -assign out.div = (!i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]); + assign out.div = (!i[27] & i[25] & i[14] & !i[6] & i[5] & !i[2]); -assign out.rem = (!i[27]&i[25]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + assign out.rem = (!i[27] & i[25] & i[14] & i[13] & !i[6] & i[5] & !i[2]); -assign out.fence = (!i[5]&i[3]); + assign out.fence = (!i[5] & i[3]); -assign out.fence_i = (i[12]&!i[5]&i[3]); + assign out.fence_i = (i[12] & !i[5] & i[3]); -assign out.clz = (i[29]&!i[27]&!i[24]&!i[22]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5] + assign out.clz = (i[29]&!i[27]&!i[24]&!i[22]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5] &i[4]&!i[2]); -assign out.ctz = (i[29]&!i[27]&!i[24]&!i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4] + assign out.ctz = (i[29]&!i[27]&!i[24]&!i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4] &!i[2]); -assign out.cpop = (i[29]&!i[27]&!i[24]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.cpop = (i[29]&!i[27]&!i[24]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.sext_b = (i[29]&!i[27]&i[22]&!i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.sext_b = (i[29]&!i[27]&i[22]&!i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.sext_h = (i[29]&!i[27]&i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.sext_h = (i[29]&!i[27]&i[22]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.min = (i[27]&i[25]&i[14]&!i[13]&!i[6]&i[5]&!i[2]); + assign out.min = (i[27] & i[25] & i[14] & !i[13] & !i[6] & i[5] & !i[2]); -assign out.max = (i[27]&i[25]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + assign out.max = (i[27] & i[25] & i[14] & i[13] & !i[6] & i[5] & !i[2]); -assign out.pack = (!i[30]&!i[29]&i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + assign out.pack = (!i[30] & !i[29] & i[27] & !i[25] & !i[13] & !i[12] & i[5] & i[4] & !i[2]); -assign out.packu = (i[30]&i[27]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + assign out.packu = (i[30] & i[27] & !i[13] & !i[12] & i[5] & i[4] & !i[2]); -assign out.packh = (!i[30]&i[27]&!i[25]&i[13]&i[12]&!i[6]&i[5]&!i[2]); + assign out.packh = (!i[30] & i[27] & !i[25] & i[13] & i[12] & !i[6] & i[5] & !i[2]); -assign out.rol = (i[29]&!i[27]&!i[14]&i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.rol = (i[29] & !i[27] & !i[14] & i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.ror = (i[29]&!i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.ror = (i[29] & !i[27] & i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.zbb = (!i[30]&!i[29]&i[27]&!i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[13] + assign out.zbb = (!i[30]&!i[29]&i[27]&!i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[13] &!i[12]&i[5]&i[4]&!i[2]) | (i[29]&!i[27]&!i[24]&!i[13]&i[12]&!i[5] &i[4]&!i[2]) | (i[29]&!i[27]&i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]) | ( i[30]&!i[27]&i[14]&!i[12]&!i[6]&i[5]&!i[2]) | (i[30]&!i[27]&i[13] @@ -1683,92 +1828,92 @@ assign out.zbb = (!i[30]&!i[29]&i[27]&!i[24]&!i[23]&!i[22]&!i[21]&!i[20]&!i[13] &i[4]&!i[2]) | (i[30]&i[29]&i[24]&i[23]&!i[22]&!i[21]&!i[20]&i[14] &!i[13]&i[12]&!i[5]&i[4]&!i[2]) | (i[27]&i[25]&i[14]&!i[6]&i[5]&!i[2]); -assign out.bset = (!i[30]&i[29]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.bset = (!i[30] & i[29] & !i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.bclr = (i[30]&!i[29]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.bclr = (i[30] & !i[29] & !i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.binv = (i[30]&i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.binv = (i[30] & i[29] & i[27] & !i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.bext = (i[30]&!i[29]&i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.bext = (i[30] & !i[29] & i[27] & i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.zbs = (i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]) | (i[30]&!i[29] + assign out.zbs = (i[29]&i[27]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]) | (i[30]&!i[29] &i[27]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); -assign out.bcompress = (!i[30]&!i[29]&i[27]&!i[25]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.bcompress = (!i[30]&!i[29]&i[27]&!i[25]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); -assign out.bdecompress = (i[30]&i[27]&i[13]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.bdecompress = (i[30] & i[27] & i[13] & !i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.zbe = (i[30]&i[27]&i[14]&i[13]&!i[12]&!i[6]&i[5]&!i[2]) | (!i[30]&i[27] + assign out.zbe = (i[30]&i[27]&i[14]&i[13]&!i[12]&!i[6]&i[5]&!i[2]) | (!i[30]&i[27] &!i[25]&i[13]&i[12]&!i[6]&i[5]&!i[2]) | (!i[30]&!i[29]&i[27]&!i[25] &!i[12]&!i[6]&i[5]&i[4]&!i[2]); -assign out.clmul = (i[27]&i[25]&!i[14]&!i[13]&!i[6]&i[5]&i[4]&!i[2]); + assign out.clmul = (i[27] & i[25] & !i[14] & !i[13] & !i[6] & i[5] & i[4] & !i[2]); -assign out.clmulh = (i[27]&!i[14]&i[13]&i[12]&!i[6]&i[5]&!i[2]); + assign out.clmulh = (i[27] & !i[14] & i[13] & i[12] & !i[6] & i[5] & !i[2]); -assign out.clmulr = (i[27]&i[25]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.clmulr = (i[27] & i[25] & !i[14] & !i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.zbc = (i[27]&i[25]&!i[14]&!i[6]&i[5]&i[4]&!i[2]); + assign out.zbc = (i[27] & i[25] & !i[14] & !i[6] & i[5] & i[4] & !i[2]); -assign out.grev = (i[30]&i[29]&i[27]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.grev = (i[30] & i[29] & i[27] & i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.gorc = (!i[30]&i[29]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.gorc = (!i[30] & i[29] & i[14] & !i[13] & i[12] & !i[6] & i[4] & !i[2]); -assign out.shfl = (!i[30]&!i[29]&i[27]&!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.shfl = (!i[30]&!i[29]&i[27]&!i[25]&!i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); -assign out.unshfl = (!i[30]&!i[29]&i[27]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); + assign out.unshfl = (!i[30]&!i[29]&i[27]&!i[25]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); -assign out.xperm_n = (i[29]&i[27]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.xperm_n = (i[29] & i[27] & !i[14] & !i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.xperm_b = (i[29]&i[27]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + assign out.xperm_b = (i[29] & i[27] & !i[13] & !i[12] & i[5] & i[4] & !i[2]); -assign out.xperm_h = (i[29]&i[27]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + assign out.xperm_h = (i[29] & i[27] & i[14] & i[13] & !i[6] & i[5] & !i[2]); -assign out.zbp = (i[30]&!i[27]&!i[14]&i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[30]&i[27] + assign out.zbp = (i[30]&!i[27]&!i[14]&i[12]&!i[6]&i[5]&i[4]&!i[2]) | (!i[30]&i[27] &!i[25]&i[13]&i[12]&!i[6]&i[5]&!i[2]) | (i[30]&!i[27]&i[13]&!i[6] &i[5]&i[4]&!i[2]) | (i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | ( i[30]&i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | (i[29]&i[27]&!i[12]&!i[6] &i[5]&i[4]&!i[2]) | (!i[30]&!i[29]&i[27]&!i[25]&!i[13]&i[12]&!i[6] &i[4]&!i[2]) | (i[29]&i[14]&!i[13]&i[12]&!i[6]&i[4]&!i[2]); -assign out.crc32_b = (i[29]&!i[27]&i[24]&!i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12] + assign out.crc32_b = (i[29]&!i[27]&i[24]&!i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12] &!i[5]&i[4]&!i[2]); -assign out.crc32_h = (i[29]&!i[27]&i[24]&!i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4] + assign out.crc32_h = (i[29]&!i[27]&i[24]&!i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4] &!i[2]); -assign out.crc32_w = (i[29]&!i[27]&i[24]&!i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4] + assign out.crc32_w = (i[29]&!i[27]&i[24]&!i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4] &!i[2]); -assign out.crc32c_b = (i[29]&!i[27]&i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5] + assign out.crc32c_b = (i[29]&!i[27]&i[23]&!i[21]&!i[20]&!i[14]&!i[13]&i[12]&!i[5] &i[4]&!i[2]); -assign out.crc32c_h = (i[29]&!i[27]&i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.crc32c_h = (i[29]&!i[27]&i[23]&i[20]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.crc32c_w = (i[29]&!i[27]&i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.crc32c_w = (i[29]&!i[27]&i[23]&i[21]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); -assign out.zbr = (i[29]&!i[27]&i[24]&!i[14]&!i[13]&i[12]&!i[5]&i[4]&!i[2]); + assign out.zbr = (i[29] & !i[27] & i[24] & !i[14] & !i[13] & i[12] & !i[5] & i[4] & !i[2]); -assign out.bfp = (i[30]&i[27]&i[13]&i[12]&!i[6]&i[5]&!i[2]); + assign out.bfp = (i[30] & i[27] & i[13] & i[12] & !i[6] & i[5] & !i[2]); -assign out.zbf = (!i[30]&!i[29]&i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | ( + assign out.zbf = (!i[30]&!i[29]&i[27]&!i[25]&!i[13]&!i[12]&i[5]&i[4]&!i[2]) | ( i[27]&!i[25]&i[13]&i[12]&!i[6]&i[5]&!i[2]); -assign out.sh1add = (i[29]&!i[27]&!i[14]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.sh1add = (i[29] & !i[27] & !i[14] & !i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.sh2add = (i[29]&!i[27]&i[14]&!i[13]&!i[12]&i[5]&i[4]&!i[2]); + assign out.sh2add = (i[29] & !i[27] & i[14] & !i[13] & !i[12] & i[5] & i[4] & !i[2]); -assign out.sh3add = (i[29]&!i[27]&i[14]&i[13]&!i[6]&i[5]&!i[2]); + assign out.sh3add = (i[29] & !i[27] & i[14] & i[13] & !i[6] & i[5] & !i[2]); -assign out.zba = (i[29]&!i[27]&!i[12]&!i[6]&i[5]&i[4]&!i[2]); + assign out.zba = (i[29] & !i[27] & !i[12] & !i[6] & i[5] & i[4] & !i[2]); -assign out.pm_alu = (i[28]&i[20]&!i[13]&!i[12]&i[4]) | (!i[30]&!i[29]&!i[27]&!i[25] + assign out.pm_alu = (i[28]&i[20]&!i[13]&!i[12]&i[4]) | (!i[30]&!i[29]&!i[27]&!i[25] &!i[6]&i[4]) | (!i[29]&!i[27]&!i[25]&!i[13]&i[12]&!i[6]&i[4]) | ( !i[29]&!i[27]&!i[25]&!i[14]&!i[6]&i[4]) | (i[13]&!i[5]&i[4]) | (i[4] &i[2]) | (!i[12]&!i[5]&i[4]); -assign out.legal = (!i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23] + assign out.legal = (!i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23] &!i[22]&i[21]&!i[20]&!i[19]&!i[18]&!i[17]&!i[16]&!i[15]&!i[14]&!i[11] &!i[10]&!i[9]&!i[8]&!i[7]&i[6]&i[5]&i[4]&!i[3]&!i[2]&i[1]&i[0]) | ( !i[31]&!i[30]&!i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23]&i[22] @@ -1817,4 +1962,4 @@ assign out.legal = (!i[31]&!i[30]&i[29]&i[28]&!i[27]&!i[26]&!i[25]&!i[24]&!i[23] -endmodule // el2_dec_dec_ctl +endmodule // el2_dec_dec_ctl diff --git a/Flow/design/dec/el2_dec_gpr_ctl.sv b/Flow/design/dec/el2_dec_gpr_ctl.sv index 1dc957b..636576e 100644 --- a/Flow/design/dec/el2_dec_gpr_ctl.sv +++ b/Flow/design/dec/el2_dec_gpr_ctl.sv @@ -14,69 +14,74 @@ // limitations under the License. module el2_dec_gpr_ctl -import el2_pkg::*; + import el2_pkg::*; #( - `include "el2_param.vh" - ) ( - input logic [4:0] raddr0, // logical read addresses - input logic [4:0] raddr1, + `include "el2_param.vh" +) ( + input logic [4:0] raddr0, // logical read addresses + input logic [4:0] raddr1, - input logic wen0, // write enable - input logic [4:0] waddr0, // write address - input logic [31:0] wd0, // write data + input logic wen0, // write enable + input logic [ 4:0] waddr0, // write address + input logic [31:0] wd0, // write data - input logic wen1, // write enable - input logic [4:0] waddr1, // write address - input logic [31:0] wd1, // write data + input logic wen1, // write enable + input logic [ 4:0] waddr1, // write address + input logic [31:0] wd1, // write data - input logic wen2, // write enable - input logic [4:0] waddr2, // write address - input logic [31:0] wd2, // write data + input logic wen2, // write enable + input logic [ 4:0] waddr2, // write address + input logic [31:0] wd2, // write data - input logic clk, - input logic rst_l, + input logic clk, + input logic rst_l, - output logic [31:0] rd0, // read data + output logic [31:0] rd0, // read data output logic [31:0] rd1, - input logic scan_mode + input logic scan_mode ); - logic [31:1] [31:0] gpr_out; // 31 x 32 bit GPRs - logic [31:1] [31:0] gpr_in; - logic [31:1] w0v,w1v,w2v; - logic [31:1] gpr_wr_en; + logic [31:1][31:0] gpr_out; // 31 x 32 bit GPRs + logic [31:1][31:0] gpr_in; + logic [31:1] w0v, w1v, w2v; + logic [31:1] gpr_wr_en; - // GPR Write Enables - assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]); - for ( genvar j=1; j<32; j++ ) begin : gpr - rvdffe #(32) gprff (.*, .en(gpr_wr_en[j]), .din(gpr_in[j][31:0]), .dout(gpr_out[j][31:0])); - end : gpr + // GPR Write Enables + assign gpr_wr_en[31:1] = (w0v[31:1] | w1v[31:1] | w2v[31:1]); + for (genvar j = 1; j < 32; j++) begin : gpr + rvdffe #(32) gprff ( + .*, + .en (gpr_wr_en[j]), + .din (gpr_in[j][31:0]), + .dout(gpr_out[j][31:0]) + ); + end : gpr - // the read out - always_comb begin - rd0[31:0] = 32'b0; - rd1[31:0] = 32'b0; - w0v[31:1] = 31'b0; - w1v[31:1] = 31'b0; - w2v[31:1] = 31'b0; - gpr_in[31:1] = '0; + // the read out + always_comb begin + rd0[31:0] = 32'b0; + rd1[31:0] = 32'b0; + w0v[31:1] = 31'b0; + w1v[31:1] = 31'b0; + w2v[31:1] = 31'b0; + gpr_in[31:1] = '0; - // GPR Read logic - for (int j=1; j<32; j++ ) begin - rd0[31:0] |= ({32{(raddr0[4:0]== 5'(j))}} & gpr_out[j][31:0]); - rd1[31:0] |= ({32{(raddr1[4:0]== 5'(j))}} & gpr_out[j][31:0]); - end + // GPR Read logic + for (int j = 1; j < 32; j++) begin + rd0[31:0] |= ({32{(raddr0[4:0] == 5'(j))}} & gpr_out[j][31:0]); + rd1[31:0] |= ({32{(raddr1[4:0] == 5'(j))}} & gpr_out[j][31:0]); + end - // GPR Write logic - for (int j=1; j<32; j++ ) begin - w0v[j] = wen0 & (waddr0[4:0]== 5'(j) ); - w1v[j] = wen1 & (waddr1[4:0]== 5'(j) ); - w2v[j] = wen2 & (waddr2[4:0]== 5'(j) ); - gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) | + // GPR Write logic + for (int j = 1; j < 32; j++) begin + w0v[j] = wen0 & (waddr0[4:0] == 5'(j)); + w1v[j] = wen1 & (waddr1[4:0] == 5'(j)); + w2v[j] = wen2 & (waddr2[4:0] == 5'(j)); + gpr_in[j] = ({32{w0v[j]}} & wd0[31:0]) | ({32{w1v[j]}} & wd1[31:0]) | ({32{w2v[j]}} & wd2[31:0]); - end - end // always_comb begin + end + end // always_comb begin endmodule diff --git a/Flow/design/dec/el2_dec_ib_ctl.sv b/Flow/design/dec/el2_dec_ib_ctl.sv index 8dbaffd..1f13822 100644 --- a/Flow/design/dec/el2_dec_ib_ctl.sv +++ b/Flow/design/dec/el2_dec_ib_ctl.sv @@ -14,151 +14,151 @@ // limitations under the License. module el2_dec_ib_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic dbg_cmd_valid, // valid dbg cmd + `include "el2_param.vh" +) ( + input logic dbg_cmd_valid, // valid dbg cmd - input logic dbg_cmd_write, // dbg cmd is write - input logic [1:0] dbg_cmd_type, // dbg type - input logic [31:0] dbg_cmd_addr, // expand to 31:0 + input logic dbg_cmd_write, // dbg cmd is write + input logic [ 1:0] dbg_cmd_type, // dbg type + input logic [31:0] dbg_cmd_addr, // expand to 31:0 - input el2_br_pkt_t i0_brp, // i0 branch packet from aligner - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index - input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR - input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag - input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index + input el2_br_pkt_t i0_brp, // i0 branch packet from aligner + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + input logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + input logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + input logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index - input logic ifu_i0_pc4, // i0 is 4B inst else 2B - input logic ifu_i0_valid, // i0 valid from ifu - input logic ifu_i0_icaf, // i0 instruction access fault - input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type + input logic ifu_i0_pc4, // i0 is 4B inst else 2B + input logic ifu_i0_valid, // i0 valid from ifu + input logic ifu_i0_icaf, // i0 instruction access fault + input logic [1:0] ifu_i0_icaf_type, // i0 instruction access fault type - input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst - input logic ifu_i0_dbecc, // i0 double-bit error - input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner - input logic [31:1] ifu_i0_pc, // i0 pc from the aligner + input logic ifu_i0_icaf_second, // i0 has access fault on second 2B of 4B inst + input logic ifu_i0_dbecc, // i0 double-bit error + input logic [31:0] ifu_i0_instr, // i0 instruction from the aligner + input logic [31:1] ifu_i0_pc, // i0 pc from the aligner - output logic dec_ib0_valid_d, // ib0 valid - output logic dec_debug_valid_d, // Debug read or write at D-stage + output logic dec_ib0_valid_d, // ib0 valid + output logic dec_debug_valid_d, // Debug read or write at D-stage - output logic [31:0] dec_i0_instr_d, // i0 inst at decode + output logic [31:0] dec_i0_instr_d, // i0 inst at decode - output logic [31:1] dec_i0_pc_d, // i0 pc at decode + output logic [31:1] dec_i0_pc_d, // i0 pc at decode - output logic dec_i0_pc4_d, // i0 is 4B inst else 2B + output logic dec_i0_pc4_d, // i0 is 4B inst else 2B - output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index - output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR - output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag - output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index + output el2_br_pkt_t dec_i0_brp, // i0 branch packet at decode + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_i0_bp_index, // i0 branch index + output logic [pt.BHT_GHR_SIZE-1:0] dec_i0_bp_fghr, // BP FGHR + output logic [pt.BTB_BTAG_SIZE-1:0] dec_i0_bp_btag, // BP tag + output logic [$clog2(pt.BTB_SIZE)-1:0] dec_i0_bp_fa_index, // Fully associt btb index - output logic dec_i0_icaf_d, // i0 instruction access fault at decode - output logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst - output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type - output logic dec_i0_dbecc_d, // i0 double-bit error at decode - output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted + output logic dec_i0_icaf_d, // i0 instruction access fault at decode + output logic dec_i0_icaf_second_d, // i0 instruction access fault on second 2B of 4B inst + output logic [1:0] dec_i0_icaf_type_d, // i0 instruction access fault type + output logic dec_i0_dbecc_d, // i0 double-bit error at decode + output logic dec_debug_wdata_rs1_d, // put debug write data onto rs1 source: machine is halted - output logic dec_debug_fence_d // debug fence inst + output logic dec_debug_fence_d // debug fence inst - ); +); - logic debug_valid; - logic [4:0] dreg; - logic [11:0] dcsr; - logic [31:0] ib0, ib0_debug_in; + logic debug_valid; + logic [ 4:0] dreg; + logic [11:0] dcsr; + logic [31:0] ib0, ib0_debug_in; - logic debug_read; - logic debug_write; - logic debug_read_gpr; - logic debug_write_gpr; - logic debug_read_csr; - logic debug_write_csr; + logic debug_read; + logic debug_write; + logic debug_read_gpr; + logic debug_write_gpr; + logic debug_read_csr; + logic debug_write_csr; - logic [34:0] ifu_i0_pcdata, pc0; + logic [34:0] ifu_i0_pcdata, pc0; - assign ifu_i0_pcdata[34:0] = { ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf, - ifu_i0_pc[31:1], ifu_i0_pc4 }; + assign ifu_i0_pcdata[34:0] = { + ifu_i0_icaf_second, ifu_i0_dbecc, ifu_i0_icaf, ifu_i0_pc[31:1], ifu_i0_pc4 + }; - assign pc0[34:0] = ifu_i0_pcdata[34:0]; + assign pc0[34:0] = ifu_i0_pcdata[34:0]; - assign dec_i0_icaf_second_d = pc0[34]; // icaf's can only decode as i0 + assign dec_i0_icaf_second_d = pc0[34]; // icaf's can only decode as i0 - assign dec_i0_dbecc_d = pc0[33]; + assign dec_i0_dbecc_d = pc0[33]; - assign dec_i0_icaf_d = pc0[32]; - assign dec_i0_pc_d[31:1] = pc0[31:1]; - assign dec_i0_pc4_d = pc0[0]; + assign dec_i0_icaf_d = pc0[32]; + assign dec_i0_pc_d[31:1] = pc0[31:1]; + assign dec_i0_pc4_d = pc0[0]; - assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0]; + assign dec_i0_icaf_type_d[1:0] = ifu_i0_icaf_type[1:0]; -// GPR accesses + // GPR accesses -// put reg to read on rs1 -// read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011} + // put reg to read on rs1 + // read -> or %x0, %reg,%x0 {000000000000,reg[4:0],110000000110011} -// put write date on rs1 -// write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011} + // put write date on rs1 + // write -> or %reg, %x0, %x0 {00000000000000000110,reg[4:0],0110011} -// CSR accesses -// csr is of form rd, csr, rs1 + // CSR accesses + // csr is of form rd, csr, rs1 -// read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011} + // read -> csrrs %x0, %csr, %x0 {csr[11:0],00000010000001110011} -// put write data on rs1 -// write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011} + // put write data on rs1 + // write -> csrrw %x0, %csr, %x0 {csr[11:0],00000001000001110011} -// abstract memory command not done here - assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2); + // abstract memory command not done here + assign debug_valid = dbg_cmd_valid & (dbg_cmd_type[1:0] != 2'h2); - assign debug_read = debug_valid & ~dbg_cmd_write; - assign debug_write = debug_valid & dbg_cmd_write; + assign debug_read = debug_valid & ~dbg_cmd_write; + assign debug_write = debug_valid & dbg_cmd_write; - assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0]==2'h0); - assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0]==2'h0); - assign debug_read_csr = debug_read & (dbg_cmd_type[1:0]==2'h1); - assign debug_write_csr = debug_write & (dbg_cmd_type[1:0]==2'h1); + assign debug_read_gpr = debug_read & (dbg_cmd_type[1:0] == 2'h0); + assign debug_write_gpr = debug_write & (dbg_cmd_type[1:0] == 2'h0); + assign debug_read_csr = debug_read & (dbg_cmd_type[1:0] == 2'h1); + assign debug_write_csr = debug_write & (dbg_cmd_type[1:0] == 2'h1); - assign dreg[4:0] = dbg_cmd_addr[4:0]; - assign dcsr[11:0] = dbg_cmd_addr[11:0]; + assign dreg[4:0] = dbg_cmd_addr[4:0]; + assign dcsr[11:0] = dbg_cmd_addr[11:0]; - assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) | + assign ib0_debug_in[31:0] = ({32{debug_read_gpr}} & {12'b000000000000,dreg[4:0],15'b110000000110011}) | ({32{debug_write_gpr}} & {20'b00000000000000000110,dreg[4:0],7'b0110011}) | ({32{debug_read_csr}} & {dcsr[11:0],20'b00000010000001110011}) | ({32{debug_write_csr}} & {dcsr[11:0],20'b00000001000001110011}); - // machine is in halted state, pipe empty, write will always happen next cycle + // machine is in halted state, pipe empty, write will always happen next cycle - assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr; + assign dec_debug_wdata_rs1_d = debug_write_gpr | debug_write_csr; - // special fence csr for use only in debug mode + // special fence csr for use only in debug mode - assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4); + assign dec_debug_fence_d = debug_write_csr & (dcsr[11:0] == 12'h7c4); - assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0]; + assign ib0[31:0] = (debug_valid) ? ib0_debug_in[31:0] : ifu_i0_instr[31:0]; - assign dec_ib0_valid_d = ifu_i0_valid | debug_valid; + assign dec_ib0_valid_d = ifu_i0_valid | debug_valid; - assign dec_debug_valid_d = debug_valid; + assign dec_debug_valid_d = debug_valid; - assign dec_i0_instr_d[31:0] = ib0[31:0]; + assign dec_i0_instr_d[31:0] = ib0[31:0]; - assign dec_i0_brp = i0_brp; - assign dec_i0_bp_index = ifu_i0_bp_index; - assign dec_i0_bp_fghr = ifu_i0_bp_fghr; - assign dec_i0_bp_btag = ifu_i0_bp_btag; - assign dec_i0_bp_fa_index = ifu_i0_fa_index; + assign dec_i0_brp = i0_brp; + assign dec_i0_bp_index = ifu_i0_bp_index; + assign dec_i0_bp_fghr = ifu_i0_bp_fghr; + assign dec_i0_bp_btag = ifu_i0_bp_btag; + assign dec_i0_bp_fa_index = ifu_i0_fa_index; endmodule diff --git a/Flow/design/dec/el2_dec_tlu_ctl.sv b/Flow/design/dec/el2_dec_tlu_ctl.sv index 46dba59..c61c0d2 100644 --- a/Flow/design/dec/el2_dec_tlu_ctl.sv +++ b/Flow/design/dec/el2_dec_tlu_ctl.sv @@ -24,848 +24,1184 @@ //******************************************************************************** module el2_dec_tlu_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, - input logic free_clk, - input logic free_l2clk, - input logic rst_l, - input logic scan_mode, - - input logic [31:1] rst_vec, // reset vector, from core pins - input logic nmi_int, // nmi pin - input logic [31:1] nmi_vec, // nmi vector - input logic i_cpu_halt_req, // Asynchronous Halt request to CPU - input logic i_cpu_run_req, // Asynchronous Restart request to CPU - - input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle - - - // perf counter inputs - input logic ifu_pmu_instr_aligned, // aligned instructions - input logic ifu_pmu_fetch_stall, // fetch unit stalled - input logic ifu_pmu_ic_miss, // icache miss - input logic ifu_pmu_ic_hit, // icache hit - input logic ifu_pmu_bus_error, // Instruction side bus error - input logic ifu_pmu_bus_busy, // Instruction side bus busy - input logic ifu_pmu_bus_trxn, // Instruction side bus transaction - input logic dec_pmu_instr_decoded, // decoded instructions - input logic dec_pmu_decode_stall, // decode stall - input logic dec_pmu_presync_stall, // decode stall due to presync'd inst - input logic dec_pmu_postsync_stall,// decode stall due to postsync'd inst - input logic lsu_store_stall_any, // SB or WB is full, stall decode - input logic dma_dccm_stall_any, // DMA stall of lsu - input logic dma_iccm_stall_any, // DMA stall of ifu - input logic exu_pmu_i0_br_misp, // pipe 0 branch misp - input logic exu_pmu_i0_br_ataken, // pipe 0 branch actual taken - input logic exu_pmu_i0_pc4, // pipe 0 4 byte branch - input logic lsu_pmu_bus_trxn, // D side bus transaction - input logic lsu_pmu_bus_misaligned, // D side bus misaligned - input logic lsu_pmu_bus_error, // D side bus error - input logic lsu_pmu_bus_busy, // D side bus busy - input logic lsu_pmu_load_external_m, // D side bus load - input logic lsu_pmu_store_external_m, // D side bus store - input logic dma_pmu_dccm_read, // DMA DCCM read - input logic dma_pmu_dccm_write, // DMA DCCM write - input logic dma_pmu_any_read, // DMA read - input logic dma_pmu_any_write, // DMA write - - input logic [31:1] lsu_fir_addr, // Fast int address - input logic [1:0] lsu_fir_error, // Fast int lookup error - - input logic iccm_dma_sb_error, // I side dma single bit error - - input el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu precise exception/error packet - input logic lsu_single_ecc_error_incr, // LSU inc SB error counter - - input logic dec_pause_state, // Pause counter not zero - input logic lsu_imprecise_error_store_any, // store bus error - input logic lsu_imprecise_error_load_any, // store bus error - input logic [31:0] lsu_imprecise_error_addr_any, // store bus error address - - input logic dec_csr_wen_unq_d, // valid csr with write - for csr legal - input logic dec_csr_any_unq_d, // valid csr - for csr legal - input logic [11:0] dec_csr_rdaddr_d, // read address for csr - - input logic dec_csr_wen_r, // csr write enable at wb - input logic [11:0] dec_csr_wraddr_r, // write address for csr - input logic [31:0] dec_csr_wrdata_r, // csr write data at wb - - input logic dec_csr_stall_int_ff, // csr is mie/mstatus - - input logic dec_tlu_i0_valid_r, // pipe 0 op at e4 is valid - - input logic [31:1] exu_npc_r, // for NPC tracking - - input logic [31:1] dec_tlu_i0_pc_r, // for PC/NPC tracking - - input el2_trap_pkt_t dec_tlu_packet_r, // exceptions known at decode - - input logic [31:0] dec_illegal_inst, // For mtval - input logic dec_i0_decode_d, // decode valid, used for clean icache diagnostics - - // branch info from pipe0 for errors or counter updates - input logic [1:0] exu_i0_br_hist_r, // history - input logic exu_i0_br_error_r, // error - input logic exu_i0_br_start_error_r, // start error - input logic exu_i0_br_valid_r, // valid - input logic exu_i0_br_mp_r, // mispredict - input logic exu_i0_br_middle_r, // middle of bank - - // branch info from pipe1 for errors or counter updates - - input logic exu_i0_br_way_r, // way hit or repl - - output logic dec_tlu_core_empty, // core is empty - // Debug start - output logic dec_dbg_cmd_done, // abstract command done - output logic dec_dbg_cmd_fail, // abstract command failed - output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command - output logic dec_tlu_debug_mode, // Core is in debug mode - output logic dec_tlu_resume_ack, // Resume acknowledge - output logic dec_tlu_debug_stall, // stall decode while waiting on core to empty - - output logic dec_tlu_flush_noredir_r , // Tell fetch to idle on this flush - output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC - output logic dec_tlu_flush_leak_one_r, // single step - output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc. This is the D stage of the error - - output logic dec_tlu_flush_extint, // fast ext int started - output logic [31:2] dec_tlu_meihap, // meihap for fast int - - input logic dbg_halt_req, // DM requests a halt - input logic dbg_resume_req, // DM requests a resume - input logic ifu_miss_state_idle, // I-side miss buffer empty - input logic lsu_idle_any, // lsu is idle - input logic dec_div_active, // oop div is active - output el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger info for trigger blocks - - input logic ifu_ic_error_start, // IC single bit error - input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error - - - input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data - input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid - output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics - // Debug end - - input logic [7:0] pic_claimid, // pic claimid for csr - input logic [3:0] pic_pl, // pic priv level for csr - input logic mhwakeup, // high priority external int, wakeup if halted - - input logic mexintpend, // external interrupt pending - input logic timer_int, // timer interrupt pending - input logic soft_int, // software interrupt pending - - output logic o_cpu_halt_status, // PMU interface, halted - output logic o_cpu_halt_ack, // halt req ack - output logic o_cpu_run_ack, // run req ack - output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request - - input logic [31:4] core_id, // Core ID - - // external MPC halt/run interface - input logic mpc_debug_halt_req, // Async halt request - input logic mpc_debug_run_req, // Async run request - input logic mpc_reset_run_req, // Run/halt after reset - output logic mpc_debug_halt_ack, // Halt ack - output logic mpc_debug_run_ack, // Run ack - output logic debug_brkpt_status, // debug breakpoint - - output logic [3:0] dec_tlu_meicurpl, // to PIC - output logic [3:0] dec_tlu_meipt, // to PIC - - - output logic [31:0] dec_csr_rddata_d, // csr read data at wb - output logic dec_csr_legal_d, // csr indicates legal operation - - output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // branch pkt to bp - - output logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state - output logic dec_tlu_flush_lower_wb, // commit has a flush (exception, int, mispredict at e4) - output logic dec_tlu_i0_commit_cmt, // committed an instruction - - output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state - output logic dec_tlu_flush_lower_r, // commit has a flush (exception, int) - output logic [31:1] dec_tlu_flush_path_r, // flush pc - output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache - output logic dec_tlu_wr_pause_r, // CSR write to pause reg is at R. - output logic dec_tlu_flush_pause_r, // Flush is due to pause - - output logic dec_tlu_presync_d, // CSR read needs to be presync'd - output logic dec_tlu_postsync_d, // CSR needs to be presync'd - - - output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control - - output logic dec_tlu_force_halt, // halt has been forced - - output logic dec_tlu_perfcnt0, // toggles when pipe0 perf counter 0 has an event inc - output logic dec_tlu_perfcnt1, // toggles when pipe0 perf counter 1 has an event inc - output logic dec_tlu_perfcnt2, // toggles when pipe0 perf counter 2 has an event inc - output logic dec_tlu_perfcnt3, // toggles when pipe0 perf counter 3 has an event inc - - output logic dec_tlu_i0_exc_valid_wb1, // pipe 0 exception valid - output logic dec_tlu_i0_valid_wb1, // pipe 0 valid - output logic dec_tlu_int_valid_wb1, // pipe 2 int valid - output logic [4:0] dec_tlu_exc_cause_wb1, // exception or int cause - output logic [31:0] dec_tlu_mtval_wb1, // MTVAL value - - // feature disable from mfdc - output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding - output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address - output logic dec_tlu_core_ecc_disable, // disable core ECC - output logic dec_tlu_bpred_disable, // disable branch prediction - output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing - output logic dec_tlu_pipelining_disable, // disable pipelining - output logic dec_tlu_trace_disable, // disable trace - output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] - - // clock gating overrides from mcgc - output logic dec_tlu_misc_clk_override, // override misc clock domain gating - output logic dec_tlu_dec_clk_override, // override decode clock domain gating - output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating - output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating - output logic dec_tlu_bus_clk_override, // override bus clock domain gating - output logic dec_tlu_pic_clk_override, // override PIC clock domain gating - output logic dec_tlu_picio_clk_override,// override PICIO clock domain gating - output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating - output logic dec_tlu_icm_clk_override // override ICCM clock domain gating - - ); - - logic clk_override, e4e5_int_clk, nmi_fir_type, nmi_lsu_load_type, nmi_lsu_store_type, nmi_int_detected_f, nmi_lsu_load_type_f, - nmi_lsu_store_type_f, allow_dbg_halt_csr_write, dbg_cmd_done_ns, i_cpu_run_req_d1_raw, debug_mode_status, lsu_single_ecc_error_r_d1, - sel_npc_r, sel_npc_resume, ce_int, - nmi_in_debug_mode, dpc_capture_npc, dpc_capture_pc, tdata_load, tdata_opcode, tdata_action, perfcnt_halted, tdata_chain, - tdata_kill_write; - - - logic reset_delayed, reset_detect, reset_detected; - logic wr_mstatus_r, wr_mtvec_r, wr_mcyclel_r, wr_mcycleh_r, - wr_minstretl_r, wr_minstreth_r, wr_mscratch_r, wr_mepc_r, wr_mcause_r, wr_mscause_r, wr_mtval_r, - wr_mrac_r, wr_meihap_r, wr_meicurpl_r, wr_meipt_r, wr_dcsr_r, - wr_dpc_r, wr_meicidpl_r, wr_meivt_r, wr_meicpct_r, wr_micect_r, wr_miccmect_r, wr_mfdht_r, wr_mfdhs_r, - wr_mdccmect_r,wr_mhpme3_r, wr_mhpme4_r, wr_mhpme5_r, wr_mhpme6_r; - logic wr_mpmc_r; - logic [1:1] mpmc_b_ns, mpmc, mpmc_b; - logic set_mie_pmu_fw_halt, fw_halted_ns, fw_halted; - logic wr_mcountinhibit_r; - logic [6:0] mcountinhibit; - logic wr_mtsel_r, wr_mtdata1_t0_r, wr_mtdata1_t1_r, wr_mtdata1_t2_r, wr_mtdata1_t3_r, wr_mtdata2_t0_r, wr_mtdata2_t1_r, wr_mtdata2_t2_r, wr_mtdata2_t3_r; - logic [31:0] mtdata2_t0, mtdata2_t1, mtdata2_t2, mtdata2_t3, mtdata2_tsel_out, mtdata1_tsel_out; - logic [9:0] mtdata1_t0_ns, mtdata1_t0, mtdata1_t1_ns, mtdata1_t1, mtdata1_t2_ns, mtdata1_t2, mtdata1_t3_ns, mtdata1_t3; - logic [9:0] tdata_wrdata_r; - logic [1:0] mtsel_ns, mtsel; - logic tlu_i0_kill_writeb_r; - logic [1:0] mstatus_ns, mstatus; - logic [1:0] mfdhs_ns, mfdhs; - logic [31:0] force_halt_ctr, force_halt_ctr_f; - logic force_halt; - logic [5:0] mfdht, mfdht_ns; - logic mstatus_mie_ns; - logic [30:0] mtvec_ns, mtvec; - logic [15:2] dcsr_ns, dcsr; - logic [5:0] mip_ns, mip; - logic [5:0] mie_ns, mie; - logic [31:0] mcyclel_ns, mcyclel; - logic [31:0] mcycleh_ns, mcycleh; - logic [31:0] minstretl_ns, minstretl; - logic [31:0] minstreth_ns, minstreth; - logic [31:0] micect_ns, micect, miccmect_ns, miccmect, mdccmect_ns, mdccmect; - logic [26:0] micect_inc, miccmect_inc, mdccmect_inc; - logic [31:0] mscratch; - logic [31:0] mhpmc3, mhpmc3_ns, mhpmc4, mhpmc4_ns, mhpmc5, mhpmc5_ns, mhpmc6, mhpmc6_ns; - logic [31:0] mhpmc3h, mhpmc3h_ns, mhpmc4h, mhpmc4h_ns, mhpmc5h, mhpmc5h_ns, mhpmc6h, mhpmc6h_ns; - logic [9:0] mhpme3, mhpme4, mhpme5, mhpme6; - logic [31:0] mrac; - logic [9:2] meihap; - logic [31:10] meivt; - logic [3:0] meicurpl_ns, meicurpl; - logic [3:0] meicidpl_ns, meicidpl; - logic [3:0] meipt_ns, meipt; - logic [31:0] mdseac; - logic mdseac_locked_ns, mdseac_locked_f, mdseac_en, nmi_lsu_detected; - logic [31:1] mepc_ns, mepc; - logic [31:1] dpc_ns, dpc; - logic [31:0] mcause_ns, mcause; - logic [3:0] mscause_ns, mscause, mscause_type; - logic [31:0] mtval_ns, mtval; - logic dec_pause_state_f, dec_tlu_wr_pause_r_d1, pause_expired_r, pause_expired_wb; - logic tlu_flush_lower_r, tlu_flush_lower_r_d1; - logic [31:1] tlu_flush_path_r, tlu_flush_path_r_d1; - logic i0_valid_wb; - logic tlu_i0_commit_cmt; - logic [31:1] vectored_path, interrupt_path; - logic [16:0] dicawics_ns, dicawics; - logic wr_dicawics_r, wr_dicad0_r, wr_dicad1_r, wr_dicad0h_r; - logic [31:0] dicad0_ns, dicad0, dicad0h_ns, dicad0h; - - logic [6:0] dicad1_ns, dicad1_raw; - logic [31:0] dicad1; - logic ebreak_r, ebreak_to_debug_mode_r, ecall_r, illegal_r, mret_r, inst_acc_r, fence_i_r, - ic_perr_r, iccm_sbecc_r, ebreak_to_debug_mode_r_d1, kill_ebreak_count_r, inst_acc_second_r; - logic ce_int_ready, ext_int_ready, timer_int_ready, soft_int_ready, int_timer0_int_ready, int_timer1_int_ready, mhwakeup_ready, - take_ext_int, take_ce_int, take_timer_int, take_soft_int, take_int_timer0_int, take_int_timer1_int, take_nmi, take_nmi_r_d1, int_timer0_int_possible, int_timer1_int_possible; - logic i0_exception_valid_r, interrupt_valid_r, i0_exception_valid_r_d1, interrupt_valid_r_d1, exc_or_int_valid_r, exc_or_int_valid_r_d1, mdccme_ce_req, miccme_ce_req, mice_ce_req; - logic synchronous_flush_r; - logic [4:0] exc_cause_r, exc_cause_wb; - logic mcyclel_cout, mcyclel_cout_f, mcyclela_cout; - logic [31:0] mcyclel_inc; - logic [31:0] mcycleh_inc; - - logic minstretl_cout, minstretl_cout_f, minstret_enable, minstretl_cout_ns, minstretl_couta; - - logic [31:0] minstretl_inc, minstretl_read; - logic [31:0] minstreth_inc, minstreth_read; - logic [31:1] pc_r, pc_r_d1, npc_r, npc_r_d1; - logic valid_csr; - logic rfpc_i0_r; - logic lsu_i0_rfnpc_r; - logic dec_tlu_br0_error_r, dec_tlu_br0_start_error_r, dec_tlu_br0_v_r; - logic lsu_i0_exc_r, lsu_i0_exc_r_raw, lsu_exc_ma_r, lsu_exc_acc_r, lsu_exc_st_r, - lsu_exc_valid_r, lsu_exc_valid_r_raw, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1, block_interrupts; - logic i0_trigger_eval_r; - - logic request_debug_mode_r, request_debug_mode_r_d1, request_debug_mode_done, request_debug_mode_done_f; - logic take_halt, halt_taken, halt_taken_f, internal_dbg_halt_mode, dbg_tlu_halted_f, take_reset, - dbg_tlu_halted, core_empty, lsu_idle_any_f, ifu_miss_state_idle_f, resume_ack_ns, - debug_halt_req_f, debug_resume_req_f_raw, debug_resume_req_f, enter_debug_halt_req, dcsr_single_step_done, dcsr_single_step_done_f, - debug_halt_req_d1, debug_halt_req_ns, dcsr_single_step_running, dcsr_single_step_running_f, internal_dbg_halt_timers; - - logic [3:0] i0_trigger_r, trigger_action, trigger_enabled, - i0_trigger_chain_masked_r; - logic i0_trigger_hit_r, i0_trigger_hit_raw_r, i0_trigger_action_r, - trigger_hit_r_d1, - mepc_trigger_hit_sel_pc_r; - logic [3:0] update_hit_bit_r, i0_iside_trigger_has_pri_r,i0trigger_qual_r, i0_lsu_trigger_has_pri_r; - logic cpu_halt_status, cpu_halt_ack, cpu_run_ack, ext_halt_pulse, i_cpu_halt_req_d1, i_cpu_run_req_d1; - - logic inst_acc_r_raw, trigger_hit_dmode_r, trigger_hit_dmode_r_d1; - logic [9:0] mcgc, mcgc_ns, mcgc_int; - logic [18:0] mfdc; - logic i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, pmu_fw_halt_req_ns, pmu_fw_halt_req_f, int_timer_stalled, - fw_halt_req, enter_pmu_fw_halt_req, pmu_fw_tlu_halted, pmu_fw_tlu_halted_f, internal_pmu_fw_halt_mode, - internal_pmu_fw_halt_mode_f, int_timer0_int_hold, int_timer1_int_hold, int_timer0_int_hold_f, int_timer1_int_hold_f; - logic nmi_int_delayed, nmi_int_detected; - logic [3:0] trigger_execute, trigger_data, trigger_store; - logic dec_tlu_pmu_fw_halted; - - logic mpc_run_state_ns, debug_brkpt_status_ns, mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, dbg_halt_state_ns, dbg_run_state_ns, - dbg_halt_state_f, mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, mpc_halt_state_f, mpc_halt_state_ns, mpc_run_state_f, debug_brkpt_status_f, - mpc_debug_halt_ack_f, mpc_debug_run_ack_f, dbg_run_state_f, mpc_debug_halt_req_sync_pulse, - mpc_debug_run_req_sync_pulse, debug_brkpt_valid, debug_halt_req, debug_resume_req, dec_tlu_mpc_halted_only_ns; - logic take_ext_int_start, ext_int_freeze, take_ext_int_start_d1, take_ext_int_start_d2, - take_ext_int_start_d3, ext_int_freeze_d1, csr_meicpct, ignore_ext_int_due_to_lsu_stall; - logic mcause_sel_nmi_store, mcause_sel_nmi_load, mcause_sel_nmi_ext, fast_int_meicpct; - logic [1:0] mcause_fir_error_type; - logic dbg_halt_req_held_ns, dbg_halt_req_held, dbg_halt_req_final; - logic iccm_repair_state_ns, iccm_repair_state_d1, iccm_repair_state_rfnpc; - - - // internal timer, isolated for size reasons - logic [31:0] dec_timer_rddata_d; - logic dec_timer_read_d, dec_timer_t0_pulse, dec_timer_t1_pulse; - logic csr_mitctl0; - logic csr_mitctl1; - logic csr_mitb0; - logic csr_mitb1; - logic csr_mitcnt0; - logic csr_mitcnt1; - - logic nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync, mpc_debug_run_req_sync, mpc_debug_halt_req_sync_raw; - logic csr_wr_clk; - logic e4e5_clk, e4_valid, e5_valid, e4e5_valid, internal_dbg_halt_mode_f, internal_dbg_halt_mode_f2; - logic lsu_pmu_load_external_r, lsu_pmu_store_external_r; - logic dec_tlu_flush_noredir_r_d1, dec_tlu_flush_pause_r_d1; - logic lsu_single_ecc_error_r; - logic [31:0] lsu_error_pkt_addr_r; - logic mcyclel_cout_in; - logic i0_valid_no_ebreak_ecall_r; - logic minstret_enable_f; - logic sel_exu_npc_r, sel_flush_npc_r, sel_hold_npc_r; - logic pc0_valid_r; - logic [15:0] mfdc_int, mfdc_ns; - logic [31:0] mrac_in; - logic [31:27] csr_sat; - logic [8:6] dcsr_cause; - logic enter_debug_halt_req_le, dcsr_cause_upgradeable; - logic icache_rd_valid, icache_wr_valid, icache_rd_valid_f, icache_wr_valid_f; - logic [3:0] mhpmc_inc_r, mhpmc_inc_r_d1; - - logic [3:0][9:0] mhpme_vec; - logic mhpmc3_wr_en0, mhpmc3_wr_en1, mhpmc3_wr_en; - logic mhpmc4_wr_en0, mhpmc4_wr_en1, mhpmc4_wr_en; - logic mhpmc5_wr_en0, mhpmc5_wr_en1, mhpmc5_wr_en; - logic mhpmc6_wr_en0, mhpmc6_wr_en1, mhpmc6_wr_en; - logic mhpmc3h_wr_en0, mhpmc3h_wr_en; - logic mhpmc4h_wr_en0, mhpmc4h_wr_en; - logic mhpmc5h_wr_en0, mhpmc5h_wr_en; - logic mhpmc6h_wr_en0, mhpmc6h_wr_en; - logic [63:0] mhpmc3_incr, mhpmc4_incr, mhpmc5_incr, mhpmc6_incr; - logic perfcnt_halted_d1, zero_event_r; - logic [3:0] perfcnt_during_sleep; - logic [9:0] event_r; - - el2_inst_pkt_t pmu_i0_itype_qual; - - logic csr_mfdht; - logic csr_mfdhs; - logic csr_misa; - logic csr_mvendorid; - logic csr_marchid; - logic csr_mimpid; - logic csr_mhartid; - logic csr_mstatus; - logic csr_mtvec; - logic csr_mip; - logic csr_mie; - logic csr_mcyclel; - logic csr_mcycleh; - logic csr_minstretl; - logic csr_minstreth; - logic csr_mscratch; - logic csr_mepc; - logic csr_mcause; - logic csr_mscause; - logic csr_mtval; - logic csr_mrac; - logic csr_dmst; - logic csr_mdseac; - logic csr_meihap; - logic csr_meivt; - logic csr_meipt; - logic csr_meicurpl; - logic csr_meicidpl; - logic csr_dcsr; - logic csr_mcgc; - logic csr_mfdc; - logic csr_dpc; - logic csr_mtsel; - logic csr_mtdata1; - logic csr_mtdata2; - logic csr_mhpmc3; - logic csr_mhpmc4; - logic csr_mhpmc5; - logic csr_mhpmc6; - logic csr_mhpmc3h; - logic csr_mhpmc4h; - logic csr_mhpmc5h; - logic csr_mhpmc6h; - logic csr_mhpme3; - logic csr_mhpme4; - logic csr_mhpme5; - logic csr_mhpme6; - logic csr_mcountinhibit; - logic csr_mpmc; - logic csr_micect; - logic csr_miccmect; - logic csr_mdccmect; - logic csr_dicawics; - logic csr_dicad0h; - logic csr_dicad0; - logic csr_dicad1; - logic csr_dicago; - logic presync; - logic postsync; - logic legal; - logic dec_csr_wen_r_mod; - - logic flush_clkvalid; - logic sel_fir_addr; - logic wr_mie_r; - logic mtval_capture_pc_r; - logic mtval_capture_pc_plus2_r; - logic mtval_capture_inst_r; - logic mtval_capture_lsu_r; - logic mtval_clear_r; - logic wr_mcgc_r; - logic wr_mfdc_r; - logic wr_mdeau_r; - logic trigger_hit_for_dscr_cause_r_d1; - logic conditionally_illegal; - - logic [3:0] ifu_mscause ; - logic ifu_ic_error_start_f, ifu_iccm_rd_ecc_single_err_f; - - el2_dec_timer_ctl #(.pt(pt)) int_timers(.*); - // end of internal timers - - assign clk_override = dec_tlu_dec_clk_override; - - // Async inputs to the core have to be sync'd to the core clock. - rvsyncss #(7) syncro_ff(.*, - .clk(free_clk), - .din ({nmi_int, timer_int, soft_int, i_cpu_halt_req, i_cpu_run_req, mpc_debug_halt_req, mpc_debug_run_req}), - .dout({nmi_int_sync, timer_int_sync, soft_int_sync, i_cpu_halt_req_sync, i_cpu_run_req_sync, mpc_debug_halt_req_sync_raw, mpc_debug_run_req_sync})); - - // for CSRs that have inpipe writes only - - rvoclkhdr csrwr_r_cgc ( .en(dec_csr_wen_r_mod | clk_override), .l1clk(csr_wr_clk), .* ); - - assign e4_valid = dec_tlu_i0_valid_r; - assign e4e5_valid = e4_valid | e5_valid; - assign flush_clkvalid = internal_dbg_halt_mode_f | i_cpu_run_req_d1 | interrupt_valid_r | interrupt_valid_r_d1 | + `include "el2_param.vh" +) ( + input logic clk, + input logic free_clk, + input logic free_l2clk, + input logic rst_l, + input logic scan_mode, + + input logic [31:1] rst_vec, // reset vector, from core pins + input logic nmi_int, // nmi pin + input logic [31:1] nmi_vec, // nmi vector + input logic i_cpu_halt_req, // Asynchronous Halt request to CPU + input logic i_cpu_run_req, // Asynchronous Restart request to CPU + + input logic lsu_fastint_stall_any, // needed by lsu for 2nd pass of dma with ecc correction, stall next cycle + + + // perf counter inputs + input logic ifu_pmu_instr_aligned, // aligned instructions + input logic ifu_pmu_fetch_stall, // fetch unit stalled + input logic ifu_pmu_ic_miss, // icache miss + input logic ifu_pmu_ic_hit, // icache hit + input logic ifu_pmu_bus_error, // Instruction side bus error + input logic ifu_pmu_bus_busy, // Instruction side bus busy + input logic ifu_pmu_bus_trxn, // Instruction side bus transaction + input logic dec_pmu_instr_decoded, // decoded instructions + input logic dec_pmu_decode_stall, // decode stall + input logic dec_pmu_presync_stall, // decode stall due to presync'd inst + input logic dec_pmu_postsync_stall, // decode stall due to postsync'd inst + input logic lsu_store_stall_any, // SB or WB is full, stall decode + input logic dma_dccm_stall_any, // DMA stall of lsu + input logic dma_iccm_stall_any, // DMA stall of ifu + input logic exu_pmu_i0_br_misp, // pipe 0 branch misp + input logic exu_pmu_i0_br_ataken, // pipe 0 branch actual taken + input logic exu_pmu_i0_pc4, // pipe 0 4 byte branch + input logic lsu_pmu_bus_trxn, // D side bus transaction + input logic lsu_pmu_bus_misaligned, // D side bus misaligned + input logic lsu_pmu_bus_error, // D side bus error + input logic lsu_pmu_bus_busy, // D side bus busy + input logic lsu_pmu_load_external_m, // D side bus load + input logic lsu_pmu_store_external_m, // D side bus store + input logic dma_pmu_dccm_read, // DMA DCCM read + input logic dma_pmu_dccm_write, // DMA DCCM write + input logic dma_pmu_any_read, // DMA read + input logic dma_pmu_any_write, // DMA write + + input logic [31:1] lsu_fir_addr, // Fast int address + input logic [ 1:0] lsu_fir_error, // Fast int lookup error + + input logic iccm_dma_sb_error, // I side dma single bit error + + input el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu precise exception/error packet + input logic lsu_single_ecc_error_incr, // LSU inc SB error counter + + input logic dec_pause_state, // Pause counter not zero + input logic lsu_imprecise_error_store_any, // store bus error + input logic lsu_imprecise_error_load_any, // store bus error + input logic [31:0] lsu_imprecise_error_addr_any, // store bus error address + + input logic dec_csr_wen_unq_d, // valid csr with write - for csr legal + input logic dec_csr_any_unq_d, // valid csr - for csr legal + input logic [11:0] dec_csr_rdaddr_d, // read address for csr + + input logic dec_csr_wen_r, // csr write enable at wb + input logic [11:0] dec_csr_wraddr_r, // write address for csr + input logic [31:0] dec_csr_wrdata_r, // csr write data at wb + + input logic dec_csr_stall_int_ff, // csr is mie/mstatus + + input logic dec_tlu_i0_valid_r, // pipe 0 op at e4 is valid + + input logic [31:1] exu_npc_r, // for NPC tracking + + input logic [31:1] dec_tlu_i0_pc_r, // for PC/NPC tracking + + input el2_trap_pkt_t dec_tlu_packet_r, // exceptions known at decode + + input logic [31:0] dec_illegal_inst, // For mtval + input logic dec_i0_decode_d, // decode valid, used for clean icache diagnostics + + // branch info from pipe0 for errors or counter updates + input logic [1:0] exu_i0_br_hist_r, // history + input logic exu_i0_br_error_r, // error + input logic exu_i0_br_start_error_r, // start error + input logic exu_i0_br_valid_r, // valid + input logic exu_i0_br_mp_r, // mispredict + input logic exu_i0_br_middle_r, // middle of bank + + // branch info from pipe1 for errors or counter updates + + input logic exu_i0_br_way_r, // way hit or repl + + output logic dec_tlu_core_empty, // core is empty + // Debug start + output logic dec_dbg_cmd_done, // abstract command done + output logic dec_dbg_cmd_fail, // abstract command failed + output logic dec_tlu_dbg_halted, // Core is halted and ready for debug command + output logic dec_tlu_debug_mode, // Core is in debug mode + output logic dec_tlu_resume_ack, // Resume acknowledge + output logic dec_tlu_debug_stall, // stall decode while waiting on core to empty + + output logic dec_tlu_flush_noredir_r, // Tell fetch to idle on this flush + output logic dec_tlu_mpc_halted_only, // Core is halted only due to MPC + output logic dec_tlu_flush_leak_one_r, // single step + output logic dec_tlu_flush_err_r, // iside perr/ecc rfpc. This is the D stage of the error + + output logic dec_tlu_flush_extint, // fast ext int started + output logic [31:2] dec_tlu_meihap, // meihap for fast int + + input logic dbg_halt_req, // DM requests a halt + input logic dbg_resume_req, // DM requests a resume + input logic ifu_miss_state_idle, // I-side miss buffer empty + input logic lsu_idle_any, // lsu is idle + input logic dec_div_active, // oop div is active + output el2_trigger_pkt_t [3:0] trigger_pkt_any, // trigger info for trigger blocks + + input logic ifu_ic_error_start, // IC single bit error + input logic ifu_iccm_rd_ecc_single_err, // ICCM single bit error + + + input logic [70:0] ifu_ic_debug_rd_data, // diagnostic icache read data + input logic ifu_ic_debug_rd_data_valid, // diagnostic icache read data valid + output el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // packet of DICAWICS, DICAD0/1, DICAGO info for icache diagnostics + // Debug end + + input logic [7:0] pic_claimid, // pic claimid for csr + input logic [3:0] pic_pl, // pic priv level for csr + input logic mhwakeup, // high priority external int, wakeup if halted + + input logic mexintpend, // external interrupt pending + input logic timer_int, // timer interrupt pending + input logic soft_int, // software interrupt pending + + output logic o_cpu_halt_status, // PMU interface, halted + output logic o_cpu_halt_ack, // halt req ack + output logic o_cpu_run_ack, // run req ack + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + + input logic [31:4] core_id, // Core ID + + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint + + output logic [3:0] dec_tlu_meicurpl, // to PIC + output logic [3:0] dec_tlu_meipt, // to PIC + + + output logic [31:0] dec_csr_rddata_d, // csr read data at wb + output logic dec_csr_legal_d, // csr indicates legal operation + + output el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // branch pkt to bp + + output logic dec_tlu_i0_kill_writeb_wb, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_flush_lower_wb, // commit has a flush (exception, int, mispredict at e4) + output logic dec_tlu_i0_commit_cmt, // committed an instruction + + output logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + output logic dec_tlu_flush_lower_r, // commit has a flush (exception, int) + output logic [31:1] dec_tlu_flush_path_r, // flush pc + output logic dec_tlu_fence_i_r, // flush is a fence_i rfnpc, flush icache + output logic dec_tlu_wr_pause_r, // CSR write to pause reg is at R. + output logic dec_tlu_flush_pause_r, // Flush is due to pause + + output logic dec_tlu_presync_d, // CSR read needs to be presync'd + output logic dec_tlu_postsync_d, // CSR needs to be presync'd + + + output logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + + output logic dec_tlu_force_halt, // halt has been forced + + output logic dec_tlu_perfcnt0, // toggles when pipe0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, // toggles when pipe0 perf counter 1 has an event inc + output logic dec_tlu_perfcnt2, // toggles when pipe0 perf counter 2 has an event inc + output logic dec_tlu_perfcnt3, // toggles when pipe0 perf counter 3 has an event inc + + output logic dec_tlu_i0_exc_valid_wb1, // pipe 0 exception valid + output logic dec_tlu_i0_valid_wb1, // pipe 0 valid + output logic dec_tlu_int_valid_wb1, // pipe 2 int valid + output logic [4:0] dec_tlu_exc_cause_wb1, // exception or int cause + output logic [31:0] dec_tlu_mtval_wb1, // MTVAL value + + // feature disable from mfdc + output logic dec_tlu_external_ldfwd_disable, // disable external load forwarding + output logic dec_tlu_sideeffect_posted_disable, // disable posted stores to side-effect address + output logic dec_tlu_core_ecc_disable, // disable core ECC + output logic dec_tlu_bpred_disable, // disable branch prediction + output logic dec_tlu_wb_coalescing_disable, // disable writebuffer coalescing + output logic dec_tlu_pipelining_disable, // disable pipelining + output logic dec_tlu_trace_disable, // disable trace + output logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:16] + + // clock gating overrides from mcgc + output logic dec_tlu_misc_clk_override, // override misc clock domain gating + output logic dec_tlu_dec_clk_override, // override decode clock domain gating + output logic dec_tlu_ifu_clk_override, // override fetch clock domain gating + output logic dec_tlu_lsu_clk_override, // override load/store clock domain gating + output logic dec_tlu_bus_clk_override, // override bus clock domain gating + output logic dec_tlu_pic_clk_override, // override PIC clock domain gating + output logic dec_tlu_picio_clk_override, // override PICIO clock domain gating + output logic dec_tlu_dccm_clk_override, // override DCCM clock domain gating + output logic dec_tlu_icm_clk_override // override ICCM clock domain gating + +); + + logic + clk_override, + e4e5_int_clk, + nmi_fir_type, + nmi_lsu_load_type, + nmi_lsu_store_type, + nmi_int_detected_f, + nmi_lsu_load_type_f, + nmi_lsu_store_type_f, + allow_dbg_halt_csr_write, + dbg_cmd_done_ns, + i_cpu_run_req_d1_raw, + debug_mode_status, + lsu_single_ecc_error_r_d1, + sel_npc_r, + sel_npc_resume, + ce_int, + nmi_in_debug_mode, + dpc_capture_npc, + dpc_capture_pc, + tdata_load, + tdata_opcode, + tdata_action, + perfcnt_halted, + tdata_chain, + tdata_kill_write; + + + logic reset_delayed, reset_detect, reset_detected; + logic + wr_mstatus_r, + wr_mtvec_r, + wr_mcyclel_r, + wr_mcycleh_r, + wr_minstretl_r, + wr_minstreth_r, + wr_mscratch_r, + wr_mepc_r, + wr_mcause_r, + wr_mscause_r, + wr_mtval_r, + wr_mrac_r, + wr_meihap_r, + wr_meicurpl_r, + wr_meipt_r, + wr_dcsr_r, + wr_dpc_r, + wr_meicidpl_r, + wr_meivt_r, + wr_meicpct_r, + wr_micect_r, + wr_miccmect_r, + wr_mfdht_r, + wr_mfdhs_r, + wr_mdccmect_r, + wr_mhpme3_r, + wr_mhpme4_r, + wr_mhpme5_r, + wr_mhpme6_r; + logic wr_mpmc_r; + logic [1:1] mpmc_b_ns, mpmc, mpmc_b; + logic set_mie_pmu_fw_halt, fw_halted_ns, fw_halted; + logic wr_mcountinhibit_r; + logic [6:0] mcountinhibit; + logic + wr_mtsel_r, + wr_mtdata1_t0_r, + wr_mtdata1_t1_r, + wr_mtdata1_t2_r, + wr_mtdata1_t3_r, + wr_mtdata2_t0_r, + wr_mtdata2_t1_r, + wr_mtdata2_t2_r, + wr_mtdata2_t3_r; + logic [31:0] mtdata2_t0, mtdata2_t1, mtdata2_t2, mtdata2_t3, mtdata2_tsel_out, mtdata1_tsel_out; + logic [9:0] + mtdata1_t0_ns, + mtdata1_t0, + mtdata1_t1_ns, + mtdata1_t1, + mtdata1_t2_ns, + mtdata1_t2, + mtdata1_t3_ns, + mtdata1_t3; + logic [9:0] tdata_wrdata_r; + logic [1:0] mtsel_ns, mtsel; + logic tlu_i0_kill_writeb_r; + logic [1:0] mstatus_ns, mstatus; + logic [1:0] mfdhs_ns, mfdhs; + logic [31:0] force_halt_ctr, force_halt_ctr_f; + logic force_halt; + logic [5:0] mfdht, mfdht_ns; + logic mstatus_mie_ns; + logic [30:0] mtvec_ns, mtvec; + logic [15:2] dcsr_ns, dcsr; + logic [5:0] mip_ns, mip; + logic [5:0] mie_ns, mie; + logic [31:0] mcyclel_ns, mcyclel; + logic [31:0] mcycleh_ns, mcycleh; + logic [31:0] minstretl_ns, minstretl; + logic [31:0] minstreth_ns, minstreth; + logic [31:0] micect_ns, micect, miccmect_ns, miccmect, mdccmect_ns, mdccmect; + logic [26:0] micect_inc, miccmect_inc, mdccmect_inc; + logic [31:0] mscratch; + logic [31:0] mhpmc3, mhpmc3_ns, mhpmc4, mhpmc4_ns, mhpmc5, mhpmc5_ns, mhpmc6, mhpmc6_ns; + logic [31:0] mhpmc3h, mhpmc3h_ns, mhpmc4h, mhpmc4h_ns, mhpmc5h, mhpmc5h_ns, mhpmc6h, mhpmc6h_ns; + logic [9:0] mhpme3, mhpme4, mhpme5, mhpme6; + logic [ 31:0] mrac; + logic [ 9:2] meihap; + logic [31:10] meivt; + logic [3:0] meicurpl_ns, meicurpl; + logic [3:0] meicidpl_ns, meicidpl; + logic [3:0] meipt_ns, meipt; + logic [31:0] mdseac; + logic mdseac_locked_ns, mdseac_locked_f, mdseac_en, nmi_lsu_detected; + logic [31:1] mepc_ns, mepc; + logic [31:1] dpc_ns, dpc; + logic [31:0] mcause_ns, mcause; + logic [3:0] mscause_ns, mscause, mscause_type; + logic [31:0] mtval_ns, mtval; + logic dec_pause_state_f, dec_tlu_wr_pause_r_d1, pause_expired_r, pause_expired_wb; + logic tlu_flush_lower_r, tlu_flush_lower_r_d1; + logic [31:1] tlu_flush_path_r, tlu_flush_path_r_d1; + logic i0_valid_wb; + logic tlu_i0_commit_cmt; + logic [31:1] vectored_path, interrupt_path; + logic [16:0] dicawics_ns, dicawics; + logic wr_dicawics_r, wr_dicad0_r, wr_dicad1_r, wr_dicad0h_r; + logic [31:0] dicad0_ns, dicad0, dicad0h_ns, dicad0h; + + logic [6:0] dicad1_ns, dicad1_raw; + logic [31:0] dicad1; + logic + ebreak_r, + ebreak_to_debug_mode_r, + ecall_r, + illegal_r, + mret_r, + inst_acc_r, + fence_i_r, + ic_perr_r, + iccm_sbecc_r, + ebreak_to_debug_mode_r_d1, + kill_ebreak_count_r, + inst_acc_second_r; + logic + ce_int_ready, + ext_int_ready, + timer_int_ready, + soft_int_ready, + int_timer0_int_ready, + int_timer1_int_ready, + mhwakeup_ready, + take_ext_int, + take_ce_int, + take_timer_int, + take_soft_int, + take_int_timer0_int, + take_int_timer1_int, + take_nmi, + take_nmi_r_d1, + int_timer0_int_possible, + int_timer1_int_possible; + logic + i0_exception_valid_r, + interrupt_valid_r, + i0_exception_valid_r_d1, + interrupt_valid_r_d1, + exc_or_int_valid_r, + exc_or_int_valid_r_d1, + mdccme_ce_req, + miccme_ce_req, + mice_ce_req; + logic synchronous_flush_r; + logic [4:0] exc_cause_r, exc_cause_wb; + logic mcyclel_cout, mcyclel_cout_f, mcyclela_cout; + logic [31:0] mcyclel_inc; + logic [31:0] mcycleh_inc; + + logic minstretl_cout, minstretl_cout_f, minstret_enable, minstretl_cout_ns, minstretl_couta; + + logic [31:0] minstretl_inc, minstretl_read; + logic [31:0] minstreth_inc, minstreth_read; + logic [31:1] pc_r, pc_r_d1, npc_r, npc_r_d1; + logic valid_csr; + logic rfpc_i0_r; + logic lsu_i0_rfnpc_r; + logic dec_tlu_br0_error_r, dec_tlu_br0_start_error_r, dec_tlu_br0_v_r; + logic + lsu_i0_exc_r, + lsu_i0_exc_r_raw, + lsu_exc_ma_r, + lsu_exc_acc_r, + lsu_exc_st_r, + lsu_exc_valid_r, + lsu_exc_valid_r_raw, + lsu_exc_valid_r_d1, + lsu_i0_exc_r_d1, + block_interrupts; + logic i0_trigger_eval_r; + + logic + request_debug_mode_r, + request_debug_mode_r_d1, + request_debug_mode_done, + request_debug_mode_done_f; + logic + take_halt, + halt_taken, + halt_taken_f, + internal_dbg_halt_mode, + dbg_tlu_halted_f, + take_reset, + dbg_tlu_halted, + core_empty, + lsu_idle_any_f, + ifu_miss_state_idle_f, + resume_ack_ns, + debug_halt_req_f, + debug_resume_req_f_raw, + debug_resume_req_f, + enter_debug_halt_req, + dcsr_single_step_done, + dcsr_single_step_done_f, + debug_halt_req_d1, + debug_halt_req_ns, + dcsr_single_step_running, + dcsr_single_step_running_f, + internal_dbg_halt_timers; + + logic [3:0] i0_trigger_r, trigger_action, trigger_enabled, i0_trigger_chain_masked_r; + logic + i0_trigger_hit_r, + i0_trigger_hit_raw_r, + i0_trigger_action_r, + trigger_hit_r_d1, + mepc_trigger_hit_sel_pc_r; + logic [3:0] + update_hit_bit_r, i0_iside_trigger_has_pri_r, i0trigger_qual_r, i0_lsu_trigger_has_pri_r; + logic + cpu_halt_status, + cpu_halt_ack, + cpu_run_ack, + ext_halt_pulse, + i_cpu_halt_req_d1, + i_cpu_run_req_d1; + + logic inst_acc_r_raw, trigger_hit_dmode_r, trigger_hit_dmode_r_d1; + logic [9:0] mcgc, mcgc_ns, mcgc_int; + logic [18:0] mfdc; + logic + i_cpu_halt_req_sync_qual, + i_cpu_run_req_sync_qual, + pmu_fw_halt_req_ns, + pmu_fw_halt_req_f, + int_timer_stalled, + fw_halt_req, + enter_pmu_fw_halt_req, + pmu_fw_tlu_halted, + pmu_fw_tlu_halted_f, + internal_pmu_fw_halt_mode, + internal_pmu_fw_halt_mode_f, + int_timer0_int_hold, + int_timer1_int_hold, + int_timer0_int_hold_f, + int_timer1_int_hold_f; + logic nmi_int_delayed, nmi_int_detected; + logic [3:0] trigger_execute, trigger_data, trigger_store; + logic dec_tlu_pmu_fw_halted; + + logic + mpc_run_state_ns, + debug_brkpt_status_ns, + mpc_debug_halt_ack_ns, + mpc_debug_run_ack_ns, + dbg_halt_state_ns, + dbg_run_state_ns, + dbg_halt_state_f, + mpc_debug_halt_req_sync_f, + mpc_debug_run_req_sync_f, + mpc_halt_state_f, + mpc_halt_state_ns, + mpc_run_state_f, + debug_brkpt_status_f, + mpc_debug_halt_ack_f, + mpc_debug_run_ack_f, + dbg_run_state_f, + mpc_debug_halt_req_sync_pulse, + mpc_debug_run_req_sync_pulse, + debug_brkpt_valid, + debug_halt_req, + debug_resume_req, + dec_tlu_mpc_halted_only_ns; + logic + take_ext_int_start, + ext_int_freeze, + take_ext_int_start_d1, + take_ext_int_start_d2, + take_ext_int_start_d3, + ext_int_freeze_d1, + csr_meicpct, + ignore_ext_int_due_to_lsu_stall; + logic mcause_sel_nmi_store, mcause_sel_nmi_load, mcause_sel_nmi_ext, fast_int_meicpct; + logic [1:0] mcause_fir_error_type; + logic dbg_halt_req_held_ns, dbg_halt_req_held, dbg_halt_req_final; + logic iccm_repair_state_ns, iccm_repair_state_d1, iccm_repair_state_rfnpc; + + + // internal timer, isolated for size reasons + logic [31:0] dec_timer_rddata_d; + logic dec_timer_read_d, dec_timer_t0_pulse, dec_timer_t1_pulse; + logic csr_mitctl0; + logic csr_mitctl1; + logic csr_mitb0; + logic csr_mitb1; + logic csr_mitcnt0; + logic csr_mitcnt1; + + logic + nmi_int_sync, + timer_int_sync, + soft_int_sync, + i_cpu_halt_req_sync, + i_cpu_run_req_sync, + mpc_debug_halt_req_sync, + mpc_debug_run_req_sync, + mpc_debug_halt_req_sync_raw; + logic csr_wr_clk; + logic + e4e5_clk, e4_valid, e5_valid, e4e5_valid, internal_dbg_halt_mode_f, internal_dbg_halt_mode_f2; + logic lsu_pmu_load_external_r, lsu_pmu_store_external_r; + logic dec_tlu_flush_noredir_r_d1, dec_tlu_flush_pause_r_d1; + logic lsu_single_ecc_error_r; + logic [31:0] lsu_error_pkt_addr_r; + logic mcyclel_cout_in; + logic i0_valid_no_ebreak_ecall_r; + logic minstret_enable_f; + logic sel_exu_npc_r, sel_flush_npc_r, sel_hold_npc_r; + logic pc0_valid_r; + logic [15:0] mfdc_int, mfdc_ns; + logic [ 31:0] mrac_in; + logic [31:27] csr_sat; + logic [ 8:6] dcsr_cause; + logic enter_debug_halt_req_le, dcsr_cause_upgradeable; + logic icache_rd_valid, icache_wr_valid, icache_rd_valid_f, icache_wr_valid_f; + logic [3:0] mhpmc_inc_r, mhpmc_inc_r_d1; + + logic [3:0][9:0] mhpme_vec; + logic mhpmc3_wr_en0, mhpmc3_wr_en1, mhpmc3_wr_en; + logic mhpmc4_wr_en0, mhpmc4_wr_en1, mhpmc4_wr_en; + logic mhpmc5_wr_en0, mhpmc5_wr_en1, mhpmc5_wr_en; + logic mhpmc6_wr_en0, mhpmc6_wr_en1, mhpmc6_wr_en; + logic mhpmc3h_wr_en0, mhpmc3h_wr_en; + logic mhpmc4h_wr_en0, mhpmc4h_wr_en; + logic mhpmc5h_wr_en0, mhpmc5h_wr_en; + logic mhpmc6h_wr_en0, mhpmc6h_wr_en; + logic [63:0] mhpmc3_incr, mhpmc4_incr, mhpmc5_incr, mhpmc6_incr; + logic perfcnt_halted_d1, zero_event_r; + logic [3:0] perfcnt_during_sleep; + logic [9:0] event_r; + + el2_inst_pkt_t pmu_i0_itype_qual; + + logic csr_mfdht; + logic csr_mfdhs; + logic csr_misa; + logic csr_mvendorid; + logic csr_marchid; + logic csr_mimpid; + logic csr_mhartid; + logic csr_mstatus; + logic csr_mtvec; + logic csr_mip; + logic csr_mie; + logic csr_mcyclel; + logic csr_mcycleh; + logic csr_minstretl; + logic csr_minstreth; + logic csr_mscratch; + logic csr_mepc; + logic csr_mcause; + logic csr_mscause; + logic csr_mtval; + logic csr_mrac; + logic csr_dmst; + logic csr_mdseac; + logic csr_meihap; + logic csr_meivt; + logic csr_meipt; + logic csr_meicurpl; + logic csr_meicidpl; + logic csr_dcsr; + logic csr_mcgc; + logic csr_mfdc; + logic csr_dpc; + logic csr_mtsel; + logic csr_mtdata1; + logic csr_mtdata2; + logic csr_mhpmc3; + logic csr_mhpmc4; + logic csr_mhpmc5; + logic csr_mhpmc6; + logic csr_mhpmc3h; + logic csr_mhpmc4h; + logic csr_mhpmc5h; + logic csr_mhpmc6h; + logic csr_mhpme3; + logic csr_mhpme4; + logic csr_mhpme5; + logic csr_mhpme6; + logic csr_mcountinhibit; + logic csr_mpmc; + logic csr_micect; + logic csr_miccmect; + logic csr_mdccmect; + logic csr_dicawics; + logic csr_dicad0h; + logic csr_dicad0; + logic csr_dicad1; + logic csr_dicago; + logic presync; + logic postsync; + logic legal; + logic dec_csr_wen_r_mod; + + logic flush_clkvalid; + logic sel_fir_addr; + logic wr_mie_r; + logic mtval_capture_pc_r; + logic mtval_capture_pc_plus2_r; + logic mtval_capture_inst_r; + logic mtval_capture_lsu_r; + logic mtval_clear_r; + logic wr_mcgc_r; + logic wr_mfdc_r; + logic wr_mdeau_r; + logic trigger_hit_for_dscr_cause_r_d1; + logic conditionally_illegal; + + logic [3:0] ifu_mscause; + logic ifu_ic_error_start_f, ifu_iccm_rd_ecc_single_err_f; + + el2_dec_timer_ctl #(.pt(pt)) int_timers (.*); + // end of internal timers + + assign clk_override = dec_tlu_dec_clk_override; + + // Async inputs to the core have to be sync'd to the core clock. + rvsyncss #(7) syncro_ff ( + .*, + .clk(free_clk), + .din({ + nmi_int, + timer_int, + soft_int, + i_cpu_halt_req, + i_cpu_run_req, + mpc_debug_halt_req, + mpc_debug_run_req + }), + .dout({ + nmi_int_sync, + timer_int_sync, + soft_int_sync, + i_cpu_halt_req_sync, + i_cpu_run_req_sync, + mpc_debug_halt_req_sync_raw, + mpc_debug_run_req_sync + }) + ); + + // for CSRs that have inpipe writes only + + rvoclkhdr csrwr_r_cgc ( + .en(dec_csr_wen_r_mod | clk_override), + .l1clk(csr_wr_clk), + .* + ); + + assign e4_valid = dec_tlu_i0_valid_r; + assign e4e5_valid = e4_valid | e5_valid; + assign flush_clkvalid = internal_dbg_halt_mode_f | i_cpu_run_req_d1 | interrupt_valid_r | interrupt_valid_r_d1 | reset_delayed | pause_expired_r | pause_expired_wb | ic_perr_r | iccm_sbecc_r | clk_override; - rvoclkhdr e4e5_cgc ( .en(e4e5_valid | clk_override), .l1clk(e4e5_clk), .* ); - rvoclkhdr e4e5_int_cgc ( .en(e4e5_valid | flush_clkvalid), .l1clk(e4e5_int_clk), .* ); + rvoclkhdr e4e5_cgc ( + .en(e4e5_valid | clk_override), + .l1clk(e4e5_clk), + .* + ); + rvoclkhdr e4e5_int_cgc ( + .en(e4e5_valid | flush_clkvalid), + .l1clk(e4e5_int_clk), + .* + ); - rvdffie #(11) freeff (.*, .clk(free_l2clk), - .din ({ifu_ic_error_start, ifu_iccm_rd_ecc_single_err, iccm_repair_state_ns, e4_valid, internal_dbg_halt_mode, - lsu_pmu_load_external_m, lsu_pmu_store_external_m, tlu_flush_lower_r, tlu_i0_kill_writeb_r, - internal_dbg_halt_mode_f, force_halt}), - .dout({ifu_ic_error_start_f, ifu_iccm_rd_ecc_single_err_f, iccm_repair_state_d1, e5_valid, internal_dbg_halt_mode_f, - lsu_pmu_load_external_r, lsu_pmu_store_external_r, tlu_flush_lower_r_d1, dec_tlu_i0_kill_writeb_wb, - internal_dbg_halt_mode_f2, dec_tlu_force_halt})); + rvdffie #(11) freeff ( + .*, + .clk(free_l2clk), + .din({ + ifu_ic_error_start, + ifu_iccm_rd_ecc_single_err, + iccm_repair_state_ns, + e4_valid, + internal_dbg_halt_mode, + lsu_pmu_load_external_m, + lsu_pmu_store_external_m, + tlu_flush_lower_r, + tlu_i0_kill_writeb_r, + internal_dbg_halt_mode_f, + force_halt + }), + .dout({ + ifu_ic_error_start_f, + ifu_iccm_rd_ecc_single_err_f, + iccm_repair_state_d1, + e5_valid, + internal_dbg_halt_mode_f, + lsu_pmu_load_external_r, + lsu_pmu_store_external_r, + tlu_flush_lower_r_d1, + dec_tlu_i0_kill_writeb_wb, + internal_dbg_halt_mode_f2, + dec_tlu_force_halt + }) + ); - assign dec_tlu_i0_kill_writeb_r = tlu_i0_kill_writeb_r; + assign dec_tlu_i0_kill_writeb_r = tlu_i0_kill_writeb_r; - assign nmi_int_detected = (nmi_int_sync & ~nmi_int_delayed) | nmi_lsu_detected | (nmi_int_detected_f & ~take_nmi_r_d1) | nmi_fir_type; - // if the first nmi is a lsu type, note it. If there's already an nmi pending, ignore. Simultaneous with FIR, drop. - assign nmi_lsu_load_type = (nmi_lsu_detected & lsu_imprecise_error_load_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | + assign nmi_int_detected = (nmi_int_sync & ~nmi_int_delayed) | nmi_lsu_detected | (nmi_int_detected_f & ~take_nmi_r_d1) | nmi_fir_type; + // if the first nmi is a lsu type, note it. If there's already an nmi pending, ignore. Simultaneous with FIR, drop. + assign nmi_lsu_load_type = (nmi_lsu_detected & lsu_imprecise_error_load_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | (nmi_lsu_load_type_f & ~take_nmi_r_d1); - assign nmi_lsu_store_type = (nmi_lsu_detected & lsu_imprecise_error_store_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | + assign nmi_lsu_store_type = (nmi_lsu_detected & lsu_imprecise_error_store_any & ~(nmi_int_detected_f & ~take_nmi_r_d1)) | (nmi_lsu_store_type_f & ~take_nmi_r_d1); - assign nmi_fir_type = ~nmi_int_detected_f & take_ext_int_start_d3 & |lsu_fir_error[1:0]; + assign nmi_fir_type = ~nmi_int_detected_f & take_ext_int_start_d3 & |lsu_fir_error[1:0]; - // Filter subsequent bus errors after the first, until the lock on MDSEAC is cleared - assign nmi_lsu_detected = ~mdseac_locked_f & (lsu_imprecise_error_load_any | lsu_imprecise_error_store_any) & ~nmi_fir_type; + // Filter subsequent bus errors after the first, until the lock on MDSEAC is cleared + assign nmi_lsu_detected = ~mdseac_locked_f & (lsu_imprecise_error_load_any | lsu_imprecise_error_store_any) & ~nmi_fir_type; -localparam MSTATUS_MIE = 0; -localparam MIP_MCEIP = 5; -localparam MIP_MITIP0 = 4; -localparam MIP_MITIP1 = 3; -localparam MIP_MEIP = 2; -localparam MIP_MTIP = 1; -localparam MIP_MSIP = 0; + localparam MSTATUS_MIE = 0; + localparam MIP_MCEIP = 5; + localparam MIP_MITIP0 = 4; + localparam MIP_MITIP1 = 3; + localparam MIP_MEIP = 2; + localparam MIP_MTIP = 1; + localparam MIP_MSIP = 0; -localparam MIE_MCEIE = 5; -localparam MIE_MITIE0 = 4; -localparam MIE_MITIE1 = 3; -localparam MIE_MEIE = 2; -localparam MIE_MTIE = 1; -localparam MIE_MSIE = 0; + localparam MIE_MCEIE = 5; + localparam MIE_MITIE0 = 4; + localparam MIE_MITIE1 = 3; + localparam MIE_MEIE = 2; + localparam MIE_MTIE = 1; + localparam MIE_MSIE = 0; -localparam DCSR_EBREAKM = 15; -localparam DCSR_STEPIE = 11; -localparam DCSR_STOPC = 10; -localparam DCSR_STEP = 2; + localparam DCSR_EBREAKM = 15; + localparam DCSR_STEPIE = 11; + localparam DCSR_STOPC = 10; + localparam DCSR_STEP = 2; - assign reset_delayed = reset_detect ^ reset_detected; + assign reset_delayed = reset_detect ^ reset_detected; - // ---------------------------------------------------------------------- - // MPC halt - // - can interact with debugger halt and v-v + // ---------------------------------------------------------------------- + // MPC halt + // - can interact with debugger halt and v-v - // fast ints in progress have priority - assign mpc_debug_halt_req_sync = mpc_debug_halt_req_sync_raw & ~ext_int_freeze_d1; + // fast ints in progress have priority + assign mpc_debug_halt_req_sync = mpc_debug_halt_req_sync_raw & ~ext_int_freeze_d1; - rvdffie #(16) mpvhalt_ff (.*, .clk(free_l2clk), - .din({1'b1, reset_detect, - nmi_int_sync, nmi_int_detected, nmi_lsu_load_type, nmi_lsu_store_type, - mpc_debug_halt_req_sync, mpc_debug_run_req_sync, - mpc_halt_state_ns, mpc_run_state_ns, debug_brkpt_status_ns, - mpc_debug_halt_ack_ns, mpc_debug_run_ack_ns, - dbg_halt_state_ns, dbg_run_state_ns, - dec_tlu_mpc_halted_only_ns}), - .dout({reset_detect, reset_detected, - nmi_int_delayed, nmi_int_detected_f, nmi_lsu_load_type_f, nmi_lsu_store_type_f, - mpc_debug_halt_req_sync_f, mpc_debug_run_req_sync_f, - mpc_halt_state_f, mpc_run_state_f, debug_brkpt_status_f, - mpc_debug_halt_ack_f, mpc_debug_run_ack_f, - dbg_halt_state_f, dbg_run_state_f, - dec_tlu_mpc_halted_only})); + rvdffie #(16) mpvhalt_ff ( + .*, + .clk(free_l2clk), + .din({ + 1'b1, + reset_detect, + nmi_int_sync, + nmi_int_detected, + nmi_lsu_load_type, + nmi_lsu_store_type, + mpc_debug_halt_req_sync, + mpc_debug_run_req_sync, + mpc_halt_state_ns, + mpc_run_state_ns, + debug_brkpt_status_ns, + mpc_debug_halt_ack_ns, + mpc_debug_run_ack_ns, + dbg_halt_state_ns, + dbg_run_state_ns, + dec_tlu_mpc_halted_only_ns + }), + .dout({ + reset_detect, + reset_detected, + nmi_int_delayed, + nmi_int_detected_f, + nmi_lsu_load_type_f, + nmi_lsu_store_type_f, + mpc_debug_halt_req_sync_f, + mpc_debug_run_req_sync_f, + mpc_halt_state_f, + mpc_run_state_f, + debug_brkpt_status_f, + mpc_debug_halt_ack_f, + mpc_debug_run_ack_f, + dbg_halt_state_f, + dbg_run_state_f, + dec_tlu_mpc_halted_only + }) + ); - // turn level sensitive requests into pulses - assign mpc_debug_halt_req_sync_pulse = mpc_debug_halt_req_sync & ~mpc_debug_halt_req_sync_f; - assign mpc_debug_run_req_sync_pulse = mpc_debug_run_req_sync & ~mpc_debug_run_req_sync_f; + // turn level sensitive requests into pulses + assign mpc_debug_halt_req_sync_pulse = mpc_debug_halt_req_sync & ~mpc_debug_halt_req_sync_f; + assign mpc_debug_run_req_sync_pulse = mpc_debug_run_req_sync & ~mpc_debug_run_req_sync_f; - // states - assign mpc_halt_state_ns = (mpc_halt_state_f | mpc_debug_halt_req_sync_pulse | (reset_delayed & ~mpc_reset_run_req)) & ~mpc_debug_run_req_sync; - assign mpc_run_state_ns = (mpc_run_state_f | (mpc_debug_run_req_sync_pulse & ~mpc_debug_run_ack_f)) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + // states + assign mpc_halt_state_ns = (mpc_halt_state_f | mpc_debug_halt_req_sync_pulse | (reset_delayed & ~mpc_reset_run_req)) & ~mpc_debug_run_req_sync; + assign mpc_run_state_ns = (mpc_run_state_f | (mpc_debug_run_req_sync_pulse & ~mpc_debug_run_ack_f)) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); - // note, MPC halt can allow the jtag debugger to just start sending commands. When that happens, set the interal debugger halt state to prevent - // MPC run from starting the core. - assign dbg_halt_state_ns = (dbg_halt_state_f | (dbg_halt_req_final | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1)) & ~dbg_resume_req; - assign dbg_run_state_ns = (dbg_run_state_f | dbg_resume_req) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); + // note, MPC halt can allow the jtag debugger to just start sending commands. When that happens, set the interal debugger halt state to prevent + // MPC run from starting the core. + assign dbg_halt_state_ns = (dbg_halt_state_f | (dbg_halt_req_final | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1)) & ~dbg_resume_req; + assign dbg_run_state_ns = (dbg_run_state_f | dbg_resume_req) & (internal_dbg_halt_mode_f & ~dcsr_single_step_running_f); - // tell dbg we are only MPC halted - assign dec_tlu_mpc_halted_only_ns = ~dbg_halt_state_f & mpc_halt_state_f; + // tell dbg we are only MPC halted + assign dec_tlu_mpc_halted_only_ns = ~dbg_halt_state_f & mpc_halt_state_f; - // this asserts from detection of bkpt until after we leave debug mode - assign debug_brkpt_valid = ebreak_to_debug_mode_r_d1 | trigger_hit_dmode_r_d1; - assign debug_brkpt_status_ns = (debug_brkpt_valid | debug_brkpt_status_f) & (internal_dbg_halt_mode & ~dcsr_single_step_running_f); + // this asserts from detection of bkpt until after we leave debug mode + assign debug_brkpt_valid = ebreak_to_debug_mode_r_d1 | trigger_hit_dmode_r_d1; + assign debug_brkpt_status_ns = (debug_brkpt_valid | debug_brkpt_status_f) & (internal_dbg_halt_mode & ~dcsr_single_step_running_f); - // acks back to interface - assign mpc_debug_halt_ack_ns = (mpc_halt_state_f & internal_dbg_halt_mode_f & mpc_debug_halt_req_sync & core_empty) | (mpc_debug_halt_ack_f & mpc_debug_halt_req_sync); - assign mpc_debug_run_ack_ns = (mpc_debug_run_req_sync & ~internal_dbg_halt_mode & ~mpc_debug_halt_req_sync) | (mpc_debug_run_ack_f & mpc_debug_run_req_sync) ; + // acks back to interface + assign mpc_debug_halt_ack_ns = (mpc_halt_state_f & internal_dbg_halt_mode_f & mpc_debug_halt_req_sync & core_empty) | (mpc_debug_halt_ack_f & mpc_debug_halt_req_sync); + assign mpc_debug_run_ack_ns = (mpc_debug_run_req_sync & ~internal_dbg_halt_mode & ~mpc_debug_halt_req_sync) | (mpc_debug_run_ack_f & mpc_debug_run_req_sync) ; - // Pins - assign mpc_debug_halt_ack = mpc_debug_halt_ack_f; - assign mpc_debug_run_ack = mpc_debug_run_ack_f; - assign debug_brkpt_status = debug_brkpt_status_f; + // Pins + assign mpc_debug_halt_ack = mpc_debug_halt_ack_f; + assign mpc_debug_run_ack = mpc_debug_run_ack_f; + assign debug_brkpt_status = debug_brkpt_status_f; - // DBG halt req is a pulse, fast ext int in progress has priority - assign dbg_halt_req_held_ns = (dbg_halt_req | dbg_halt_req_held) & ext_int_freeze_d1; - assign dbg_halt_req_final = (dbg_halt_req | dbg_halt_req_held) & ~ext_int_freeze_d1; + // DBG halt req is a pulse, fast ext int in progress has priority + assign dbg_halt_req_held_ns = (dbg_halt_req | dbg_halt_req_held) & ext_int_freeze_d1; + assign dbg_halt_req_final = (dbg_halt_req | dbg_halt_req_held) & ~ext_int_freeze_d1; - // combine MPC and DBG halt requests - assign debug_halt_req = (dbg_halt_req_final | mpc_debug_halt_req_sync | (reset_delayed & ~mpc_reset_run_req)) & ~internal_dbg_halt_mode_f & ~ext_int_freeze_d1; + // combine MPC and DBG halt requests + assign debug_halt_req = (dbg_halt_req_final | mpc_debug_halt_req_sync | (reset_delayed & ~mpc_reset_run_req)) & ~internal_dbg_halt_mode_f & ~ext_int_freeze_d1; - assign debug_resume_req = ~debug_resume_req_f & // squash back to back resumes - ((mpc_run_state_ns & ~dbg_halt_state_ns) | // MPC run req - (dbg_run_state_ns & ~mpc_halt_state_ns)); // dbg request is a pulse + assign debug_resume_req = ~debug_resume_req_f & // squash back to back resumes + ((mpc_run_state_ns & ~dbg_halt_state_ns) | // MPC run req + (dbg_run_state_ns & ~mpc_halt_state_ns)); // dbg request is a pulse - // HALT - // dbg/pmu/fw requests halt, service as soon as lsu is not blocking interrupts - assign take_halt = (debug_halt_req_f | pmu_fw_halt_req_f) & ~synchronous_flush_r & ~mret_r & ~halt_taken_f & ~dec_tlu_flush_noredir_r_d1 & ~take_reset; + // HALT + // dbg/pmu/fw requests halt, service as soon as lsu is not blocking interrupts + assign take_halt = (debug_halt_req_f | pmu_fw_halt_req_f) & ~synchronous_flush_r & ~mret_r & ~halt_taken_f & ~dec_tlu_flush_noredir_r_d1 & ~take_reset; - // hold after we take a halt, so we don't keep taking halts - assign halt_taken = (dec_tlu_flush_noredir_r_d1 & ~dec_tlu_flush_pause_r_d1 & ~take_ext_int_start_d1) | (halt_taken_f & ~dbg_tlu_halted_f & ~pmu_fw_tlu_halted_f & ~interrupt_valid_r_d1); + // hold after we take a halt, so we don't keep taking halts + assign halt_taken = (dec_tlu_flush_noredir_r_d1 & ~dec_tlu_flush_pause_r_d1 & ~take_ext_int_start_d1) | (halt_taken_f & ~dbg_tlu_halted_f & ~pmu_fw_tlu_halted_f & ~interrupt_valid_r_d1); - // After doing halt flush (RFNPC) wait until core is idle before asserting a particular halt mode - // It takes a cycle for mb_empty to assert after a fetch, take_halt covers that cycle - assign core_empty = force_halt | + // After doing halt flush (RFNPC) wait until core is idle before asserting a particular halt mode + // It takes a cycle for mb_empty to assert after a fetch, take_halt covers that cycle + assign core_empty = force_halt | (lsu_idle_any & lsu_idle_any_f & ifu_miss_state_idle & ifu_miss_state_idle_f & ~debug_halt_req & ~debug_halt_req_d1 & ~dec_div_active); - assign dec_tlu_core_empty = core_empty; + assign dec_tlu_core_empty = core_empty; -//-------------------------------------------------------------------------------- -// Debug start -// + //-------------------------------------------------------------------------------- + // Debug start + // - assign enter_debug_halt_req = (~internal_dbg_halt_mode_f & debug_halt_req) | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1; + assign enter_debug_halt_req = (~internal_dbg_halt_mode_f & debug_halt_req) | dcsr_single_step_done_f | trigger_hit_dmode_r_d1 | ebreak_to_debug_mode_r_d1; - // dbg halt state active from request until non-step resume - assign internal_dbg_halt_mode = debug_halt_req_ns | (internal_dbg_halt_mode_f & ~(debug_resume_req_f & ~dcsr[DCSR_STEP])); - // dbg halt can access csrs as long as we are not stepping - assign allow_dbg_halt_csr_write = internal_dbg_halt_mode_f & ~dcsr_single_step_running_f; + // dbg halt state active from request until non-step resume + assign internal_dbg_halt_mode = debug_halt_req_ns | (internal_dbg_halt_mode_f & ~(debug_resume_req_f & ~dcsr[DCSR_STEP])); + // dbg halt can access csrs as long as we are not stepping + assign allow_dbg_halt_csr_write = internal_dbg_halt_mode_f & ~dcsr_single_step_running_f; - // hold debug_halt_req_ns high until we enter debug halt - assign debug_halt_req_ns = enter_debug_halt_req | (debug_halt_req_f & ~dbg_tlu_halted); + // hold debug_halt_req_ns high until we enter debug halt + assign debug_halt_req_ns = enter_debug_halt_req | (debug_halt_req_f & ~dbg_tlu_halted); - assign dbg_tlu_halted = (debug_halt_req_f & core_empty & halt_taken) | (dbg_tlu_halted_f & ~debug_resume_req_f); + assign dbg_tlu_halted = (debug_halt_req_f & core_empty & halt_taken) | (dbg_tlu_halted_f & ~debug_resume_req_f); - assign resume_ack_ns = (debug_resume_req_f & dbg_tlu_halted_f & dbg_run_state_ns); + assign resume_ack_ns = (debug_resume_req_f & dbg_tlu_halted_f & dbg_run_state_ns); - assign dcsr_single_step_done = dec_tlu_i0_valid_r & ~dec_tlu_dbg_halted & dcsr[DCSR_STEP] & ~rfpc_i0_r; + assign dcsr_single_step_done = dec_tlu_i0_valid_r & ~dec_tlu_dbg_halted & dcsr[DCSR_STEP] & ~rfpc_i0_r; - assign dcsr_single_step_running = (debug_resume_req_f & dcsr[DCSR_STEP]) | (dcsr_single_step_running_f & ~dcsr_single_step_done_f); + assign dcsr_single_step_running = (debug_resume_req_f & dcsr[DCSR_STEP]) | (dcsr_single_step_running_f & ~dcsr_single_step_done_f); - assign dbg_cmd_done_ns = dec_tlu_i0_valid_r & dec_tlu_dbg_halted; + assign dbg_cmd_done_ns = dec_tlu_i0_valid_r & dec_tlu_dbg_halted; - // used to hold off commits after an in-pipe debug mode request (triggers, DCSR) - assign request_debug_mode_r = (trigger_hit_dmode_r | ebreak_to_debug_mode_r) | (request_debug_mode_r_d1 & ~dec_tlu_flush_lower_wb); + // used to hold off commits after an in-pipe debug mode request (triggers, DCSR) + assign request_debug_mode_r = (trigger_hit_dmode_r | ebreak_to_debug_mode_r) | (request_debug_mode_r_d1 & ~dec_tlu_flush_lower_wb); - assign request_debug_mode_done = (request_debug_mode_r_d1 | request_debug_mode_done_f) & ~dbg_tlu_halted_f; + assign request_debug_mode_done = (request_debug_mode_r_d1 | request_debug_mode_done_f) & ~dbg_tlu_halted_f; - rvdffie #(18) halt_ff (.*, .clk(free_l2clk), - .din({dec_tlu_flush_noredir_r, halt_taken, lsu_idle_any, ifu_miss_state_idle, dbg_tlu_halted, - resume_ack_ns, debug_halt_req_ns, debug_resume_req, trigger_hit_dmode_r, - dcsr_single_step_done, debug_halt_req, dec_tlu_wr_pause_r, dec_pause_state, - request_debug_mode_r, request_debug_mode_done, dcsr_single_step_running, dec_tlu_flush_pause_r, - dbg_halt_req_held_ns}), - .dout({dec_tlu_flush_noredir_r_d1, halt_taken_f, lsu_idle_any_f, ifu_miss_state_idle_f, dbg_tlu_halted_f, - dec_tlu_resume_ack , debug_halt_req_f, debug_resume_req_f_raw, trigger_hit_dmode_r_d1, - dcsr_single_step_done_f, debug_halt_req_d1, dec_tlu_wr_pause_r_d1, dec_pause_state_f, - request_debug_mode_r_d1, request_debug_mode_done_f, dcsr_single_step_running_f, dec_tlu_flush_pause_r_d1, - dbg_halt_req_held})); + rvdffie #(18) halt_ff ( + .*, + .clk(free_l2clk), + .din({ + dec_tlu_flush_noredir_r, + halt_taken, + lsu_idle_any, + ifu_miss_state_idle, + dbg_tlu_halted, + resume_ack_ns, + debug_halt_req_ns, + debug_resume_req, + trigger_hit_dmode_r, + dcsr_single_step_done, + debug_halt_req, + dec_tlu_wr_pause_r, + dec_pause_state, + request_debug_mode_r, + request_debug_mode_done, + dcsr_single_step_running, + dec_tlu_flush_pause_r, + dbg_halt_req_held_ns + }), + .dout({ + dec_tlu_flush_noredir_r_d1, + halt_taken_f, + lsu_idle_any_f, + ifu_miss_state_idle_f, + dbg_tlu_halted_f, + dec_tlu_resume_ack, + debug_halt_req_f, + debug_resume_req_f_raw, + trigger_hit_dmode_r_d1, + dcsr_single_step_done_f, + debug_halt_req_d1, + dec_tlu_wr_pause_r_d1, + dec_pause_state_f, + request_debug_mode_r_d1, + request_debug_mode_done_f, + dcsr_single_step_running_f, + dec_tlu_flush_pause_r_d1, + dbg_halt_req_held + }) + ); - // MPC run collides with DBG halt, fix it here - assign debug_resume_req_f = debug_resume_req_f_raw & ~dbg_halt_req; + // MPC run collides with DBG halt, fix it here + assign debug_resume_req_f = debug_resume_req_f_raw & ~dbg_halt_req; - assign dec_tlu_debug_stall = debug_halt_req_f; - assign dec_tlu_dbg_halted = dbg_tlu_halted_f; - assign dec_tlu_debug_mode = internal_dbg_halt_mode_f; - assign dec_tlu_pmu_fw_halted = pmu_fw_tlu_halted_f; + assign dec_tlu_debug_stall = debug_halt_req_f; + assign dec_tlu_dbg_halted = dbg_tlu_halted_f; + assign dec_tlu_debug_mode = internal_dbg_halt_mode_f; + assign dec_tlu_pmu_fw_halted = pmu_fw_tlu_halted_f; - // kill fetch redirection on flush if going to halt, or if there's a fence during db-halt - assign dec_tlu_flush_noredir_r = take_halt | (fence_i_r & internal_dbg_halt_mode) | dec_tlu_flush_pause_r | (i0_trigger_hit_r & trigger_hit_dmode_r) | take_ext_int_start; + // kill fetch redirection on flush if going to halt, or if there's a fence during db-halt + assign dec_tlu_flush_noredir_r = take_halt | (fence_i_r & internal_dbg_halt_mode) | dec_tlu_flush_pause_r | (i0_trigger_hit_r & trigger_hit_dmode_r) | take_ext_int_start; - assign dec_tlu_flush_extint = take_ext_int_start; + assign dec_tlu_flush_extint = take_ext_int_start; - // 1 cycle after writing the PAUSE counter, flush with noredir to idle F1-D. - assign dec_tlu_flush_pause_r = dec_tlu_wr_pause_r_d1 & ~interrupt_valid_r & ~take_ext_int_start; + // 1 cycle after writing the PAUSE counter, flush with noredir to idle F1-D. + assign dec_tlu_flush_pause_r = dec_tlu_wr_pause_r_d1 & ~interrupt_valid_r & ~take_ext_int_start; - // detect end of pause counter and rfpc - assign pause_expired_r = ~dec_pause_state & dec_pause_state_f & ~(ext_int_ready | ce_int_ready | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | nmi_int_detected | ext_int_freeze_d1) & ~interrupt_valid_r_d1 & ~debug_halt_req_f & ~pmu_fw_halt_req_f & ~halt_taken_f; + // detect end of pause counter and rfpc + assign pause_expired_r = ~dec_pause_state & dec_pause_state_f & ~(ext_int_ready | ce_int_ready | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | nmi_int_detected | ext_int_freeze_d1) & ~interrupt_valid_r_d1 & ~debug_halt_req_f & ~pmu_fw_halt_req_f & ~halt_taken_f; - assign dec_tlu_flush_leak_one_r = dec_tlu_flush_lower_r & dcsr[DCSR_STEP] & (dec_tlu_resume_ack | dcsr_single_step_running) & ~dec_tlu_flush_noredir_r; - assign dec_tlu_flush_err_r = dec_tlu_flush_lower_r & (ic_perr_r | iccm_sbecc_r); + assign dec_tlu_flush_leak_one_r = dec_tlu_flush_lower_r & dcsr[DCSR_STEP] & (dec_tlu_resume_ack | dcsr_single_step_running) & ~dec_tlu_flush_noredir_r; + assign dec_tlu_flush_err_r = dec_tlu_flush_lower_r & (ic_perr_r | iccm_sbecc_r); - // If DM attempts to access an illegal CSR, send cmd_fail back - assign dec_dbg_cmd_done = dbg_cmd_done_ns; - assign dec_dbg_cmd_fail = illegal_r & dec_dbg_cmd_done; + // If DM attempts to access an illegal CSR, send cmd_fail back + assign dec_dbg_cmd_done = dbg_cmd_done_ns; + assign dec_dbg_cmd_fail = illegal_r & dec_dbg_cmd_done; - //-------------------------------------------------------------------------------- - //-------------------------------------------------------------------------------- - // Triggers - // -localparam MTDATA1_DMODE = 9; -localparam MTDATA1_SEL = 7; -localparam MTDATA1_ACTION = 6; -localparam MTDATA1_CHAIN = 5; -localparam MTDATA1_MATCH = 4; -localparam MTDATA1_M_ENABLED = 3; -localparam MTDATA1_EXE = 2; -localparam MTDATA1_ST = 1; -localparam MTDATA1_LD = 0; + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + // Triggers + // + localparam MTDATA1_DMODE = 9; + localparam MTDATA1_SEL = 7; + localparam MTDATA1_ACTION = 6; + localparam MTDATA1_CHAIN = 5; + localparam MTDATA1_MATCH = 4; + localparam MTDATA1_M_ENABLED = 3; + localparam MTDATA1_EXE = 2; + localparam MTDATA1_ST = 1; + localparam MTDATA1_LD = 0; - // Prioritize trigger hits with other exceptions. - // - // Trigger should have highest priority except: - // - trigger is an execute-data and there is an inst_access exception (lsu triggers won't fire, inst. is nop'd by decode) - // - trigger is a store-data and there is a lsu_acc_exc or lsu_ma_exc. - assign trigger_execute[3:0] = {mtdata1_t3[MTDATA1_EXE], mtdata1_t2[MTDATA1_EXE], mtdata1_t1[MTDATA1_EXE], mtdata1_t0[MTDATA1_EXE]}; - assign trigger_data[3:0] = {mtdata1_t3[MTDATA1_SEL], mtdata1_t2[MTDATA1_SEL], mtdata1_t1[MTDATA1_SEL], mtdata1_t0[MTDATA1_SEL]}; - assign trigger_store[3:0] = {mtdata1_t3[MTDATA1_ST], mtdata1_t2[MTDATA1_ST], mtdata1_t1[MTDATA1_ST], mtdata1_t0[MTDATA1_ST]}; + // Prioritize trigger hits with other exceptions. + // + // Trigger should have highest priority except: + // - trigger is an execute-data and there is an inst_access exception (lsu triggers won't fire, inst. is nop'd by decode) + // - trigger is a store-data and there is a lsu_acc_exc or lsu_ma_exc. + assign trigger_execute[3:0] = { + mtdata1_t3[MTDATA1_EXE], + mtdata1_t2[MTDATA1_EXE], + mtdata1_t1[MTDATA1_EXE], + mtdata1_t0[MTDATA1_EXE] + }; + assign trigger_data[3:0] = { + mtdata1_t3[MTDATA1_SEL], + mtdata1_t2[MTDATA1_SEL], + mtdata1_t1[MTDATA1_SEL], + mtdata1_t0[MTDATA1_SEL] + }; + assign trigger_store[3:0] = { + mtdata1_t3[MTDATA1_ST], mtdata1_t2[MTDATA1_ST], mtdata1_t1[MTDATA1_ST], mtdata1_t0[MTDATA1_ST] + }; - // MSTATUS[MIE] needs to be on to take triggers unless the action is trigger to debug mode. - assign trigger_enabled[3:0] = {(mtdata1_t3[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t3[MTDATA1_M_ENABLED], - (mtdata1_t2[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t2[MTDATA1_M_ENABLED], - (mtdata1_t1[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t1[MTDATA1_M_ENABLED], - (mtdata1_t0[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t0[MTDATA1_M_ENABLED]}; + // MSTATUS[MIE] needs to be on to take triggers unless the action is trigger to debug mode. + assign trigger_enabled[3:0] = { + (mtdata1_t3[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t3[MTDATA1_M_ENABLED], + (mtdata1_t2[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t2[MTDATA1_M_ENABLED], + (mtdata1_t1[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t1[MTDATA1_M_ENABLED], + (mtdata1_t0[MTDATA1_ACTION] | mstatus[MSTATUS_MIE]) & mtdata1_t0[MTDATA1_M_ENABLED] + }; - // iside exceptions are always in i0 - assign i0_iside_trigger_has_pri_r[3:0] = ~( (trigger_execute[3:0] & trigger_data[3:0] & {4{inst_acc_r_raw}}) | // exe-data with inst_acc + // iside exceptions are always in i0 + assign i0_iside_trigger_has_pri_r[3:0] = ~( (trigger_execute[3:0] & trigger_data[3:0] & {4{inst_acc_r_raw}}) | // exe-data with inst_acc ({4{exu_i0_br_error_r | exu_i0_br_start_error_r}})); // branch error in i0 - // lsu excs have to line up with their respective triggers since the lsu op can be i0 - assign i0_lsu_trigger_has_pri_r[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i0_exc_r_raw}}); + // lsu excs have to line up with their respective triggers since the lsu op can be i0 + assign i0_lsu_trigger_has_pri_r[3:0] = ~(trigger_store[3:0] & trigger_data[3:0] & {4{lsu_i0_exc_r_raw}}); - // trigger hits have to be eval'd to cancel side effect lsu ops even though the pipe is already frozen - assign i0_trigger_eval_r = dec_tlu_i0_valid_r; + // trigger hits have to be eval'd to cancel side effect lsu ops even though the pipe is already frozen + assign i0_trigger_eval_r = dec_tlu_i0_valid_r; - assign i0trigger_qual_r[3:0] = {4{i0_trigger_eval_r}} & dec_tlu_packet_r.i0trigger[3:0] & i0_iside_trigger_has_pri_r[3:0] & i0_lsu_trigger_has_pri_r[3:0] & trigger_enabled[3:0]; + assign i0trigger_qual_r[3:0] = {4{i0_trigger_eval_r}} & dec_tlu_packet_r.i0trigger[3:0] & i0_iside_trigger_has_pri_r[3:0] & i0_lsu_trigger_has_pri_r[3:0] & trigger_enabled[3:0]; - // Qual trigger hits - assign i0_trigger_r[3:0] = ~{4{dec_tlu_flush_lower_wb | dec_tlu_dbg_halted}} & i0trigger_qual_r[3:0]; + // Qual trigger hits + assign i0_trigger_r[3:0] = ~{4{dec_tlu_flush_lower_wb | dec_tlu_dbg_halted}} & i0trigger_qual_r[3:0]; - // chaining can mask raw trigger info - assign i0_trigger_chain_masked_r[3:0] = {i0_trigger_r[3] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[2]), - i0_trigger_r[2] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[3]), - i0_trigger_r[1] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[0]), - i0_trigger_r[0] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[1])}; + // chaining can mask raw trigger info + assign i0_trigger_chain_masked_r[3:0] = { + i0_trigger_r[3] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[2]), + i0_trigger_r[2] & (~mtdata1_t2[MTDATA1_CHAIN] | i0_trigger_r[3]), + i0_trigger_r[1] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[0]), + i0_trigger_r[0] & (~mtdata1_t0[MTDATA1_CHAIN] | i0_trigger_r[1]) + }; - // This is the highest priority by this point. - assign i0_trigger_hit_raw_r = |i0_trigger_chain_masked_r[3:0]; + // This is the highest priority by this point. + assign i0_trigger_hit_raw_r = |i0_trigger_chain_masked_r[3:0]; - assign i0_trigger_hit_r = i0_trigger_hit_raw_r; + assign i0_trigger_hit_r = i0_trigger_hit_raw_r; - // Actions include breakpoint, or dmode. Dmode is only possible if the DMODE bit is set. - // Otherwise, take a breakpoint. - assign trigger_action[3:0] = {mtdata1_t3[MTDATA1_ACTION] & mtdata1_t3[MTDATA1_DMODE], - mtdata1_t2[MTDATA1_ACTION] & mtdata1_t2[MTDATA1_DMODE] & ~mtdata1_t2[MTDATA1_CHAIN], - mtdata1_t1[MTDATA1_ACTION] & mtdata1_t1[MTDATA1_DMODE], - mtdata1_t0[MTDATA1_ACTION] & mtdata1_t0[MTDATA1_DMODE] & ~mtdata1_t0[MTDATA1_CHAIN]}; + // Actions include breakpoint, or dmode. Dmode is only possible if the DMODE bit is set. + // Otherwise, take a breakpoint. + assign trigger_action[3:0] = { + mtdata1_t3[MTDATA1_ACTION] & mtdata1_t3[MTDATA1_DMODE], + mtdata1_t2[MTDATA1_ACTION] & mtdata1_t2[MTDATA1_DMODE] & ~mtdata1_t2[MTDATA1_CHAIN], + mtdata1_t1[MTDATA1_ACTION] & mtdata1_t1[MTDATA1_DMODE], + mtdata1_t0[MTDATA1_ACTION] & mtdata1_t0[MTDATA1_DMODE] & ~mtdata1_t0[MTDATA1_CHAIN] + }; - // this is needed to set the HIT bit in the triggers - assign update_hit_bit_r[3:0] = ({4{|i0_trigger_r[3:0] & ~rfpc_i0_r}} & {i0_trigger_chain_masked_r[3], i0_trigger_r[2], i0_trigger_chain_masked_r[1], i0_trigger_r[0]}); + // this is needed to set the HIT bit in the triggers + assign update_hit_bit_r[3:0] = ({4{|i0_trigger_r[3:0] & ~rfpc_i0_r}} & {i0_trigger_chain_masked_r[3], i0_trigger_r[2], i0_trigger_chain_masked_r[1], i0_trigger_r[0]}); - // action, 1 means dmode. Simultaneous triggers with at least 1 set for dmode force entire action to dmode. - assign i0_trigger_action_r = |(i0_trigger_chain_masked_r[3:0] & trigger_action[3:0]); + // action, 1 means dmode. Simultaneous triggers with at least 1 set for dmode force entire action to dmode. + assign i0_trigger_action_r = |(i0_trigger_chain_masked_r[3:0] & trigger_action[3:0]); - assign trigger_hit_dmode_r = (i0_trigger_hit_r & i0_trigger_action_r); + assign trigger_hit_dmode_r = (i0_trigger_hit_r & i0_trigger_action_r); - assign mepc_trigger_hit_sel_pc_r = i0_trigger_hit_r & ~trigger_hit_dmode_r; + assign mepc_trigger_hit_sel_pc_r = i0_trigger_hit_r & ~trigger_hit_dmode_r; -// -// Debug end -//-------------------------------------------------------------------------------- + // + // Debug end + //-------------------------------------------------------------------------------- - //---------------------------------------------------------------------- - // - // Commit - // - //---------------------------------------------------------------------- + //---------------------------------------------------------------------- + // + // Commit + // + //---------------------------------------------------------------------- - //-------------------------------------------------------------------------------- - // External halt (not debug halt) - // - Fully interlocked handshake - // i_cpu_halt_req ____|--------------|_______________ - // core_empty ---------------|___________ - // o_cpu_halt_ack _________________|----|__________ - // o_cpu_halt_status _______________|---------------------|_________ - // i_cpu_run_req ______|----------|____ - // o_cpu_run_ack ____________|------|________ - // + //-------------------------------------------------------------------------------- + // External halt (not debug halt) + // - Fully interlocked handshake + // i_cpu_halt_req ____|--------------|_______________ + // core_empty ---------------|___________ + // o_cpu_halt_ack _________________|----|__________ + // o_cpu_halt_status _______________|---------------------|_________ + // i_cpu_run_req ______|----------|____ + // o_cpu_run_ack ____________|------|________ + // - // debug mode has priority, ignore PMU/FW halt/run while in debug mode - assign i_cpu_halt_req_sync_qual = i_cpu_halt_req_sync & ~dec_tlu_debug_mode & ~ext_int_freeze_d1; - assign i_cpu_run_req_sync_qual = i_cpu_run_req_sync & ~dec_tlu_debug_mode & pmu_fw_tlu_halted_f & ~ext_int_freeze_d1; + // debug mode has priority, ignore PMU/FW halt/run while in debug mode + assign i_cpu_halt_req_sync_qual = i_cpu_halt_req_sync & ~dec_tlu_debug_mode & ~ext_int_freeze_d1; + assign i_cpu_run_req_sync_qual = i_cpu_run_req_sync & ~dec_tlu_debug_mode & pmu_fw_tlu_halted_f & ~ext_int_freeze_d1; - rvdffie #(10) exthaltff (.*, .clk(free_l2clk), .din({i_cpu_halt_req_sync_qual, i_cpu_run_req_sync_qual, cpu_halt_status, - cpu_halt_ack, cpu_run_ack, internal_pmu_fw_halt_mode, - pmu_fw_halt_req_ns, pmu_fw_tlu_halted, - int_timer0_int_hold, int_timer1_int_hold}), - .dout({i_cpu_halt_req_d1, i_cpu_run_req_d1_raw, o_cpu_halt_status, - o_cpu_halt_ack, o_cpu_run_ack, internal_pmu_fw_halt_mode_f, - pmu_fw_halt_req_f, pmu_fw_tlu_halted_f, - int_timer0_int_hold_f, int_timer1_int_hold_f})); + rvdffie #(10) exthaltff ( + .*, + .clk(free_l2clk), + .din({ + i_cpu_halt_req_sync_qual, + i_cpu_run_req_sync_qual, + cpu_halt_status, + cpu_halt_ack, + cpu_run_ack, + internal_pmu_fw_halt_mode, + pmu_fw_halt_req_ns, + pmu_fw_tlu_halted, + int_timer0_int_hold, + int_timer1_int_hold + }), + .dout({ + i_cpu_halt_req_d1, + i_cpu_run_req_d1_raw, + o_cpu_halt_status, + o_cpu_halt_ack, + o_cpu_run_ack, + internal_pmu_fw_halt_mode_f, + pmu_fw_halt_req_f, + pmu_fw_tlu_halted_f, + int_timer0_int_hold_f, + int_timer1_int_hold_f + }) + ); - // only happens if we aren't in dgb_halt - assign ext_halt_pulse = i_cpu_halt_req_sync_qual & ~i_cpu_halt_req_d1; + // only happens if we aren't in dgb_halt + assign ext_halt_pulse = i_cpu_halt_req_sync_qual & ~i_cpu_halt_req_d1; - assign enter_pmu_fw_halt_req = ext_halt_pulse | fw_halt_req; + assign enter_pmu_fw_halt_req = ext_halt_pulse | fw_halt_req; - assign pmu_fw_halt_req_ns = (enter_pmu_fw_halt_req | (pmu_fw_halt_req_f & ~pmu_fw_tlu_halted)) & ~debug_halt_req_f; + assign pmu_fw_halt_req_ns = (enter_pmu_fw_halt_req | (pmu_fw_halt_req_f & ~pmu_fw_tlu_halted)) & ~debug_halt_req_f; - assign internal_pmu_fw_halt_mode = pmu_fw_halt_req_ns | (internal_pmu_fw_halt_mode_f & ~i_cpu_run_req_d1 & ~debug_halt_req_f); + assign internal_pmu_fw_halt_mode = pmu_fw_halt_req_ns | (internal_pmu_fw_halt_mode_f & ~i_cpu_run_req_d1 & ~debug_halt_req_f); - // debug halt has priority - assign pmu_fw_tlu_halted = ((pmu_fw_halt_req_f & core_empty & halt_taken & ~enter_debug_halt_req) | (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1)) & ~debug_halt_req_f; + // debug halt has priority + assign pmu_fw_tlu_halted = ((pmu_fw_halt_req_f & core_empty & halt_taken & ~enter_debug_halt_req) | (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1)) & ~debug_halt_req_f; - assign cpu_halt_ack = (i_cpu_halt_req_d1 & pmu_fw_tlu_halted_f) | (o_cpu_halt_ack & i_cpu_halt_req_sync); - assign cpu_halt_status = (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1) | (o_cpu_halt_status & ~i_cpu_run_req_d1 & ~internal_dbg_halt_mode_f); - assign cpu_run_ack = (~pmu_fw_tlu_halted_f & i_cpu_run_req_sync) | (o_cpu_halt_status & i_cpu_run_req_d1_raw) | (o_cpu_run_ack & i_cpu_run_req_sync); - assign debug_mode_status = internal_dbg_halt_mode_f; - assign o_debug_mode_status = debug_mode_status; + assign cpu_halt_ack = (i_cpu_halt_req_d1 & pmu_fw_tlu_halted_f) | (o_cpu_halt_ack & i_cpu_halt_req_sync); + assign cpu_halt_status = (pmu_fw_tlu_halted_f & ~i_cpu_run_req_d1) | (o_cpu_halt_status & ~i_cpu_run_req_d1 & ~internal_dbg_halt_mode_f); + assign cpu_run_ack = (~pmu_fw_tlu_halted_f & i_cpu_run_req_sync) | (o_cpu_halt_status & i_cpu_run_req_d1_raw) | (o_cpu_run_ack & i_cpu_run_req_sync); + assign debug_mode_status = internal_dbg_halt_mode_f; + assign o_debug_mode_status = debug_mode_status; - // high priority interrupts can wakeup from external halt, so can unmasked timer interrupts - assign i_cpu_run_req_d1 = i_cpu_run_req_d1_raw | ((nmi_int_detected | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | (mhwakeup & mhwakeup_ready)) & o_cpu_halt_status & ~i_cpu_halt_req_d1); + // high priority interrupts can wakeup from external halt, so can unmasked timer interrupts + assign i_cpu_run_req_d1 = i_cpu_run_req_d1_raw | ((nmi_int_detected | timer_int_ready | soft_int_ready | int_timer0_int_hold_f | int_timer1_int_hold_f | (mhwakeup & mhwakeup_ready)) & o_cpu_halt_status & ~i_cpu_halt_req_d1); - //-------------------------------------------------------------------------------- - //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- + //-------------------------------------------------------------------------------- - assign lsu_single_ecc_error_r = lsu_single_ecc_error_incr; + assign lsu_single_ecc_error_r = lsu_single_ecc_error_incr; - assign lsu_error_pkt_addr_r[31:0] = lsu_error_pkt_r.addr[31:0]; + assign lsu_error_pkt_addr_r[31:0] = lsu_error_pkt_r.addr[31:0]; - assign lsu_exc_valid_r_raw = lsu_error_pkt_r.exc_valid & ~dec_tlu_flush_lower_wb; + assign lsu_exc_valid_r_raw = lsu_error_pkt_r.exc_valid & ~dec_tlu_flush_lower_wb; - assign lsu_i0_exc_r_raw = lsu_error_pkt_r.exc_valid; + assign lsu_i0_exc_r_raw = lsu_error_pkt_r.exc_valid; - assign lsu_i0_exc_r = lsu_i0_exc_r_raw & lsu_exc_valid_r_raw & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign lsu_i0_exc_r = lsu_i0_exc_r_raw & lsu_exc_valid_r_raw & ~i0_trigger_hit_r & ~rfpc_i0_r; - assign lsu_exc_valid_r = lsu_i0_exc_r; + assign lsu_exc_valid_r = lsu_i0_exc_r; - assign lsu_exc_ma_r = lsu_i0_exc_r & ~lsu_error_pkt_r.exc_type; - assign lsu_exc_acc_r = lsu_i0_exc_r & lsu_error_pkt_r.exc_type; - assign lsu_exc_st_r = lsu_i0_exc_r & lsu_error_pkt_r.inst_type; + assign lsu_exc_ma_r = lsu_i0_exc_r & ~lsu_error_pkt_r.exc_type; + assign lsu_exc_acc_r = lsu_i0_exc_r & lsu_error_pkt_r.exc_type; + assign lsu_exc_st_r = lsu_i0_exc_r & lsu_error_pkt_r.inst_type; - // Single bit ECC errors on loads are RFNPC corrected, with the corrected data written to the GPR. - // LSU turns the load into a store and patches the data in the DCCM - assign lsu_i0_rfnpc_r = dec_tlu_i0_valid_r & ~i0_trigger_hit_r & + // Single bit ECC errors on loads are RFNPC corrected, with the corrected data written to the GPR. + // LSU turns the load into a store and patches the data in the DCCM + assign lsu_i0_rfnpc_r = dec_tlu_i0_valid_r & ~i0_trigger_hit_r & (~lsu_error_pkt_r.inst_type & lsu_error_pkt_r.single_ecc_error); - // Final commit valids - assign tlu_i0_commit_cmt = dec_tlu_i0_valid_r & + // Final commit valids + assign tlu_i0_commit_cmt = dec_tlu_i0_valid_r & ~rfpc_i0_r & ~lsu_i0_exc_r & ~inst_acc_r & @@ -873,91 +1209,94 @@ localparam MTDATA1_LD = 0; ~request_debug_mode_r_d1 & ~i0_trigger_hit_r; - // unified place to manage the killing of arch state writebacks - assign tlu_i0_kill_writeb_r = rfpc_i0_r | lsu_i0_exc_r | inst_acc_r | (illegal_r & dec_tlu_dbg_halted) | i0_trigger_hit_r; - assign dec_tlu_i0_commit_cmt = tlu_i0_commit_cmt; + // unified place to manage the killing of arch state writebacks + assign tlu_i0_kill_writeb_r = rfpc_i0_r | lsu_i0_exc_r | inst_acc_r | (illegal_r & dec_tlu_dbg_halted) | i0_trigger_hit_r; + assign dec_tlu_i0_commit_cmt = tlu_i0_commit_cmt; - // refetch PC, microarch flush - // ic errors only in pipe0 - assign rfpc_i0_r = ((dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (exu_i0_br_error_r | exu_i0_br_start_error_r)) | // inst commit with rfpc - ((ic_perr_r | iccm_sbecc_r) & ~ext_int_freeze_d1)) & // ic/iccm without inst commit - ~i0_trigger_hit_r & // unless there's a trigger. Err signal to ic/iccm will assert anyway to clear the error. - ~lsu_i0_rfnpc_r; + // refetch PC, microarch flush + // ic errors only in pipe0 + assign rfpc_i0_r = ((dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (exu_i0_br_error_r | exu_i0_br_start_error_r)) | // inst commit with rfpc + ((ic_perr_r | iccm_sbecc_r) & ~ext_int_freeze_d1)) & // ic/iccm without inst commit + ~i0_trigger_hit_r & // unless there's a trigger. Err signal to ic/iccm will assert anyway to clear the error. + ~lsu_i0_rfnpc_r; - // From the indication of a iccm single bit error until the first commit or flush, maintain a repair state. In the repair state, rfnpc i0 commits. - assign iccm_repair_state_ns = iccm_sbecc_r | (iccm_repair_state_d1 & ~dec_tlu_flush_lower_r); + // From the indication of a iccm single bit error until the first commit or flush, maintain a repair state. In the repair state, rfnpc i0 commits. + assign iccm_repair_state_ns = iccm_sbecc_r | (iccm_repair_state_d1 & ~dec_tlu_flush_lower_r); - localparam MCPC = 12'h7c2; + localparam MCPC = 12'h7c2; - // this is a flush of last resort, meaning only assert it if there is no other flush happening. - assign iccm_repair_state_rfnpc = tlu_i0_commit_cmt & iccm_repair_state_d1 & + // this is a flush of last resort, meaning only assert it if there is no other flush happening. + assign iccm_repair_state_rfnpc = tlu_i0_commit_cmt & iccm_repair_state_d1 & ~(ebreak_r | ecall_r | mret_r | take_reset | illegal_r | (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCPC))); -if(pt.BTB_ENABLE==1) begin - // go ahead and repair the branch error on other flushes, doesn't have to be the rfpc flush - assign dec_tlu_br0_error_r = exu_i0_br_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; - assign dec_tlu_br0_start_error_r = exu_i0_br_start_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; - assign dec_tlu_br0_v_r = exu_i0_br_valid_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (~exu_i0_br_mp_r | ~exu_pmu_i0_br_ataken); + if (pt.BTB_ENABLE == 1) begin + // go ahead and repair the branch error on other flushes, doesn't have to be the rfpc flush + assign dec_tlu_br0_error_r = exu_i0_br_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; + assign dec_tlu_br0_start_error_r = exu_i0_br_start_error_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1; + assign dec_tlu_br0_v_r = exu_i0_br_valid_r & dec_tlu_i0_valid_r & ~tlu_flush_lower_r_d1 & (~exu_i0_br_mp_r | ~exu_pmu_i0_br_ataken); - assign dec_tlu_br0_r_pkt.hist[1:0] = exu_i0_br_hist_r[1:0]; - assign dec_tlu_br0_r_pkt.br_error = dec_tlu_br0_error_r; - assign dec_tlu_br0_r_pkt.br_start_error = dec_tlu_br0_start_error_r; - assign dec_tlu_br0_r_pkt.valid = dec_tlu_br0_v_r; - assign dec_tlu_br0_r_pkt.way = exu_i0_br_way_r; - assign dec_tlu_br0_r_pkt.middle = exu_i0_br_middle_r; -end // if (pt.BTB_ENABLE==1) + assign dec_tlu_br0_r_pkt.hist[1:0] = exu_i0_br_hist_r[1:0]; + assign dec_tlu_br0_r_pkt.br_error = dec_tlu_br0_error_r; + assign dec_tlu_br0_r_pkt.br_start_error = dec_tlu_br0_start_error_r; + assign dec_tlu_br0_r_pkt.valid = dec_tlu_br0_v_r; + assign dec_tlu_br0_r_pkt.way = exu_i0_br_way_r; + assign dec_tlu_br0_r_pkt.middle = exu_i0_br_middle_r; + end // if (pt.BTB_ENABLE==1) else begin - assign dec_tlu_br0_error_r = '0; - assign dec_tlu_br0_start_error_r = '0; - assign dec_tlu_br0_v_r = '0; - assign dec_tlu_br0_r_pkt = '0; -end // else: !if(pt.BTB_ENABLE==1) + assign dec_tlu_br0_error_r = '0; + assign dec_tlu_br0_start_error_r = '0; + assign dec_tlu_br0_v_r = '0; + assign dec_tlu_br0_r_pkt = '0; + end // else: !if(pt.BTB_ENABLE==1) - // only expect these in pipe 0 - assign ebreak_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~dcsr[DCSR_EBREAKM] & ~rfpc_i0_r; - assign ecall_r = (dec_tlu_packet_r.pmu_i0_itype == ECALL) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; - assign illegal_r = ~dec_tlu_packet_r.legal & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; - assign mret_r = (dec_tlu_packet_r.pmu_i0_itype == MRET) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; - // fence_i includes debug only fence_i's - assign fence_i_r = (dec_tlu_packet_r.fence_i & dec_tlu_i0_valid_r & ~i0_trigger_hit_r) & ~rfpc_i0_r; - assign ic_perr_r = ifu_ic_error_start_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; - assign iccm_sbecc_r = ifu_iccm_rd_ecc_single_err_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; - assign inst_acc_r_raw = dec_tlu_packet_r.icaf & dec_tlu_i0_valid_r; - assign inst_acc_r = inst_acc_r_raw & ~rfpc_i0_r & ~i0_trigger_hit_r; - assign inst_acc_second_r = dec_tlu_packet_r.icaf_second; + // only expect these in pipe 0 + assign ebreak_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~dcsr[DCSR_EBREAKM] & ~rfpc_i0_r; + assign ecall_r = (dec_tlu_packet_r.pmu_i0_itype == ECALL) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign illegal_r = ~dec_tlu_packet_r.legal & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign mret_r = (dec_tlu_packet_r.pmu_i0_itype == MRET) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + // fence_i includes debug only fence_i's + assign fence_i_r = (dec_tlu_packet_r.fence_i & dec_tlu_i0_valid_r & ~i0_trigger_hit_r) & ~rfpc_i0_r; + assign ic_perr_r = ifu_ic_error_start_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; + assign iccm_sbecc_r = ifu_iccm_rd_ecc_single_err_f & ~ext_int_freeze_d1 & (~internal_dbg_halt_mode_f | dcsr_single_step_running) & ~internal_pmu_fw_halt_mode_f; + assign inst_acc_r_raw = dec_tlu_packet_r.icaf & dec_tlu_i0_valid_r; + assign inst_acc_r = inst_acc_r_raw & ~rfpc_i0_r & ~i0_trigger_hit_r; + assign inst_acc_second_r = dec_tlu_packet_r.icaf_second; - assign ebreak_to_debug_mode_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & dcsr[DCSR_EBREAKM] & ~rfpc_i0_r; + assign ebreak_to_debug_mode_r = (dec_tlu_packet_r.pmu_i0_itype == EBREAK) & dec_tlu_i0_valid_r & ~i0_trigger_hit_r & dcsr[DCSR_EBREAKM] & ~rfpc_i0_r; - rvdff #(1) exctype_wb_ff (.*, .clk(e4e5_clk), - .din (ebreak_to_debug_mode_r ), - .dout(ebreak_to_debug_mode_r_d1)); + rvdff #(1) exctype_wb_ff ( + .*, + .clk (e4e5_clk), + .din (ebreak_to_debug_mode_r), + .dout(ebreak_to_debug_mode_r_d1) + ); - assign dec_tlu_fence_i_r = fence_i_r; - // - // Exceptions - // - // - MEPC <- PC - // - PC <- MTVEC, assert flush_lower - // - MCAUSE <- cause - // - MSCAUSE <- secondary cause - // - MTVAL <- - // - MPIE <- MIE - // - MIE <- 0 - // - assign i0_exception_valid_r = (ebreak_r | ecall_r | illegal_r | inst_acc_r) & ~rfpc_i0_r & ~dec_tlu_dbg_halted; + assign dec_tlu_fence_i_r = fence_i_r; + // + // Exceptions + // + // - MEPC <- PC + // - PC <- MTVEC, assert flush_lower + // - MCAUSE <- cause + // - MSCAUSE <- secondary cause + // - MTVAL <- + // - MPIE <- MIE + // - MIE <- 0 + // + assign i0_exception_valid_r = (ebreak_r | ecall_r | illegal_r | inst_acc_r) & ~rfpc_i0_r & ~dec_tlu_dbg_halted; - // Cause: - // - // 0x2 : illegal - // 0x3 : breakpoint - // 0xb : Environment call M-mode + // Cause: + // + // 0x2 : illegal + // 0x3 : breakpoint + // 0xb : Environment call M-mode - assign exc_cause_r[4:0] = ( ({5{take_ext_int}} & 5'h0b) | + assign exc_cause_r[4:0] = ( ({5{take_ext_int}} & 5'h0b) | ({5{take_timer_int}} & 5'h07) | ({5{take_soft_int}} & 5'h03) | ({5{take_int_timer0_int}} & 5'h1d) | @@ -973,112 +1312,111 @@ end // else: !if(pt.BTB_ENABLE==1) ({5{lsu_exc_acc_r & lsu_exc_st_r}} & 5'h07) ) & ~{5{take_nmi}}; - // - // Interrupts - // - // exceptions that are committed have already happened and will cause an int at E4 to wait a cycle - // or more if MSTATUS[MIE] is cleared. - // - // -in priority order, highest to lowest - // -single cycle window where a csr write to MIE/MSTATUS is at E4 when the other conditions for externals are met. - // Hold off externals for a cycle to make sure we are consistent with what was just written - assign mhwakeup_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP] & mie_ns[MIE_MEIE]; - assign ext_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP] & mie_ns[MIE_MEIE] & ~ignore_ext_int_due_to_lsu_stall; - assign ce_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MCEIP] & mie_ns[MIE_MCEIE]; - assign soft_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MSIP] & mie_ns[MIE_MSIE]; - assign timer_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MTIP] & mie_ns[MIE_MTIE]; + // + // Interrupts + // + // exceptions that are committed have already happened and will cause an int at E4 to wait a cycle + // or more if MSTATUS[MIE] is cleared. + // + // -in priority order, highest to lowest + // -single cycle window where a csr write to MIE/MSTATUS is at E4 when the other conditions for externals are met. + // Hold off externals for a cycle to make sure we are consistent with what was just written + assign mhwakeup_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP] & mie_ns[MIE_MEIE]; + assign ext_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MEIP] & mie_ns[MIE_MEIE] & ~ignore_ext_int_due_to_lsu_stall; + assign ce_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MCEIP] & mie_ns[MIE_MCEIE]; + assign soft_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MSIP] & mie_ns[MIE_MSIE]; + assign timer_int_ready = ~dec_csr_stall_int_ff & mstatus_mie_ns & mip[MIP_MTIP] & mie_ns[MIE_MTIE]; - // MIP for internal timers pulses for 1 clock, resets the timer counter. Mip won't hold past the various stall conditions. - assign int_timer0_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE0]; - assign int_timer0_int_ready = mip[MIP_MITIP0] & int_timer0_int_possible; - assign int_timer1_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE1]; - assign int_timer1_int_ready = mip[MIP_MITIP1] & int_timer1_int_possible; + // MIP for internal timers pulses for 1 clock, resets the timer counter. Mip won't hold past the various stall conditions. + assign int_timer0_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE0]; + assign int_timer0_int_ready = mip[MIP_MITIP0] & int_timer0_int_possible; + assign int_timer1_int_possible = mstatus_mie_ns & mie_ns[MIE_MITIE1]; + assign int_timer1_int_ready = mip[MIP_MITIP1] & int_timer1_int_possible; - // Internal timers pulse and reset. If core is PMU/FW halted, the pulse will cause an exit from halt, but won't stick around - // Make it sticky, also for 1 cycle stall conditions. - assign int_timer_stalled = dec_csr_stall_int_ff | synchronous_flush_r | exc_or_int_valid_r_d1 | mret_r; + // Internal timers pulse and reset. If core is PMU/FW halted, the pulse will cause an exit from halt, but won't stick around + // Make it sticky, also for 1 cycle stall conditions. + assign int_timer_stalled = dec_csr_stall_int_ff | synchronous_flush_r | exc_or_int_valid_r_d1 | mret_r; - assign int_timer0_int_hold = (int_timer0_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer0_int_possible & int_timer0_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f); - assign int_timer1_int_hold = (int_timer1_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer1_int_possible & int_timer1_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f); + assign int_timer0_int_hold = (int_timer0_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer0_int_possible & int_timer0_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f); + assign int_timer1_int_hold = (int_timer1_int_ready & (pmu_fw_tlu_halted_f | int_timer_stalled)) | (int_timer1_int_possible & int_timer1_int_hold_f & ~interrupt_valid_r & ~take_ext_int_start & ~internal_dbg_halt_mode_f); - assign internal_dbg_halt_timers = internal_dbg_halt_mode_f & ~dcsr_single_step_running; + assign internal_dbg_halt_timers = internal_dbg_halt_mode_f & ~dcsr_single_step_running; - assign block_interrupts = ( (internal_dbg_halt_mode & (~dcsr_single_step_running | dec_tlu_i0_valid_r)) | // No ints in db-halt unless we are single stepping - internal_pmu_fw_halt_mode | i_cpu_halt_req_d1 |// No ints in PMU/FW halt. First we exit halt - take_nmi | // NMI is top priority - ebreak_to_debug_mode_r | // Heading to debug mode, hold off ints - synchronous_flush_r | // exception flush this cycle - exc_or_int_valid_r_d1 | // ext/int past cycle (need time for MIE to update) - mret_r | // mret in progress, for cases were ISR enables ints before mret - ext_int_freeze_d1 // Fast interrupt in progress (optional) - ); + assign block_interrupts = ( (internal_dbg_halt_mode & (~dcsr_single_step_running | dec_tlu_i0_valid_r)) | // No ints in db-halt unless we are single stepping + internal_pmu_fw_halt_mode | i_cpu_halt_req_d1 | // No ints in PMU/FW halt. First we exit halt + take_nmi | // NMI is top priority + ebreak_to_debug_mode_r | // Heading to debug mode, hold off ints + synchronous_flush_r | // exception flush this cycle + exc_or_int_valid_r_d1 | // ext/int past cycle (need time for MIE to update) + mret_r | // mret in progress, for cases were ISR enables ints before mret + ext_int_freeze_d1 // Fast interrupt in progress (optional) + ); -if (pt.FAST_INTERRUPT_REDIRECT) begin + if (pt.FAST_INTERRUPT_REDIRECT) begin - assign take_ext_int_start = ext_int_ready & ~block_interrupts; + assign take_ext_int_start = ext_int_ready & ~block_interrupts; - assign ext_int_freeze = take_ext_int_start | take_ext_int_start_d1 | take_ext_int_start_d2 | take_ext_int_start_d3; - assign take_ext_int = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; - assign fast_int_meicpct = csr_meicpct & dec_csr_any_unq_d; // MEICPCT becomes illegal if fast ints are enabled + assign ext_int_freeze = take_ext_int_start | take_ext_int_start_d1 | take_ext_int_start_d2 | take_ext_int_start_d3; + assign take_ext_int = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; + assign fast_int_meicpct = csr_meicpct & dec_csr_any_unq_d; // MEICPCT becomes illegal if fast ints are enabled - assign ignore_ext_int_due_to_lsu_stall = lsu_fastint_stall_any; -end -else begin - assign take_ext_int_start = 1'b0; - assign ext_int_freeze = 1'b0; - assign ext_int_freeze_d1 = 1'b0; - assign take_ext_int_start_d1 = 1'b0; - assign take_ext_int_start_d2 = 1'b0; - assign take_ext_int_start_d3 = 1'b0; - assign fast_int_meicpct = 1'b0; - assign ignore_ext_int_due_to_lsu_stall = 1'b0; + assign ignore_ext_int_due_to_lsu_stall = lsu_fastint_stall_any; + end else begin + assign take_ext_int_start = 1'b0; + assign ext_int_freeze = 1'b0; + assign ext_int_freeze_d1 = 1'b0; + assign take_ext_int_start_d1 = 1'b0; + assign take_ext_int_start_d2 = 1'b0; + assign take_ext_int_start_d3 = 1'b0; + assign fast_int_meicpct = 1'b0; + assign ignore_ext_int_due_to_lsu_stall = 1'b0; - assign take_ext_int = ext_int_ready & ~block_interrupts; -end + assign take_ext_int = ext_int_ready & ~block_interrupts; + end - assign take_ce_int = ce_int_ready & ~ext_int_ready & ~block_interrupts; - assign take_soft_int = soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; - assign take_timer_int = timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; - assign take_int_timer0_int = (int_timer0_int_ready | int_timer0_int_hold_f) & int_timer0_int_possible & ~dec_csr_stall_int_ff & + assign take_ce_int = ce_int_ready & ~ext_int_ready & ~block_interrupts; + assign take_soft_int = soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; + assign take_timer_int = timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; + assign take_int_timer0_int = (int_timer0_int_ready | int_timer0_int_hold_f) & int_timer0_int_possible & ~dec_csr_stall_int_ff & ~timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; - assign take_int_timer1_int = (int_timer1_int_ready | int_timer1_int_hold_f) & int_timer1_int_possible & ~dec_csr_stall_int_ff & + assign take_int_timer1_int = (int_timer1_int_ready | int_timer1_int_hold_f) & int_timer1_int_possible & ~dec_csr_stall_int_ff & ~(int_timer0_int_ready | int_timer0_int_hold_f) & ~timer_int_ready & ~soft_int_ready & ~ext_int_ready & ~ce_int_ready & ~block_interrupts; - assign take_reset = reset_delayed & mpc_reset_run_req; - assign take_nmi = nmi_int_detected & ~internal_pmu_fw_halt_mode & (~internal_dbg_halt_mode | (dcsr_single_step_running_f & dcsr[DCSR_STEPIE] & ~dec_tlu_i0_valid_r & ~dcsr_single_step_done_f)) & + assign take_reset = reset_delayed & mpc_reset_run_req; + assign take_nmi = nmi_int_detected & ~internal_pmu_fw_halt_mode & (~internal_dbg_halt_mode | (dcsr_single_step_running_f & dcsr[DCSR_STEPIE] & ~dec_tlu_i0_valid_r & ~dcsr_single_step_done_f)) & ~synchronous_flush_r & ~mret_r & ~take_reset & ~ebreak_to_debug_mode_r & (~ext_int_freeze_d1 | (take_ext_int_start_d3 & |lsu_fir_error[1:0])); - assign interrupt_valid_r = take_ext_int | take_timer_int | take_soft_int | take_nmi | take_ce_int | take_int_timer0_int | take_int_timer1_int; + assign interrupt_valid_r = take_ext_int | take_timer_int | take_soft_int | take_nmi | take_ce_int | take_int_timer0_int | take_int_timer1_int; - // Compute interrupt path: - // If vectored async is set in mtvec, flush path for interrupts is MTVEC + (4 * CAUSE); - assign vectored_path[31:1] = {mtvec[30:1], 1'b0} + {25'b0, exc_cause_r[4:0], 1'b0}; - assign interrupt_path[31:1] = take_nmi ? nmi_vec[31:1] : ((mtvec[0] == 1'b1) ? vectored_path[31:1] : {mtvec[30:1], 1'b0}); + // Compute interrupt path: + // If vectored async is set in mtvec, flush path for interrupts is MTVEC + (4 * CAUSE); + assign vectored_path[31:1] = {mtvec[30:1], 1'b0} + {25'b0, exc_cause_r[4:0], 1'b0}; + assign interrupt_path[31:1] = take_nmi ? nmi_vec[31:1] : ((mtvec[0] == 1'b1) ? vectored_path[31:1] : {mtvec[30:1], 1'b0}); - assign sel_npc_r = lsu_i0_rfnpc_r | fence_i_r | iccm_repair_state_rfnpc | (i_cpu_run_req_d1 & ~interrupt_valid_r) | (rfpc_i0_r & ~dec_tlu_i0_valid_r); - assign sel_npc_resume = (i_cpu_run_req_d1 & pmu_fw_tlu_halted_f) | pause_expired_r; + assign sel_npc_r = lsu_i0_rfnpc_r | fence_i_r | iccm_repair_state_rfnpc | (i_cpu_run_req_d1 & ~interrupt_valid_r) | (rfpc_i0_r & ~dec_tlu_i0_valid_r); + assign sel_npc_resume = (i_cpu_run_req_d1 & pmu_fw_tlu_halted_f) | pause_expired_r; - assign sel_fir_addr = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; + assign sel_fir_addr = take_ext_int_start_d3 & ~|lsu_fir_error[1:0]; - assign synchronous_flush_r = i0_exception_valid_r | // exception - rfpc_i0_r | // rfpc - lsu_exc_valid_r | // lsu exception in either pipe 0 or pipe 1 - fence_i_r | // fence, a rfnpc - lsu_i0_rfnpc_r | // lsu dccm sb ecc - iccm_repair_state_rfnpc | // Iccm sb ecc - debug_resume_req_f | // resume from debug halt, fetch the dpc - sel_npc_resume | // resume from pmu/fw halt, or from pause and fetch the NPC - dec_tlu_wr_pause_r_d1 | // flush at start of pause - i0_trigger_hit_r; // trigger hit, ebreak or goto debug mode + assign synchronous_flush_r = i0_exception_valid_r | // exception + rfpc_i0_r | // rfpc + lsu_exc_valid_r | // lsu exception in either pipe 0 or pipe 1 + fence_i_r | // fence, a rfnpc + lsu_i0_rfnpc_r | // lsu dccm sb ecc + iccm_repair_state_rfnpc | // Iccm sb ecc + debug_resume_req_f | // resume from debug halt, fetch the dpc + sel_npc_resume | // resume from pmu/fw halt, or from pause and fetch the NPC + dec_tlu_wr_pause_r_d1 | // flush at start of pause + i0_trigger_hit_r; // trigger hit, ebreak or goto debug mode - assign tlu_flush_lower_r = interrupt_valid_r | mret_r | synchronous_flush_r | take_halt | take_reset | take_ext_int_start; + assign tlu_flush_lower_r = interrupt_valid_r | mret_r | synchronous_flush_r | take_halt | take_reset | take_ext_int_start; - assign tlu_flush_path_r[31:1] = take_reset ? rst_vec[31:1] : + assign tlu_flush_path_r[31:1] = take_reset ? rst_vec[31:1] : ( ({31{sel_fir_addr}} & lsu_fir_addr[31:1]) | ({31{~take_nmi & sel_npc_r}} & npc_r[31:1]) | @@ -1090,303 +1428,395 @@ end ({31{~take_nmi & debug_resume_req_f}} & dpc[31:1]) | ({31{~take_nmi & sel_npc_resume}} & npc_r_d1[31:1]) ); - rvdffpcie #(31) flush_lower_ff (.*, .en(tlu_flush_lower_r), - .din({tlu_flush_path_r[31:1]}), - .dout({tlu_flush_path_r_d1[31:1]})); + rvdffpcie #(31) flush_lower_ff ( + .*, + .en (tlu_flush_lower_r), + .din ({tlu_flush_path_r[31:1]}), + .dout({tlu_flush_path_r_d1[31:1]}) + ); - assign dec_tlu_flush_lower_wb = tlu_flush_lower_r_d1; - assign dec_tlu_flush_lower_r = tlu_flush_lower_r; - assign dec_tlu_flush_path_r[31:1] = tlu_flush_path_r[31:1]; + assign dec_tlu_flush_lower_wb = tlu_flush_lower_r_d1; + assign dec_tlu_flush_lower_r = tlu_flush_lower_r; + assign dec_tlu_flush_path_r[31:1] = tlu_flush_path_r[31:1]; - // this is used to capture mepc, etc. - assign exc_or_int_valid_r = lsu_exc_valid_r | i0_exception_valid_r | interrupt_valid_r | (i0_trigger_hit_r & ~trigger_hit_dmode_r); + // this is used to capture mepc, etc. + assign exc_or_int_valid_r = lsu_exc_valid_r | i0_exception_valid_r | interrupt_valid_r | (i0_trigger_hit_r & ~trigger_hit_dmode_r); - rvdffie #(12) excinfo_wb_ff (.*, - .din({interrupt_valid_r, i0_exception_valid_r, exc_or_int_valid_r, - exc_cause_r[4:0], tlu_i0_commit_cmt & ~illegal_r, i0_trigger_hit_r, - take_nmi, pause_expired_r }), - .dout({interrupt_valid_r_d1, i0_exception_valid_r_d1, exc_or_int_valid_r_d1, - exc_cause_wb[4:0], i0_valid_wb, trigger_hit_r_d1, - take_nmi_r_d1, pause_expired_wb})); + rvdffie #(12) excinfo_wb_ff ( + .*, + .din({ + interrupt_valid_r, + i0_exception_valid_r, + exc_or_int_valid_r, + exc_cause_r[4:0], + tlu_i0_commit_cmt & ~illegal_r, + i0_trigger_hit_r, + take_nmi, + pause_expired_r + }), + .dout({ + interrupt_valid_r_d1, + i0_exception_valid_r_d1, + exc_or_int_valid_r_d1, + exc_cause_wb[4:0], + i0_valid_wb, + trigger_hit_r_d1, + take_nmi_r_d1, + pause_expired_wb + }) + ); - //---------------------------------------------------------------------- - // - // CSRs - // - //---------------------------------------------------------------------- + //---------------------------------------------------------------------- + // + // CSRs + // + //---------------------------------------------------------------------- - // ---------------------------------------------------------------------- - // MISA (RO) - // [31:30] XLEN - implementation width, 2'b01 - 32 bits - // [12] M - integer mul/div - // [8] I - RV32I - // [2] C - Compressed extension - localparam MISA = 12'h301; + // ---------------------------------------------------------------------- + // MISA (RO) + // [31:30] XLEN - implementation width, 2'b01 - 32 bits + // [12] M - integer mul/div + // [8] I - RV32I + // [2] C - Compressed extension + localparam MISA = 12'h301; - // MVENDORID, MARCHID, MIMPID, MHARTID - localparam MVENDORID = 12'hf11; - localparam MARCHID = 12'hf12; - localparam MIMPID = 12'hf13; - localparam MHARTID = 12'hf14; + // MVENDORID, MARCHID, MIMPID, MHARTID + localparam MVENDORID = 12'hf11; + localparam MARCHID = 12'hf12; + localparam MIMPID = 12'hf13; + localparam MHARTID = 12'hf14; - // ---------------------------------------------------------------------- - // MSTATUS (RW) - // [12:11] MPP : Prior priv level, always 2'b11, not flopped - // [7] MPIE : Int enable previous [1] - // [3] MIE : Int enable [0] - localparam MSTATUS = 12'h300; + // ---------------------------------------------------------------------- + // MSTATUS (RW) + // [12:11] MPP : Prior priv level, always 2'b11, not flopped + // [7] MPIE : Int enable previous [1] + // [3] MIE : Int enable [0] + localparam MSTATUS = 12'h300; - //When executing a MRET instruction, supposing MPP holds the value 3, MIE - //is set to MPIE; the privilege mode is changed to 3; MPIE is set to 1; and MPP is set to 3 + //When executing a MRET instruction, supposing MPP holds the value 3, MIE + //is set to MPIE; the privilege mode is changed to 3; MPIE is set to 1; and MPP is set to 3 - assign dec_csr_wen_r_mod = dec_csr_wen_r & ~i0_trigger_hit_r & ~rfpc_i0_r; - assign wr_mstatus_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSTATUS); + assign dec_csr_wen_r_mod = dec_csr_wen_r & ~i0_trigger_hit_r & ~rfpc_i0_r; + assign wr_mstatus_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSTATUS); - // set this even if we don't go to fwhalt due to debug halt. We committed the inst, so ... - assign set_mie_pmu_fw_halt = ~mpmc_b_ns[1] & fw_halt_req; + // set this even if we don't go to fwhalt due to debug halt. We committed the inst, so ... + assign set_mie_pmu_fw_halt = ~mpmc_b_ns[1] & fw_halt_req; - assign mstatus_ns[1:0] = ( ({2{~wr_mstatus_r & exc_or_int_valid_r}} & {mstatus[MSTATUS_MIE], 1'b0}) | + assign mstatus_ns[1:0] = ( ({2{~wr_mstatus_r & exc_or_int_valid_r}} & {mstatus[MSTATUS_MIE], 1'b0}) | ({2{ wr_mstatus_r & exc_or_int_valid_r}} & {dec_csr_wrdata_r[3], 1'b0}) | ({2{mret_r & ~exc_or_int_valid_r}} & {1'b1, mstatus[1]}) | ({2{set_mie_pmu_fw_halt}} & {mstatus[1], 1'b1}) | ({2{wr_mstatus_r & ~exc_or_int_valid_r}} & {dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]}) | ({2{~wr_mstatus_r & ~exc_or_int_valid_r & ~mret_r & ~set_mie_pmu_fw_halt}} & mstatus[1:0]) ); - // gate MIE if we are single stepping and DCSR[STEPIE] is off - assign mstatus_mie_ns = mstatus[MSTATUS_MIE] & (~dcsr_single_step_running_f | dcsr[DCSR_STEPIE]); + // gate MIE if we are single stepping and DCSR[STEPIE] is off + assign mstatus_mie_ns = mstatus[MSTATUS_MIE] & (~dcsr_single_step_running_f | dcsr[DCSR_STEPIE]); - // ---------------------------------------------------------------------- - // MTVEC (RW) - // [31:2] BASE : Trap vector base address - // [1] - Reserved, not implemented, reads zero - // [0] MODE : 0 = Direct, 1 = Asyncs are vectored to BASE + (4 * CAUSE) - localparam MTVEC = 12'h305; + // ---------------------------------------------------------------------- + // MTVEC (RW) + // [31:2] BASE : Trap vector base address + // [1] - Reserved, not implemented, reads zero + // [0] MODE : 0 = Direct, 1 = Asyncs are vectored to BASE + (4 * CAUSE) + localparam MTVEC = 12'h305; - assign wr_mtvec_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVEC); - assign mtvec_ns[30:0] = {dec_csr_wrdata_r[31:2], dec_csr_wrdata_r[0]} ; - rvdffe #(31) mtvec_ff (.*, .en(wr_mtvec_r), .din(mtvec_ns[30:0]), .dout(mtvec[30:0])); + assign wr_mtvec_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVEC); + assign mtvec_ns[30:0] = {dec_csr_wrdata_r[31:2], dec_csr_wrdata_r[0]}; + rvdffe #(31) mtvec_ff ( + .*, + .en (wr_mtvec_r), + .din (mtvec_ns[30:0]), + .dout(mtvec[30:0]) + ); - // ---------------------------------------------------------------------- - // MIP (RW) - // - // [30] MCEIP : (RO) M-Mode Correctable Error interrupt pending - // [29] MITIP0 : (RO) M-Mode Internal Timer0 interrupt pending - // [28] MITIP1 : (RO) M-Mode Internal Timer1 interrupt pending - // [11] MEIP : (RO) M-Mode external interrupt pending - // [7] MTIP : (RO) M-Mode timer interrupt pending - // [3] MSIP : (RO) M-Mode software interrupt pending - localparam MIP = 12'h344; + // ---------------------------------------------------------------------- + // MIP (RW) + // + // [30] MCEIP : (RO) M-Mode Correctable Error interrupt pending + // [29] MITIP0 : (RO) M-Mode Internal Timer0 interrupt pending + // [28] MITIP1 : (RO) M-Mode Internal Timer1 interrupt pending + // [11] MEIP : (RO) M-Mode external interrupt pending + // [7] MTIP : (RO) M-Mode timer interrupt pending + // [3] MSIP : (RO) M-Mode software interrupt pending + localparam MIP = 12'h344; - assign ce_int = (mdccme_ce_req | miccme_ce_req | mice_ce_req); + assign ce_int = (mdccme_ce_req | miccme_ce_req | mice_ce_req); - assign mip_ns[5:0] = {ce_int, dec_timer_t0_pulse, dec_timer_t1_pulse, mexintpend, timer_int_sync, soft_int_sync}; + assign mip_ns[5:0] = { + ce_int, dec_timer_t0_pulse, dec_timer_t1_pulse, mexintpend, timer_int_sync, soft_int_sync + }; - // ---------------------------------------------------------------------- - // MIE (RW) - // [30] MCEIE : (RO) M-Mode Correctable Error interrupt enable - // [29] MITIE0 : (RO) M-Mode Internal Timer0 interrupt enable - // [28] MITIE1 : (RO) M-Mode Internal Timer1 interrupt enable - // [11] MEIE : (RW) M-Mode external interrupt enable - // [7] MTIE : (RW) M-Mode timer interrupt enable - // [3] MSIE : (RW) M-Mode software interrupt enable - localparam MIE = 12'h304; + // ---------------------------------------------------------------------- + // MIE (RW) + // [30] MCEIE : (RO) M-Mode Correctable Error interrupt enable + // [29] MITIE0 : (RO) M-Mode Internal Timer0 interrupt enable + // [28] MITIE1 : (RO) M-Mode Internal Timer1 interrupt enable + // [11] MEIE : (RW) M-Mode external interrupt enable + // [7] MTIE : (RW) M-Mode timer interrupt enable + // [3] MSIE : (RW) M-Mode software interrupt enable + localparam MIE = 12'h304; - assign wr_mie_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MIE); - assign mie_ns[5:0] = wr_mie_r ? {dec_csr_wrdata_r[30:28], dec_csr_wrdata_r[11], dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]} : mie[5:0]; - rvdff #(6) mie_ff (.*, .clk(csr_wr_clk), .din(mie_ns[5:0]), .dout(mie[5:0])); + assign wr_mie_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MIE); + assign mie_ns[5:0] = wr_mie_r ? {dec_csr_wrdata_r[30:28], dec_csr_wrdata_r[11], dec_csr_wrdata_r[7], dec_csr_wrdata_r[3]} : mie[5:0]; + rvdff #(6) mie_ff ( + .*, + .clk (csr_wr_clk), + .din (mie_ns[5:0]), + .dout(mie[5:0]) + ); - // ---------------------------------------------------------------------- - // MCYCLEL (RW) - // [31:0] : Lower Cycle count + // ---------------------------------------------------------------------- + // MCYCLEL (RW) + // [31:0] : Lower Cycle count - localparam MCYCLEL = 12'hb00; + localparam MCYCLEL = 12'hb00; - assign kill_ebreak_count_r = ebreak_to_debug_mode_r & dcsr[DCSR_STOPC]; + assign kill_ebreak_count_r = ebreak_to_debug_mode_r & dcsr[DCSR_STOPC]; - assign wr_mcyclel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEL); + assign wr_mcyclel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEL); - assign mcyclel_cout_in = ~(kill_ebreak_count_r | (dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted | mcountinhibit[0]); + assign mcyclel_cout_in = ~(kill_ebreak_count_r | (dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted | mcountinhibit[0]); - // split for power - assign {mcyclela_cout, mcyclel_inc[7:0]} = mcyclel[7:0] + {7'b0, 1'b1}; - assign {mcyclel_cout, mcyclel_inc[31:8]} = mcyclel[31:8] + {23'b0, mcyclela_cout}; + // split for power + assign {mcyclela_cout, mcyclel_inc[7:0]} = mcyclel[7:0] + {7'b0, 1'b1}; + assign {mcyclel_cout, mcyclel_inc[31:8]} = mcyclel[31:8] + {23'b0, mcyclela_cout}; - assign mcyclel_ns[31:0] = wr_mcyclel_r ? dec_csr_wrdata_r[31:0] : mcyclel_inc[31:0]; + assign mcyclel_ns[31:0] = wr_mcyclel_r ? dec_csr_wrdata_r[31:0] : mcyclel_inc[31:0]; - rvdffe #(24) mcyclel_bff (.*, .clk(free_l2clk), .en(wr_mcyclel_r | (mcyclela_cout & mcyclel_cout_in)), .din(mcyclel_ns[31:8]), .dout(mcyclel[31:8])); - rvdffe #(8) mcyclel_aff (.*, .clk(free_l2clk), .en(wr_mcyclel_r | mcyclel_cout_in), .din(mcyclel_ns[7:0]), .dout(mcyclel[7:0])); + rvdffe #(24) mcyclel_bff ( + .*, + .clk (free_l2clk), + .en (wr_mcyclel_r | (mcyclela_cout & mcyclel_cout_in)), + .din (mcyclel_ns[31:8]), + .dout(mcyclel[31:8]) + ); + rvdffe #(8) mcyclel_aff ( + .*, + .clk (free_l2clk), + .en (wr_mcyclel_r | mcyclel_cout_in), + .din (mcyclel_ns[7:0]), + .dout(mcyclel[7:0]) + ); - // ---------------------------------------------------------------------- - // MCYCLEH (RW) - // [63:32] : Higher Cycle count - // Chained with mcyclel. Note: mcyclel overflow due to a mcycleh write gets ignored. + // ---------------------------------------------------------------------- + // MCYCLEH (RW) + // [63:32] : Higher Cycle count + // Chained with mcyclel. Note: mcyclel overflow due to a mcycleh write gets ignored. - localparam MCYCLEH = 12'hb80; + localparam MCYCLEH = 12'hb80; - assign wr_mcycleh_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEH); + assign wr_mcycleh_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCYCLEH); - assign mcycleh_inc[31:0] = mcycleh[31:0] + {31'b0, mcyclel_cout_f}; - assign mcycleh_ns[31:0] = wr_mcycleh_r ? dec_csr_wrdata_r[31:0] : mcycleh_inc[31:0]; + assign mcycleh_inc[31:0] = mcycleh[31:0] + {31'b0, mcyclel_cout_f}; + assign mcycleh_ns[31:0] = wr_mcycleh_r ? dec_csr_wrdata_r[31:0] : mcycleh_inc[31:0]; - rvdffe #(32) mcycleh_ff (.*, .clk(free_l2clk), .en(wr_mcycleh_r | mcyclel_cout_f), .din(mcycleh_ns[31:0]), .dout(mcycleh[31:0])); + rvdffe #(32) mcycleh_ff ( + .*, + .clk (free_l2clk), + .en (wr_mcycleh_r | mcyclel_cout_f), + .din (mcycleh_ns[31:0]), + .dout(mcycleh[31:0]) + ); - // ---------------------------------------------------------------------- - // MINSTRETL (RW) - // [31:0] : Lower Instruction retired count - // From the spec "Some CSRs, such as the instructions retired counter, instret, may be modified as side effects - // of instruction execution. In these cases, if a CSR access instruction reads a CSR, it reads the - // value prior to the execution of the instruction. If a CSR access instruction writes a CSR, the - // update occurs after the execution of the instruction. In particular, a value written to instret by - // one instruction will be the value read by the following instruction (i.e., the increment of instret - // caused by the first instruction retiring happens before the write of the new value)." - localparam MINSTRETL = 12'hb02; + // ---------------------------------------------------------------------- + // MINSTRETL (RW) + // [31:0] : Lower Instruction retired count + // From the spec "Some CSRs, such as the instructions retired counter, instret, may be modified as side effects + // of instruction execution. In these cases, if a CSR access instruction reads a CSR, it reads the + // value prior to the execution of the instruction. If a CSR access instruction writes a CSR, the + // update occurs after the execution of the instruction. In particular, a value written to instret by + // one instruction will be the value read by the following instruction (i.e., the increment of instret + // caused by the first instruction retiring happens before the write of the new value)." + localparam MINSTRETL = 12'hb02; - assign i0_valid_no_ebreak_ecall_r = dec_tlu_i0_valid_r & ~(ebreak_r | ecall_r | ebreak_to_debug_mode_r | illegal_r | mcountinhibit[2]); + assign i0_valid_no_ebreak_ecall_r = dec_tlu_i0_valid_r & ~(ebreak_r | ecall_r | ebreak_to_debug_mode_r | illegal_r | mcountinhibit[2]); - assign wr_minstretl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETL); + assign wr_minstretl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETL); - assign {minstretl_couta, minstretl_inc[7:0]} = minstretl[7:0] + {7'b0,1'b1}; - assign {minstretl_cout, minstretl_inc[31:8]} = minstretl[31:8] + {23'b0, minstretl_couta}; + assign {minstretl_couta, minstretl_inc[7:0]} = minstretl[7:0] + {7'b0, 1'b1}; + assign {minstretl_cout, minstretl_inc[31:8]} = minstretl[31:8] + {23'b0, minstretl_couta}; - assign minstret_enable = (i0_valid_no_ebreak_ecall_r & tlu_i0_commit_cmt) | wr_minstretl_r; + assign minstret_enable = (i0_valid_no_ebreak_ecall_r & tlu_i0_commit_cmt) | wr_minstretl_r; - assign minstretl_cout_ns = minstretl_cout & ~wr_minstreth_r & i0_valid_no_ebreak_ecall_r & ~dec_tlu_dbg_halted; + assign minstretl_cout_ns = minstretl_cout & ~wr_minstreth_r & i0_valid_no_ebreak_ecall_r & ~dec_tlu_dbg_halted; - assign minstretl_ns[31:0] = wr_minstretl_r ? dec_csr_wrdata_r[31:0] : minstretl_inc[31:0]; - rvdffe #(24) minstretl_bff (.*, .en(wr_minstretl_r | (minstretl_couta & minstret_enable)), - .din(minstretl_ns[31:8]), .dout(minstretl[31:8])); - rvdffe #(8) minstretl_aff (.*, .en(minstret_enable), - .din(minstretl_ns[7:0]), .dout(minstretl[7:0])); + assign minstretl_ns[31:0] = wr_minstretl_r ? dec_csr_wrdata_r[31:0] : minstretl_inc[31:0]; + rvdffe #(24) minstretl_bff ( + .*, + .en (wr_minstretl_r | (minstretl_couta & minstret_enable)), + .din (minstretl_ns[31:8]), + .dout(minstretl[31:8]) + ); + rvdffe #(8) minstretl_aff ( + .*, + .en (minstret_enable), + .din (minstretl_ns[7:0]), + .dout(minstretl[7:0]) + ); - assign minstretl_read[31:0] = minstretl[31:0]; - // ---------------------------------------------------------------------- - // MINSTRETH (RW) - // [63:32] : Higher Instret count - // Chained with minstretl. Note: minstretl overflow due to a minstreth write gets ignored. + assign minstretl_read[31:0] = minstretl[31:0]; + // ---------------------------------------------------------------------- + // MINSTRETH (RW) + // [63:32] : Higher Instret count + // Chained with minstretl. Note: minstretl overflow due to a minstreth write gets ignored. - localparam MINSTRETH = 12'hb82; + localparam MINSTRETH = 12'hb82; - assign wr_minstreth_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETH); + assign wr_minstreth_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MINSTRETH); - assign minstreth_inc[31:0] = minstreth[31:0] + {31'b0, minstretl_cout_f}; - assign minstreth_ns[31:0] = wr_minstreth_r ? dec_csr_wrdata_r[31:0] : minstreth_inc[31:0]; - rvdffe #(32) minstreth_ff (.*, .en((minstret_enable_f & minstretl_cout_f) | wr_minstreth_r), .din(minstreth_ns[31:0]), .dout(minstreth[31:0])); + assign minstreth_inc[31:0] = minstreth[31:0] + {31'b0, minstretl_cout_f}; + assign minstreth_ns[31:0] = wr_minstreth_r ? dec_csr_wrdata_r[31:0] : minstreth_inc[31:0]; + rvdffe #(32) minstreth_ff ( + .*, + .en ((minstret_enable_f & minstretl_cout_f) | wr_minstreth_r), + .din (minstreth_ns[31:0]), + .dout(minstreth[31:0]) + ); - assign minstreth_read[31:0] = minstreth_inc[31:0]; + assign minstreth_read[31:0] = minstreth_inc[31:0]; - // ---------------------------------------------------------------------- - // MSCRATCH (RW) - // [31:0] : Scratch register - localparam MSCRATCH = 12'h340; + // ---------------------------------------------------------------------- + // MSCRATCH (RW) + // [31:0] : Scratch register + localparam MSCRATCH = 12'h340; - assign wr_mscratch_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCRATCH); + assign wr_mscratch_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCRATCH); - rvdffe #(32) mscratch_ff (.*, .en(wr_mscratch_r), .din(dec_csr_wrdata_r[31:0]), .dout(mscratch[31:0])); + rvdffe #(32) mscratch_ff ( + .*, + .en (wr_mscratch_r), + .din (dec_csr_wrdata_r[31:0]), + .dout(mscratch[31:0]) + ); - // ---------------------------------------------------------------------- - // MEPC (RW) - // [31:1] : Exception PC - localparam MEPC = 12'h341; + // ---------------------------------------------------------------------- + // MEPC (RW) + // [31:1] : Exception PC + localparam MEPC = 12'h341; - // NPC + // NPC - assign sel_exu_npc_r = ~dec_tlu_dbg_halted & ~tlu_flush_lower_r_d1 & dec_tlu_i0_valid_r; - assign sel_flush_npc_r = ~dec_tlu_dbg_halted & tlu_flush_lower_r_d1 & ~dec_tlu_flush_noredir_r_d1; - assign sel_hold_npc_r = ~sel_exu_npc_r & ~sel_flush_npc_r; + assign sel_exu_npc_r = ~dec_tlu_dbg_halted & ~tlu_flush_lower_r_d1 & dec_tlu_i0_valid_r; + assign sel_flush_npc_r = ~dec_tlu_dbg_halted & tlu_flush_lower_r_d1 & ~dec_tlu_flush_noredir_r_d1; + assign sel_hold_npc_r = ~sel_exu_npc_r & ~sel_flush_npc_r; - assign npc_r[31:1] = ( ({31{sel_exu_npc_r}} & exu_npc_r[31:1]) | + assign npc_r[31:1] = ( ({31{sel_exu_npc_r}} & exu_npc_r[31:1]) | ({31{~mpc_reset_run_req & reset_delayed}} & rst_vec[31:1]) | // init to reset vector for mpc halt on reset case ({31{(sel_flush_npc_r)}} & tlu_flush_path_r_d1[31:1]) | ({31{(sel_hold_npc_r)}} & npc_r_d1[31:1]) ); - rvdffpcie #(31) npwbc_ff (.*, .en(sel_exu_npc_r | sel_flush_npc_r | reset_delayed), .din(npc_r[31:1]), .dout(npc_r_d1[31:1])); + rvdffpcie #(31) npwbc_ff ( + .*, + .en (sel_exu_npc_r | sel_flush_npc_r | reset_delayed), + .din (npc_r[31:1]), + .dout(npc_r_d1[31:1]) + ); - // PC has to be captured for exceptions and interrupts. For MRET, we could execute it and then take an - // interrupt before the next instruction. - assign pc0_valid_r = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_r; + // PC has to be captured for exceptions and interrupts. For MRET, we could execute it and then take an + // interrupt before the next instruction. + assign pc0_valid_r = ~dec_tlu_dbg_halted & dec_tlu_i0_valid_r; - assign pc_r[31:1] = ( ({31{ pc0_valid_r}} & dec_tlu_i0_pc_r[31:1]) | + assign pc_r[31:1] = ( ({31{ pc0_valid_r}} & dec_tlu_i0_pc_r[31:1]) | ({31{~pc0_valid_r}} & pc_r_d1[31:1])); - rvdffpcie #(31) pwbc_ff (.*, .en(pc0_valid_r), .din(pc_r[31:1]), .dout(pc_r_d1[31:1])); + rvdffpcie #(31) pwbc_ff ( + .*, + .en (pc0_valid_r), + .din (pc_r[31:1]), + .dout(pc_r_d1[31:1]) + ); - assign wr_mepc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEPC); + assign wr_mepc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEPC); - assign mepc_ns[31:1] = ( ({31{i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r}} & pc_r[31:1]) | + assign mepc_ns[31:1] = ( ({31{i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r}} & pc_r[31:1]) | ({31{interrupt_valid_r}} & npc_r[31:1]) | ({31{wr_mepc_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:1]) | ({31{~wr_mepc_r & ~exc_or_int_valid_r}} & mepc[31:1]) ); - rvdffe #(31) mepc_ff (.*, .en(i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r | interrupt_valid_r | wr_mepc_r), .din(mepc_ns[31:1]), .dout(mepc[31:1])); + rvdffe #(31) mepc_ff ( + .*, + .en(i0_exception_valid_r | lsu_exc_valid_r | mepc_trigger_hit_sel_pc_r | interrupt_valid_r | wr_mepc_r), + .din(mepc_ns[31:1]), + .dout(mepc[31:1]) + ); - // ---------------------------------------------------------------------- - // MCAUSE (RW) - // [31:0] : Exception Cause - localparam MCAUSE = 12'h342; + // ---------------------------------------------------------------------- + // MCAUSE (RW) + // [31:0] : Exception Cause + localparam MCAUSE = 12'h342; - assign wr_mcause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCAUSE); - assign mcause_sel_nmi_store = exc_or_int_valid_r & take_nmi & nmi_lsu_store_type; - assign mcause_sel_nmi_load = exc_or_int_valid_r & take_nmi & nmi_lsu_load_type; - assign mcause_sel_nmi_ext = exc_or_int_valid_r & take_nmi & take_ext_int_start_d3 & |lsu_fir_error[1:0] & ~nmi_int_detected_f; - // FIR value decoder - // 0 –no error - // 1 –uncorrectable ecc => f000_1000 - // 2 –dccm region access error => f000_1001 - // 3 –non dccm region access error => f000_1002 - assign mcause_fir_error_type[1:0] = {&lsu_fir_error[1:0], lsu_fir_error[1] & ~lsu_fir_error[0]}; + assign wr_mcause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCAUSE); + assign mcause_sel_nmi_store = exc_or_int_valid_r & take_nmi & nmi_lsu_store_type; + assign mcause_sel_nmi_load = exc_or_int_valid_r & take_nmi & nmi_lsu_load_type; + assign mcause_sel_nmi_ext = exc_or_int_valid_r & take_nmi & take_ext_int_start_d3 & |lsu_fir_error[1:0] & ~nmi_int_detected_f; + // FIR value decoder + // 0 –no error + // 1 –uncorrectable ecc => f000_1000 + // 2 –dccm region access error => f000_1001 + // 3 –non dccm region access error => f000_1002 + assign mcause_fir_error_type[1:0] = {&lsu_fir_error[1:0], lsu_fir_error[1] & ~lsu_fir_error[0]}; - assign mcause_ns[31:0] = ( ({32{mcause_sel_nmi_store}} & {32'hf000_0000}) | + assign mcause_ns[31:0] = ( ({32{mcause_sel_nmi_store}} & {32'hf000_0000}) | ({32{mcause_sel_nmi_load}} & {32'hf000_0001}) | ({32{mcause_sel_nmi_ext}} & {28'hf000_100, 2'b0, mcause_fir_error_type[1:0]}) | ({32{exc_or_int_valid_r & ~take_nmi}} & {interrupt_valid_r, 26'b0, exc_cause_r[4:0]}) | ({32{wr_mcause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[31:0]) | ({32{~wr_mcause_r & ~exc_or_int_valid_r}} & mcause[31:0]) ); - rvdffe #(32) mcause_ff (.*, .en(exc_or_int_valid_r | wr_mcause_r), .din(mcause_ns[31:0]), .dout(mcause[31:0])); - // ---------------------------------------------------------------------- - // MSCAUSE (RW) - // [2:0] : Secondary exception Cause - localparam MSCAUSE = 12'h7ff; + rvdffe #(32) mcause_ff ( + .*, + .en (exc_or_int_valid_r | wr_mcause_r), + .din (mcause_ns[31:0]), + .dout(mcause[31:0]) + ); + // ---------------------------------------------------------------------- + // MSCAUSE (RW) + // [2:0] : Secondary exception Cause + localparam MSCAUSE = 12'h7ff; - assign wr_mscause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCAUSE); + assign wr_mscause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MSCAUSE); - assign ifu_mscause[3:0] = (dec_tlu_packet_r.icaf_type[1:0] == 2'b00) ? 4'b1001 : + assign ifu_mscause[3:0] = (dec_tlu_packet_r.icaf_type[1:0] == 2'b00) ? 4'b1001 : {2'b00 , dec_tlu_packet_r.icaf_type[1:0]} ; - assign mscause_type[3:0] = ( ({4{lsu_i0_exc_r}} & lsu_error_pkt_r.mscause[3:0]) | + assign mscause_type[3:0] = ( ({4{lsu_i0_exc_r}} & lsu_error_pkt_r.mscause[3:0]) | ({4{i0_trigger_hit_r}} & 4'b0001) | ({4{ebreak_r}} & 4'b0010) | ({4{inst_acc_r}} & ifu_mscause[3:0]) ); - assign mscause_ns[3:0] = ( ({4{exc_or_int_valid_r}} & mscause_type[3:0]) | + assign mscause_ns[3:0] = ( ({4{exc_or_int_valid_r}} & mscause_type[3:0]) | ({4{ wr_mscause_r & ~exc_or_int_valid_r}} & dec_csr_wrdata_r[3:0]) | ({4{~wr_mscause_r & ~exc_or_int_valid_r}} & mscause[3:0]) ); - rvdff #(4) mscause_ff (.*, .clk(e4e5_int_clk), .din(mscause_ns[3:0]), .dout(mscause[3:0])); - // ---------------------------------------------------------------------- - // MTVAL (RW) - // [31:0] : Exception address if relevant - localparam MTVAL = 12'h343; + rvdff #(4) mscause_ff ( + .*, + .clk (e4e5_int_clk), + .din (mscause_ns[3:0]), + .dout(mscause[3:0]) + ); + // ---------------------------------------------------------------------- + // MTVAL (RW) + // [31:0] : Exception address if relevant + localparam MTVAL = 12'h343; - assign wr_mtval_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVAL); - assign mtval_capture_pc_r = exc_or_int_valid_r & (ebreak_r | (inst_acc_r & ~inst_acc_second_r) | mepc_trigger_hit_sel_pc_r) & ~take_nmi; - assign mtval_capture_pc_plus2_r = exc_or_int_valid_r & (inst_acc_r & inst_acc_second_r) & ~take_nmi; - assign mtval_capture_inst_r = exc_or_int_valid_r & illegal_r & ~take_nmi; - assign mtval_capture_lsu_r = exc_or_int_valid_r & lsu_exc_valid_r & ~take_nmi; - assign mtval_clear_r = exc_or_int_valid_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_capture_lsu_r & ~mepc_trigger_hit_sel_pc_r; + assign wr_mtval_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTVAL); + assign mtval_capture_pc_r = exc_or_int_valid_r & (ebreak_r | (inst_acc_r & ~inst_acc_second_r) | mepc_trigger_hit_sel_pc_r) & ~take_nmi; + assign mtval_capture_pc_plus2_r = exc_or_int_valid_r & (inst_acc_r & inst_acc_second_r) & ~take_nmi; + assign mtval_capture_inst_r = exc_or_int_valid_r & illegal_r & ~take_nmi; + assign mtval_capture_lsu_r = exc_or_int_valid_r & lsu_exc_valid_r & ~take_nmi; + assign mtval_clear_r = exc_or_int_valid_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_capture_lsu_r & ~mepc_trigger_hit_sel_pc_r; - assign mtval_ns[31:0] = (({32{mtval_capture_pc_r}} & {pc_r[31:1], 1'b0}) | + assign mtval_ns[31:0] = (({32{mtval_capture_pc_r}} & {pc_r[31:1], 1'b0}) | ({32{mtval_capture_pc_plus2_r}} & {pc_r[31:1] + 31'b1, 1'b0}) | ({32{mtval_capture_inst_r}} & dec_illegal_inst[31:0]) | ({32{mtval_capture_lsu_r}} & lsu_error_pkt_addr_r[31:0]) | @@ -1394,703 +1824,902 @@ end ({32{~take_nmi & ~wr_mtval_r & ~mtval_capture_pc_r & ~mtval_capture_inst_r & ~mtval_clear_r & ~mtval_capture_lsu_r}} & mtval[31:0]) ); - rvdffe #(32) mtval_ff (.*, .en(tlu_flush_lower_r | wr_mtval_r), .din(mtval_ns[31:0]), .dout(mtval[31:0])); - - // ---------------------------------------------------------------------- - // MCGC (RW) Clock gating control - // [31:10]: Reserved, reads 0x0 - // [9] : picio_clk_override - // [7] : dec_clk_override - // [6] : Unused - // [5] : ifu_clk_override - // [4] : lsu_clk_override - // [3] : bus_clk_override - // [2] : pic_clk_override - // [1] : dccm_clk_override - // [0] : icm_clk_override - // - localparam MCGC = 12'h7f8; - assign wr_mcgc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCGC); - - assign mcgc_ns[9:0] = wr_mcgc_r ? {~dec_csr_wrdata_r[9], dec_csr_wrdata_r[8:0]} : mcgc_int[9:0]; - rvdffe #(10) mcgc_ff (.*, .en(wr_mcgc_r), .din(mcgc_ns[9:0]), .dout(mcgc_int[9:0])); - - assign mcgc[9:0] = {~mcgc_int[9], mcgc_int[8:0]}; - - assign dec_tlu_picio_clk_override= mcgc[9]; - assign dec_tlu_misc_clk_override = mcgc[8]; - assign dec_tlu_dec_clk_override = mcgc[7]; - //sign dec_tlu_exu_clk_override = mcgc[6]; - assign dec_tlu_ifu_clk_override = mcgc[5]; - assign dec_tlu_lsu_clk_override = mcgc[4]; - assign dec_tlu_bus_clk_override = mcgc[3]; - assign dec_tlu_pic_clk_override = mcgc[2]; - assign dec_tlu_dccm_clk_override = mcgc[1]; - assign dec_tlu_icm_clk_override = mcgc[0]; - - // ---------------------------------------------------------------------- - // MFDC (RW) Feature Disable Control - // [31:19] : Reserved, reads 0x0 - // [18:16] : DMA QoS Prty - // [15:13] : Reserved, reads 0x0 - // [12] : Disable trace - // [11] : Disable external load forwarding - // [10] : Disable dual issue - // [9] : Disable pic multiple ints - // [8] : Disable core ecc - // [7] : Disable secondary alu?s - // [6] : Unused, 0x0 - // [5] : Disable non-blocking loads/divides - // [4] : Disable fast divide - // [3] : Disable branch prediction and return stack - // [2] : Disable write buffer coalescing - // [1] : Disable load misses that bypass the write buffer - // [0] : Disable pipelining - Enable single instruction execution - // - localparam MFDC = 12'h7f9; - - assign wr_mfdc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDC); - - rvdffe #(16) mfdc_ff (.*, .en(wr_mfdc_r), .din({mfdc_ns[15:0]}), .dout(mfdc_int[15:0])); - - // flip poweron value of bit 6 for AXI build - if(pt.BUILD_AXI4==1) begin : axi4 - // flip poweron valid of bit 12 - assign mfdc_ns[15:0] = {~dec_csr_wrdata_r[18:16], dec_csr_wrdata_r[12], dec_csr_wrdata_r[11:7], ~dec_csr_wrdata_r[6], dec_csr_wrdata_r[5:0]}; - assign mfdc[18:0] = {~mfdc_int[15:13], 3'b0, mfdc_int[12], mfdc_int[11:7], ~mfdc_int[6], mfdc_int[5:0]}; - end - else begin - // flip poweron valid of bit 12 - assign mfdc_ns[15:0] = {~dec_csr_wrdata_r[18:16],dec_csr_wrdata_r[12:0]}; - assign mfdc[18:0] = {~mfdc_int[15:13], 3'b0, mfdc_int[12:0]}; - end - - - assign dec_tlu_dma_qos_prty[2:0] = mfdc[18:16]; - assign dec_tlu_trace_disable = mfdc[12]; - assign dec_tlu_external_ldfwd_disable = mfdc[11]; - assign dec_tlu_core_ecc_disable = mfdc[8]; - assign dec_tlu_sideeffect_posted_disable = mfdc[6]; - assign dec_tlu_bpred_disable = mfdc[3]; - assign dec_tlu_wb_coalescing_disable = mfdc[2]; - assign dec_tlu_pipelining_disable = mfdc[0]; - - // ---------------------------------------------------------------------- - // MCPC (RW) Pause counter - // [31:0] : Reads 0x0, decs in the wb register in decode_ctl - - assign dec_tlu_wr_pause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCPC) & ~interrupt_valid_r & ~take_ext_int_start; - - // ---------------------------------------------------------------------- - // MRAC (RW) - // [31:0] : Region Access Control Register, 16 regions, {side_effect, cachable} pairs - localparam MRAC = 12'h7c0; - - assign wr_mrac_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MRAC); - - // prevent pairs of 0x11, side_effect and cacheable - assign mrac_in[31:0] = {dec_csr_wrdata_r[31], dec_csr_wrdata_r[30] & ~dec_csr_wrdata_r[31], - dec_csr_wrdata_r[29], dec_csr_wrdata_r[28] & ~dec_csr_wrdata_r[29], - dec_csr_wrdata_r[27], dec_csr_wrdata_r[26] & ~dec_csr_wrdata_r[27], - dec_csr_wrdata_r[25], dec_csr_wrdata_r[24] & ~dec_csr_wrdata_r[25], - dec_csr_wrdata_r[23], dec_csr_wrdata_r[22] & ~dec_csr_wrdata_r[23], - dec_csr_wrdata_r[21], dec_csr_wrdata_r[20] & ~dec_csr_wrdata_r[21], - dec_csr_wrdata_r[19], dec_csr_wrdata_r[18] & ~dec_csr_wrdata_r[19], - dec_csr_wrdata_r[17], dec_csr_wrdata_r[16] & ~dec_csr_wrdata_r[17], - dec_csr_wrdata_r[15], dec_csr_wrdata_r[14] & ~dec_csr_wrdata_r[15], - dec_csr_wrdata_r[13], dec_csr_wrdata_r[12] & ~dec_csr_wrdata_r[13], - dec_csr_wrdata_r[11], dec_csr_wrdata_r[10] & ~dec_csr_wrdata_r[11], - dec_csr_wrdata_r[9], dec_csr_wrdata_r[8] & ~dec_csr_wrdata_r[9], - dec_csr_wrdata_r[7], dec_csr_wrdata_r[6] & ~dec_csr_wrdata_r[7], - dec_csr_wrdata_r[5], dec_csr_wrdata_r[4] & ~dec_csr_wrdata_r[5], - dec_csr_wrdata_r[3], dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[3], - dec_csr_wrdata_r[1], dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[1]}; - - rvdffe #(32) mrac_ff (.*, .en(wr_mrac_r), .din(mrac_in[31:0]), .dout(mrac[31:0])); - - // drive to LSU/IFU - assign dec_tlu_mrac_ff[31:0] = mrac[31:0]; - - // ---------------------------------------------------------------------- - // MDEAU (WAR0) - // [31:0] : Dbus Error Address Unlock register - // - localparam MDEAU = 12'hbc0; - - assign wr_mdeau_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDEAU); - - - // ---------------------------------------------------------------------- - // MDSEAC (R) - // [31:0] : Dbus Store Error Address Capture register - // - localparam MDSEAC = 12'hfc0; - - // only capture error bus if the MDSEAC reg is not locked - assign mdseac_locked_ns = mdseac_en | (mdseac_locked_f & ~wr_mdeau_r); - - assign mdseac_en = (lsu_imprecise_error_store_any | lsu_imprecise_error_load_any) & ~nmi_int_detected_f & ~mdseac_locked_f; - - rvdffe #(32) mdseac_ff (.*, .en(mdseac_en), .din(lsu_imprecise_error_addr_any[31:0]), .dout(mdseac[31:0])); - - // ---------------------------------------------------------------------- - // MPMC (R0W1) - // [0] : FW halt - // [1] : Set MSTATUS[MIE] on halt - - localparam MPMC = 12'h7c6; - - assign wr_mpmc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MPMC); - - // allow the cycle of the dbg halt flush that contains the wr_mpmc_r to - // set the mstatus bit potentially, use delayed version of internal dbg halt. - assign fw_halt_req = wr_mpmc_r & dec_csr_wrdata_r[0] & ~internal_dbg_halt_mode_f2 & ~ext_int_freeze_d1; - - assign fw_halted_ns = (fw_halt_req | fw_halted) & ~set_mie_pmu_fw_halt; - assign mpmc_b_ns[1] = wr_mpmc_r ? ~dec_csr_wrdata_r[1] : ~mpmc[1]; - rvdff #(1) mpmc_ff (.*, .clk(csr_wr_clk), .din(mpmc_b_ns[1]), .dout(mpmc_b[1])); - assign mpmc[1] = ~mpmc_b[1]; - - // ---------------------------------------------------------------------- - // MICECT (I-Cache error counter/threshold) - // [31:27] : Icache parity error threshold - // [26:0] : Icache parity error count - localparam MICECT = 12'h7f0; - - assign csr_sat[31:27] = (dec_csr_wrdata_r[31:27] > 5'd26) ? 5'd26 : dec_csr_wrdata_r[31:27]; - - assign wr_micect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICECT); - assign micect_inc[26:0] = micect[26:0] + {26'b0, ic_perr_r}; - assign micect_ns = wr_micect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {micect[31:27], micect_inc[26:0]}; - - rvdffe #(32) micect_ff (.*, .en(wr_micect_r | ic_perr_r), .din(micect_ns[31:0]), .dout(micect[31:0])); - - assign mice_ce_req = |({32'hffffffff << micect[31:27]} & {5'b0, micect[26:0]}); - - // ---------------------------------------------------------------------- - // MICCMECT (ICCM error counter/threshold) - // [31:27] : ICCM parity error threshold - // [26:0] : ICCM parity error count - localparam MICCMECT = 12'h7f1; - - assign wr_miccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICCMECT); - assign miccmect_inc[26:0] = miccmect[26:0] + {26'b0, iccm_sbecc_r | iccm_dma_sb_error}; - assign miccmect_ns = wr_miccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {miccmect[31:27], miccmect_inc[26:0]}; - - rvdffe #(32) miccmect_ff (.*, .clk(free_l2clk), .en(wr_miccmect_r | iccm_sbecc_r | iccm_dma_sb_error), .din(miccmect_ns[31:0]), .dout(miccmect[31:0])); - - assign miccme_ce_req = |({32'hffffffff << miccmect[31:27]} & {5'b0, miccmect[26:0]}); - - // ---------------------------------------------------------------------- - // MDCCMECT (DCCM error counter/threshold) - // [31:27] : DCCM parity error threshold - // [26:0] : DCCM parity error count - localparam MDCCMECT = 12'h7f2; - - assign wr_mdccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDCCMECT); - assign mdccmect_inc[26:0] = mdccmect[26:0] + {26'b0, lsu_single_ecc_error_r_d1}; - assign mdccmect_ns = wr_mdccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {mdccmect[31:27], mdccmect_inc[26:0]}; - - rvdffe #(32) mdccmect_ff (.*, .clk(free_l2clk), .en(wr_mdccmect_r | lsu_single_ecc_error_r_d1), .din(mdccmect_ns[31:0]), .dout(mdccmect[31:0])); - - assign mdccme_ce_req = |({32'hffffffff << mdccmect[31:27]} & {5'b0, mdccmect[26:0]}); - - - // ---------------------------------------------------------------------- - // MFDHT (Force Debug Halt Threshold) - // [5:1] : Halt timeout threshold (power of 2) - // [0] : Halt timeout enabled - localparam MFDHT = 12'h7ce; - - assign wr_mfdht_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHT); - - assign mfdht_ns[5:0] = wr_mfdht_r ? dec_csr_wrdata_r[5:0] : mfdht[5:0]; - - rvdffs #(6) mfdht_ff (.*, .clk(csr_wr_clk), .en(wr_mfdht_r), .din(mfdht_ns[5:0]), .dout(mfdht[5:0])); - - // ---------------------------------------------------------------------- - // MFDHS(RW) - // [1] : LSU operation pending when debug halt threshold reached - // [0] : IFU operation pending when debug halt threshold reached - - localparam MFDHS = 12'h7cf; - - assign wr_mfdhs_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHS); - - assign mfdhs_ns[1:0] = wr_mfdhs_r ? dec_csr_wrdata_r[1:0] : ((dbg_tlu_halted & ~dbg_tlu_halted_f) ? {~lsu_idle_any_f, ~ifu_miss_state_idle_f} : mfdhs[1:0]); - - rvdffs #(2) mfdhs_ff (.*, .clk(free_clk), .en(wr_mfdhs_r | dbg_tlu_halted), .din(mfdhs_ns[1:0]), .dout(mfdhs[1:0])); - - assign force_halt_ctr[31:0] = debug_halt_req_f ? (force_halt_ctr_f[31:0] + 32'b1) : (dbg_tlu_halted_f ? 32'b0 : force_halt_ctr_f[31:0]); - - rvdffe #(32) forcehaltctr_ff (.*, .en(mfdht[0]), .din(force_halt_ctr[31:0]), .dout(force_halt_ctr_f[31:0])); - - assign force_halt = mfdht[0] & |(force_halt_ctr_f[31:0] & (32'hffffffff << mfdht[5:1])); - - - // ---------------------------------------------------------------------- - // MEIVT (External Interrupt Vector Table (R/W)) - // [31:10]: Base address (R/W) - // [9:0] : Reserved, reads 0x0 - localparam MEIVT = 12'hbc8; - - assign wr_meivt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIVT); - - rvdffe #(22) meivt_ff (.*, .en(wr_meivt_r), .din(dec_csr_wrdata_r[31:10]), .dout(meivt[31:10])); - - - // ---------------------------------------------------------------------- - // MEIHAP (External Interrupt Handler Access Pointer (R)) - // [31:10]: Base address (R/W) - // [9:2] : ClaimID (R) - // [1:0] : Reserved, 0x0 - localparam MEIHAP = 12'hfc8; - - assign wr_meihap_r = wr_meicpct_r; - - rvdffe #(8) meihap_ff (.*, .en(wr_meihap_r), .din(pic_claimid[7:0]), .dout(meihap[9:2])); - - assign dec_tlu_meihap[31:2] = {meivt[31:10], meihap[9:2]}; - // ---------------------------------------------------------------------- - // MEICURPL (R/W) - // [31:4] : Reserved (read 0x0) - // [3:0] : CURRPRI - Priority level of current interrupt service routine (R/W) - localparam MEICURPL = 12'hbcc; - - assign wr_meicurpl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICURPL); - assign meicurpl_ns[3:0] = wr_meicurpl_r ? dec_csr_wrdata_r[3:0] : meicurpl[3:0]; - - rvdff #(4) meicurpl_ff (.*, .clk(csr_wr_clk), .din(meicurpl_ns[3:0]), .dout(meicurpl[3:0])); - - // PIC needs this reg - assign dec_tlu_meicurpl[3:0] = meicurpl[3:0]; - - - // ---------------------------------------------------------------------- - // MEICIDPL (R/W) - // [31:4] : Reserved (read 0x0) - // [3:0] : External Interrupt Claim ID's Priority Level Register - localparam MEICIDPL = 12'hbcb; - - assign wr_meicidpl_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICIDPL)) | take_ext_int_start; - - assign meicidpl_ns[3:0] = wr_meicpct_r ? pic_pl[3:0] : (wr_meicidpl_r ? dec_csr_wrdata_r[3:0] : meicidpl[3:0]); - - - // ---------------------------------------------------------------------- - // MEICPCT (Capture CLAIMID in MEIHAP and PL in MEICIDPL - // [31:1] : Reserved (read 0x0) - // [0] : Capture (W1, Read 0) - localparam MEICPCT = 12'hbca; - - assign wr_meicpct_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICPCT)) | take_ext_int_start; - - // ---------------------------------------------------------------------- - // MEIPT (External Interrupt Priority Threshold) - // [31:4] : Reserved (read 0x0) - // [3:0] : PRITHRESH - localparam MEIPT = 12'hbc9; - - assign wr_meipt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIPT); - assign meipt_ns[3:0] = wr_meipt_r ? dec_csr_wrdata_r[3:0] : meipt[3:0]; - - rvdff #(4) meipt_ff (.*, .clk(csr_wr_clk), .din(meipt_ns[3:0]), .dout(meipt[3:0])); - - // to PIC - assign dec_tlu_meipt[3:0] = meipt[3:0]; - // ---------------------------------------------------------------------- - // DCSR (R/W) (Only accessible in debug mode) - // [31:28] : xdebugver (hard coded to 0x4) RO - // [27:16] : 0x0, reserved - // [15] : ebreakm - // [14] : 0x0, reserved - // [13] : ebreaks (0x0 for this core) - // [12] : ebreaku (0x0 for this core) - // [11] : stepie - // [10] : stopcount - // [9] : 0x0 //stoptime - // [8:6] : cause (RO) - // [5:4] : 0x0, reserved - // [3] : nmip - // [2] : step - // [1:0] : prv (0x3 for this core) - // - localparam DCSR = 12'h7b0; - - // RV has clarified that 'priority 4' in the spec means top priority. - // 4. single step. 3. Debugger request. 2. Ebreak. 1. Trigger. - - // RV debug spec indicates a cause priority change for trigger hits during single step. - assign trigger_hit_for_dscr_cause_r_d1 = trigger_hit_dmode_r_d1 | (trigger_hit_r_d1 & dcsr_single_step_done_f); - - assign dcsr_cause[8:6] = ( ({3{dcsr_single_step_done_f & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1 & ~debug_halt_req}} & 3'b100) | + rvdffe #(32) mtval_ff ( + .*, + .en (tlu_flush_lower_r | wr_mtval_r), + .din (mtval_ns[31:0]), + .dout(mtval[31:0]) + ); + + // ---------------------------------------------------------------------- + // MCGC (RW) Clock gating control + // [31:10]: Reserved, reads 0x0 + // [9] : picio_clk_override + // [7] : dec_clk_override + // [6] : Unused + // [5] : ifu_clk_override + // [4] : lsu_clk_override + // [3] : bus_clk_override + // [2] : pic_clk_override + // [1] : dccm_clk_override + // [0] : icm_clk_override + // + localparam MCGC = 12'h7f8; + assign wr_mcgc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCGC); + + assign mcgc_ns[9:0] = wr_mcgc_r ? {~dec_csr_wrdata_r[9], dec_csr_wrdata_r[8:0]} : mcgc_int[9:0]; + rvdffe #(10) mcgc_ff ( + .*, + .en (wr_mcgc_r), + .din (mcgc_ns[9:0]), + .dout(mcgc_int[9:0]) + ); + + assign mcgc[9:0] = {~mcgc_int[9], mcgc_int[8:0]}; + + assign dec_tlu_picio_clk_override = mcgc[9]; + assign dec_tlu_misc_clk_override = mcgc[8]; + assign dec_tlu_dec_clk_override = mcgc[7]; + //sign dec_tlu_exu_clk_override = mcgc[6]; + assign dec_tlu_ifu_clk_override = mcgc[5]; + assign dec_tlu_lsu_clk_override = mcgc[4]; + assign dec_tlu_bus_clk_override = mcgc[3]; + assign dec_tlu_pic_clk_override = mcgc[2]; + assign dec_tlu_dccm_clk_override = mcgc[1]; + assign dec_tlu_icm_clk_override = mcgc[0]; + + // ---------------------------------------------------------------------- + // MFDC (RW) Feature Disable Control + // [31:19] : Reserved, reads 0x0 + // [18:16] : DMA QoS Prty + // [15:13] : Reserved, reads 0x0 + // [12] : Disable trace + // [11] : Disable external load forwarding + // [10] : Disable dual issue + // [9] : Disable pic multiple ints + // [8] : Disable core ecc + // [7] : Disable secondary alu?s + // [6] : Unused, 0x0 + // [5] : Disable non-blocking loads/divides + // [4] : Disable fast divide + // [3] : Disable branch prediction and return stack + // [2] : Disable write buffer coalescing + // [1] : Disable load misses that bypass the write buffer + // [0] : Disable pipelining - Enable single instruction execution + // + localparam MFDC = 12'h7f9; + + assign wr_mfdc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDC); + + rvdffe #(16) mfdc_ff ( + .*, + .en (wr_mfdc_r), + .din ({mfdc_ns[15:0]}), + .dout(mfdc_int[15:0]) + ); + + // flip poweron value of bit 6 for AXI build + if (pt.BUILD_AXI4 == 1) begin : axi4 + // flip poweron valid of bit 12 + assign mfdc_ns[15:0] = { + ~dec_csr_wrdata_r[18:16], + dec_csr_wrdata_r[12], + dec_csr_wrdata_r[11:7], + ~dec_csr_wrdata_r[6], + dec_csr_wrdata_r[5:0] + }; + assign mfdc[18:0] = { + ~mfdc_int[15:13], 3'b0, mfdc_int[12], mfdc_int[11:7], ~mfdc_int[6], mfdc_int[5:0] + }; + end else begin + // flip poweron valid of bit 12 + assign mfdc_ns[15:0] = {~dec_csr_wrdata_r[18:16], dec_csr_wrdata_r[12:0]}; + assign mfdc[18:0] = {~mfdc_int[15:13], 3'b0, mfdc_int[12:0]}; + end + + + assign dec_tlu_dma_qos_prty[2:0] = mfdc[18:16]; + assign dec_tlu_trace_disable = mfdc[12]; + assign dec_tlu_external_ldfwd_disable = mfdc[11]; + assign dec_tlu_core_ecc_disable = mfdc[8]; + assign dec_tlu_sideeffect_posted_disable = mfdc[6]; + assign dec_tlu_bpred_disable = mfdc[3]; + assign dec_tlu_wb_coalescing_disable = mfdc[2]; + assign dec_tlu_pipelining_disable = mfdc[0]; + + // ---------------------------------------------------------------------- + // MCPC (RW) Pause counter + // [31:0] : Reads 0x0, decs in the wb register in decode_ctl + + assign dec_tlu_wr_pause_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCPC) & ~interrupt_valid_r & ~take_ext_int_start; + + // ---------------------------------------------------------------------- + // MRAC (RW) + // [31:0] : Region Access Control Register, 16 regions, {side_effect, cachable} pairs + localparam MRAC = 12'h7c0; + + assign wr_mrac_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MRAC); + + // prevent pairs of 0x11, side_effect and cacheable + assign mrac_in[31:0] = { + dec_csr_wrdata_r[31], + dec_csr_wrdata_r[30] & ~dec_csr_wrdata_r[31], + dec_csr_wrdata_r[29], + dec_csr_wrdata_r[28] & ~dec_csr_wrdata_r[29], + dec_csr_wrdata_r[27], + dec_csr_wrdata_r[26] & ~dec_csr_wrdata_r[27], + dec_csr_wrdata_r[25], + dec_csr_wrdata_r[24] & ~dec_csr_wrdata_r[25], + dec_csr_wrdata_r[23], + dec_csr_wrdata_r[22] & ~dec_csr_wrdata_r[23], + dec_csr_wrdata_r[21], + dec_csr_wrdata_r[20] & ~dec_csr_wrdata_r[21], + dec_csr_wrdata_r[19], + dec_csr_wrdata_r[18] & ~dec_csr_wrdata_r[19], + dec_csr_wrdata_r[17], + dec_csr_wrdata_r[16] & ~dec_csr_wrdata_r[17], + dec_csr_wrdata_r[15], + dec_csr_wrdata_r[14] & ~dec_csr_wrdata_r[15], + dec_csr_wrdata_r[13], + dec_csr_wrdata_r[12] & ~dec_csr_wrdata_r[13], + dec_csr_wrdata_r[11], + dec_csr_wrdata_r[10] & ~dec_csr_wrdata_r[11], + dec_csr_wrdata_r[9], + dec_csr_wrdata_r[8] & ~dec_csr_wrdata_r[9], + dec_csr_wrdata_r[7], + dec_csr_wrdata_r[6] & ~dec_csr_wrdata_r[7], + dec_csr_wrdata_r[5], + dec_csr_wrdata_r[4] & ~dec_csr_wrdata_r[5], + dec_csr_wrdata_r[3], + dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[3], + dec_csr_wrdata_r[1], + dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[1] + }; + + rvdffe #(32) mrac_ff ( + .*, + .en (wr_mrac_r), + .din (mrac_in[31:0]), + .dout(mrac[31:0]) + ); + + // drive to LSU/IFU + assign dec_tlu_mrac_ff[31:0] = mrac[31:0]; + + // ---------------------------------------------------------------------- + // MDEAU (WAR0) + // [31:0] : Dbus Error Address Unlock register + // + localparam MDEAU = 12'hbc0; + + assign wr_mdeau_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDEAU); + + + // ---------------------------------------------------------------------- + // MDSEAC (R) + // [31:0] : Dbus Store Error Address Capture register + // + localparam MDSEAC = 12'hfc0; + + // only capture error bus if the MDSEAC reg is not locked + assign mdseac_locked_ns = mdseac_en | (mdseac_locked_f & ~wr_mdeau_r); + + assign mdseac_en = (lsu_imprecise_error_store_any | lsu_imprecise_error_load_any) & ~nmi_int_detected_f & ~mdseac_locked_f; + + rvdffe #(32) mdseac_ff ( + .*, + .en (mdseac_en), + .din (lsu_imprecise_error_addr_any[31:0]), + .dout(mdseac[31:0]) + ); + + // ---------------------------------------------------------------------- + // MPMC (R0W1) + // [0] : FW halt + // [1] : Set MSTATUS[MIE] on halt + + localparam MPMC = 12'h7c6; + + assign wr_mpmc_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MPMC); + + // allow the cycle of the dbg halt flush that contains the wr_mpmc_r to + // set the mstatus bit potentially, use delayed version of internal dbg halt. + assign fw_halt_req = wr_mpmc_r & dec_csr_wrdata_r[0] & ~internal_dbg_halt_mode_f2 & ~ext_int_freeze_d1; + + assign fw_halted_ns = (fw_halt_req | fw_halted) & ~set_mie_pmu_fw_halt; + assign mpmc_b_ns[1] = wr_mpmc_r ? ~dec_csr_wrdata_r[1] : ~mpmc[1]; + rvdff #(1) mpmc_ff ( + .*, + .clk (csr_wr_clk), + .din (mpmc_b_ns[1]), + .dout(mpmc_b[1]) + ); + assign mpmc[1] = ~mpmc_b[1]; + + // ---------------------------------------------------------------------- + // MICECT (I-Cache error counter/threshold) + // [31:27] : Icache parity error threshold + // [26:0] : Icache parity error count + localparam MICECT = 12'h7f0; + + assign csr_sat[31:27] = (dec_csr_wrdata_r[31:27] > 5'd26) ? 5'd26 : dec_csr_wrdata_r[31:27]; + + assign wr_micect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICECT); + assign micect_inc[26:0] = micect[26:0] + {26'b0, ic_perr_r}; + assign micect_ns = wr_micect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {micect[31:27], micect_inc[26:0]}; + + rvdffe #(32) micect_ff ( + .*, + .en (wr_micect_r | ic_perr_r), + .din (micect_ns[31:0]), + .dout(micect[31:0]) + ); + + assign mice_ce_req = |({32'hffffffff << micect[31:27]} &{5'b0, micect[26:0]}); + + // ---------------------------------------------------------------------- + // MICCMECT (ICCM error counter/threshold) + // [31:27] : ICCM parity error threshold + // [26:0] : ICCM parity error count + localparam MICCMECT = 12'h7f1; + + assign wr_miccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MICCMECT); + assign miccmect_inc[26:0] = miccmect[26:0] + {26'b0, iccm_sbecc_r | iccm_dma_sb_error}; + assign miccmect_ns = wr_miccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {miccmect[31:27], miccmect_inc[26:0]}; + + rvdffe #(32) miccmect_ff ( + .*, + .clk (free_l2clk), + .en (wr_miccmect_r | iccm_sbecc_r | iccm_dma_sb_error), + .din (miccmect_ns[31:0]), + .dout(miccmect[31:0]) + ); + + assign miccme_ce_req = |({32'hffffffff << miccmect[31:27]} &{5'b0, miccmect[26:0]}); + + // ---------------------------------------------------------------------- + // MDCCMECT (DCCM error counter/threshold) + // [31:27] : DCCM parity error threshold + // [26:0] : DCCM parity error count + localparam MDCCMECT = 12'h7f2; + + assign wr_mdccmect_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MDCCMECT); + assign mdccmect_inc[26:0] = mdccmect[26:0] + {26'b0, lsu_single_ecc_error_r_d1}; + assign mdccmect_ns = wr_mdccmect_r ? {csr_sat[31:27], dec_csr_wrdata_r[26:0]} : {mdccmect[31:27], mdccmect_inc[26:0]}; + + rvdffe #(32) mdccmect_ff ( + .*, + .clk (free_l2clk), + .en (wr_mdccmect_r | lsu_single_ecc_error_r_d1), + .din (mdccmect_ns[31:0]), + .dout(mdccmect[31:0]) + ); + + assign mdccme_ce_req = |({32'hffffffff << mdccmect[31:27]} &{5'b0, mdccmect[26:0]}); + + + // ---------------------------------------------------------------------- + // MFDHT (Force Debug Halt Threshold) + // [5:1] : Halt timeout threshold (power of 2) + // [0] : Halt timeout enabled + localparam MFDHT = 12'h7ce; + + assign wr_mfdht_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHT); + + assign mfdht_ns[5:0] = wr_mfdht_r ? dec_csr_wrdata_r[5:0] : mfdht[5:0]; + + rvdffs #(6) mfdht_ff ( + .*, + .clk (csr_wr_clk), + .en (wr_mfdht_r), + .din (mfdht_ns[5:0]), + .dout(mfdht[5:0]) + ); + + // ---------------------------------------------------------------------- + // MFDHS(RW) + // [1] : LSU operation pending when debug halt threshold reached + // [0] : IFU operation pending when debug halt threshold reached + + localparam MFDHS = 12'h7cf; + + assign wr_mfdhs_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MFDHS); + + assign mfdhs_ns[1:0] = wr_mfdhs_r ? dec_csr_wrdata_r[1:0] : ((dbg_tlu_halted & ~dbg_tlu_halted_f) ? {~lsu_idle_any_f, ~ifu_miss_state_idle_f} : mfdhs[1:0]); + + rvdffs #(2) mfdhs_ff ( + .*, + .clk (free_clk), + .en (wr_mfdhs_r | dbg_tlu_halted), + .din (mfdhs_ns[1:0]), + .dout(mfdhs[1:0]) + ); + + assign force_halt_ctr[31:0] = debug_halt_req_f ? (force_halt_ctr_f[31:0] + 32'b1) : (dbg_tlu_halted_f ? 32'b0 : force_halt_ctr_f[31:0]); + + rvdffe #(32) forcehaltctr_ff ( + .*, + .en (mfdht[0]), + .din (force_halt_ctr[31:0]), + .dout(force_halt_ctr_f[31:0]) + ); + + assign force_halt = mfdht[0] & |(force_halt_ctr_f[31:0] & (32'hffffffff << mfdht[5:1])); + + + // ---------------------------------------------------------------------- + // MEIVT (External Interrupt Vector Table (R/W)) + // [31:10]: Base address (R/W) + // [9:0] : Reserved, reads 0x0 + localparam MEIVT = 12'hbc8; + + assign wr_meivt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIVT); + + rvdffe #(22) meivt_ff ( + .*, + .en (wr_meivt_r), + .din (dec_csr_wrdata_r[31:10]), + .dout(meivt[31:10]) + ); + + + // ---------------------------------------------------------------------- + // MEIHAP (External Interrupt Handler Access Pointer (R)) + // [31:10]: Base address (R/W) + // [9:2] : ClaimID (R) + // [1:0] : Reserved, 0x0 + localparam MEIHAP = 12'hfc8; + + assign wr_meihap_r = wr_meicpct_r; + + rvdffe #(8) meihap_ff ( + .*, + .en (wr_meihap_r), + .din (pic_claimid[7:0]), + .dout(meihap[9:2]) + ); + + assign dec_tlu_meihap[31:2] = {meivt[31:10], meihap[9:2]}; + // ---------------------------------------------------------------------- + // MEICURPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : CURRPRI - Priority level of current interrupt service routine (R/W) + localparam MEICURPL = 12'hbcc; + + assign wr_meicurpl_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICURPL); + assign meicurpl_ns[3:0] = wr_meicurpl_r ? dec_csr_wrdata_r[3:0] : meicurpl[3:0]; + + rvdff #(4) meicurpl_ff ( + .*, + .clk (csr_wr_clk), + .din (meicurpl_ns[3:0]), + .dout(meicurpl[3:0]) + ); + + // PIC needs this reg + assign dec_tlu_meicurpl[3:0] = meicurpl[3:0]; + + + // ---------------------------------------------------------------------- + // MEICIDPL (R/W) + // [31:4] : Reserved (read 0x0) + // [3:0] : External Interrupt Claim ID's Priority Level Register + localparam MEICIDPL = 12'hbcb; + + assign wr_meicidpl_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICIDPL)) | take_ext_int_start; + + assign meicidpl_ns[3:0] = wr_meicpct_r ? pic_pl[3:0] : (wr_meicidpl_r ? dec_csr_wrdata_r[3:0] : meicidpl[3:0]); + + + // ---------------------------------------------------------------------- + // MEICPCT (Capture CLAIMID in MEIHAP and PL in MEICIDPL + // [31:1] : Reserved (read 0x0) + // [0] : Capture (W1, Read 0) + localparam MEICPCT = 12'hbca; + + assign wr_meicpct_r = (dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEICPCT)) | take_ext_int_start; + + // ---------------------------------------------------------------------- + // MEIPT (External Interrupt Priority Threshold) + // [31:4] : Reserved (read 0x0) + // [3:0] : PRITHRESH + localparam MEIPT = 12'hbc9; + + assign wr_meipt_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MEIPT); + assign meipt_ns[3:0] = wr_meipt_r ? dec_csr_wrdata_r[3:0] : meipt[3:0]; + + rvdff #(4) meipt_ff ( + .*, + .clk (csr_wr_clk), + .din (meipt_ns[3:0]), + .dout(meipt[3:0]) + ); + + // to PIC + assign dec_tlu_meipt[3:0] = meipt[3:0]; + // ---------------------------------------------------------------------- + // DCSR (R/W) (Only accessible in debug mode) + // [31:28] : xdebugver (hard coded to 0x4) RO + // [27:16] : 0x0, reserved + // [15] : ebreakm + // [14] : 0x0, reserved + // [13] : ebreaks (0x0 for this core) + // [12] : ebreaku (0x0 for this core) + // [11] : stepie + // [10] : stopcount + // [9] : 0x0 //stoptime + // [8:6] : cause (RO) + // [5:4] : 0x0, reserved + // [3] : nmip + // [2] : step + // [1:0] : prv (0x3 for this core) + // + localparam DCSR = 12'h7b0; + + // RV has clarified that 'priority 4' in the spec means top priority. + // 4. single step. 3. Debugger request. 2. Ebreak. 1. Trigger. + + // RV debug spec indicates a cause priority change for trigger hits during single step. + assign trigger_hit_for_dscr_cause_r_d1 = trigger_hit_dmode_r_d1 | (trigger_hit_r_d1 & dcsr_single_step_done_f); + + assign dcsr_cause[8:6] = ( ({3{dcsr_single_step_done_f & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1 & ~debug_halt_req}} & 3'b100) | ({3{debug_halt_req & ~ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} & 3'b011) | ({3{ebreak_to_debug_mode_r_d1 & ~trigger_hit_for_dscr_cause_r_d1}} & 3'b001) | ({3{trigger_hit_for_dscr_cause_r_d1}} & 3'b010)); - assign wr_dcsr_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DCSR); + assign wr_dcsr_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DCSR); // Multiple halt enter requests can happen before we are halted. // We have to continue to upgrade based on dcsr_cause priority but we can't downgrade. - assign dcsr_cause_upgradeable = internal_dbg_halt_mode_f & (dcsr[8:6] == 3'b011); - assign enter_debug_halt_req_le = enter_debug_halt_req & (~dbg_tlu_halted | dcsr_cause_upgradeable); + assign dcsr_cause_upgradeable = internal_dbg_halt_mode_f & (dcsr[8:6] == 3'b011); + assign enter_debug_halt_req_le = enter_debug_halt_req & (~dbg_tlu_halted | dcsr_cause_upgradeable); - assign nmi_in_debug_mode = nmi_int_detected_f & internal_dbg_halt_mode_f; - assign dcsr_ns[15:2] = enter_debug_halt_req_le ? {dcsr[15:9], dcsr_cause[8:6], dcsr[5:2]} : + assign nmi_in_debug_mode = nmi_int_detected_f & internal_dbg_halt_mode_f; + assign dcsr_ns[15:2] = enter_debug_halt_req_le ? {dcsr[15:9], dcsr_cause[8:6], dcsr[5:2]} : (wr_dcsr_r ? {dec_csr_wrdata_r[15], 3'b0, dec_csr_wrdata_r[11:10], 1'b0, dcsr[8:6], 2'b00, nmi_in_debug_mode | dcsr[3], dec_csr_wrdata_r[2]} : {dcsr[15:4], nmi_in_debug_mode, dcsr[2]}); - rvdffe #(14) dcsr_ff (.*, .clk(free_l2clk), .en(enter_debug_halt_req_le | wr_dcsr_r | internal_dbg_halt_mode | take_nmi), .din(dcsr_ns[15:2]), .dout(dcsr[15:2])); + rvdffe #(14) dcsr_ff ( + .*, + .clk (free_l2clk), + .en (enter_debug_halt_req_le | wr_dcsr_r | internal_dbg_halt_mode | take_nmi), + .din (dcsr_ns[15:2]), + .dout(dcsr[15:2]) + ); - // ---------------------------------------------------------------------- - // DPC (R/W) (Only accessible in debug mode) - // [31:0] : Debug PC - localparam DPC = 12'h7b1; + // ---------------------------------------------------------------------- + // DPC (R/W) (Only accessible in debug mode) + // [31:0] : Debug PC + localparam DPC = 12'h7b1; - assign wr_dpc_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DPC); - assign dpc_capture_npc = dbg_tlu_halted & ~dbg_tlu_halted_f & ~request_debug_mode_done; - assign dpc_capture_pc = request_debug_mode_r; + assign wr_dpc_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DPC); + assign dpc_capture_npc = dbg_tlu_halted & ~dbg_tlu_halted_f & ~request_debug_mode_done; + assign dpc_capture_pc = request_debug_mode_r; - assign dpc_ns[31:1] = ( ({31{~dpc_capture_pc & ~dpc_capture_npc & wr_dpc_r}} & dec_csr_wrdata_r[31:1]) | + assign dpc_ns[31:1] = ( ({31{~dpc_capture_pc & ~dpc_capture_npc & wr_dpc_r}} & dec_csr_wrdata_r[31:1]) | ({31{dpc_capture_pc}} & pc_r[31:1]) | ({31{~dpc_capture_pc & dpc_capture_npc}} & npc_r[31:1]) ); - rvdffe #(31) dpc_ff (.*, .en(wr_dpc_r | dpc_capture_pc | dpc_capture_npc), .din(dpc_ns[31:1]), .dout(dpc[31:1])); + rvdffe #(31) dpc_ff ( + .*, + .en (wr_dpc_r | dpc_capture_pc | dpc_capture_npc), + .din (dpc_ns[31:1]), + .dout(dpc[31:1]) + ); - // ---------------------------------------------------------------------- - // DICAWICS (R/W) (Only accessible in debug mode) - // [31:25] : Reserved - // [24] : Array select, 0 is data, 1 is tag - // [23:22] : Reserved - // [21:20] : Way select - // [19:17] : Reserved - // [16:3] : Index - // [2:0] : Reserved - localparam DICAWICS = 12'h7c8; + // ---------------------------------------------------------------------- + // DICAWICS (R/W) (Only accessible in debug mode) + // [31:25] : Reserved + // [24] : Array select, 0 is data, 1 is tag + // [23:22] : Reserved + // [21:20] : Way select + // [19:17] : Reserved + // [16:3] : Index + // [2:0] : Reserved + localparam DICAWICS = 12'h7c8; - assign dicawics_ns[16:0] = {dec_csr_wrdata_r[24], dec_csr_wrdata_r[21:20], dec_csr_wrdata_r[16:3]}; - assign wr_dicawics_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAWICS); + assign dicawics_ns[16:0] = { + dec_csr_wrdata_r[24], dec_csr_wrdata_r[21:20], dec_csr_wrdata_r[16:3] + }; + assign wr_dicawics_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAWICS); - rvdffe #(17) dicawics_ff (.*, .en(wr_dicawics_r), .din(dicawics_ns[16:0]), .dout(dicawics[16:0])); + rvdffe #(17) dicawics_ff ( + .*, + .en (wr_dicawics_r), + .din (dicawics_ns[16:0]), + .dout(dicawics[16:0]) + ); - // ---------------------------------------------------------------------- - // DICAD0 (R/W) (Only accessible in debug mode) - // - // If dicawics[array] is 0 - // [31:0] : inst data - // - // If dicawics[array] is 1 - // [31:16] : Tag - // [15:7] : Reserved - // [6:4] : LRU - // [3:1] : Reserved - // [0] : Valid - localparam DICAD0 = 12'h7c9; + // ---------------------------------------------------------------------- + // DICAD0 (R/W) (Only accessible in debug mode) + // + // If dicawics[array] is 0 + // [31:0] : inst data + // + // If dicawics[array] is 1 + // [31:16] : Tag + // [15:7] : Reserved + // [6:4] : LRU + // [3:1] : Reserved + // [0] : Valid + localparam DICAD0 = 12'h7c9; - assign dicad0_ns[31:0] = wr_dicad0_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[31:0]; + assign dicad0_ns[31:0] = wr_dicad0_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[31:0]; - assign wr_dicad0_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0); + assign wr_dicad0_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0); - rvdffe #(32) dicad0_ff (.*, .en(wr_dicad0_r | ifu_ic_debug_rd_data_valid), .din(dicad0_ns[31:0]), .dout(dicad0[31:0])); + rvdffe #(32) dicad0_ff ( + .*, + .en (wr_dicad0_r | ifu_ic_debug_rd_data_valid), + .din (dicad0_ns[31:0]), + .dout(dicad0[31:0]) + ); - // ---------------------------------------------------------------------- - // DICAD0H (R/W) (Only accessible in debug mode) - // - // If dicawics[array] is 0 - // [63:32] : inst data - // - localparam DICAD0H = 12'h7cc; + // ---------------------------------------------------------------------- + // DICAD0H (R/W) (Only accessible in debug mode) + // + // If dicawics[array] is 0 + // [63:32] : inst data + // + localparam DICAD0H = 12'h7cc; - assign dicad0h_ns[31:0] = wr_dicad0h_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[63:32]; + assign dicad0h_ns[31:0] = wr_dicad0h_r ? dec_csr_wrdata_r[31:0] : ifu_ic_debug_rd_data[63:32]; - assign wr_dicad0h_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0H); + assign wr_dicad0h_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD0H); - rvdffe #(32) dicad0h_ff (.*, .en(wr_dicad0h_r | ifu_ic_debug_rd_data_valid), .din(dicad0h_ns[31:0]), .dout(dicad0h[31:0])); + rvdffe #(32) dicad0h_ff ( + .*, + .en (wr_dicad0h_r | ifu_ic_debug_rd_data_valid), + .din (dicad0h_ns[31:0]), + .dout(dicad0h[31:0]) + ); -if (pt.ICACHE_ECC == 1) begin - // ---------------------------------------------------------------------- - // DICAD1 (R/W) (Only accessible in debug mode) - // [6:0] : ECC - localparam DICAD1 = 12'h7ca; + if (pt.ICACHE_ECC == 1) begin + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [6:0] : ECC + localparam DICAD1 = 12'h7ca; - assign dicad1_ns[6:0] = wr_dicad1_r ? dec_csr_wrdata_r[6:0] : ifu_ic_debug_rd_data[70:64]; + assign dicad1_ns[6:0] = wr_dicad1_r ? dec_csr_wrdata_r[6:0] : ifu_ic_debug_rd_data[70:64]; - assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1); + assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1); - rvdffe #(.WIDTH(7), .OVERRIDE(1)) dicad1_ff (.*, .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[6:0]), .dout(dicad1_raw[6:0])); + rvdffe #( + .WIDTH(7), + .OVERRIDE(1) + ) dicad1_ff ( + .*, + .en (wr_dicad1_r | ifu_ic_debug_rd_data_valid), + .din (dicad1_ns[6:0]), + .dout(dicad1_raw[6:0]) + ); - assign dicad1[31:0] = {25'b0, dicad1_raw[6:0]}; + assign dicad1[31:0] = {25'b0, dicad1_raw[6:0]}; -end -else begin - // ---------------------------------------------------------------------- - // DICAD1 (R/W) (Only accessible in debug mode) - // [3:0] : Parity - localparam DICAD1 = 12'h7ca; + end else begin + // ---------------------------------------------------------------------- + // DICAD1 (R/W) (Only accessible in debug mode) + // [3:0] : Parity + localparam DICAD1 = 12'h7ca; - assign dicad1_ns[3:0] = wr_dicad1_r ? dec_csr_wrdata_r[3:0] : ifu_ic_debug_rd_data[67:64]; + assign dicad1_ns[3:0] = wr_dicad1_r ? dec_csr_wrdata_r[3:0] : ifu_ic_debug_rd_data[67:64]; - assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1); + assign wr_dicad1_r = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAD1); - rvdffs #(4) dicad1_ff (.*, .clk(free_clk), .en(wr_dicad1_r | ifu_ic_debug_rd_data_valid), .din(dicad1_ns[3:0]), .dout(dicad1_raw[3:0])); + rvdffs #(4) dicad1_ff ( + .*, + .clk (free_clk), + .en (wr_dicad1_r | ifu_ic_debug_rd_data_valid), + .din (dicad1_ns[3:0]), + .dout(dicad1_raw[3:0]) + ); - assign dicad1[31:0] = {28'b0, dicad1_raw[3:0]}; -end - // ---------------------------------------------------------------------- - // DICAGO (R/W) (Only accessible in debug mode) - // [0] : Go - localparam DICAGO = 12'h7cb; + assign dicad1[31:0] = {28'b0, dicad1_raw[3:0]}; + end + // ---------------------------------------------------------------------- + // DICAGO (R/W) (Only accessible in debug mode) + // [0] : Go + localparam DICAGO = 12'h7cb; -if (pt.ICACHE_ECC == 1) - assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = { dicad1[6:0], dicad0h[31:0], dicad0[31:0]}; -else - assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = {3'b0, dicad1[3:0], dicad0h[31:0], dicad0[31:0]}; + if (pt.ICACHE_ECC == 1) + assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = {dicad1[6:0], dicad0h[31:0], dicad0[31:0]}; + else + assign dec_tlu_ic_diag_pkt.icache_wrdata[70:0] = { + 3'b0, dicad1[3:0], dicad0h[31:0], dicad0[31:0] + }; - assign dec_tlu_ic_diag_pkt.icache_dicawics[16:0] = dicawics[16:0]; + assign dec_tlu_ic_diag_pkt.icache_dicawics[16:0] = dicawics[16:0]; - assign icache_rd_valid = allow_dbg_halt_csr_write & dec_csr_any_unq_d & dec_i0_decode_d & ~dec_csr_wen_unq_d & (dec_csr_rdaddr_d[11:0] == DICAGO); - assign icache_wr_valid = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAGO); + assign icache_rd_valid = allow_dbg_halt_csr_write & dec_csr_any_unq_d & dec_i0_decode_d & ~dec_csr_wen_unq_d & (dec_csr_rdaddr_d[11:0] == DICAGO); + assign icache_wr_valid = allow_dbg_halt_csr_write & dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == DICAGO); - assign dec_tlu_ic_diag_pkt.icache_rd_valid = icache_rd_valid_f; - assign dec_tlu_ic_diag_pkt.icache_wr_valid = icache_wr_valid_f; + assign dec_tlu_ic_diag_pkt.icache_rd_valid = icache_rd_valid_f; + assign dec_tlu_ic_diag_pkt.icache_wr_valid = icache_wr_valid_f; - // ---------------------------------------------------------------------- - // MTSEL (R/W) - // [1:0] : Trigger select : 00, 01, 10 are data/address triggers. 11 is inst count - localparam MTSEL = 12'h7a0; + // ---------------------------------------------------------------------- + // MTSEL (R/W) + // [1:0] : Trigger select : 00, 01, 10 are data/address triggers. 11 is inst count + localparam MTSEL = 12'h7a0; - assign wr_mtsel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTSEL); - assign mtsel_ns[1:0] = wr_mtsel_r ? {dec_csr_wrdata_r[1:0]} : mtsel[1:0]; + assign wr_mtsel_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTSEL); + assign mtsel_ns[1:0] = wr_mtsel_r ? {dec_csr_wrdata_r[1:0]} : mtsel[1:0]; - rvdff #(2) mtsel_ff (.*, .clk(csr_wr_clk), .din(mtsel_ns[1:0]), .dout(mtsel[1:0])); + rvdff #(2) mtsel_ff ( + .*, + .clk (csr_wr_clk), + .din (mtsel_ns[1:0]), + .dout(mtsel[1:0]) + ); - // ---------------------------------------------------------------------- - // MTDATA1 (R/W) - // [31:0] : Trigger Data 1 - localparam MTDATA1 = 12'h7a1; + // ---------------------------------------------------------------------- + // MTDATA1 (R/W) + // [31:0] : Trigger Data 1 + localparam MTDATA1 = 12'h7a1; - // for triggers 0, 1, 2 and 3 aka Match Control - // [31:28] : type, hard coded to 0x2 - // [27] : dmode - // [26:21] : hard coded to 0x1f - // [20] : hit - // [19] : select (0 - address, 1 - data) - // [18] : timing, always 'before', reads 0x0 - // [17:12] : action, bits [17:13] not implemented and reads 0x0 - // [11] : chain - // [10:7] : match, bits [10:8] not implemented and reads 0x0 - // [6] : M - // [5:3] : not implemented, reads 0x0 - // [2] : execute - // [1] : store - // [0] : load - // - // decoder ring - // [27] : => 9 - // [20] : => 8 - // [19] : => 7 - // [12] : => 6 - // [11] : => 5 - // [7] : => 4 - // [6] : => 3 - // [2] : => 2 - // [1] : => 1 - // [0] : => 0 + // for triggers 0, 1, 2 and 3 aka Match Control + // [31:28] : type, hard coded to 0x2 + // [27] : dmode + // [26:21] : hard coded to 0x1f + // [20] : hit + // [19] : select (0 - address, 1 - data) + // [18] : timing, always 'before', reads 0x0 + // [17:12] : action, bits [17:13] not implemented and reads 0x0 + // [11] : chain + // [10:7] : match, bits [10:8] not implemented and reads 0x0 + // [6] : M + // [5:3] : not implemented, reads 0x0 + // [2] : execute + // [1] : store + // [0] : load + // + // decoder ring + // [27] : => 9 + // [20] : => 8 + // [19] : => 7 + // [12] : => 6 + // [11] : => 5 + // [7] : => 4 + // [6] : => 3 + // [2] : => 2 + // [1] : => 1 + // [0] : => 0 - // don't allow setting load-data. - assign tdata_load = dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[19]; - // don't allow setting execute-data. - assign tdata_opcode = dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[19]; - // don't allow clearing DMODE and action=1 - assign tdata_action = (dec_csr_wrdata_r[27] & dbg_tlu_halted_f) & dec_csr_wrdata_r[12]; + // don't allow setting load-data. + assign tdata_load = dec_csr_wrdata_r[0] & ~dec_csr_wrdata_r[19]; + // don't allow setting execute-data. + assign tdata_opcode = dec_csr_wrdata_r[2] & ~dec_csr_wrdata_r[19]; + // don't allow clearing DMODE and action=1 + assign tdata_action = (dec_csr_wrdata_r[27] & dbg_tlu_halted_f) & dec_csr_wrdata_r[12]; - // Chain bit has conditions: WARL for triggers without chains. Force to zero if dmode is 0 but next trigger dmode is 1. - assign tdata_chain = mtsel[0] ? 1'b0 : // triggers 1 and 3 chain bit is always zero - mtsel[1] ? dec_csr_wrdata_r[11] & ~(mtdata1_t3[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]) : // trigger 2 - dec_csr_wrdata_r[11] & ~(mtdata1_t1[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]); // trigger 0 + // Chain bit has conditions: WARL for triggers without chains. Force to zero if dmode is 0 but next trigger dmode is 1. + assign tdata_chain = mtsel[0] ? 1'b0 : // triggers 1 and 3 chain bit is always zero + mtsel[1] ? dec_csr_wrdata_r[11] & ~(mtdata1_t3[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]) : // trigger 2 + dec_csr_wrdata_r[11] & ~(mtdata1_t1[MTDATA1_DMODE] & ~dec_csr_wrdata_r[27]); // trigger 0 - // Kill mtdata1 write if dmode=1 but prior trigger has dmode=0/chain=1. Only applies to T1 and T3 - assign tdata_kill_write = mtsel[1] ? dec_csr_wrdata_r[27] & (~mtdata1_t2[MTDATA1_DMODE] & mtdata1_t2[MTDATA1_CHAIN]) : // trigger 3 + // Kill mtdata1 write if dmode=1 but prior trigger has dmode=0/chain=1. Only applies to T1 and T3 + assign tdata_kill_write = mtsel[1] ? dec_csr_wrdata_r[27] & (~mtdata1_t2[MTDATA1_DMODE] & mtdata1_t2[MTDATA1_CHAIN]) : // trigger 3 dec_csr_wrdata_r[27] & (~mtdata1_t0[MTDATA1_DMODE] & mtdata1_t0[MTDATA1_CHAIN]) ; // trigger 1 - assign tdata_wrdata_r[9:0] = {dec_csr_wrdata_r[27] & dbg_tlu_halted_f, - dec_csr_wrdata_r[20:19], - tdata_action, - tdata_chain, - dec_csr_wrdata_r[7:6], - tdata_opcode, - dec_csr_wrdata_r[1], - tdata_load}; + assign tdata_wrdata_r[9:0] = { + dec_csr_wrdata_r[27] & dbg_tlu_halted_f, + dec_csr_wrdata_r[20:19], + tdata_action, + tdata_chain, + dec_csr_wrdata_r[7:6], + tdata_opcode, + dec_csr_wrdata_r[1], + tdata_load + }; - // If the DMODE bit is set, tdata1 can only be updated in debug_mode - assign wr_mtdata1_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f); - assign mtdata1_t0_ns[9:0] = wr_mtdata1_t0_r ? tdata_wrdata_r[9:0] : + // If the DMODE bit is set, tdata1 can only be updated in debug_mode + assign wr_mtdata1_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t0_ns[9:0] = wr_mtdata1_t0_r ? tdata_wrdata_r[9:0] : {mtdata1_t0[9], update_hit_bit_r[0] | mtdata1_t0[8], mtdata1_t0[7:0]}; - assign wr_mtdata1_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write; - assign mtdata1_t1_ns[9:0] = wr_mtdata1_t1_r ? tdata_wrdata_r[9:0] : + assign wr_mtdata1_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write; + assign mtdata1_t1_ns[9:0] = wr_mtdata1_t1_r ? tdata_wrdata_r[9:0] : {mtdata1_t1[9], update_hit_bit_r[1] | mtdata1_t1[8], mtdata1_t1[7:0]}; - assign wr_mtdata1_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f); - assign mtdata1_t2_ns[9:0] = wr_mtdata1_t2_r ? tdata_wrdata_r[9:0] : + assign wr_mtdata1_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f); + assign mtdata1_t2_ns[9:0] = wr_mtdata1_t2_r ? tdata_wrdata_r[9:0] : {mtdata1_t2[9], update_hit_bit_r[2] | mtdata1_t2[8], mtdata1_t2[7:0]}; - assign wr_mtdata1_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write; - assign mtdata1_t3_ns[9:0] = wr_mtdata1_t3_r ? tdata_wrdata_r[9:0] : + assign wr_mtdata1_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA1) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f) & ~tdata_kill_write; + assign mtdata1_t3_ns[9:0] = wr_mtdata1_t3_r ? tdata_wrdata_r[9:0] : {mtdata1_t3[9], update_hit_bit_r[3] | mtdata1_t3[8], mtdata1_t3[7:0]}; - rvdffe #(10) mtdata1_t0_ff (.*, .en(trigger_enabled[0] | wr_mtdata1_t0_r), .din(mtdata1_t0_ns[9:0]), .dout(mtdata1_t0[9:0])); - rvdffe #(10) mtdata1_t1_ff (.*, .en(trigger_enabled[1] | wr_mtdata1_t1_r), .din(mtdata1_t1_ns[9:0]), .dout(mtdata1_t1[9:0])); - rvdffe #(10) mtdata1_t2_ff (.*, .en(trigger_enabled[2] | wr_mtdata1_t2_r), .din(mtdata1_t2_ns[9:0]), .dout(mtdata1_t2[9:0])); - rvdffe #(10) mtdata1_t3_ff (.*, .en(trigger_enabled[3] | wr_mtdata1_t3_r), .din(mtdata1_t3_ns[9:0]), .dout(mtdata1_t3[9:0])); + rvdffe #(10) mtdata1_t0_ff ( + .*, + .en (trigger_enabled[0] | wr_mtdata1_t0_r), + .din (mtdata1_t0_ns[9:0]), + .dout(mtdata1_t0[9:0]) + ); + rvdffe #(10) mtdata1_t1_ff ( + .*, + .en (trigger_enabled[1] | wr_mtdata1_t1_r), + .din (mtdata1_t1_ns[9:0]), + .dout(mtdata1_t1[9:0]) + ); + rvdffe #(10) mtdata1_t2_ff ( + .*, + .en (trigger_enabled[2] | wr_mtdata1_t2_r), + .din (mtdata1_t2_ns[9:0]), + .dout(mtdata1_t2[9:0]) + ); + rvdffe #(10) mtdata1_t3_ff ( + .*, + .en (trigger_enabled[3] | wr_mtdata1_t3_r), + .din (mtdata1_t3_ns[9:0]), + .dout(mtdata1_t3[9:0]) + ); - assign mtdata1_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & {4'h2, mtdata1_t0[9], 6'b011111, mtdata1_t0[8:7], 6'b0, mtdata1_t0[6:5], 3'b0, mtdata1_t0[4:3], 3'b0, mtdata1_t0[2:0]}) | + assign mtdata1_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & {4'h2, mtdata1_t0[9], 6'b011111, mtdata1_t0[8:7], 6'b0, mtdata1_t0[6:5], 3'b0, mtdata1_t0[4:3], 3'b0, mtdata1_t0[2:0]}) | ({32{(mtsel[1:0] == 2'b01)}} & {4'h2, mtdata1_t1[9], 6'b011111, mtdata1_t1[8:7], 6'b0, mtdata1_t1[6:5], 3'b0, mtdata1_t1[4:3], 3'b0, mtdata1_t1[2:0]}) | ({32{(mtsel[1:0] == 2'b10)}} & {4'h2, mtdata1_t2[9], 6'b011111, mtdata1_t2[8:7], 6'b0, mtdata1_t2[6:5], 3'b0, mtdata1_t2[4:3], 3'b0, mtdata1_t2[2:0]}) | ({32{(mtsel[1:0] == 2'b11)}} & {4'h2, mtdata1_t3[9], 6'b011111, mtdata1_t3[8:7], 6'b0, mtdata1_t3[6:5], 3'b0, mtdata1_t3[4:3], 3'b0, mtdata1_t3[2:0]})); - assign trigger_pkt_any[0].select = mtdata1_t0[MTDATA1_SEL]; - assign trigger_pkt_any[0].match = mtdata1_t0[MTDATA1_MATCH]; - assign trigger_pkt_any[0].store = mtdata1_t0[MTDATA1_ST]; - assign trigger_pkt_any[0].load = mtdata1_t0[MTDATA1_LD]; - assign trigger_pkt_any[0].execute = mtdata1_t0[MTDATA1_EXE]; - assign trigger_pkt_any[0].m = mtdata1_t0[MTDATA1_M_ENABLED]; + assign trigger_pkt_any[0].select = mtdata1_t0[MTDATA1_SEL]; + assign trigger_pkt_any[0].match = mtdata1_t0[MTDATA1_MATCH]; + assign trigger_pkt_any[0].store = mtdata1_t0[MTDATA1_ST]; + assign trigger_pkt_any[0].load = mtdata1_t0[MTDATA1_LD]; + assign trigger_pkt_any[0].execute = mtdata1_t0[MTDATA1_EXE]; + assign trigger_pkt_any[0].m = mtdata1_t0[MTDATA1_M_ENABLED]; - assign trigger_pkt_any[1].select = mtdata1_t1[MTDATA1_SEL]; - assign trigger_pkt_any[1].match = mtdata1_t1[MTDATA1_MATCH]; - assign trigger_pkt_any[1].store = mtdata1_t1[MTDATA1_ST]; - assign trigger_pkt_any[1].load = mtdata1_t1[MTDATA1_LD]; - assign trigger_pkt_any[1].execute = mtdata1_t1[MTDATA1_EXE]; - assign trigger_pkt_any[1].m = mtdata1_t1[MTDATA1_M_ENABLED]; + assign trigger_pkt_any[1].select = mtdata1_t1[MTDATA1_SEL]; + assign trigger_pkt_any[1].match = mtdata1_t1[MTDATA1_MATCH]; + assign trigger_pkt_any[1].store = mtdata1_t1[MTDATA1_ST]; + assign trigger_pkt_any[1].load = mtdata1_t1[MTDATA1_LD]; + assign trigger_pkt_any[1].execute = mtdata1_t1[MTDATA1_EXE]; + assign trigger_pkt_any[1].m = mtdata1_t1[MTDATA1_M_ENABLED]; - assign trigger_pkt_any[2].select = mtdata1_t2[MTDATA1_SEL]; - assign trigger_pkt_any[2].match = mtdata1_t2[MTDATA1_MATCH]; - assign trigger_pkt_any[2].store = mtdata1_t2[MTDATA1_ST]; - assign trigger_pkt_any[2].load = mtdata1_t2[MTDATA1_LD]; - assign trigger_pkt_any[2].execute = mtdata1_t2[MTDATA1_EXE]; - assign trigger_pkt_any[2].m = mtdata1_t2[MTDATA1_M_ENABLED]; + assign trigger_pkt_any[2].select = mtdata1_t2[MTDATA1_SEL]; + assign trigger_pkt_any[2].match = mtdata1_t2[MTDATA1_MATCH]; + assign trigger_pkt_any[2].store = mtdata1_t2[MTDATA1_ST]; + assign trigger_pkt_any[2].load = mtdata1_t2[MTDATA1_LD]; + assign trigger_pkt_any[2].execute = mtdata1_t2[MTDATA1_EXE]; + assign trigger_pkt_any[2].m = mtdata1_t2[MTDATA1_M_ENABLED]; - assign trigger_pkt_any[3].select = mtdata1_t3[MTDATA1_SEL]; - assign trigger_pkt_any[3].match = mtdata1_t3[MTDATA1_MATCH]; - assign trigger_pkt_any[3].store = mtdata1_t3[MTDATA1_ST]; - assign trigger_pkt_any[3].load = mtdata1_t3[MTDATA1_LD]; - assign trigger_pkt_any[3].execute = mtdata1_t3[MTDATA1_EXE]; - assign trigger_pkt_any[3].m = mtdata1_t3[MTDATA1_M_ENABLED]; + assign trigger_pkt_any[3].select = mtdata1_t3[MTDATA1_SEL]; + assign trigger_pkt_any[3].match = mtdata1_t3[MTDATA1_MATCH]; + assign trigger_pkt_any[3].store = mtdata1_t3[MTDATA1_ST]; + assign trigger_pkt_any[3].load = mtdata1_t3[MTDATA1_LD]; + assign trigger_pkt_any[3].execute = mtdata1_t3[MTDATA1_EXE]; + assign trigger_pkt_any[3].m = mtdata1_t3[MTDATA1_M_ENABLED]; - // ---------------------------------------------------------------------- - // MTDATA2 (R/W) - // [31:0] : Trigger Data 2 - localparam MTDATA2 = 12'h7a2; + // ---------------------------------------------------------------------- + // MTDATA2 (R/W) + // [31:0] : Trigger Data 2 + localparam MTDATA2 = 12'h7a2; - // If the DMODE bit is set, tdata2 can only be updated in debug_mode - assign wr_mtdata2_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f); - assign wr_mtdata2_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f); - assign wr_mtdata2_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f); - assign wr_mtdata2_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f); + // If the DMODE bit is set, tdata2 can only be updated in debug_mode + assign wr_mtdata2_t0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b0) & (~mtdata1_t0[MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b01) & (~mtdata1_t1[MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t2_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b10) & (~mtdata1_t2[MTDATA1_DMODE] | dbg_tlu_halted_f); + assign wr_mtdata2_t3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MTDATA2) & (mtsel[1:0] == 2'b11) & (~mtdata1_t3[MTDATA1_DMODE] | dbg_tlu_halted_f); - rvdffe #(32) mtdata2_t0_ff (.*, .en(wr_mtdata2_t0_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t0[31:0])); - rvdffe #(32) mtdata2_t1_ff (.*, .en(wr_mtdata2_t1_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t1[31:0])); - rvdffe #(32) mtdata2_t2_ff (.*, .en(wr_mtdata2_t2_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t2[31:0])); - rvdffe #(32) mtdata2_t3_ff (.*, .en(wr_mtdata2_t3_r), .din(dec_csr_wrdata_r[31:0]), .dout(mtdata2_t3[31:0])); + rvdffe #(32) mtdata2_t0_ff ( + .*, + .en (wr_mtdata2_t0_r), + .din (dec_csr_wrdata_r[31:0]), + .dout(mtdata2_t0[31:0]) + ); + rvdffe #(32) mtdata2_t1_ff ( + .*, + .en (wr_mtdata2_t1_r), + .din (dec_csr_wrdata_r[31:0]), + .dout(mtdata2_t1[31:0]) + ); + rvdffe #(32) mtdata2_t2_ff ( + .*, + .en (wr_mtdata2_t2_r), + .din (dec_csr_wrdata_r[31:0]), + .dout(mtdata2_t2[31:0]) + ); + rvdffe #(32) mtdata2_t3_ff ( + .*, + .en (wr_mtdata2_t3_r), + .din (dec_csr_wrdata_r[31:0]), + .dout(mtdata2_t3[31:0]) + ); - assign mtdata2_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & mtdata2_t0[31:0]) | + assign mtdata2_tsel_out[31:0] = ( ({32{(mtsel[1:0] == 2'b00)}} & mtdata2_t0[31:0]) | ({32{(mtsel[1:0] == 2'b01)}} & mtdata2_t1[31:0]) | ({32{(mtsel[1:0] == 2'b10)}} & mtdata2_t2[31:0]) | ({32{(mtsel[1:0] == 2'b11)}} & mtdata2_t3[31:0])); - assign trigger_pkt_any[0].tdata2[31:0] = mtdata2_t0[31:0]; - assign trigger_pkt_any[1].tdata2[31:0] = mtdata2_t1[31:0]; - assign trigger_pkt_any[2].tdata2[31:0] = mtdata2_t2[31:0]; - assign trigger_pkt_any[3].tdata2[31:0] = mtdata2_t3[31:0]; + assign trigger_pkt_any[0].tdata2[31:0] = mtdata2_t0[31:0]; + assign trigger_pkt_any[1].tdata2[31:0] = mtdata2_t1[31:0]; + assign trigger_pkt_any[2].tdata2[31:0] = mtdata2_t2[31:0]; + assign trigger_pkt_any[3].tdata2[31:0] = mtdata2_t3[31:0]; - //---------------------------------------------------------------------- - // Performance Monitor Counters section starts - //---------------------------------------------------------------------- - localparam MHPME_NOEVENT = 10'd0; - localparam MHPME_CLK_ACTIVE = 10'd1; // OOP - out of pipe - localparam MHPME_ICACHE_HIT = 10'd2; // OOP - localparam MHPME_ICACHE_MISS = 10'd3; // OOP - localparam MHPME_INST_COMMIT = 10'd4; - localparam MHPME_INST_COMMIT_16B = 10'd5; - localparam MHPME_INST_COMMIT_32B = 10'd6; - localparam MHPME_INST_ALIGNED = 10'd7; // OOP - localparam MHPME_INST_DECODED = 10'd8; // OOP - localparam MHPME_INST_MUL = 10'd9; - localparam MHPME_INST_DIV = 10'd10; - localparam MHPME_INST_LOAD = 10'd11; - localparam MHPME_INST_STORE = 10'd12; - localparam MHPME_INST_MALOAD = 10'd13; - localparam MHPME_INST_MASTORE = 10'd14; - localparam MHPME_INST_ALU = 10'd15; - localparam MHPME_INST_CSRREAD = 10'd16; - localparam MHPME_INST_CSRRW = 10'd17; - localparam MHPME_INST_CSRWRITE = 10'd18; - localparam MHPME_INST_EBREAK = 10'd19; - localparam MHPME_INST_ECALL = 10'd20; - localparam MHPME_INST_FENCE = 10'd21; - localparam MHPME_INST_FENCEI = 10'd22; - localparam MHPME_INST_MRET = 10'd23; - localparam MHPME_INST_BRANCH = 10'd24; - localparam MHPME_BRANCH_MP = 10'd25; - localparam MHPME_BRANCH_TAKEN = 10'd26; - localparam MHPME_BRANCH_NOTP = 10'd27; - localparam MHPME_FETCH_STALL = 10'd28; // OOP - localparam MHPME_DECODE_STALL = 10'd30; // OOP - localparam MHPME_POSTSYNC_STALL = 10'd31; // OOP - localparam MHPME_PRESYNC_STALL = 10'd32; // OOP - localparam MHPME_LSU_SB_WB_STALL = 10'd34; // OOP - localparam MHPME_DMA_DCCM_STALL = 10'd35; // OOP - localparam MHPME_DMA_ICCM_STALL = 10'd36; // OOP - localparam MHPME_EXC_TAKEN = 10'd37; - localparam MHPME_TIMER_INT_TAKEN = 10'd38; - localparam MHPME_EXT_INT_TAKEN = 10'd39; - localparam MHPME_FLUSH_LOWER = 10'd40; - localparam MHPME_BR_ERROR = 10'd41; - localparam MHPME_IBUS_TRANS = 10'd42; // OOP - localparam MHPME_DBUS_TRANS = 10'd43; // OOP - localparam MHPME_DBUS_MA_TRANS = 10'd44; // OOP - localparam MHPME_IBUS_ERROR = 10'd45; // OOP - localparam MHPME_DBUS_ERROR = 10'd46; // OOP - localparam MHPME_IBUS_STALL = 10'd47; // OOP - localparam MHPME_DBUS_STALL = 10'd48; // OOP - localparam MHPME_INT_DISABLED = 10'd49; // OOP - localparam MHPME_INT_STALLED = 10'd50; // OOP - localparam MHPME_INST_BITMANIP = 10'd54; - localparam MHPME_DBUS_LOAD = 10'd55; - localparam MHPME_DBUS_STORE = 10'd56; - // Counts even during sleep state - localparam MHPME_SLEEP_CYC = 10'd512; // OOP - localparam MHPME_DMA_READ_ALL = 10'd513; // OOP - localparam MHPME_DMA_WRITE_ALL = 10'd514; // OOP - localparam MHPME_DMA_READ_DCCM = 10'd515; // OOP - localparam MHPME_DMA_WRITE_DCCM = 10'd516; // OOP + //---------------------------------------------------------------------- + // Performance Monitor Counters section starts + //---------------------------------------------------------------------- + localparam MHPME_NOEVENT = 10'd0; + localparam MHPME_CLK_ACTIVE = 10'd1; // OOP - out of pipe + localparam MHPME_ICACHE_HIT = 10'd2; // OOP + localparam MHPME_ICACHE_MISS = 10'd3; // OOP + localparam MHPME_INST_COMMIT = 10'd4; + localparam MHPME_INST_COMMIT_16B = 10'd5; + localparam MHPME_INST_COMMIT_32B = 10'd6; + localparam MHPME_INST_ALIGNED = 10'd7; // OOP + localparam MHPME_INST_DECODED = 10'd8; // OOP + localparam MHPME_INST_MUL = 10'd9; + localparam MHPME_INST_DIV = 10'd10; + localparam MHPME_INST_LOAD = 10'd11; + localparam MHPME_INST_STORE = 10'd12; + localparam MHPME_INST_MALOAD = 10'd13; + localparam MHPME_INST_MASTORE = 10'd14; + localparam MHPME_INST_ALU = 10'd15; + localparam MHPME_INST_CSRREAD = 10'd16; + localparam MHPME_INST_CSRRW = 10'd17; + localparam MHPME_INST_CSRWRITE = 10'd18; + localparam MHPME_INST_EBREAK = 10'd19; + localparam MHPME_INST_ECALL = 10'd20; + localparam MHPME_INST_FENCE = 10'd21; + localparam MHPME_INST_FENCEI = 10'd22; + localparam MHPME_INST_MRET = 10'd23; + localparam MHPME_INST_BRANCH = 10'd24; + localparam MHPME_BRANCH_MP = 10'd25; + localparam MHPME_BRANCH_TAKEN = 10'd26; + localparam MHPME_BRANCH_NOTP = 10'd27; + localparam MHPME_FETCH_STALL = 10'd28; // OOP + localparam MHPME_DECODE_STALL = 10'd30; // OOP + localparam MHPME_POSTSYNC_STALL = 10'd31; // OOP + localparam MHPME_PRESYNC_STALL = 10'd32; // OOP + localparam MHPME_LSU_SB_WB_STALL = 10'd34; // OOP + localparam MHPME_DMA_DCCM_STALL = 10'd35; // OOP + localparam MHPME_DMA_ICCM_STALL = 10'd36; // OOP + localparam MHPME_EXC_TAKEN = 10'd37; + localparam MHPME_TIMER_INT_TAKEN = 10'd38; + localparam MHPME_EXT_INT_TAKEN = 10'd39; + localparam MHPME_FLUSH_LOWER = 10'd40; + localparam MHPME_BR_ERROR = 10'd41; + localparam MHPME_IBUS_TRANS = 10'd42; // OOP + localparam MHPME_DBUS_TRANS = 10'd43; // OOP + localparam MHPME_DBUS_MA_TRANS = 10'd44; // OOP + localparam MHPME_IBUS_ERROR = 10'd45; // OOP + localparam MHPME_DBUS_ERROR = 10'd46; // OOP + localparam MHPME_IBUS_STALL = 10'd47; // OOP + localparam MHPME_DBUS_STALL = 10'd48; // OOP + localparam MHPME_INT_DISABLED = 10'd49; // OOP + localparam MHPME_INT_STALLED = 10'd50; // OOP + localparam MHPME_INST_BITMANIP = 10'd54; + localparam MHPME_DBUS_LOAD = 10'd55; + localparam MHPME_DBUS_STORE = 10'd56; + // Counts even during sleep state + localparam MHPME_SLEEP_CYC = 10'd512; // OOP + localparam MHPME_DMA_READ_ALL = 10'd513; // OOP + localparam MHPME_DMA_WRITE_ALL = 10'd514; // OOP + localparam MHPME_DMA_READ_DCCM = 10'd515; // OOP + localparam MHPME_DMA_WRITE_DCCM = 10'd516; // OOP - // Pack the event selects into a vector for genvar - assign mhpme_vec[0][9:0] = mhpme3[9:0]; - assign mhpme_vec[1][9:0] = mhpme4[9:0]; - assign mhpme_vec[2][9:0] = mhpme5[9:0]; - assign mhpme_vec[3][9:0] = mhpme6[9:0]; + // Pack the event selects into a vector for genvar + assign mhpme_vec[0][9:0] = mhpme3[9:0]; + assign mhpme_vec[1][9:0] = mhpme4[9:0]; + assign mhpme_vec[2][9:0] = mhpme5[9:0]; + assign mhpme_vec[3][9:0] = mhpme6[9:0]; - // only consider committed itypes - //logic [3:0] pmu_i0_itype_qual; - assign pmu_i0_itype_qual[3:0] = dec_tlu_packet_r.pmu_i0_itype[3:0] & {4{tlu_i0_commit_cmt}}; + // only consider committed itypes + //logic [3:0] pmu_i0_itype_qual; + assign pmu_i0_itype_qual[3:0] = dec_tlu_packet_r.pmu_i0_itype[3:0] & {4{tlu_i0_commit_cmt}}; - // Generate the muxed incs for all counters based on event type - for (genvar i=0 ; i < 4; i++) begin - assign mhpmc_inc_r[i] = {{~mcountinhibit[i+3]}} & + // Generate the muxed incs for all counters based on event type + for (genvar i = 0; i < 4; i++) begin + assign mhpmc_inc_r[i] = {{~mcountinhibit[i+3]}} & ( ({1{(mhpme_vec[i][9:0] == MHPME_CLK_ACTIVE )}} & 1'b1) | ({1{(mhpme_vec[i][9:0] == MHPME_ICACHE_HIT )}} & {ifu_pmu_ic_hit}) | @@ -2147,131 +2776,239 @@ else ({1{(mhpme_vec[i][9:0] == MHPME_INST_BITMANIP )}} & {(pmu_i0_itype_qual == BITMANIPU)}) | ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_LOAD )}} & {tlu_i0_commit_cmt & lsu_pmu_load_external_r & ~illegal_r}) | ({1{(mhpme_vec[i][9:0] == MHPME_DBUS_STORE )}} & {tlu_i0_commit_cmt & lsu_pmu_store_external_r & ~illegal_r}) | - // These count even during sleep - ({1{(mhpme_vec[i][9:0] == MHPME_SLEEP_CYC )}} & {dec_tlu_pmu_fw_halted}) | + // These count even during sleep + ({1{(mhpme_vec[i][9:0] == MHPME_SLEEP_CYC )}} & {dec_tlu_pmu_fw_halted}) | ({1{(mhpme_vec[i][9:0] == MHPME_DMA_READ_ALL )}} & {dma_pmu_any_read}) | ({1{(mhpme_vec[i][9:0] == MHPME_DMA_WRITE_ALL )}} & {dma_pmu_any_write}) | ({1{(mhpme_vec[i][9:0] == MHPME_DMA_READ_DCCM )}} & {dma_pmu_dccm_read}) | ({1{(mhpme_vec[i][9:0] == MHPME_DMA_WRITE_DCCM )}} & {dma_pmu_dccm_write}) ); - end + end - if(pt.FAST_INTERRUPT_REDIRECT) - rvdffie #(31) mstatus_ff (.*, .clk(free_l2clk), - .din({mdseac_locked_ns, lsu_single_ecc_error_r, lsu_exc_valid_r, lsu_i0_exc_r, - take_ext_int_start, take_ext_int_start_d1, take_ext_int_start_d2, ext_int_freeze, - mip_ns[5:0], mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in, - minstret_enable, minstretl_cout_ns, fw_halted_ns, - meicidpl_ns[3:0], icache_rd_valid, icache_wr_valid, mhpmc_inc_r[3:0], perfcnt_halted, - mstatus_ns[1:0]}), - .dout({mdseac_locked_f, lsu_single_ecc_error_r_d1, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1, - take_ext_int_start_d1, take_ext_int_start_d2, take_ext_int_start_d3, ext_int_freeze_d1, - mip[5:0], mcyclel_cout_f, minstret_enable_f, minstretl_cout_f, - fw_halted, meicidpl[3:0], icache_rd_valid_f, icache_wr_valid_f, - mhpmc_inc_r_d1[3:0], perfcnt_halted_d1, - mstatus[1:0]})); + if (pt.FAST_INTERRUPT_REDIRECT) + rvdffie #(31) mstatus_ff ( + .*, + .clk(free_l2clk), + .din({ + mdseac_locked_ns, + lsu_single_ecc_error_r, + lsu_exc_valid_r, + lsu_i0_exc_r, + take_ext_int_start, + take_ext_int_start_d1, + take_ext_int_start_d2, + ext_int_freeze, + mip_ns[5:0], + mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in, + minstret_enable, + minstretl_cout_ns, + fw_halted_ns, + meicidpl_ns[3:0], + icache_rd_valid, + icache_wr_valid, + mhpmc_inc_r[3:0], + perfcnt_halted, + mstatus_ns[1:0] + }), + .dout({ + mdseac_locked_f, + lsu_single_ecc_error_r_d1, + lsu_exc_valid_r_d1, + lsu_i0_exc_r_d1, + take_ext_int_start_d1, + take_ext_int_start_d2, + take_ext_int_start_d3, + ext_int_freeze_d1, + mip[5:0], + mcyclel_cout_f, + minstret_enable_f, + minstretl_cout_f, + fw_halted, + meicidpl[3:0], + icache_rd_valid_f, + icache_wr_valid_f, + mhpmc_inc_r_d1[3:0], + perfcnt_halted_d1, + mstatus[1:0] + }) + ); - else - rvdffie #(27) mstatus_ff (.*, .clk(free_l2clk), - .din({mdseac_locked_ns, lsu_single_ecc_error_r, lsu_exc_valid_r, lsu_i0_exc_r, - mip_ns[5:0], mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in, - minstret_enable, minstretl_cout_ns, fw_halted_ns, - meicidpl_ns[3:0], icache_rd_valid, icache_wr_valid, mhpmc_inc_r[3:0], perfcnt_halted, - mstatus_ns[1:0]}), - .dout({mdseac_locked_f, lsu_single_ecc_error_r_d1, lsu_exc_valid_r_d1, lsu_i0_exc_r_d1, - mip[5:0], mcyclel_cout_f, minstret_enable_f, minstretl_cout_f, - fw_halted, meicidpl[3:0], icache_rd_valid_f, icache_wr_valid_f, - mhpmc_inc_r_d1[3:0], perfcnt_halted_d1, - mstatus[1:0]})); + else + rvdffie #(27) mstatus_ff ( + .*, + .clk(free_l2clk), + .din({ + mdseac_locked_ns, + lsu_single_ecc_error_r, + lsu_exc_valid_r, + lsu_i0_exc_r, + mip_ns[5:0], + mcyclel_cout & ~wr_mcycleh_r & mcyclel_cout_in, + minstret_enable, + minstretl_cout_ns, + fw_halted_ns, + meicidpl_ns[3:0], + icache_rd_valid, + icache_wr_valid, + mhpmc_inc_r[3:0], + perfcnt_halted, + mstatus_ns[1:0] + }), + .dout({ + mdseac_locked_f, + lsu_single_ecc_error_r_d1, + lsu_exc_valid_r_d1, + lsu_i0_exc_r_d1, + mip[5:0], + mcyclel_cout_f, + minstret_enable_f, + minstretl_cout_f, + fw_halted, + meicidpl[3:0], + icache_rd_valid_f, + icache_wr_valid_f, + mhpmc_inc_r_d1[3:0], + perfcnt_halted_d1, + mstatus[1:0] + }) + ); - assign perfcnt_halted = ((dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted); - assign perfcnt_during_sleep[3:0] = {4{~(dec_tlu_dbg_halted & dcsr[DCSR_STOPC])}} & {mhpme_vec[3][9],mhpme_vec[2][9],mhpme_vec[1][9],mhpme_vec[0][9]}; + assign perfcnt_halted = ((dec_tlu_dbg_halted & dcsr[DCSR_STOPC]) | dec_tlu_pmu_fw_halted); + assign perfcnt_during_sleep[3:0] = {4{~(dec_tlu_dbg_halted & dcsr[DCSR_STOPC])}} & {mhpme_vec[3][9],mhpme_vec[2][9],mhpme_vec[1][9],mhpme_vec[0][9]}; - assign dec_tlu_perfcnt0 = mhpmc_inc_r_d1[0] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[0]); - assign dec_tlu_perfcnt1 = mhpmc_inc_r_d1[1] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[1]); - assign dec_tlu_perfcnt2 = mhpmc_inc_r_d1[2] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[2]); - assign dec_tlu_perfcnt3 = mhpmc_inc_r_d1[3] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[3]); + assign dec_tlu_perfcnt0 = mhpmc_inc_r_d1[0] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[0]); + assign dec_tlu_perfcnt1 = mhpmc_inc_r_d1[1] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[1]); + assign dec_tlu_perfcnt2 = mhpmc_inc_r_d1[2] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[2]); + assign dec_tlu_perfcnt3 = mhpmc_inc_r_d1[3] & ~(perfcnt_halted_d1 & ~perfcnt_during_sleep[3]); - // ---------------------------------------------------------------------- - // MHPMC3H(RW), MHPMC3(RW) - // [63:32][31:0] : Hardware Performance Monitor Counter 3 - localparam MHPMC3 = 12'hB03; - localparam MHPMC3H = 12'hB83; + // ---------------------------------------------------------------------- + // MHPMC3H(RW), MHPMC3(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 3 + localparam MHPMC3 = 12'hB03; + localparam MHPMC3H = 12'hB83; - assign mhpmc3_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3); - assign mhpmc3_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[0]) & (|(mhpmc_inc_r[0])); - assign mhpmc3_wr_en = mhpmc3_wr_en0 | mhpmc3_wr_en1; - assign mhpmc3_incr[63:0] = {mhpmc3h[31:0],mhpmc3[31:0]} + {63'b0, 1'b1}; - assign mhpmc3_ns[31:0] = mhpmc3_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[31:0]; - rvdffe #(32) mhpmc3_ff (.*, .clk(free_l2clk), .en(mhpmc3_wr_en), .din(mhpmc3_ns[31:0]), .dout(mhpmc3[31:0])); + assign mhpmc3_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3); + assign mhpmc3_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[0]) & (|(mhpmc_inc_r[0])); + assign mhpmc3_wr_en = mhpmc3_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3_incr[63:0] = {mhpmc3h[31:0], mhpmc3[31:0]} + {63'b0, 1'b1}; + assign mhpmc3_ns[31:0] = mhpmc3_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[31:0]; + rvdffe #(32) mhpmc3_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc3_wr_en), + .din (mhpmc3_ns[31:0]), + .dout(mhpmc3[31:0]) + ); - assign mhpmc3h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3H); - assign mhpmc3h_wr_en = mhpmc3h_wr_en0 | mhpmc3_wr_en1; - assign mhpmc3h_ns[31:0] = mhpmc3h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[63:32]; - rvdffe #(32) mhpmc3h_ff (.*, .clk(free_l2clk), .en(mhpmc3h_wr_en), .din(mhpmc3h_ns[31:0]), .dout(mhpmc3h[31:0])); + assign mhpmc3h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC3H); + assign mhpmc3h_wr_en = mhpmc3h_wr_en0 | mhpmc3_wr_en1; + assign mhpmc3h_ns[31:0] = mhpmc3h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc3_incr[63:32]; + rvdffe #(32) mhpmc3h_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc3h_wr_en), + .din (mhpmc3h_ns[31:0]), + .dout(mhpmc3h[31:0]) + ); - // ---------------------------------------------------------------------- - // MHPMC4H(RW), MHPMC4(RW) - // [63:32][31:0] : Hardware Performance Monitor Counter 4 - localparam MHPMC4 = 12'hB04; - localparam MHPMC4H = 12'hB84; + // ---------------------------------------------------------------------- + // MHPMC4H(RW), MHPMC4(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 4 + localparam MHPMC4 = 12'hB04; + localparam MHPMC4H = 12'hB84; - assign mhpmc4_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4); - assign mhpmc4_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[1]) & (|(mhpmc_inc_r[1])); - assign mhpmc4_wr_en = mhpmc4_wr_en0 | mhpmc4_wr_en1; - assign mhpmc4_incr[63:0] = {mhpmc4h[31:0],mhpmc4[31:0]} + {63'b0,1'b1}; - assign mhpmc4_ns[31:0] = mhpmc4_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[31:0]; - rvdffe #(32) mhpmc4_ff (.*, .clk(free_l2clk), .en(mhpmc4_wr_en), .din(mhpmc4_ns[31:0]), .dout(mhpmc4[31:0])); + assign mhpmc4_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4); + assign mhpmc4_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[1]) & (|(mhpmc_inc_r[1])); + assign mhpmc4_wr_en = mhpmc4_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4_incr[63:0] = {mhpmc4h[31:0], mhpmc4[31:0]} + {63'b0, 1'b1}; + assign mhpmc4_ns[31:0] = mhpmc4_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[31:0]; + rvdffe #(32) mhpmc4_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc4_wr_en), + .din (mhpmc4_ns[31:0]), + .dout(mhpmc4[31:0]) + ); - assign mhpmc4h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4H); - assign mhpmc4h_wr_en = mhpmc4h_wr_en0 | mhpmc4_wr_en1; - assign mhpmc4h_ns[31:0] = mhpmc4h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[63:32]; - rvdffe #(32) mhpmc4h_ff (.*, .clk(free_l2clk), .en(mhpmc4h_wr_en), .din(mhpmc4h_ns[31:0]), .dout(mhpmc4h[31:0])); + assign mhpmc4h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC4H); + assign mhpmc4h_wr_en = mhpmc4h_wr_en0 | mhpmc4_wr_en1; + assign mhpmc4h_ns[31:0] = mhpmc4h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc4_incr[63:32]; + rvdffe #(32) mhpmc4h_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc4h_wr_en), + .din (mhpmc4h_ns[31:0]), + .dout(mhpmc4h[31:0]) + ); - // ---------------------------------------------------------------------- - // MHPMC5H(RW), MHPMC5(RW) - // [63:32][31:0] : Hardware Performance Monitor Counter 5 - localparam MHPMC5 = 12'hB05; - localparam MHPMC5H = 12'hB85; + // ---------------------------------------------------------------------- + // MHPMC5H(RW), MHPMC5(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 5 + localparam MHPMC5 = 12'hB05; + localparam MHPMC5H = 12'hB85; - assign mhpmc5_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5); - assign mhpmc5_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[2]) & (|(mhpmc_inc_r[2])); - assign mhpmc5_wr_en = mhpmc5_wr_en0 | mhpmc5_wr_en1; - assign mhpmc5_incr[63:0] = {mhpmc5h[31:0],mhpmc5[31:0]} + {63'b0,1'b1}; - assign mhpmc5_ns[31:0] = mhpmc5_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[31:0]; - rvdffe #(32) mhpmc5_ff (.*, .clk(free_l2clk), .en(mhpmc5_wr_en), .din(mhpmc5_ns[31:0]), .dout(mhpmc5[31:0])); + assign mhpmc5_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5); + assign mhpmc5_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[2]) & (|(mhpmc_inc_r[2])); + assign mhpmc5_wr_en = mhpmc5_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5_incr[63:0] = {mhpmc5h[31:0], mhpmc5[31:0]} + {63'b0, 1'b1}; + assign mhpmc5_ns[31:0] = mhpmc5_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[31:0]; + rvdffe #(32) mhpmc5_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc5_wr_en), + .din (mhpmc5_ns[31:0]), + .dout(mhpmc5[31:0]) + ); - assign mhpmc5h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5H); - assign mhpmc5h_wr_en = mhpmc5h_wr_en0 | mhpmc5_wr_en1; - assign mhpmc5h_ns[31:0] = mhpmc5h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[63:32]; - rvdffe #(32) mhpmc5h_ff (.*, .clk(free_l2clk), .en(mhpmc5h_wr_en), .din(mhpmc5h_ns[31:0]), .dout(mhpmc5h[31:0])); + assign mhpmc5h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC5H); + assign mhpmc5h_wr_en = mhpmc5h_wr_en0 | mhpmc5_wr_en1; + assign mhpmc5h_ns[31:0] = mhpmc5h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc5_incr[63:32]; + rvdffe #(32) mhpmc5h_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc5h_wr_en), + .din (mhpmc5h_ns[31:0]), + .dout(mhpmc5h[31:0]) + ); - // ---------------------------------------------------------------------- - // MHPMC6H(RW), MHPMC6(RW) - // [63:32][31:0] : Hardware Performance Monitor Counter 6 - localparam MHPMC6 = 12'hB06; - localparam MHPMC6H = 12'hB86; + // ---------------------------------------------------------------------- + // MHPMC6H(RW), MHPMC6(RW) + // [63:32][31:0] : Hardware Performance Monitor Counter 6 + localparam MHPMC6 = 12'hB06; + localparam MHPMC6H = 12'hB86; - assign mhpmc6_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6); - assign mhpmc6_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[3]) & (|(mhpmc_inc_r[3])); - assign mhpmc6_wr_en = mhpmc6_wr_en0 | mhpmc6_wr_en1; - assign mhpmc6_incr[63:0] = {mhpmc6h[31:0],mhpmc6[31:0]} + {63'b0,1'b1}; - assign mhpmc6_ns[31:0] = mhpmc6_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[31:0]; - rvdffe #(32) mhpmc6_ff (.*, .clk(free_l2clk), .en(mhpmc6_wr_en), .din(mhpmc6_ns[31:0]), .dout(mhpmc6[31:0])); + assign mhpmc6_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6); + assign mhpmc6_wr_en1 = (~perfcnt_halted | perfcnt_during_sleep[3]) & (|(mhpmc_inc_r[3])); + assign mhpmc6_wr_en = mhpmc6_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6_incr[63:0] = {mhpmc6h[31:0], mhpmc6[31:0]} + {63'b0, 1'b1}; + assign mhpmc6_ns[31:0] = mhpmc6_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[31:0]; + rvdffe #(32) mhpmc6_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc6_wr_en), + .din (mhpmc6_ns[31:0]), + .dout(mhpmc6[31:0]) + ); - assign mhpmc6h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6H); - assign mhpmc6h_wr_en = mhpmc6h_wr_en0 | mhpmc6_wr_en1; - assign mhpmc6h_ns[31:0] = mhpmc6h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[63:32]; - rvdffe #(32) mhpmc6h_ff (.*, .clk(free_l2clk), .en(mhpmc6h_wr_en), .din(mhpmc6h_ns[31:0]), .dout(mhpmc6h[31:0])); + assign mhpmc6h_wr_en0 = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPMC6H); + assign mhpmc6h_wr_en = mhpmc6h_wr_en0 | mhpmc6_wr_en1; + assign mhpmc6h_ns[31:0] = mhpmc6h_wr_en0 ? dec_csr_wrdata_r[31:0] : mhpmc6_incr[63:32]; + rvdffe #(32) mhpmc6h_ff ( + .*, + .clk (free_l2clk), + .en (mhpmc6h_wr_en), + .din (mhpmc6h_ns[31:0]), + .dout(mhpmc6h[31:0]) + ); - // ---------------------------------------------------------------------- - // MHPME3(RW) - // [9:0] : Hardware Performance Monitor Event 3 - localparam MHPME3 = 12'h323; + // ---------------------------------------------------------------------- + // MHPME3(RW) + // [9:0] : Hardware Performance Monitor Event 3 + localparam MHPME3 = 12'h323; - // we only have events 0-56 with holes, 512-516, HPME* are WARL so zero otherwise. - assign zero_event_r = ( (dec_csr_wrdata_r[9:0] > 10'd516) | + // we only have events 0-56 with holes, 512-516, HPME* are WARL so zero otherwise. + assign zero_event_r = ( (dec_csr_wrdata_r[9:0] > 10'd516) | (|dec_csr_wrdata_r[31:10]) | ((dec_csr_wrdata_r[9:0] < 10'd512) & (dec_csr_wrdata_r[9:0] > 10'd56)) | ((dec_csr_wrdata_r[9:0] < 10'd54) & (dec_csr_wrdata_r[9:0] > 10'd50)) | @@ -2279,60 +3016,86 @@ else (dec_csr_wrdata_r[9:0] == 10'd33) ); - assign event_r[9:0] = zero_event_r ? '0 : dec_csr_wrdata_r[9:0]; + assign event_r[9:0] = zero_event_r ? '0 : dec_csr_wrdata_r[9:0]; - assign wr_mhpme3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME3); - rvdffe #(10) mhpme3_ff (.*, .en(wr_mhpme3_r), .din(event_r[9:0]), .dout(mhpme3[9:0])); - // ---------------------------------------------------------------------- - // MHPME4(RW) - // [9:0] : Hardware Performance Monitor Event 4 - localparam MHPME4 = 12'h324; + assign wr_mhpme3_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME3); + rvdffe #(10) mhpme3_ff ( + .*, + .en (wr_mhpme3_r), + .din (event_r[9:0]), + .dout(mhpme3[9:0]) + ); + // ---------------------------------------------------------------------- + // MHPME4(RW) + // [9:0] : Hardware Performance Monitor Event 4 + localparam MHPME4 = 12'h324; - assign wr_mhpme4_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME4); - rvdffe #(10) mhpme4_ff (.*, .en(wr_mhpme4_r), .din(event_r[9:0]), .dout(mhpme4[9:0])); - // ---------------------------------------------------------------------- - // MHPME5(RW) - // [9:0] : Hardware Performance Monitor Event 5 - localparam MHPME5 = 12'h325; + assign wr_mhpme4_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME4); + rvdffe #(10) mhpme4_ff ( + .*, + .en (wr_mhpme4_r), + .din (event_r[9:0]), + .dout(mhpme4[9:0]) + ); + // ---------------------------------------------------------------------- + // MHPME5(RW) + // [9:0] : Hardware Performance Monitor Event 5 + localparam MHPME5 = 12'h325; - assign wr_mhpme5_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME5); - rvdffe #(10) mhpme5_ff (.*, .en(wr_mhpme5_r), .din(event_r[9:0]), .dout(mhpme5[9:0])); - // ---------------------------------------------------------------------- - // MHPME6(RW) - // [9:0] : Hardware Performance Monitor Event 6 - localparam MHPME6 = 12'h326; + assign wr_mhpme5_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME5); + rvdffe #(10) mhpme5_ff ( + .*, + .en (wr_mhpme5_r), + .din (event_r[9:0]), + .dout(mhpme5[9:0]) + ); + // ---------------------------------------------------------------------- + // MHPME6(RW) + // [9:0] : Hardware Performance Monitor Event 6 + localparam MHPME6 = 12'h326; - assign wr_mhpme6_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME6); - rvdffe #(10) mhpme6_ff (.*, .en(wr_mhpme6_r), .din(event_r[9:0]), .dout(mhpme6[9:0])); + assign wr_mhpme6_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MHPME6); + rvdffe #(10) mhpme6_ff ( + .*, + .en (wr_mhpme6_r), + .din (event_r[9:0]), + .dout(mhpme6[9:0]) + ); - //---------------------------------------------------------------------- - // Performance Monitor Counters section ends - //---------------------------------------------------------------------- - // ---------------------------------------------------------------------- + //---------------------------------------------------------------------- + // Performance Monitor Counters section ends + //---------------------------------------------------------------------- + // ---------------------------------------------------------------------- - // MCOUNTINHIBIT(RW) - // [31:7] : Reserved, read 0x0 - // [6] : HPM6 disable - // [5] : HPM5 disable - // [4] : HPM4 disable - // [3] : HPM3 disable - // [2] : MINSTRET disable - // [1] : reserved, read 0x0 - // [0] : MCYCLE disable + // MCOUNTINHIBIT(RW) + // [31:7] : Reserved, read 0x0 + // [6] : HPM6 disable + // [5] : HPM5 disable + // [4] : HPM4 disable + // [3] : HPM3 disable + // [2] : MINSTRET disable + // [1] : reserved, read 0x0 + // [0] : MCYCLE disable - localparam MCOUNTINHIBIT = 12'h320; + localparam MCOUNTINHIBIT = 12'h320; - assign wr_mcountinhibit_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCOUNTINHIBIT); - rvdffs #(6) mcountinhibit_ff (.*, .clk(csr_wr_clk), .en(wr_mcountinhibit_r), .din({dec_csr_wrdata_r[6:2], dec_csr_wrdata_r[0]}), .dout({mcountinhibit[6:2], mcountinhibit[0]})); - assign mcountinhibit[1] = 1'b0; + assign wr_mcountinhibit_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MCOUNTINHIBIT); + rvdffs #(6) mcountinhibit_ff ( + .*, + .clk (csr_wr_clk), + .en (wr_mcountinhibit_r), + .din ({dec_csr_wrdata_r[6:2], dec_csr_wrdata_r[0]}), + .dout({mcountinhibit[6:2], mcountinhibit[0]}) + ); + assign mcountinhibit[1] = 1'b0; - //-------------------------------------------------------------------------------- - // trace - //-------------------------------------------------------------------------------- - logic [4:0] dec_tlu_exc_cause_wb1_raw, dec_tlu_exc_cause_wb2; - logic dec_tlu_int_valid_wb1_raw, dec_tlu_int_valid_wb2; + //-------------------------------------------------------------------------------- + // trace + //-------------------------------------------------------------------------------- + logic [4:0] dec_tlu_exc_cause_wb1_raw, dec_tlu_exc_cause_wb2; + logic dec_tlu_int_valid_wb1_raw, dec_tlu_int_valid_wb2; - assign {dec_tlu_i0_valid_wb1, + assign {dec_tlu_i0_valid_wb1, dec_tlu_i0_exc_valid_wb1, dec_tlu_exc_cause_wb1_raw[4:0], dec_tlu_int_valid_wb1_raw} = {8{~dec_tlu_trace_disable}} & {i0_valid_wb, @@ -2343,252 +3106,252 @@ else // skid buffer for ints, reduces trace port count by 1 - rvdffie #(.WIDTH(6), .OVERRIDE(1)) traceskidff (.*, .clk(clk), - .din ({dec_tlu_exc_cause_wb1_raw[4:0], - dec_tlu_int_valid_wb1_raw}), - .dout({dec_tlu_exc_cause_wb2[4:0], - dec_tlu_int_valid_wb2})); - //skid for ints - assign dec_tlu_exc_cause_wb1[4:0] = dec_tlu_int_valid_wb2 ? dec_tlu_exc_cause_wb2[4:0] : dec_tlu_exc_cause_wb1_raw[4:0]; - assign dec_tlu_int_valid_wb1 = dec_tlu_int_valid_wb2; + rvdffie #( + .WIDTH(6), + .OVERRIDE(1) + ) traceskidff ( + .*, + .clk (clk), + .din ({dec_tlu_exc_cause_wb1_raw[4:0], dec_tlu_int_valid_wb1_raw}), + .dout({dec_tlu_exc_cause_wb2[4:0], dec_tlu_int_valid_wb2}) + ); + //skid for ints + assign dec_tlu_exc_cause_wb1[4:0] = dec_tlu_int_valid_wb2 ? dec_tlu_exc_cause_wb2[4:0] : dec_tlu_exc_cause_wb1_raw[4:0]; + assign dec_tlu_int_valid_wb1 = dec_tlu_int_valid_wb2; - assign dec_tlu_mtval_wb1 = mtval[31:0]; + assign dec_tlu_mtval_wb1 = mtval[31:0]; - // end trace - //-------------------------------------------------------------------------------- + // end trace + //-------------------------------------------------------------------------------- - // ---------------------------------------------------------------------- - // CSR read mux - // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + // CSR read mux + // ---------------------------------------------------------------------- -// file "csrdecode" is human readable file that has all of the CSR decodes defined and is part of git repo -// modify this file as needed + // file "csrdecode" is human readable file that has all of the CSR decodes defined and is part of git repo + // modify this file as needed -// to generate all the equations below from "csrdecode" except legal equation: + // to generate all the equations below from "csrdecode" except legal equation: -// 1) coredecode -in csrdecode > corecsrdecode.e + // 1) coredecode -in csrdecode > corecsrdecode.e -// 2) espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations + // 2) espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations -// to generate the legal CSR equation below: + // to generate the legal CSR equation below: -// 1) coredecode -in csrdecode -legal > csrlegal.e + // 1) coredecode -in csrdecode -legal > csrlegal.e -// 2) espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation -// coredecode -in csrdecode > corecsrdecode.e; espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations; coredecode -in csrdecode -legal > csrlegal.e; espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation + // 2) espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation + // coredecode -in csrdecode > corecsrdecode.e; espresso -Dso -oeqntott corecsrdecode.e | addassign > csrequations; coredecode -in csrdecode -legal > csrlegal.e; espresso -Dso -oeqntott csrlegal.e | addassign > csrlegal_equation -assign csr_misa = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + assign csr_misa = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); -assign csr_mvendorid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + assign csr_mvendorid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_marchid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] + assign csr_marchid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mimpid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6] + assign csr_mimpid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_mhartid = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[7] - &dec_csr_rdaddr_d[2]); + assign csr_mhartid = (dec_csr_rdaddr_d[10] & !dec_csr_rdaddr_d[7] & dec_csr_rdaddr_d[2]); -assign csr_mstatus = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + assign csr_mstatus = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); -assign csr_mtvec = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] + assign csr_mtvec = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); -assign csr_mip = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2]); + assign csr_mip = (!dec_csr_rdaddr_d[7] & dec_csr_rdaddr_d[6] & dec_csr_rdaddr_d[2]); -assign csr_mie = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + assign csr_mie = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); -assign csr_mcyclel = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + assign csr_mcyclel = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &!dec_csr_rdaddr_d[1]); -assign csr_mcycleh = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + assign csr_mcycleh = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); -assign csr_minstretl = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + assign csr_minstretl = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_minstreth = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] + assign csr_minstreth = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mscratch = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + assign csr_mscratch = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mepc = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + assign csr_mepc = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mcause = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] + assign csr_mcause = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mscause = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] - &dec_csr_rdaddr_d[2]); + assign csr_mscause = (dec_csr_rdaddr_d[6] & dec_csr_rdaddr_d[5] & dec_csr_rdaddr_d[2]); -assign csr_mtval = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[1] + assign csr_mtval = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mrac = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + assign csr_mrac = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); -assign csr_dmst = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + assign csr_dmst = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); -assign csr_mdseac = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] + assign csr_mdseac = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]); -assign csr_meihap = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10] - &dec_csr_rdaddr_d[3]); + assign csr_meihap = (dec_csr_rdaddr_d[11] & dec_csr_rdaddr_d[10] & dec_csr_rdaddr_d[3]); -assign csr_meivt = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6] + assign csr_meivt = (!dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] &!dec_csr_rdaddr_d[0]); -assign csr_meipt = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] + assign csr_meipt = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_meicurpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] - &dec_csr_rdaddr_d[2]); + assign csr_meicurpl = (dec_csr_rdaddr_d[11] & dec_csr_rdaddr_d[6] & dec_csr_rdaddr_d[2]); -assign csr_meicidpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + assign csr_meicidpl = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_dcsr = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + assign csr_dcsr = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[0]); -assign csr_mcgc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + assign csr_mcgc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[0]); -assign csr_mfdc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + assign csr_mfdc = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_dpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + assign csr_dpc = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[0]); -assign csr_mtsel = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + assign csr_mtsel = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mtdata1 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] + assign csr_mtdata1 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); -assign csr_mtdata2 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5] + assign csr_mtdata2 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[1]); -assign csr_mhpmc3 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + assign csr_mhpmc3 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[0]); -assign csr_mhpmc4 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + assign csr_mhpmc4 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mhpmc5 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] + assign csr_mhpmc5 = (dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[7] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mhpmc6 = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] + assign csr_mhpmc6 = (!dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mhpmc3h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + assign csr_mhpmc3h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mhpmc4h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + assign csr_mhpmc4h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mhpmc5h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] + assign csr_mhpmc5h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mhpmc6h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] + assign csr_mhpmc6h = (dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[6] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mhpme3 = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] + assign csr_mhpme3 = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[0]); -assign csr_mhpme4 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + assign csr_mhpme4 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] &!dec_csr_rdaddr_d[0]); -assign csr_mhpme5 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + assign csr_mhpme5 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] &dec_csr_rdaddr_d[0]); -assign csr_mhpme6 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] + assign csr_mhpme6 = (dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1] &!dec_csr_rdaddr_d[0]); -assign csr_mcountinhibit = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] + assign csr_mcountinhibit = (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &!dec_csr_rdaddr_d[0]); -assign csr_mitctl0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + assign csr_mitctl0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mitctl1 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3] + assign csr_mitctl1 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[3] &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_mitb0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4] + assign csr_mitb0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[0]); -assign csr_mitb1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2] + assign csr_mitb1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mitcnt0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] + assign csr_mitcnt0 = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]); -assign csr_mitcnt1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2] + assign csr_mitcnt1 = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[2] &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_mpmc = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] + assign csr_mpmc = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] &dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); -assign csr_meicpct = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] + assign csr_meicpct = (dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_micect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3] + assign csr_micect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_miccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + assign csr_miccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] &!dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[0]); -assign csr_mdccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] + assign csr_mdccmect = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mfdht = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] + assign csr_mfdht = (dec_csr_rdaddr_d[6]&dec_csr_rdaddr_d[3]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_mfdhs = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2] + assign csr_mfdhs = (dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[0]); -assign csr_dicawics = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] + assign csr_dicawics = (!dec_csr_rdaddr_d[11]&!dec_csr_rdaddr_d[5] &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1] &!dec_csr_rdaddr_d[0]); -assign csr_dicad0h = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + assign csr_dicad0h = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] &dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[1]); -assign csr_dicad0 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] + assign csr_dicad0 = (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] &dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign csr_dicad1 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + assign csr_dicad1 = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]); -assign csr_dicago = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] + assign csr_dicago = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]); -assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7] &dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[2]&!dec_csr_rdaddr_d[0]) | (dec_csr_rdaddr_d[5] @@ -2602,7 +3365,7 @@ assign presync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] dec_csr_rdaddr_d[7]&!dec_csr_rdaddr_d[5]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); -assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] + assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[1]&dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] &!dec_csr_rdaddr_d[6]&!dec_csr_rdaddr_d[5]&dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] @@ -2615,7 +3378,7 @@ assign postsync = (dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[4]&dec_csr_rdaddr_d[3] &!dec_csr_rdaddr_d[1]) | (dec_csr_rdaddr_d[10]&!dec_csr_rdaddr_d[4] &!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2]&dec_csr_rdaddr_d[1]); -assign legal = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] + assign legal = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] &dec_csr_rdaddr_d[8]&dec_csr_rdaddr_d[7]&dec_csr_rdaddr_d[6] &dec_csr_rdaddr_d[4]&!dec_csr_rdaddr_d[3]&!dec_csr_rdaddr_d[2] &dec_csr_rdaddr_d[1]&!dec_csr_rdaddr_d[0]) | (!dec_csr_rdaddr_d[11] @@ -2708,21 +3471,20 @@ assign legal = (!dec_csr_rdaddr_d[11]&dec_csr_rdaddr_d[10]&dec_csr_rdaddr_d[9] -assign dec_tlu_presync_d = presync & dec_csr_any_unq_d & ~dec_csr_wen_unq_d; -assign dec_tlu_postsync_d = postsync & dec_csr_any_unq_d; + assign dec_tlu_presync_d = presync & dec_csr_any_unq_d & ~dec_csr_wen_unq_d; + assign dec_tlu_postsync_d = postsync & dec_csr_any_unq_d; - // allow individual configuration of these features -assign conditionally_illegal = ((csr_mitcnt0 | csr_mitcnt1 | csr_mitb0 | csr_mitb1 | csr_mitctl0 | csr_mitctl1) & !pt.TIMER_LEGAL_EN); + // allow individual configuration of these features + assign conditionally_illegal = ((csr_mitcnt0 | csr_mitcnt1 | csr_mitb0 | csr_mitb1 | csr_mitctl0 | csr_mitctl1) & !pt.TIMER_LEGAL_EN); -assign valid_csr = ( legal & (~(csr_dcsr | csr_dpc | csr_dmst | csr_dicawics | csr_dicad0 | csr_dicad0h | csr_dicad1 | csr_dicago) | dbg_tlu_halted_f) + assign valid_csr = ( legal & (~(csr_dcsr | csr_dpc | csr_dmst | csr_dicawics | csr_dicad0 | csr_dicad0h | csr_dicad1 | csr_dicago) | dbg_tlu_halted_f) & ~fast_int_meicpct & ~conditionally_illegal); -assign dec_csr_legal_d = ( dec_csr_any_unq_d & - valid_csr & // of a valid CSR - ~(dec_csr_wen_unq_d & (csr_mvendorid | csr_marchid | csr_mimpid | csr_mhartid | csr_mdseac | csr_meihap)) // that's not a write to a RO CSR - ); - // CSR read mux -assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | + assign dec_csr_legal_d = (dec_csr_any_unq_d & valid_csr & // of a valid CSR + ~(dec_csr_wen_unq_d & (csr_mvendorid | csr_marchid | csr_mimpid | csr_mhartid | csr_mdseac | csr_meihap)) // that's not a write to a RO CSR + ); + // CSR read mux + assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | ({32{csr_mvendorid}} & 32'h00000045) | ({32{csr_marchid}} & 32'h00000010) | ({32{csr_mimpid}} & 32'h4) | @@ -2782,159 +3544,214 @@ assign dec_csr_rddata_d[31:0] = ( ({32{csr_misa}} & 32'h40001104) | -endmodule // el2_dec_tlu_ctl +endmodule // el2_dec_tlu_ctl module el2_dec_timer_ctl #( -`include "el2_param.vh" - ) - ( - input logic clk, - input logic free_l2clk, - input logic csr_wr_clk, - input logic rst_l, - input logic dec_csr_wen_r_mod, // csr write enable at wb - input logic [11:0] dec_csr_wraddr_r, // write address for csr - input logic [31:0] dec_csr_wrdata_r, // csr write data at wb + `include "el2_param.vh" +) ( + input logic clk, + input logic free_l2clk, + input logic csr_wr_clk, + input logic rst_l, + input logic dec_csr_wen_r_mod, // csr write enable at wb + input logic [11:0] dec_csr_wraddr_r, // write address for csr + input logic [31:0] dec_csr_wrdata_r, // csr write data at wb - input logic csr_mitctl0, - input logic csr_mitctl1, - input logic csr_mitb0, - input logic csr_mitb1, - input logic csr_mitcnt0, - input logic csr_mitcnt1, + input logic csr_mitctl0, + input logic csr_mitctl1, + input logic csr_mitb0, + input logic csr_mitb1, + input logic csr_mitcnt0, + input logic csr_mitcnt1, - input logic dec_pause_state, // Paused - input logic dec_tlu_pmu_fw_halted, // pmu/fw halted - input logic internal_dbg_halt_timers, // debug halted + input logic dec_pause_state, // Paused + input logic dec_tlu_pmu_fw_halted, // pmu/fw halted + input logic internal_dbg_halt_timers, // debug halted - output logic [31:0] dec_timer_rddata_d, // timer CSR read data - output logic dec_timer_read_d, // timer CSR address match - output logic dec_timer_t0_pulse, // timer0 int - output logic dec_timer_t1_pulse, // timer1 int + output logic [31:0] dec_timer_rddata_d, // timer CSR read data + output logic dec_timer_read_d, // timer CSR address match + output logic dec_timer_t0_pulse, // timer0 int + output logic dec_timer_t1_pulse, // timer1 int - input logic scan_mode - ); - localparam MITCTL_ENABLE = 0; - localparam MITCTL_ENABLE_HALTED = 1; - localparam MITCTL_ENABLE_PAUSED = 2; + input logic scan_mode +); + localparam MITCTL_ENABLE = 0; + localparam MITCTL_ENABLE_HALTED = 1; + localparam MITCTL_ENABLE_PAUSED = 2; - logic [31:0] mitcnt0_ns, mitcnt0, mitcnt1_ns, mitcnt1, mitb0, mitb1, mitb0_b, mitb1_b, mitcnt0_inc, mitcnt1_inc; - logic [2:0] mitctl0_ns, mitctl0; - logic [3:0] mitctl1_ns, mitctl1; - logic wr_mitcnt0_r, wr_mitcnt1_r, wr_mitb0_r, wr_mitb1_r, wr_mitctl0_r, wr_mitctl1_r; - logic mitcnt0_inc_ok, mitcnt1_inc_ok; - logic mitcnt0_inc_cout, mitcnt1_inc_cout; - logic mit0_match_ns; - logic mit1_match_ns; - logic mitctl0_0_b_ns; - logic mitctl0_0_b; - logic mitctl1_0_b_ns; - logic mitctl1_0_b; + logic [31:0] + mitcnt0_ns, + mitcnt0, + mitcnt1_ns, + mitcnt1, + mitb0, + mitb1, + mitb0_b, + mitb1_b, + mitcnt0_inc, + mitcnt1_inc; + logic [2:0] mitctl0_ns, mitctl0; + logic [3:0] mitctl1_ns, mitctl1; + logic wr_mitcnt0_r, wr_mitcnt1_r, wr_mitb0_r, wr_mitb1_r, wr_mitctl0_r, wr_mitctl1_r; + logic mitcnt0_inc_ok, mitcnt1_inc_ok; + logic mitcnt0_inc_cout, mitcnt1_inc_cout; + logic mit0_match_ns; + logic mit1_match_ns; + logic mitctl0_0_b_ns; + logic mitctl0_0_b; + logic mitctl1_0_b_ns; + logic mitctl1_0_b; - assign mit0_match_ns = (mitcnt0[31:0] >= mitb0[31:0]); - assign mit1_match_ns = (mitcnt1[31:0] >= mitb1[31:0]); + assign mit0_match_ns = (mitcnt0[31:0] >= mitb0[31:0]); + assign mit1_match_ns = (mitcnt1[31:0] >= mitb1[31:0]); - assign dec_timer_t0_pulse = mit0_match_ns; - assign dec_timer_t1_pulse = mit1_match_ns; - // ---------------------------------------------------------------------- - // MITCNT0 (RW) - // [31:0] : Internal Timer Counter 0 + assign dec_timer_t0_pulse = mit0_match_ns; + assign dec_timer_t1_pulse = mit1_match_ns; + // ---------------------------------------------------------------------- + // MITCNT0 (RW) + // [31:0] : Internal Timer Counter 0 - localparam MITCNT0 = 12'h7d2; + localparam MITCNT0 = 12'h7d2; - assign wr_mitcnt0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT0); + assign wr_mitcnt0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT0); - assign mitcnt0_inc_ok = mitctl0[MITCTL_ENABLE] & (~dec_pause_state | mitctl0[MITCTL_ENABLE_PAUSED]) & (~dec_tlu_pmu_fw_halted | mitctl0[MITCTL_ENABLE_HALTED]) & ~internal_dbg_halt_timers; + assign mitcnt0_inc_ok = mitctl0[MITCTL_ENABLE] & (~dec_pause_state | mitctl0[MITCTL_ENABLE_PAUSED]) & (~dec_tlu_pmu_fw_halted | mitctl0[MITCTL_ENABLE_HALTED]) & ~internal_dbg_halt_timers; - assign {mitcnt0_inc_cout, mitcnt0_inc[7:0]} = mitcnt0[7:0] + {7'b0, 1'b1}; - assign mitcnt0_inc[31:8] = mitcnt0[31:8] + {23'b0, mitcnt0_inc_cout}; + assign {mitcnt0_inc_cout, mitcnt0_inc[7:0]} = mitcnt0[7:0] + {7'b0, 1'b1}; + assign mitcnt0_inc[31:8] = mitcnt0[31:8] + {23'b0, mitcnt0_inc_cout}; - assign mitcnt0_ns[31:0] = wr_mitcnt0_r ? dec_csr_wrdata_r[31:0] : mit0_match_ns ? 'b0 : mitcnt0_inc[31:0]; + assign mitcnt0_ns[31:0] = wr_mitcnt0_r ? dec_csr_wrdata_r[31:0] : mit0_match_ns ? 'b0 : mitcnt0_inc[31:0]; - rvdffe #(24) mitcnt0_ffb (.*, .clk(free_l2clk), .en(wr_mitcnt0_r | (mitcnt0_inc_ok & mitcnt0_inc_cout) | mit0_match_ns), .din(mitcnt0_ns[31:8]), .dout(mitcnt0[31:8])); - rvdffe #(8) mitcnt0_ffa (.*, .clk(free_l2clk), .en(wr_mitcnt0_r | mitcnt0_inc_ok | mit0_match_ns), .din(mitcnt0_ns[7:0]), .dout(mitcnt0[7:0])); + rvdffe #(24) mitcnt0_ffb ( + .*, + .clk (free_l2clk), + .en (wr_mitcnt0_r | (mitcnt0_inc_ok & mitcnt0_inc_cout) | mit0_match_ns), + .din (mitcnt0_ns[31:8]), + .dout(mitcnt0[31:8]) + ); + rvdffe #(8) mitcnt0_ffa ( + .*, + .clk (free_l2clk), + .en (wr_mitcnt0_r | mitcnt0_inc_ok | mit0_match_ns), + .din (mitcnt0_ns[7:0]), + .dout(mitcnt0[7:0]) + ); - // ---------------------------------------------------------------------- - // MITCNT1 (RW) - // [31:0] : Internal Timer Counter 0 + // ---------------------------------------------------------------------- + // MITCNT1 (RW) + // [31:0] : Internal Timer Counter 0 - localparam MITCNT1 = 12'h7d5; + localparam MITCNT1 = 12'h7d5; - assign wr_mitcnt1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT1); + assign wr_mitcnt1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCNT1); - assign mitcnt1_inc_ok = mitctl1[MITCTL_ENABLE] & + assign mitcnt1_inc_ok = mitctl1[MITCTL_ENABLE] & (~dec_pause_state | mitctl1[MITCTL_ENABLE_PAUSED]) & (~dec_tlu_pmu_fw_halted | mitctl1[MITCTL_ENABLE_HALTED]) & ~internal_dbg_halt_timers & (~mitctl1[3] | mit0_match_ns); - // only inc MITCNT1 if not cascaded with 0, or if 0 overflows - assign {mitcnt1_inc_cout, mitcnt1_inc[7:0]} = mitcnt1[7:0] + {7'b0, 1'b1}; - assign mitcnt1_inc[31:8] = mitcnt1[31:8] + {23'b0, mitcnt1_inc_cout}; + // only inc MITCNT1 if not cascaded with 0, or if 0 overflows + assign {mitcnt1_inc_cout, mitcnt1_inc[7:0]} = mitcnt1[7:0] + {7'b0, 1'b1}; + assign mitcnt1_inc[31:8] = mitcnt1[31:8] + {23'b0, mitcnt1_inc_cout}; - assign mitcnt1_ns[31:0] = wr_mitcnt1_r ? dec_csr_wrdata_r[31:0] : mit1_match_ns ? 'b0 : mitcnt1_inc[31:0]; + assign mitcnt1_ns[31:0] = wr_mitcnt1_r ? dec_csr_wrdata_r[31:0] : mit1_match_ns ? 'b0 : mitcnt1_inc[31:0]; - rvdffe #(24) mitcnt1_ffb (.*, .clk(free_l2clk), .en(wr_mitcnt1_r | (mitcnt1_inc_ok & mitcnt1_inc_cout) | mit1_match_ns), .din(mitcnt1_ns[31:8]), .dout(mitcnt1[31:8])); - rvdffe #(8) mitcnt1_ffa (.*, .clk(free_l2clk), .en(wr_mitcnt1_r | mitcnt1_inc_ok | mit1_match_ns), .din(mitcnt1_ns[7:0]), .dout(mitcnt1[7:0])); + rvdffe #(24) mitcnt1_ffb ( + .*, + .clk (free_l2clk), + .en (wr_mitcnt1_r | (mitcnt1_inc_ok & mitcnt1_inc_cout) | mit1_match_ns), + .din (mitcnt1_ns[31:8]), + .dout(mitcnt1[31:8]) + ); + rvdffe #(8) mitcnt1_ffa ( + .*, + .clk (free_l2clk), + .en (wr_mitcnt1_r | mitcnt1_inc_ok | mit1_match_ns), + .din (mitcnt1_ns[7:0]), + .dout(mitcnt1[7:0]) + ); - // ---------------------------------------------------------------------- - // MITB0 (RW) - // [31:0] : Internal Timer Bound 0 + // ---------------------------------------------------------------------- + // MITB0 (RW) + // [31:0] : Internal Timer Bound 0 - localparam MITB0 = 12'h7d3; + localparam MITB0 = 12'h7d3; - assign wr_mitb0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB0); + assign wr_mitb0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB0); - rvdffe #(32) mitb0_ff (.*, .en(wr_mitb0_r), .din(~dec_csr_wrdata_r[31:0]), .dout(mitb0_b[31:0])); - assign mitb0[31:0] = ~mitb0_b[31:0]; + rvdffe #(32) mitb0_ff ( + .*, + .en (wr_mitb0_r), + .din (~dec_csr_wrdata_r[31:0]), + .dout(mitb0_b[31:0]) + ); + assign mitb0[31:0] = ~mitb0_b[31:0]; - // ---------------------------------------------------------------------- - // MITB1 (RW) - // [31:0] : Internal Timer Bound 1 + // ---------------------------------------------------------------------- + // MITB1 (RW) + // [31:0] : Internal Timer Bound 1 - localparam MITB1 = 12'h7d6; + localparam MITB1 = 12'h7d6; - assign wr_mitb1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB1); + assign wr_mitb1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITB1); - rvdffe #(32) mitb1_ff (.*, .en(wr_mitb1_r), .din(~dec_csr_wrdata_r[31:0]), .dout(mitb1_b[31:0])); - assign mitb1[31:0] = ~mitb1_b[31:0]; + rvdffe #(32) mitb1_ff ( + .*, + .en (wr_mitb1_r), + .din (~dec_csr_wrdata_r[31:0]), + .dout(mitb1_b[31:0]) + ); + assign mitb1[31:0] = ~mitb1_b[31:0]; - // ---------------------------------------------------------------------- - // MITCTL0 (RW) Internal Timer Ctl 0 - // [31:3] : Reserved, reads 0x0 - // [2] : Enable while PAUSEd - // [1] : Enable while HALTed - // [0] : Enable (resets to 0x1) + // ---------------------------------------------------------------------- + // MITCTL0 (RW) Internal Timer Ctl 0 + // [31:3] : Reserved, reads 0x0 + // [2] : Enable while PAUSEd + // [1] : Enable while HALTed + // [0] : Enable (resets to 0x1) - localparam MITCTL0 = 12'h7d4; + localparam MITCTL0 = 12'h7d4; - assign wr_mitctl0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL0); - assign mitctl0_ns[2:0] = wr_mitctl0_r ? {dec_csr_wrdata_r[2:0]} : {mitctl0[2:0]}; + assign wr_mitctl0_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL0); + assign mitctl0_ns[2:0] = wr_mitctl0_r ? {dec_csr_wrdata_r[2:0]} : {mitctl0[2:0]}; - assign mitctl0_0_b_ns = ~mitctl0_ns[0]; - rvdffs #(3) mitctl0_ff (.*, .clk(csr_wr_clk), .en(wr_mitctl0_r), .din({mitctl0_ns[2:1], mitctl0_0_b_ns}), .dout({mitctl0[2:1], mitctl0_0_b})); - assign mitctl0[0] = ~mitctl0_0_b; + assign mitctl0_0_b_ns = ~mitctl0_ns[0]; + rvdffs #(3) mitctl0_ff ( + .*, + .clk (csr_wr_clk), + .en (wr_mitctl0_r), + .din ({mitctl0_ns[2:1], mitctl0_0_b_ns}), + .dout({mitctl0[2:1], mitctl0_0_b}) + ); + assign mitctl0[0] = ~mitctl0_0_b; - // ---------------------------------------------------------------------- - // MITCTL1 (RW) Internal Timer Ctl 1 - // [31:4] : Reserved, reads 0x0 - // [3] : Cascade - // [2] : Enable while PAUSEd - // [1] : Enable while HALTed - // [0] : Enable (resets to 0x1) + // ---------------------------------------------------------------------- + // MITCTL1 (RW) Internal Timer Ctl 1 + // [31:4] : Reserved, reads 0x0 + // [3] : Cascade + // [2] : Enable while PAUSEd + // [1] : Enable while HALTed + // [0] : Enable (resets to 0x1) - localparam MITCTL1 = 12'h7d7; + localparam MITCTL1 = 12'h7d7; - assign wr_mitctl1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL1); - assign mitctl1_ns[3:0] = wr_mitctl1_r ? {dec_csr_wrdata_r[3:0]} : {mitctl1[3:0]}; + assign wr_mitctl1_r = dec_csr_wen_r_mod & (dec_csr_wraddr_r[11:0] == MITCTL1); + assign mitctl1_ns[3:0] = wr_mitctl1_r ? {dec_csr_wrdata_r[3:0]} : {mitctl1[3:0]}; - assign mitctl1_0_b_ns = ~mitctl1_ns[0]; - rvdffs #(4) mitctl1_ff (.*, .clk(csr_wr_clk), .en(wr_mitctl1_r), .din({mitctl1_ns[3:1], mitctl1_0_b_ns}), .dout({mitctl1[3:1], mitctl1_0_b})); - assign mitctl1[0] = ~mitctl1_0_b; - assign dec_timer_read_d = csr_mitcnt1 | csr_mitcnt0 | csr_mitb1 | csr_mitb0 | csr_mitctl0 | csr_mitctl1; - assign dec_timer_rddata_d[31:0] = ( ({32{csr_mitcnt0}} & mitcnt0[31:0]) | + assign mitctl1_0_b_ns = ~mitctl1_ns[0]; + rvdffs #(4) mitctl1_ff ( + .*, + .clk (csr_wr_clk), + .en (wr_mitctl1_r), + .din ({mitctl1_ns[3:1], mitctl1_0_b_ns}), + .dout({mitctl1[3:1], mitctl1_0_b}) + ); + assign mitctl1[0] = ~mitctl1_0_b; + assign dec_timer_read_d = csr_mitcnt1 | csr_mitcnt0 | csr_mitb1 | csr_mitb0 | csr_mitctl0 | csr_mitctl1; + assign dec_timer_rddata_d[31:0] = ( ({32{csr_mitcnt0}} & mitcnt0[31:0]) | ({32{csr_mitcnt1}} & mitcnt1[31:0]) | ({32{csr_mitb0}} & mitb0[31:0]) | ({32{csr_mitb1}} & mitb1[31:0]) | @@ -2943,4 +3760,4 @@ module el2_dec_timer_ctl #( ); -endmodule // dec_timer_ctl +endmodule // dec_timer_ctl diff --git a/Flow/design/dec/el2_dec_trigger.sv b/Flow/design/dec/el2_dec_trigger.sv index fce7298..25223ff 100644 --- a/Flow/design/dec/el2_dec_trigger.sv +++ b/Flow/design/dec/el2_dec_trigger.sv @@ -23,27 +23,32 @@ // //******************************************************************************** module el2_dec_trigger -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - )( + `include "el2_param.vh" +) ( - input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match - input logic [31:1] dec_i0_pc_d, // i0 pc + input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Packet from tlu. 'select':0-pc,1-Opcode 'Execute' needs to be set for dec triggers to fire. 'match'-1 do mask, 0: full match + input logic [31:1] dec_i0_pc_d, // i0 pc - output logic [3:0] dec_i0_trigger_match_d // Trigger match + output logic [3:0] dec_i0_trigger_match_d // Trigger match ); - logic [3:0][31:0] dec_i0_match_data; - logic [3:0] dec_i0_trigger_data_match; + logic [3:0][31:0] dec_i0_match_data; + logic [3:0] dec_i0_trigger_data_match; - for (genvar i=0; i<4; i++) begin - assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match + for (genvar i = 0; i < 4; i++) begin + assign dec_i0_match_data[i][31:0] = ({32{~trigger_pkt_any[i].select & trigger_pkt_any[i].execute}} & {dec_i0_pc_d[31:1], trigger_pkt_any[i].tdata2[0]}); // select=0; do a PC match - rvmaskandmatch trigger_i0_match (.mask(trigger_pkt_any[i].tdata2[31:0]), .data(dec_i0_match_data[i][31:0]), .masken(trigger_pkt_any[i].match), .match(dec_i0_trigger_data_match[i])); + rvmaskandmatch trigger_i0_match ( + .mask (trigger_pkt_any[i].tdata2[31:0]), + .data (dec_i0_match_data[i][31:0]), + .masken(trigger_pkt_any[i].match), + .match (dec_i0_trigger_data_match[i]) + ); - assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i]; - end + assign dec_i0_trigger_match_d[i] = trigger_pkt_any[i].execute & trigger_pkt_any[i].m & dec_i0_trigger_data_match[i]; + end -endmodule // el2_dec_trigger +endmodule // el2_dec_trigger diff --git a/Flow/design/dmi/dmi_jtag_to_core_sync.v b/Flow/design/dmi/dmi_jtag_to_core_sync.v index 562f815..f0d3e85 100644 --- a/Flow/design/dmi/dmi_jtag_to_core_sync.v +++ b/Flow/design/dmi/dmi_jtag_to_core_sync.v @@ -23,42 +23,41 @@ //------------------------------------------------------------------------------------- module dmi_jtag_to_core_sync ( -// JTAG signals -input rd_en, // 1 bit Read Enable from JTAG -input wr_en, // 1 bit Write enable from JTAG + // JTAG signals + input rd_en, // 1 bit Read Enable from JTAG + input wr_en, // 1 bit Write enable from JTAG -// Processor Signals -input rst_n, // Core reset -input clk, // Core clock + // Processor Signals + input rst_n, // Core reset + input clk, // Core clock -output reg_en, // 1 bit Write interface bit to Processor -output reg_wr_en // 1 bit Write enable to Processor + output reg_en, // 1 bit Write interface bit to Processor + output reg_wr_en // 1 bit Write enable to Processor ); - -wire c_rd_en; -wire c_wr_en; -reg [2:0] rden, wren; - -// Outputs -assign reg_en = c_wr_en | c_rd_en; -assign reg_wr_en = c_wr_en; + wire c_rd_en; + wire c_wr_en; + reg [2:0] rden, wren; -// synchronizers -always @ ( posedge clk or negedge rst_n) begin - if(!rst_n) begin - rden <= '0; - wren <= '0; + // Outputs + assign reg_en = c_wr_en | c_rd_en; + assign reg_wr_en = c_wr_en; + + + // synchronizers + always @(posedge clk or negedge rst_n) begin + if (!rst_n) begin + rden <= '0; + wren <= '0; + end else begin + rden <= {rden[1:0], rd_en}; + wren <= {wren[1:0], wr_en}; end - else begin - rden <= {rden[1:0], rd_en}; - wren <= {wren[1:0], wr_en}; - end -end + end + + assign c_rd_en = rden[1] & ~rden[2]; + assign c_wr_en = wren[1] & ~wren[2]; -assign c_rd_en = rden[1] & ~rden[2]; -assign c_wr_en = wren[1] & ~wren[2]; - endmodule diff --git a/Flow/design/dmi/dmi_wrapper.v b/Flow/design/dmi/dmi_wrapper.v index d9fd741..9031163 100644 --- a/Flow/design/dmi/dmi_wrapper.v +++ b/Flow/design/dmi/dmi_wrapper.v @@ -21,70 +21,70 @@ // //------------------------------------------------------------------------------------- -module dmi_wrapper( +module dmi_wrapper ( - // JTAG signals - input trst_n, // JTAG reset - input tck, // JTAG clock - input tms, // Test mode select - input tdi, // Test Data Input - output tdo, // Test Data Output - output tdoEnable, // Test Data Output enable + // JTAG signals + input trst_n, // JTAG reset + input tck, // JTAG clock + input tms, // Test mode select + input tdi, // Test Data Input + output tdo, // Test Data Output + output tdoEnable, // Test Data Output enable - // Processor Signals - input core_rst_n, // Core reset - input core_clk, // Core clock - input [31:1] jtag_id, // JTAG ID - input [31:0] rd_data, // 32 bit Read data from Processor - output [31:0] reg_wr_data, // 32 bit Write data to Processor - output [6:0] reg_wr_addr, // 7 bit reg address to Processor - output reg_en, // 1 bit Read enable to Processor - output reg_wr_en, // 1 bit Write enable to Processor - output dmi_hard_reset + // Processor Signals + input core_rst_n, // Core reset + input core_clk, // Core clock + input [31:1] jtag_id, // JTAG ID + input [31:0] rd_data, // 32 bit Read data from Processor + output [31:0] reg_wr_data, // 32 bit Write data to Processor + output [6:0] reg_wr_addr, // 7 bit reg address to Processor + output reg_en, // 1 bit Read enable to Processor + output reg_wr_en, // 1 bit Write enable to Processor + output dmi_hard_reset ); - + //Wire Declaration - wire rd_en; - wire wr_en; - wire dmireset; + wire rd_en; + wire wr_en; + wire dmireset; + - //jtag_tap instantiation - rvjtag_tap i_jtag_tap( - .trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset - .tck(tck), // dedicated JTAG TCK pad signal - .tms(tms), // dedicated JTAG TMS pad signal - .tdi(tdi), // dedicated JTAG TDI pad signal - .tdo(tdo), // dedicated JTAG TDO pad signal - .tdoEnable(tdoEnable), // enable for TDO pad - .wr_data(reg_wr_data), // 32 bit Write data - .wr_addr(reg_wr_addr), // 7 bit Write address - .rd_en(rd_en), // 1 bit read enable - .wr_en(wr_en), // 1 bit Write enable - .rd_data(rd_data), // 32 bit Read data - .rd_status(2'b0), - .idle(3'h0), // no need to wait to sample data - .dmi_stat(2'b0), // no need to wait or error possible - .version(4'h1), // debug spec 0.13 compliant - .jtag_id(jtag_id), - .dmi_hard_reset(dmi_hard_reset), - .dmi_reset(dmireset) -); + rvjtag_tap i_jtag_tap ( + .trst(trst_n), // dedicated JTAG TRST (active low) pad signal or asynchronous active low power on reset + .tck(tck), // dedicated JTAG TCK pad signal + .tms(tms), // dedicated JTAG TMS pad signal + .tdi(tdi), // dedicated JTAG TDI pad signal + .tdo(tdo), // dedicated JTAG TDO pad signal + .tdoEnable(tdoEnable), // enable for TDO pad + .wr_data(reg_wr_data), // 32 bit Write data + .wr_addr(reg_wr_addr), // 7 bit Write address + .rd_en(rd_en), // 1 bit read enable + .wr_en(wr_en), // 1 bit Write enable + .rd_data(rd_data), // 32 bit Read data + .rd_status(2'b0), + .idle(3'h0), // no need to wait to sample data + .dmi_stat(2'b0), // no need to wait or error possible + .version(4'h1), // debug spec 0.13 compliant + .jtag_id(jtag_id), + .dmi_hard_reset(dmi_hard_reset), + .dmi_reset(dmireset) + ); // dmi_jtag_to_core_sync instantiation - dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync( - .wr_en(wr_en), // 1 bit Write enable - .rd_en(rd_en), // 1 bit Read enable + dmi_jtag_to_core_sync i_dmi_jtag_to_core_sync ( + .wr_en(wr_en), // 1 bit Write enable + .rd_en(rd_en), // 1 bit Read enable - .rst_n(core_rst_n), - .clk(core_clk), - .reg_en(reg_en), // 1 bit Write interface bit - .reg_wr_en(reg_wr_en) // 1 bit Write enable + .rst_n (core_rst_n), + .clk (core_clk), + .reg_en (reg_en), // 1 bit Write interface bit + .reg_wr_en(reg_wr_en) // 1 bit Write enable ); endmodule diff --git a/Flow/design/dmi/rvjtag_tap.v b/Flow/design/dmi/rvjtag_tap.v index 2553575..2886975 100644 --- a/Flow/design/dmi/rvjtag_tap.v +++ b/Flow/design/dmi/rvjtag_tap.v @@ -14,209 +14,202 @@ // limitations under the License module rvjtag_tap #( -parameter AWIDTH = 7 -) -( -input trst, -input tck, -input tms, -input tdi, -output reg tdo, -output tdoEnable, + parameter AWIDTH = 7 +) ( + input trst, + input tck, + input tms, + input tdi, + output reg tdo, + output tdoEnable, -output [31:0] wr_data, -output [AWIDTH-1:0] wr_addr, -output wr_en, -output rd_en, + output [ 31:0] wr_data, + output [AWIDTH-1:0] wr_addr, + output wr_en, + output rd_en, -input [31:0] rd_data, -input [1:0] rd_status, + input [31:0] rd_data, + input [ 1:0] rd_status, -output reg dmi_reset, -output reg dmi_hard_reset, + output reg dmi_reset, + output reg dmi_hard_reset, -input [2:0] idle, -input [1:0] dmi_stat, -/* + input [ 2:0] idle, + input [ 1:0] dmi_stat, + /* -- revisionCode : 4'h0; -- manufacturersIdCode : 11'h45; -- deviceIdCode : 16'h0001; -- order MSB .. LSB -> [4 bit version or revision] [16 bit part number] [11 bit manufacturer id] [value of 1'b1 in LSB] */ -input [31:1] jtag_id, -input [3:0] version + input [31:1] jtag_id, + input [ 3:0] version ); -localparam USER_DR_LENGTH = AWIDTH + 34; + localparam USER_DR_LENGTH = AWIDTH + 34; -reg [USER_DR_LENGTH-1:0] sr, nsr, dr; + reg [USER_DR_LENGTH-1:0] sr, nsr, dr; -/////////////////////////////////////////////////////// -// Tap controller -/////////////////////////////////////////////////////// -logic[3:0] state, nstate; -logic [4:0] ir; -wire jtag_reset; -wire shift_dr; -wire pause_dr; -wire update_dr; -wire capture_dr; -wire shift_ir; -wire pause_ir ; -wire update_ir ; -wire capture_ir; -wire[1:0] dr_en; -wire devid_sel; -wire [5:0] abits; + /////////////////////////////////////////////////////// + // Tap controller + /////////////////////////////////////////////////////// + logic [3:0] state, nstate; + logic [4:0] ir; + wire jtag_reset; + wire shift_dr; + wire pause_dr; + wire update_dr; + wire capture_dr; + wire shift_ir; + wire pause_ir; + wire update_ir; + wire capture_ir; + wire [1:0] dr_en; + wire devid_sel; + wire [5:0] abits; -assign abits = AWIDTH[5:0]; + assign abits = AWIDTH[5:0]; -localparam TEST_LOGIC_RESET_STATE = 0; -localparam RUN_TEST_IDLE_STATE = 1; -localparam SELECT_DR_SCAN_STATE = 2; -localparam CAPTURE_DR_STATE = 3; -localparam SHIFT_DR_STATE = 4; -localparam EXIT1_DR_STATE = 5; -localparam PAUSE_DR_STATE = 6; -localparam EXIT2_DR_STATE = 7; -localparam UPDATE_DR_STATE = 8; -localparam SELECT_IR_SCAN_STATE = 9; -localparam CAPTURE_IR_STATE = 10; -localparam SHIFT_IR_STATE = 11; -localparam EXIT1_IR_STATE = 12; -localparam PAUSE_IR_STATE = 13; -localparam EXIT2_IR_STATE = 14; -localparam UPDATE_IR_STATE = 15; + localparam TEST_LOGIC_RESET_STATE = 0; + localparam RUN_TEST_IDLE_STATE = 1; + localparam SELECT_DR_SCAN_STATE = 2; + localparam CAPTURE_DR_STATE = 3; + localparam SHIFT_DR_STATE = 4; + localparam EXIT1_DR_STATE = 5; + localparam PAUSE_DR_STATE = 6; + localparam EXIT2_DR_STATE = 7; + localparam UPDATE_DR_STATE = 8; + localparam SELECT_IR_SCAN_STATE = 9; + localparam CAPTURE_IR_STATE = 10; + localparam SHIFT_IR_STATE = 11; + localparam EXIT1_IR_STATE = 12; + localparam PAUSE_IR_STATE = 13; + localparam EXIT2_IR_STATE = 14; + localparam UPDATE_IR_STATE = 15; -always_comb begin + always_comb begin nstate = state; - case(state) - TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE; - RUN_TEST_IDLE_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; - SELECT_DR_SCAN_STATE: nstate = tms ? SELECT_IR_SCAN_STATE : CAPTURE_DR_STATE; - CAPTURE_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; - SHIFT_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; - EXIT1_DR_STATE: nstate = tms ? UPDATE_DR_STATE : PAUSE_DR_STATE; - PAUSE_DR_STATE: nstate = tms ? EXIT2_DR_STATE : PAUSE_DR_STATE; - EXIT2_DR_STATE: nstate = tms ? UPDATE_DR_STATE : SHIFT_DR_STATE; - UPDATE_DR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; - SELECT_IR_SCAN_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE; - CAPTURE_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; - SHIFT_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; - EXIT1_IR_STATE: nstate = tms ? UPDATE_IR_STATE : PAUSE_IR_STATE; - PAUSE_IR_STATE: nstate = tms ? EXIT2_IR_STATE : PAUSE_IR_STATE; - EXIT2_IR_STATE: nstate = tms ? UPDATE_IR_STATE : SHIFT_IR_STATE; - UPDATE_IR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; - default: nstate = TEST_LOGIC_RESET_STATE; + case (state) + TEST_LOGIC_RESET_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : RUN_TEST_IDLE_STATE; + RUN_TEST_IDLE_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_DR_SCAN_STATE: nstate = tms ? SELECT_IR_SCAN_STATE : CAPTURE_DR_STATE; + CAPTURE_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + SHIFT_DR_STATE: nstate = tms ? EXIT1_DR_STATE : SHIFT_DR_STATE; + EXIT1_DR_STATE: nstate = tms ? UPDATE_DR_STATE : PAUSE_DR_STATE; + PAUSE_DR_STATE: nstate = tms ? EXIT2_DR_STATE : PAUSE_DR_STATE; + EXIT2_DR_STATE: nstate = tms ? UPDATE_DR_STATE : SHIFT_DR_STATE; + UPDATE_DR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + SELECT_IR_SCAN_STATE: nstate = tms ? TEST_LOGIC_RESET_STATE : CAPTURE_IR_STATE; + CAPTURE_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + SHIFT_IR_STATE: nstate = tms ? EXIT1_IR_STATE : SHIFT_IR_STATE; + EXIT1_IR_STATE: nstate = tms ? UPDATE_IR_STATE : PAUSE_IR_STATE; + PAUSE_IR_STATE: nstate = tms ? EXIT2_IR_STATE : PAUSE_IR_STATE; + EXIT2_IR_STATE: nstate = tms ? UPDATE_IR_STATE : SHIFT_IR_STATE; + UPDATE_IR_STATE: nstate = tms ? SELECT_DR_SCAN_STATE : RUN_TEST_IDLE_STATE; + default: nstate = TEST_LOGIC_RESET_STATE; endcase -end + end -always @ (posedge tck or negedge trst) begin - if(!trst) state <= TEST_LOGIC_RESET_STATE; + always @(posedge tck or negedge trst) begin + if (!trst) state <= TEST_LOGIC_RESET_STATE; else state <= nstate; -end + end -assign jtag_reset = state == TEST_LOGIC_RESET_STATE; -assign shift_dr = state == SHIFT_DR_STATE; -assign pause_dr = state == PAUSE_DR_STATE; -assign update_dr = state == UPDATE_DR_STATE; -assign capture_dr = state == CAPTURE_DR_STATE; -assign shift_ir = state == SHIFT_IR_STATE; -assign pause_ir = state == PAUSE_IR_STATE; -assign update_ir = state == UPDATE_IR_STATE; -assign capture_ir = state == CAPTURE_IR_STATE; + assign jtag_reset = state == TEST_LOGIC_RESET_STATE; + assign shift_dr = state == SHIFT_DR_STATE; + assign pause_dr = state == PAUSE_DR_STATE; + assign update_dr = state == UPDATE_DR_STATE; + assign capture_dr = state == CAPTURE_DR_STATE; + assign shift_ir = state == SHIFT_IR_STATE; + assign pause_ir = state == PAUSE_IR_STATE; + assign update_ir = state == UPDATE_IR_STATE; + assign capture_ir = state == CAPTURE_IR_STATE; -assign tdoEnable = shift_dr | shift_ir; + assign tdoEnable = shift_dr | shift_ir; -/////////////////////////////////////////////////////// -// IR register -/////////////////////////////////////////////////////// + /////////////////////////////////////////////////////// + // IR register + /////////////////////////////////////////////////////// -always @ (negedge tck or negedge trst) begin - if (!trst) ir <= 5'b1; - else begin + always @(negedge tck or negedge trst) begin + if (!trst) ir <= 5'b1; + else begin if (jtag_reset) ir <= 5'b1; - else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f :sr[4:0]; - end -end - - -assign devid_sel = ir == 5'b00001; -assign dr_en[0] = ir == 5'b10000; -assign dr_en[1] = ir == 5'b10001; - -/////////////////////////////////////////////////////// -// Shift register -/////////////////////////////////////////////////////// -always @ (posedge tck or negedge trst) begin - if(!trst)begin - sr <= '0; + else if (update_ir) ir <= (sr[4:0] == '0) ? 5'h1f : sr[4:0]; end - else begin - sr <= nsr; - end -end + end -// SR next value -always_comb begin + + assign devid_sel = ir == 5'b00001; + assign dr_en[0] = ir == 5'b10000; + assign dr_en[1] = ir == 5'b10001; + + /////////////////////////////////////////////////////// + // Shift register + /////////////////////////////////////////////////////// + always @(posedge tck or negedge trst) begin + if (!trst) begin + sr <= '0; + end else begin + sr <= nsr; + end + end + + // SR next value + always_comb begin nsr = sr; - case(1) - shift_dr: begin - case(1) - dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]}; + case (1) + shift_dr: begin + case (1) + dr_en[1]: nsr = {tdi, sr[USER_DR_LENGTH-1:1]}; - dr_en[0], + dr_en[0], devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}},tdi, sr[31:1]}; - default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass - endcase - end - capture_dr: begin - nsr[0] = 1'b0; - case(1) - dr_en[0]: nsr = {{USER_DR_LENGTH-15{1'b0}}, idle, dmi_stat, abits, version}; - dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status}; - devid_sel: nsr = {{USER_DR_LENGTH-32{1'b0}}, jtag_id, 1'b1}; - endcase - end - shift_ir: nsr = {{USER_DR_LENGTH-5{1'b0}},tdi, sr[4:1]}; - capture_ir: nsr = {{USER_DR_LENGTH-1{1'b0}},1'b1}; + default: nsr = {{USER_DR_LENGTH-1{1'b0}},tdi}; // bypass + endcase + end + capture_dr: begin + nsr[0] = 1'b0; + case (1) + dr_en[0]: nsr = {{USER_DR_LENGTH - 15{1'b0}}, idle, dmi_stat, abits, version}; + dr_en[1]: nsr = {{AWIDTH{1'b0}}, rd_data, rd_status}; + devid_sel: nsr = {{USER_DR_LENGTH - 32{1'b0}}, jtag_id, 1'b1}; + endcase + end + shift_ir: nsr = {{USER_DR_LENGTH - 5{1'b0}}, tdi, sr[4:1]}; + capture_ir: nsr = {{USER_DR_LENGTH - 1{1'b0}}, 1'b1}; endcase -end + end -// TDO retiming -always @ (negedge tck ) tdo <= sr[0]; + // TDO retiming + always @(negedge tck) tdo <= sr[0]; -// DMI CS register -always @ (posedge tck or negedge trst) begin - if(!trst) begin - dmi_hard_reset <= 1'b0; - dmi_reset <= 1'b0; - end - else if (update_dr & dr_en[0]) begin - dmi_hard_reset <= sr[17]; - dmi_reset <= sr[16]; + // DMI CS register + always @(posedge tck or negedge trst) begin + if (!trst) begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; + end else if (update_dr & dr_en[0]) begin + dmi_hard_reset <= sr[17]; + dmi_reset <= sr[16]; + end else begin + dmi_hard_reset <= 1'b0; + dmi_reset <= 1'b0; end + end + + // DR register + always @(posedge tck or negedge trst) begin + if (!trst) dr <= '0; else begin - dmi_hard_reset <= 1'b0; - dmi_reset <= 1'b0; + if (update_dr & dr_en[1]) dr <= sr; + else dr <= {dr[USER_DR_LENGTH-1:2], 2'b0}; end -end + end -// DR register -always @ (posedge tck or negedge trst) begin - if(!trst) - dr <= '0; - else begin - if (update_dr & dr_en[1]) - dr <= sr; - else - dr <= {dr[USER_DR_LENGTH-1:2],2'b0}; - end -end - -assign {wr_addr, wr_data, wr_en, rd_en} = dr; + assign {wr_addr, wr_data, wr_en, rd_en} = dr; diff --git a/Flow/design/el2_dma_ctrl.sv b/Flow/design/el2_dma_ctrl.sv index 3b6c77e..d535cb4 100644 --- a/Flow/design/el2_dma_ctrl.sv +++ b/Flow/design/el2_dma_ctrl.sv @@ -22,470 +22,745 @@ //******************************************************************************** module el2_dma_ctrl #( -`include "el2_param.vh" - )( - input logic clk, - input logic free_clk, - input logic rst_l, - input logic dma_bus_clk_en, // slave bus clock enable - input logic clk_override, - input logic scan_mode, + `include "el2_param.vh" +) ( + input logic clk, + input logic free_clk, + input logic rst_l, + input logic dma_bus_clk_en, // slave bus clock enable + input logic clk_override, + input logic scan_mode, - // Debug signals - input logic [31:0] dbg_cmd_addr, - input logic [31:0] dbg_cmd_wrdata, - input logic dbg_cmd_valid, - input logic dbg_cmd_write, // 1: write command, 0: read_command - input logic [1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory - input logic [1:0] dbg_cmd_size, // size of the abstract mem access debug command + // Debug signals + input logic [31:0] dbg_cmd_addr, + input logic [31:0] dbg_cmd_wrdata, + input logic dbg_cmd_valid, + input logic dbg_cmd_write, // 1: write command, 0: read_command + input logic [ 1:0] dbg_cmd_type, // 0:gpr 1:csr 2: memory + input logic [ 1:0] dbg_cmd_size, // size of the abstract mem access debug command - input logic dbg_dma_bubble, // Debug needs a bubble to send a valid - output logic dma_dbg_ready, // DMA is ready to accept debug request + input logic dbg_dma_bubble, // Debug needs a bubble to send a valid + output logic dma_dbg_ready, // DMA is ready to accept debug request - output logic dma_dbg_cmd_done, - output logic dma_dbg_cmd_fail, - output logic [31:0] dma_dbg_rddata, + output logic dma_dbg_cmd_done, + output logic dma_dbg_cmd_fail, + output logic [31:0] dma_dbg_rddata, - // Core side signals - output logic dma_dccm_req, // DMA dccm request (only one of dccm/iccm will be set) - output logic dma_iccm_req, // DMA iccm request - output logic [2:0] dma_mem_tag, // DMA Buffer entry number - output logic [31:0] dma_mem_addr, // DMA request address - output logic [2:0] dma_mem_sz, // DMA request size - output logic dma_mem_write, // DMA write to dccm/iccm - output logic [63:0] dma_mem_wdata, // DMA write data + // Core side signals + output logic dma_dccm_req, // DMA dccm request (only one of dccm/iccm will be set) + output logic dma_iccm_req, // DMA iccm request + output logic [ 2:0] dma_mem_tag, // DMA Buffer entry number + output logic [31:0] dma_mem_addr, // DMA request address + output logic [ 2:0] dma_mem_sz, // DMA request size + output logic dma_mem_write, // DMA write to dccm/iccm + output logic [63:0] dma_mem_wdata, // DMA write data - input logic dccm_dma_rvalid, // dccm data valid for DMA read - input logic dccm_dma_ecc_error, // ECC error on DMA read - input logic [2:0] dccm_dma_rtag, // Tag of the DMA req - input logic [63:0] dccm_dma_rdata, // dccm data for DMA read - input logic iccm_dma_rvalid, // iccm data valid for DMA read - input logic iccm_dma_ecc_error, // ECC error on DMA read - input logic [2:0] iccm_dma_rtag, // Tag of the DMA req - input logic [63:0] iccm_dma_rdata, // iccm data for DMA read + input logic dccm_dma_rvalid, // dccm data valid for DMA read + input logic dccm_dma_ecc_error, // ECC error on DMA read + input logic [ 2:0] dccm_dma_rtag, // Tag of the DMA req + input logic [63:0] dccm_dma_rdata, // dccm data for DMA read + input logic iccm_dma_rvalid, // iccm data valid for DMA read + input logic iccm_dma_ecc_error, // ECC error on DMA read + input logic [ 2:0] iccm_dma_rtag, // Tag of the DMA req + input logic [63:0] iccm_dma_rdata, // iccm data for DMA read - output logic dma_active, // DMA is busy - output logic dma_dccm_stall_any, // stall dccm pipe (bubble) so that DMA can proceed - output logic dma_iccm_stall_any, // stall iccm pipe (bubble) so that DMA can proceed - input logic dccm_ready, // dccm ready to accept DMA request - input logic iccm_ready, // iccm ready to accept DMA request - input logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:15] + output logic dma_active, // DMA is busy + output logic dma_dccm_stall_any, // stall dccm pipe (bubble) so that DMA can proceed + output logic dma_iccm_stall_any, // stall iccm pipe (bubble) so that DMA can proceed + input logic dccm_ready, // dccm ready to accept DMA request + input logic iccm_ready, // iccm ready to accept DMA request + input logic [2:0] dec_tlu_dma_qos_prty, // DMA QoS priority coming from MFDC [18:15] - // PMU signals - output logic dma_pmu_dccm_read, - output logic dma_pmu_dccm_write, - output logic dma_pmu_any_read, - output logic dma_pmu_any_write, + // PMU signals + output logic dma_pmu_dccm_read, + output logic dma_pmu_dccm_write, + output logic dma_pmu_any_read, + output logic dma_pmu_any_write, - // AXI Write Channels - input logic dma_axi_awvalid, - output logic dma_axi_awready, - input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, - input logic [31:0] dma_axi_awaddr, - input logic [2:0] dma_axi_awsize, + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [ 31:0] dma_axi_awaddr, + input logic [ 2:0] dma_axi_awsize, - input logic dma_axi_wvalid, - output logic dma_axi_wready, - input logic [63:0] dma_axi_wdata, - input logic [7:0] dma_axi_wstrb, + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [ 7:0] dma_axi_wstrb, - output logic dma_axi_bvalid, - input logic dma_axi_bready, - output logic [1:0] dma_axi_bresp, - output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [ 1:0] dma_axi_bresp, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, - // AXI Read Channels - input logic dma_axi_arvalid, - output logic dma_axi_arready, - input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, - input logic [31:0] dma_axi_araddr, - input logic [2:0] dma_axi_arsize, + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [ 31:0] dma_axi_araddr, + input logic [ 2:0] dma_axi_arsize, - output logic dma_axi_rvalid, - input logic dma_axi_rready, - output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, - output logic [63:0] dma_axi_rdata, - output logic [1:0] dma_axi_rresp, - output logic dma_axi_rlast + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [ 63:0] dma_axi_rdata, + output logic [ 1:0] dma_axi_rresp, + output logic dma_axi_rlast ); - localparam DEPTH = pt.DMA_BUF_DEPTH; - localparam DEPTH_PTR = $clog2(DEPTH); - localparam NACK_COUNT = 7; + localparam DEPTH = pt.DMA_BUF_DEPTH; + localparam DEPTH_PTR = $clog2(DEPTH); + localparam NACK_COUNT = 7; - logic [DEPTH-1:0] fifo_valid; - logic [DEPTH-1:0][1:0] fifo_error; - logic [DEPTH-1:0] fifo_error_bus; - logic [DEPTH-1:0] fifo_rpend; - logic [DEPTH-1:0] fifo_done; // DMA trxn is done in core - logic [DEPTH-1:0] fifo_done_bus; // DMA trxn is done in core but synced to bus clock - logic [DEPTH-1:0][31:0] fifo_addr; - logic [DEPTH-1:0][2:0] fifo_sz; - logic [DEPTH-1:0][7:0] fifo_byteen; - logic [DEPTH-1:0] fifo_write; - logic [DEPTH-1:0] fifo_posted_write; - logic [DEPTH-1:0] fifo_dbg; - logic [DEPTH-1:0][63:0] fifo_data; - logic [DEPTH-1:0][pt.DMA_BUS_TAG-1:0] fifo_tag; - logic [DEPTH-1:0][pt.DMA_BUS_ID-1:0] fifo_mid; - logic [DEPTH-1:0][pt.DMA_BUS_PRTY-1:0] fifo_prty; + logic [DEPTH-1:0] fifo_valid; + logic [DEPTH-1:0][1:0] fifo_error; + logic [DEPTH-1:0] fifo_error_bus; + logic [DEPTH-1:0] fifo_rpend; + logic [DEPTH-1:0] fifo_done; // DMA trxn is done in core + logic [DEPTH-1:0] fifo_done_bus; // DMA trxn is done in core but synced to bus clock + logic [DEPTH-1:0][31:0] fifo_addr; + logic [DEPTH-1:0][2:0] fifo_sz; + logic [DEPTH-1:0][7:0] fifo_byteen; + logic [DEPTH-1:0] fifo_write; + logic [DEPTH-1:0] fifo_posted_write; + logic [DEPTH-1:0] fifo_dbg; + logic [DEPTH-1:0][63:0] fifo_data; + logic [DEPTH-1:0][pt.DMA_BUS_TAG-1:0] fifo_tag; + logic [DEPTH-1:0][pt.DMA_BUS_ID-1:0] fifo_mid; + logic [DEPTH-1:0][pt.DMA_BUS_PRTY-1:0] fifo_prty; - logic [DEPTH-1:0] fifo_cmd_en; - logic [DEPTH-1:0] fifo_data_en; - logic [DEPTH-1:0] fifo_pend_en; - logic [DEPTH-1:0] fifo_done_en; - logic [DEPTH-1:0] fifo_done_bus_en; - logic [DEPTH-1:0] fifo_error_en; - logic [DEPTH-1:0] fifo_error_bus_en; - logic [DEPTH-1:0] fifo_reset; - logic [DEPTH-1:0][1:0] fifo_error_in; - logic [DEPTH-1:0][63:0] fifo_data_in; + logic [DEPTH-1:0] fifo_cmd_en; + logic [DEPTH-1:0] fifo_data_en; + logic [DEPTH-1:0] fifo_pend_en; + logic [DEPTH-1:0] fifo_done_en; + logic [DEPTH-1:0] fifo_done_bus_en; + logic [DEPTH-1:0] fifo_error_en; + logic [DEPTH-1:0] fifo_error_bus_en; + logic [DEPTH-1:0] fifo_reset; + logic [DEPTH-1:0][1:0] fifo_error_in; + logic [DEPTH-1:0][63:0] fifo_data_in; - logic fifo_write_in; - logic fifo_posted_write_in; - logic fifo_dbg_in; - logic [31:0] fifo_addr_in; - logic [2:0] fifo_sz_in; - logic [7:0] fifo_byteen_in; + logic fifo_write_in; + logic fifo_posted_write_in; + logic fifo_dbg_in; + logic [31:0] fifo_addr_in; + logic [2:0] fifo_sz_in; + logic [7:0] fifo_byteen_in; - logic [DEPTH_PTR-1:0] RspPtr, NxtRspPtr; - logic [DEPTH_PTR-1:0] WrPtr, NxtWrPtr; - logic [DEPTH_PTR-1:0] RdPtr, NxtRdPtr; - logic WrPtrEn, RdPtrEn, RspPtrEn; + logic [DEPTH_PTR-1:0] RspPtr, NxtRspPtr; + logic [DEPTH_PTR-1:0] WrPtr, NxtWrPtr; + logic [DEPTH_PTR-1:0] RdPtr, NxtRdPtr; + logic WrPtrEn, RdPtrEn, RspPtrEn; - logic [1:0] dma_dbg_sz; - logic [1:0] dma_dbg_addr; - logic [31:0] dma_dbg_mem_rddata; - logic [31:0] dma_dbg_mem_wrdata; - logic dma_dbg_cmd_error; - logic dma_dbg_cmd_done_q; + logic [ 1:0] dma_dbg_sz; + logic [ 1:0] dma_dbg_addr; + logic [31:0] dma_dbg_mem_rddata; + logic [31:0] dma_dbg_mem_wrdata; + logic dma_dbg_cmd_error; + logic dma_dbg_cmd_done_q; - logic fifo_full, fifo_full_spec, fifo_empty; - logic dma_address_error, dma_alignment_error; - logic [3:0] num_fifo_vld; - logic dma_mem_req; - logic [31:0] dma_mem_addr_int; - logic [2:0] dma_mem_sz_int; - logic [7:0] dma_mem_byteen; - logic dma_mem_addr_in_dccm; - logic dma_mem_addr_in_iccm; - logic dma_mem_addr_in_pic; - logic dma_mem_addr_in_pic_region_nc; - logic dma_mem_addr_in_dccm_region_nc; - logic dma_mem_addr_in_iccm_region_nc; + logic fifo_full, fifo_full_spec, fifo_empty; + logic dma_address_error, dma_alignment_error; + logic [ 3:0] num_fifo_vld; + logic dma_mem_req; + logic [31:0] dma_mem_addr_int; + logic [ 2:0] dma_mem_sz_int; + logic [ 7:0] dma_mem_byteen; + logic dma_mem_addr_in_dccm; + logic dma_mem_addr_in_iccm; + logic dma_mem_addr_in_pic; + logic dma_mem_addr_in_pic_region_nc; + logic dma_mem_addr_in_dccm_region_nc; + logic dma_mem_addr_in_iccm_region_nc; - logic [2:0] dma_nack_count, dma_nack_count_d, dma_nack_count_csr; + logic [2:0] dma_nack_count, dma_nack_count_d, dma_nack_count_csr; - logic dma_buffer_c1_clken; - logic dma_free_clken; - logic dma_buffer_c1_clk; - logic dma_free_clk; - logic dma_bus_clk; + logic dma_buffer_c1_clken; + logic dma_free_clken; + logic dma_buffer_c1_clk; + logic dma_free_clk; + logic dma_bus_clk; - logic bus_rsp_valid, bus_rsp_sent; - logic bus_cmd_valid, bus_cmd_sent; - logic bus_cmd_write, bus_cmd_posted_write; - logic [7:0] bus_cmd_byteen; - logic [2:0] bus_cmd_sz; - logic [31:0] bus_cmd_addr; - logic [63:0] bus_cmd_wdata; - logic [pt.DMA_BUS_TAG-1:0] bus_cmd_tag; - logic [pt.DMA_BUS_ID-1:0] bus_cmd_mid; - logic [pt.DMA_BUS_PRTY-1:0] bus_cmd_prty; - logic bus_posted_write_done; + logic bus_rsp_valid, bus_rsp_sent; + logic bus_cmd_valid, bus_cmd_sent; + logic bus_cmd_write, bus_cmd_posted_write; + logic [ 7:0] bus_cmd_byteen; + logic [ 2:0] bus_cmd_sz; + logic [ 31:0] bus_cmd_addr; + logic [ 63:0] bus_cmd_wdata; + logic [ pt.DMA_BUS_TAG-1:0] bus_cmd_tag; + logic [ pt.DMA_BUS_ID-1:0] bus_cmd_mid; + logic [pt.DMA_BUS_PRTY-1:0] bus_cmd_prty; + logic bus_posted_write_done; - logic fifo_full_spec_bus; - logic dbg_dma_bubble_bus; - logic stall_dma_in; - logic dma_fifo_ready; + logic fifo_full_spec_bus; + logic dbg_dma_bubble_bus; + logic stall_dma_in; + logic dma_fifo_ready; - logic wrbuf_en, wrbuf_data_en; - logic wrbuf_cmd_sent, wrbuf_rst, wrbuf_data_rst; - logic wrbuf_vld, wrbuf_data_vld; - logic [pt.DMA_BUS_TAG-1:0] wrbuf_tag; - logic [2:0] wrbuf_sz; - logic [31:0] wrbuf_addr; - logic [63:0] wrbuf_data; - logic [7:0] wrbuf_byteen; + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst, wrbuf_data_rst; + logic wrbuf_vld, wrbuf_data_vld; + logic [pt.DMA_BUS_TAG-1:0] wrbuf_tag; + logic [ 2:0] wrbuf_sz; + logic [ 31:0] wrbuf_addr; + logic [ 63:0] wrbuf_data; + logic [ 7:0] wrbuf_byteen; - logic rdbuf_en; - logic rdbuf_cmd_sent, rdbuf_rst; - logic rdbuf_vld; - logic [pt.DMA_BUS_TAG-1:0] rdbuf_tag; - logic [2:0] rdbuf_sz; - logic [31:0] rdbuf_addr; + logic rdbuf_en; + logic rdbuf_cmd_sent, rdbuf_rst; + logic rdbuf_vld; + logic [pt.DMA_BUS_TAG-1:0] rdbuf_tag; + logic [ 2:0] rdbuf_sz; + logic [ 31:0] rdbuf_addr; - logic axi_mstr_prty_in, axi_mstr_prty_en; - logic axi_mstr_priority; - logic axi_mstr_sel; + logic axi_mstr_prty_in, axi_mstr_prty_en; + logic axi_mstr_priority; + logic axi_mstr_sel; - logic axi_rsp_valid, axi_rsp_sent; - logic axi_rsp_write; - logic [pt.DMA_BUS_TAG-1:0] axi_rsp_tag; - logic [1:0] axi_rsp_error; - logic [63:0] axi_rsp_rdata; + logic axi_rsp_valid, axi_rsp_sent; + logic axi_rsp_write; + logic [pt.DMA_BUS_TAG-1:0] axi_rsp_tag; + logic [ 1:0] axi_rsp_error; + logic [ 63:0] axi_rsp_rdata; - //------------------------LOGIC STARTS HERE--------------------------------- + //------------------------LOGIC STARTS HERE--------------------------------- - // FIFO inputs - assign fifo_addr_in[31:0] = dbg_cmd_valid ? dbg_cmd_addr[31:0] : bus_cmd_addr[31:0]; - assign fifo_byteen_in[7:0] = {8{~dbg_cmd_valid}} & bus_cmd_byteen[7:0]; // Byte enable is used only for bus requests - assign fifo_sz_in[2:0] = dbg_cmd_valid ? {1'b0,dbg_cmd_size[1:0]} : bus_cmd_sz[2:0]; - assign fifo_write_in = dbg_cmd_valid ? dbg_cmd_write : bus_cmd_write; - assign fifo_posted_write_in = ~dbg_cmd_valid & bus_cmd_posted_write; - assign fifo_dbg_in = dbg_cmd_valid; + // FIFO inputs + assign fifo_addr_in[31:0] = dbg_cmd_valid ? dbg_cmd_addr[31:0] : bus_cmd_addr[31:0]; + assign fifo_byteen_in[7:0] = {8{~dbg_cmd_valid}} & bus_cmd_byteen[7:0]; // Byte enable is used only for bus requests + assign fifo_sz_in[2:0] = dbg_cmd_valid ? {1'b0, dbg_cmd_size[1:0]} : bus_cmd_sz[2:0]; + assign fifo_write_in = dbg_cmd_valid ? dbg_cmd_write : bus_cmd_write; + assign fifo_posted_write_in = ~dbg_cmd_valid & bus_cmd_posted_write; + assign fifo_dbg_in = dbg_cmd_valid; - for (genvar i=0 ;i= DEPTH); + always_comb begin + num_fifo_vld[3:0] = {3'b0, bus_cmd_sent} - {3'b0, bus_rsp_sent}; + for (int i = 0; i < DEPTH; i++) begin + num_fifo_vld[3:0] += {3'b0, fifo_valid[i]}; + end + end + assign fifo_full_spec = (num_fifo_vld[3:0] >= DEPTH); - assign dma_fifo_ready = ~(fifo_full | dbg_dma_bubble_bus); + assign dma_fifo_ready = ~(fifo_full | dbg_dma_bubble_bus); - // Error logic - assign dma_address_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & (~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm)); // request not for ICCM or DCCM - assign dma_alignment_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & ~dma_address_error & + // Error logic + assign dma_address_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & (~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm)); // request not for ICCM or DCCM + assign dma_alignment_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & ~fifo_dbg[RdPtr] & ~dma_address_error & (((dma_mem_sz_int[2:0] == 3'h1) & dma_mem_addr_int[0]) | // HW size but unaligned - ((dma_mem_sz_int[2:0] == 3'h2) & (|dma_mem_addr_int[1:0])) | // W size but unaligned - ((dma_mem_sz_int[2:0] == 3'h3) & (|dma_mem_addr_int[2:0])) | // DW size but unaligned - (dma_mem_addr_in_iccm & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // ICCM access not word size - (dma_mem_addr_in_dccm & dma_mem_write & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // DCCM write not word size - (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h2) & (dma_mem_byteen[dma_mem_addr_int[2:0]+:4] != 4'hf)) | // Write byte enables not aligned for word store - (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h3) & ~((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0) | (dma_mem_byteen[7:0] == 8'hff)))); // Write byte enables not aligned for dword store + ((dma_mem_sz_int[2:0] == 3'h2) & (|dma_mem_addr_int[1:0])) | // W size but unaligned + ((dma_mem_sz_int[2:0] == 3'h3) & (|dma_mem_addr_int[2:0])) | // DW size but unaligned + (dma_mem_addr_in_iccm & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // ICCM access not word size + (dma_mem_addr_in_dccm & dma_mem_write & ~((dma_mem_sz_int[1:0] == 2'b10) | (dma_mem_sz_int[1:0] == 2'b11))) | // DCCM write not word size + (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h2) & (dma_mem_byteen[dma_mem_addr_int[2:0]+:4] != 4'hf)) | // Write byte enables not aligned for word store + (dma_mem_write & (dma_mem_sz_int[2:0] == 3'h3) & ~((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0) | (dma_mem_byteen[7:0] == 8'hff)))); // Write byte enables not aligned for dword store - //Dbg outputs - assign dma_dbg_ready = fifo_empty & dbg_dma_bubble; - assign dma_dbg_cmd_done = (fifo_valid[RspPtr] & fifo_dbg[RspPtr] & fifo_done[RspPtr]); - assign dma_dbg_cmd_fail = (|fifo_error[RspPtr] & dma_dbg_cmd_done) ; + //Dbg outputs + assign dma_dbg_ready = fifo_empty & dbg_dma_bubble; + assign dma_dbg_cmd_done = (fifo_valid[RspPtr] & fifo_dbg[RspPtr] & fifo_done[RspPtr]); + assign dma_dbg_cmd_fail = (|fifo_error[RspPtr] & dma_dbg_cmd_done); - assign dma_dbg_sz[1:0] = fifo_sz[RspPtr][1:0]; - assign dma_dbg_addr[1:0] = fifo_addr[RspPtr][1:0]; - assign dma_dbg_mem_rddata[31:0] = fifo_addr[RspPtr][2] ? fifo_data[RspPtr][63:32] : fifo_data[RspPtr][31:0]; - assign dma_dbg_rddata[31:0] = ({32{(dma_dbg_sz[1:0] == 2'h0)}} & ((dma_dbg_mem_rddata[31:0] >> 8*dma_dbg_addr[1:0]) & 32'hff)) | + assign dma_dbg_sz[1:0] = fifo_sz[RspPtr][1:0]; + assign dma_dbg_addr[1:0] = fifo_addr[RspPtr][1:0]; + assign dma_dbg_mem_rddata[31:0] = fifo_addr[RspPtr][2] ? fifo_data[RspPtr][63:32] : fifo_data[RspPtr][31:0]; + assign dma_dbg_rddata[31:0] = ({32{(dma_dbg_sz[1:0] == 2'h0)}} & ((dma_dbg_mem_rddata[31:0] >> 8*dma_dbg_addr[1:0]) & 32'hff)) | ({32{(dma_dbg_sz[1:0] == 2'h1)}} & ((dma_dbg_mem_rddata[31:0] >> 16*dma_dbg_addr[1]) & 32'hffff)) | ({32{(dma_dbg_sz[1:0] == 2'h2)}} & dma_dbg_mem_rddata[31:0]); - assign dma_dbg_cmd_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & fifo_dbg[RdPtr] & + assign dma_dbg_cmd_error = fifo_valid[RdPtr] & ~fifo_done[RdPtr] & fifo_dbg[RdPtr] & ((~(dma_mem_addr_in_dccm | dma_mem_addr_in_iccm | dma_mem_addr_in_pic)) | // Address outside of ICCM/DCCM/PIC ((dma_mem_addr_in_iccm | dma_mem_addr_in_pic) & (dma_mem_sz_int[1:0] != 2'b10))); // Only word accesses allowed for ICCM/PIC - assign dma_dbg_mem_wrdata[31:0] = ({32{dbg_cmd_size[1:0] == 2'h0}} & {4{dbg_cmd_wrdata[7:0]}}) | + assign dma_dbg_mem_wrdata[31:0] = ({32{dbg_cmd_size[1:0] == 2'h0}} & {4{dbg_cmd_wrdata[7:0]}}) | ({32{dbg_cmd_size[1:0] == 2'h1}} & {2{dbg_cmd_wrdata[15:0]}}) | ({32{dbg_cmd_size[1:0] == 2'h2}} & dbg_cmd_wrdata[31:0]); - // Block the decode if fifo full - assign dma_dccm_stall_any = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & (dma_nack_count >= dma_nack_count_csr); - assign dma_iccm_stall_any = dma_mem_req & dma_mem_addr_in_iccm & (dma_nack_count >= dma_nack_count_csr); + // Block the decode if fifo full + assign dma_dccm_stall_any = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & (dma_nack_count >= dma_nack_count_csr); + assign dma_iccm_stall_any = dma_mem_req & dma_mem_addr_in_iccm & (dma_nack_count >= dma_nack_count_csr); - // Used to indicate ready to debug - assign fifo_empty = ~((|(fifo_valid[DEPTH-1:0])) | bus_cmd_sent); + // Used to indicate ready to debug + assign fifo_empty = ~((|(fifo_valid[DEPTH-1:0])) | bus_cmd_sent); - // Nack counter, stall the lsu pipe if 7 nacks - assign dma_nack_count_csr[2:0] = dec_tlu_dma_qos_prty[2:0]; - assign dma_nack_count_d[2:0] = (dma_nack_count[2:0] >= dma_nack_count_csr[2:0]) ? ({3{~(dma_dccm_req | dma_iccm_req)}} & dma_nack_count[2:0]) : + // Nack counter, stall the lsu pipe if 7 nacks + assign dma_nack_count_csr[2:0] = dec_tlu_dma_qos_prty[2:0]; + assign dma_nack_count_d[2:0] = (dma_nack_count[2:0] >= dma_nack_count_csr[2:0]) ? ({3{~(dma_dccm_req | dma_iccm_req)}} & dma_nack_count[2:0]) : (dma_mem_req & ~(dma_dccm_req | dma_iccm_req)) ? (dma_nack_count[2:0] + 1'b1) : 3'b0; - rvdffs #(3) nack_count_dff(.din(dma_nack_count_d[2:0]), .dout(dma_nack_count[2:0]), .en(dma_mem_req), .clk(dma_free_clk), .*); + rvdffs #(3) nack_count_dff ( + .din (dma_nack_count_d[2:0]), + .dout(dma_nack_count[2:0]), + .en (dma_mem_req), + .clk (dma_free_clk), + .* + ); - // Core outputs - assign dma_mem_req = fifo_valid[RdPtr] & ~fifo_rpend[RdPtr] & ~fifo_done[RdPtr] & ~(dma_address_error | dma_alignment_error | dma_dbg_cmd_error); - assign dma_dccm_req = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & dccm_ready; - assign dma_iccm_req = dma_mem_req & dma_mem_addr_in_iccm & iccm_ready; - assign dma_mem_tag[2:0] = 3'(RdPtr); - assign dma_mem_addr_int[31:0] = fifo_addr[RdPtr]; - assign dma_mem_sz_int[2:0] = fifo_sz[RdPtr]; - assign dma_mem_addr[31:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & (dma_mem_byteen[7:0] == 8'hf0)) ? {dma_mem_addr_int[31:3],1'b1,dma_mem_addr_int[1:0]} : dma_mem_addr_int[31:0]; - assign dma_mem_sz[2:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & ((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0))) ? 3'h2 : dma_mem_sz_int[2:0]; - assign dma_mem_byteen[7:0] = fifo_byteen[RdPtr]; - assign dma_mem_write = fifo_write[RdPtr]; - assign dma_mem_wdata[63:0] = fifo_data[RdPtr]; + // Core outputs + assign dma_mem_req = fifo_valid[RdPtr] & ~fifo_rpend[RdPtr] & ~fifo_done[RdPtr] & ~(dma_address_error | dma_alignment_error | dma_dbg_cmd_error); + assign dma_dccm_req = dma_mem_req & (dma_mem_addr_in_dccm | dma_mem_addr_in_pic) & dccm_ready; + assign dma_iccm_req = dma_mem_req & dma_mem_addr_in_iccm & iccm_ready; + assign dma_mem_tag[2:0] = 3'(RdPtr); + assign dma_mem_addr_int[31:0] = fifo_addr[RdPtr]; + assign dma_mem_sz_int[2:0] = fifo_sz[RdPtr]; + assign dma_mem_addr[31:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & (dma_mem_byteen[7:0] == 8'hf0)) ? {dma_mem_addr_int[31:3],1'b1,dma_mem_addr_int[1:0]} : dma_mem_addr_int[31:0]; + assign dma_mem_sz[2:0] = (dma_mem_write & ~fifo_dbg[RdPtr] & ((dma_mem_byteen[7:0] == 8'h0f) | (dma_mem_byteen[7:0] == 8'hf0))) ? 3'h2 : dma_mem_sz_int[2:0]; + assign dma_mem_byteen[7:0] = fifo_byteen[RdPtr]; + assign dma_mem_write = fifo_write[RdPtr]; + assign dma_mem_wdata[63:0] = fifo_data[RdPtr]; - // PMU outputs - assign dma_pmu_dccm_read = dma_dccm_req & ~dma_mem_write; - assign dma_pmu_dccm_write = dma_dccm_req & dma_mem_write; - assign dma_pmu_any_read = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write; - assign dma_pmu_any_write = (dma_dccm_req | dma_iccm_req) & dma_mem_write; + // PMU outputs + assign dma_pmu_dccm_read = dma_dccm_req & ~dma_mem_write; + assign dma_pmu_dccm_write = dma_dccm_req & dma_mem_write; + assign dma_pmu_any_read = (dma_dccm_req | dma_iccm_req) & ~dma_mem_write; + assign dma_pmu_any_write = (dma_dccm_req | dma_iccm_req) & dma_mem_write; - // Address check dccm - if (pt.DCCM_ENABLE) begin: Gen_dccm_enable - rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), - .CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck ( - .addr(dma_mem_addr_int[31:0]), - .in_range(dma_mem_addr_in_dccm), - .in_region(dma_mem_addr_in_dccm_region_nc) - ); - end else begin: Gen_dccm_disable - assign dma_mem_addr_in_dccm = '0; - assign dma_mem_addr_in_dccm_region_nc = '0; - end // else: !if(pt.ICCM_ENABLE) + // Address check dccm + if (pt.DCCM_ENABLE) begin : Gen_dccm_enable + rvrangecheck #( + .CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE) + ) addr_dccm_rangecheck ( + .addr(dma_mem_addr_int[31:0]), + .in_range(dma_mem_addr_in_dccm), + .in_region(dma_mem_addr_in_dccm_region_nc) + ); + end else begin : Gen_dccm_disable + assign dma_mem_addr_in_dccm = '0; + assign dma_mem_addr_in_dccm_region_nc = '0; + end // else: !if(pt.ICCM_ENABLE) - // Address check iccm - if (pt.ICCM_ENABLE) begin: Gen_iccm_enable - rvrangecheck #(.CCM_SADR(pt.ICCM_SADR), - .CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck ( - .addr(dma_mem_addr_int[31:0]), - .in_range(dma_mem_addr_in_iccm), - .in_region(dma_mem_addr_in_iccm_region_nc) - ); - end else begin: Gen_iccm_disable - assign dma_mem_addr_in_iccm = '0; - assign dma_mem_addr_in_iccm_region_nc = '0; - end // else: !if(pt.ICCM_ENABLE) + // Address check iccm + if (pt.ICCM_ENABLE) begin : Gen_iccm_enable + rvrangecheck #( + .CCM_SADR(pt.ICCM_SADR), + .CCM_SIZE(pt.ICCM_SIZE) + ) addr_iccm_rangecheck ( + .addr(dma_mem_addr_int[31:0]), + .in_range(dma_mem_addr_in_iccm), + .in_region(dma_mem_addr_in_iccm_region_nc) + ); + end else begin : Gen_iccm_disable + assign dma_mem_addr_in_iccm = '0; + assign dma_mem_addr_in_iccm_region_nc = '0; + end // else: !if(pt.ICCM_ENABLE) - // PIC memory address check - rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), - .CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck ( + // PIC memory address check + rvrangecheck #( + .CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE) + ) addr_pic_rangecheck ( .addr(dma_mem_addr_int[31:0]), .in_range(dma_mem_addr_in_pic), .in_region(dma_mem_addr_in_pic_region_nc) - ); + ); - // Inputs - rvdff_fpga #(1) fifo_full_bus_ff (.din(fifo_full_spec), .dout(fifo_full_spec_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(1) dbg_dma_bubble_ff (.din(dbg_dma_bubble), .dout(dbg_dma_bubble_bus), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdff #(1) dma_dbg_cmd_doneff (.din(dma_dbg_cmd_done), .dout(dma_dbg_cmd_done_q), .clk(free_clk), .*); + // Inputs + rvdff_fpga #(1) fifo_full_bus_ff ( + .din(fifo_full_spec), + .dout(fifo_full_spec_bus), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #(1) dbg_dma_bubble_ff ( + .din(dbg_dma_bubble), + .dout(dbg_dma_bubble_bus), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdff #(1) dma_dbg_cmd_doneff ( + .din (dma_dbg_cmd_done), + .dout(dma_dbg_cmd_done_q), + .clk (free_clk), + .* + ); - // Clock Gating logic - assign dma_buffer_c1_clken = (bus_cmd_valid & dma_bus_clk_en) | dbg_cmd_valid | clk_override; - assign dma_free_clken = (bus_cmd_valid | bus_rsp_valid | dbg_cmd_valid | dma_dbg_cmd_done | dma_dbg_cmd_done_q | (|fifo_valid[DEPTH-1:0]) | clk_override); + // Clock Gating logic + assign dma_buffer_c1_clken = (bus_cmd_valid & dma_bus_clk_en) | dbg_cmd_valid | clk_override; + assign dma_free_clken = (bus_cmd_valid | bus_rsp_valid | dbg_cmd_valid | dma_dbg_cmd_done | dma_dbg_cmd_done_q | (|fifo_valid[DEPTH-1:0]) | clk_override); - rvoclkhdr dma_buffer_c1cgc ( .en(dma_buffer_c1_clken), .l1clk(dma_buffer_c1_clk), .* ); - rvoclkhdr dma_free_cgc (.en(dma_free_clken), .l1clk(dma_free_clk), .*); + rvoclkhdr dma_buffer_c1cgc ( + .en(dma_buffer_c1_clken), + .l1clk(dma_buffer_c1_clk), + .* + ); + rvoclkhdr dma_free_cgc ( + .en(dma_free_clken), + .l1clk(dma_free_clk), + .* + ); - rvclkhdr dma_bus_cgc (.en(dma_bus_clk_en), .l1clk(dma_bus_clk), .*); + rvclkhdr dma_bus_cgc ( + .en(dma_bus_clk_en), + .l1clk(dma_bus_clk), + .* + ); - // Write channel buffer - assign wrbuf_en = dma_axi_awvalid & dma_axi_awready; - assign wrbuf_data_en = dma_axi_wvalid & dma_axi_wready; - assign wrbuf_cmd_sent = bus_cmd_sent & bus_cmd_write; - assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; - assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en; + // Write channel buffer + assign wrbuf_en = dma_axi_awvalid & dma_axi_awready; + assign wrbuf_data_en = dma_axi_wvalid & dma_axi_wready; + assign wrbuf_cmd_sent = bus_cmd_sent & bus_cmd_write; + assign wrbuf_rst = wrbuf_cmd_sent & ~wrbuf_en; + assign wrbuf_data_rst = wrbuf_cmd_sent & ~wrbuf_data_en; - rvdffsc_fpga #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffsc_fpga #(.WIDTH(1)) wrbuf_data_vldff (.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_data_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(pt.DMA_BUS_TAG)) wrbuf_tagff (.din(dma_axi_awid[pt.DMA_BUS_TAG-1:0]), .dout(wrbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(3)) wrbuf_szff (.din(dma_axi_awsize[2:0]), .dout(wrbuf_sz[2:0]), .en(wrbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(dma_axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & dma_bus_clk_en), .*); - rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(dma_axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & dma_bus_clk_en), .*); - rvdffs_fpga #(.WIDTH(8)) wrbuf_byteenff (.din(dma_axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #( + .WIDTH(1) + ) wrbuf_vldff ( + .din(1'b1), + .dout(wrbuf_vld), + .en(wrbuf_en), + .clear(wrbuf_rst), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffsc_fpga #( + .WIDTH(1) + ) wrbuf_data_vldff ( + .din(1'b1), + .dout(wrbuf_data_vld), + .en(wrbuf_data_en), + .clear(wrbuf_data_rst), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(pt.DMA_BUS_TAG) + ) wrbuf_tagff ( + .din(dma_axi_awid[pt.DMA_BUS_TAG-1:0]), + .dout(wrbuf_tag[pt.DMA_BUS_TAG-1:0]), + .en(wrbuf_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(3) + ) wrbuf_szff ( + .din(dma_axi_awsize[2:0]), + .dout(wrbuf_sz[2:0]), + .en(wrbuf_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) wrbuf_addrff ( + .din (dma_axi_awaddr[31:0]), + .dout(wrbuf_addr[31:0]), + .en (wrbuf_en & dma_bus_clk_en), + .* + ); + rvdffe #( + .WIDTH(64) + ) wrbuf_dataff ( + .din (dma_axi_wdata[63:0]), + .dout(wrbuf_data[63:0]), + .en (wrbuf_data_en & dma_bus_clk_en), + .* + ); + rvdffs_fpga #( + .WIDTH(8) + ) wrbuf_byteenff ( + .din(dma_axi_wstrb[7:0]), + .dout(wrbuf_byteen[7:0]), + .en(wrbuf_data_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); - // Read channel buffer - assign rdbuf_en = dma_axi_arvalid & dma_axi_arready; - assign rdbuf_cmd_sent = bus_cmd_sent & ~bus_cmd_write; - assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en; + // Read channel buffer + assign rdbuf_en = dma_axi_arvalid & dma_axi_arready; + assign rdbuf_cmd_sent = bus_cmd_sent & ~bus_cmd_write; + assign rdbuf_rst = rdbuf_cmd_sent & ~rdbuf_en; - rvdffsc_fpga #(.WIDTH(1)) rdbuf_vldff (.din(1'b1), .dout(rdbuf_vld), .en(rdbuf_en), .clear(rdbuf_rst), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(pt.DMA_BUS_TAG)) rdbuf_tagff (.din(dma_axi_arid[pt.DMA_BUS_TAG-1:0]), .dout(rdbuf_tag[pt.DMA_BUS_TAG-1:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(3)) rdbuf_szff (.din(dma_axi_arsize[2:0]), .dout(rdbuf_sz[2:0]), .en(rdbuf_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) rdbuf_addrff (.din(dma_axi_araddr[31:0]), .dout(rdbuf_addr[31:0]), .en(rdbuf_en & dma_bus_clk_en), .*); + rvdffsc_fpga #( + .WIDTH(1) + ) rdbuf_vldff ( + .din(1'b1), + .dout(rdbuf_vld), + .en(rdbuf_en), + .clear(rdbuf_rst), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(pt.DMA_BUS_TAG) + ) rdbuf_tagff ( + .din(dma_axi_arid[pt.DMA_BUS_TAG-1:0]), + .dout(rdbuf_tag[pt.DMA_BUS_TAG-1:0]), + .en(rdbuf_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(3) + ) rdbuf_szff ( + .din(dma_axi_arsize[2:0]), + .dout(rdbuf_sz[2:0]), + .en(rdbuf_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) rdbuf_addrff ( + .din (dma_axi_araddr[31:0]), + .dout(rdbuf_addr[31:0]), + .en (rdbuf_en & dma_bus_clk_en), + .* + ); - assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent); - assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent); - assign dma_axi_arready = ~(rdbuf_vld & ~rdbuf_cmd_sent); + assign dma_axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent); + assign dma_axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent); + assign dma_axi_arready = ~(rdbuf_vld & ~rdbuf_cmd_sent); - //Generate a single request from read/write channel - assign bus_cmd_valid = (wrbuf_vld & wrbuf_data_vld) | rdbuf_vld; - assign bus_cmd_sent = bus_cmd_valid & dma_fifo_ready; - assign bus_cmd_write = axi_mstr_sel; - assign bus_cmd_posted_write = '0; - assign bus_cmd_addr[31:0] = axi_mstr_sel ? wrbuf_addr[31:0] : rdbuf_addr[31:0]; - assign bus_cmd_sz[2:0] = axi_mstr_sel ? wrbuf_sz[2:0] : rdbuf_sz[2:0]; - assign bus_cmd_wdata[63:0] = wrbuf_data[63:0]; - assign bus_cmd_byteen[7:0] = wrbuf_byteen[7:0]; - assign bus_cmd_tag[pt.DMA_BUS_TAG-1:0] = axi_mstr_sel ? wrbuf_tag[pt.DMA_BUS_TAG-1:0] : rdbuf_tag[pt.DMA_BUS_TAG-1:0]; - assign bus_cmd_mid[pt.DMA_BUS_ID-1:0] = '0; - assign bus_cmd_prty[pt.DMA_BUS_PRTY-1:0] = '0; + //Generate a single request from read/write channel + assign bus_cmd_valid = (wrbuf_vld & wrbuf_data_vld) | rdbuf_vld; + assign bus_cmd_sent = bus_cmd_valid & dma_fifo_ready; + assign bus_cmd_write = axi_mstr_sel; + assign bus_cmd_posted_write = '0; + assign bus_cmd_addr[31:0] = axi_mstr_sel ? wrbuf_addr[31:0] : rdbuf_addr[31:0]; + assign bus_cmd_sz[2:0] = axi_mstr_sel ? wrbuf_sz[2:0] : rdbuf_sz[2:0]; + assign bus_cmd_wdata[63:0] = wrbuf_data[63:0]; + assign bus_cmd_byteen[7:0] = wrbuf_byteen[7:0]; + assign bus_cmd_tag[pt.DMA_BUS_TAG-1:0] = axi_mstr_sel ? wrbuf_tag[pt.DMA_BUS_TAG-1:0] : rdbuf_tag[pt.DMA_BUS_TAG-1:0]; + assign bus_cmd_mid[pt.DMA_BUS_ID-1:0] = '0; + assign bus_cmd_prty[pt.DMA_BUS_PRTY-1:0] = '0; - // Sel=1 -> write has higher priority - assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld); - assign axi_mstr_prty_in = ~axi_mstr_priority; - assign axi_mstr_prty_en = bus_cmd_sent; - rvdffs_fpga #(.WIDTH(1)) mstr_prtyff(.din(axi_mstr_prty_in), .dout(axi_mstr_priority), .en(axi_mstr_prty_en), .clk(dma_bus_clk), .clken(dma_bus_clk_en), .rawclk(clk), .*); + // Sel=1 -> write has higher priority + assign axi_mstr_sel = (wrbuf_vld & wrbuf_data_vld & rdbuf_vld) ? axi_mstr_priority : (wrbuf_vld & wrbuf_data_vld); + assign axi_mstr_prty_in = ~axi_mstr_priority; + assign axi_mstr_prty_en = bus_cmd_sent; + rvdffs_fpga #( + .WIDTH(1) + ) mstr_prtyff ( + .din(axi_mstr_prty_in), + .dout(axi_mstr_priority), + .en(axi_mstr_prty_en), + .clk(dma_bus_clk), + .clken(dma_bus_clk_en), + .rawclk(clk), + .* + ); - assign axi_rsp_valid = fifo_valid[RspPtr] & ~fifo_dbg[RspPtr] & fifo_done_bus[RspPtr]; - assign axi_rsp_rdata[63:0] = fifo_data[RspPtr]; - assign axi_rsp_write = fifo_write[RspPtr]; - assign axi_rsp_error[1:0] = fifo_error[RspPtr][0] ? 2'b10 : (fifo_error[RspPtr][1] ? 2'b11 : 2'b0); - assign axi_rsp_tag[pt.DMA_BUS_TAG-1:0] = fifo_tag[RspPtr]; + assign axi_rsp_valid = fifo_valid[RspPtr] & ~fifo_dbg[RspPtr] & fifo_done_bus[RspPtr]; + assign axi_rsp_rdata[63:0] = fifo_data[RspPtr]; + assign axi_rsp_write = fifo_write[RspPtr]; + assign axi_rsp_error[1:0] = fifo_error[RspPtr][0] ? 2'b10 : (fifo_error[RspPtr][1] ? 2'b11 : 2'b0); + assign axi_rsp_tag[pt.DMA_BUS_TAG-1:0] = fifo_tag[RspPtr]; - // AXI response channel signals - assign dma_axi_bvalid = axi_rsp_valid & axi_rsp_write; - assign dma_axi_bresp[1:0] = axi_rsp_error[1:0]; - assign dma_axi_bid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; + // AXI response channel signals + assign dma_axi_bvalid = axi_rsp_valid & axi_rsp_write; + assign dma_axi_bresp[1:0] = axi_rsp_error[1:0]; + assign dma_axi_bid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; - assign dma_axi_rvalid = axi_rsp_valid & ~axi_rsp_write; - assign dma_axi_rresp[1:0] = axi_rsp_error; - assign dma_axi_rdata[63:0] = axi_rsp_rdata[63:0]; - assign dma_axi_rlast = 1'b1; - assign dma_axi_rid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; + assign dma_axi_rvalid = axi_rsp_valid & ~axi_rsp_write; + assign dma_axi_rresp[1:0] = axi_rsp_error; + assign dma_axi_rdata[63:0] = axi_rsp_rdata[63:0]; + assign dma_axi_rlast = 1'b1; + assign dma_axi_rid[pt.DMA_BUS_TAG-1:0] = axi_rsp_tag[pt.DMA_BUS_TAG-1:0]; - assign bus_posted_write_done = 1'b0; - assign bus_rsp_valid = (dma_axi_bvalid | dma_axi_rvalid); - assign bus_rsp_sent = (dma_axi_bvalid & dma_axi_bready) | (dma_axi_rvalid & dma_axi_rready); + assign bus_posted_write_done = 1'b0; + assign bus_rsp_valid = (dma_axi_bvalid | dma_axi_rvalid); + assign bus_rsp_sent = (dma_axi_bvalid & dma_axi_bready) | (dma_axi_rvalid & dma_axi_rready); - assign dma_active = wrbuf_vld | rdbuf_vld | (|fifo_valid[DEPTH-1:0]); + assign dma_active = wrbuf_vld | rdbuf_vld | (|fifo_valid[DEPTH-1:0]); -endmodule // el2_dma_ctrl +endmodule // el2_dma_ctrl diff --git a/Flow/design/el2_mem.sv b/Flow/design/el2_mem.sv index 6711819..6ca81ae 100644 --- a/Flow/design/el2_mem.sv +++ b/Flow/design/el2_mem.sv @@ -16,124 +16,132 @@ //******************************************************************************** module el2_mem -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) -( - input logic clk, - input logic rst_l, - input logic dccm_clk_override, - input logic icm_clk_override, - input logic dec_tlu_core_ecc_disable, + `include "el2_param.vh" +) ( + input logic clk, + input logic rst_l, + input logic dccm_clk_override, + input logic icm_clk_override, + input logic dec_tlu_core_ecc_disable, - //DCCM ports - input logic dccm_wren, - input logic dccm_rden, - input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, - input logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, - input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, - input logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, + //DCCM ports + input logic dccm_wren, + input logic dccm_rden, + input logic [ pt.DCCM_BITS-1:0] dccm_wr_addr_lo, + input logic [ pt.DCCM_BITS-1:0] dccm_wr_addr_hi, + input logic [ pt.DCCM_BITS-1:0] dccm_rd_addr_lo, + input logic [ pt.DCCM_BITS-1:0] dccm_rd_addr_hi, + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, -//`ifdef pt.DCCM_ENABLE - input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt, + //`ifdef pt.DCCM_ENABLE + input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt, -//`endif + //`endif - //ICCM ports - input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, + //ICCM ports + input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, - input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, - input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle - input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle - input logic iccm_wren, - input logic iccm_rden, - input logic [2:0] iccm_wr_size, - input logic [77:0] iccm_wr_data, + input logic [pt.ICCM_BITS-1:1] iccm_rw_addr, + input logic iccm_buf_correct_ecc, // ICCM is doing a single bit error correct cycle + input logic iccm_correction_state, // ICCM is doing a single bit error correct cycle + input logic iccm_wren, + input logic iccm_rden, + input logic [2:0] iccm_wr_size, + input logic [77:0] iccm_wr_data, - output logic [63:0] iccm_rd_data, - output logic [77:0] iccm_rd_data_ecc, + output logic [63:0] iccm_rd_data, + output logic [77:0] iccm_rd_data_ecc, - // Icache and Itag Ports + // Icache and Itag Ports - input logic [31:1] ic_rw_addr, - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, - input logic ic_rd_en, - input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. - input logic ic_sel_premux_data, // Premux data sel - input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, - input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, + input logic [31:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, + input logic ic_rd_en, + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Premux data sel + input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, + input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, - input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC - input logic [70:0] ic_debug_wr_data, // Debug wr cache. - output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. - input logic ic_debug_rd_en, // Icache debug rd - input logic ic_debug_wr_en, // Icache debug wr - input logic ic_debug_tag_array, // Debug tag array - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. - output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - output logic [25:0] ictag_debug_rd_data,// Debug icache tag. + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [25:0] ictag_debug_rd_data, // Debug icache tag. - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, - output logic ic_tag_perr, // Icache Tag parity error + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // parity error per bank + output logic [ pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, + output logic ic_tag_perr, // Icache Tag parity error - input logic scan_mode + input logic scan_mode ); - logic active_clk; - rvoclkhdr active_cg ( .en(1'b1), .l1clk(active_clk), .* ); - - // DCCM Instantiation - if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable - el2_lsu_dccm_mem #(.pt(pt)) dccm ( - .clk_override(dccm_clk_override), - .* - ); - end else begin: Gen_dccm_disable - assign dccm_rd_data_lo = '0; - assign dccm_rd_data_hi = '0; - end - -if ( pt.ICACHE_ENABLE ) begin: icache - el2_ifu_ic_mem #(.pt(pt)) icm ( - .clk_override(icm_clk_override), + logic active_clk; + rvoclkhdr active_cg ( + .en(1'b1), + .l1clk(active_clk), .* - ); -end -else begin - assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0; - assign ic_tag_perr = '0 ; - assign ic_rd_data = '0 ; - assign ictag_debug_rd_data = '0 ; -end // else: !if( pt.ICACHE_ENABLE ) + ); + + // DCCM Instantiation + if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable + el2_lsu_dccm_mem #( + .pt(pt) + ) dccm ( + .clk_override(dccm_clk_override), + .* + ); + end else begin : Gen_dccm_disable + assign dccm_rd_data_lo = '0; + assign dccm_rd_data_hi = '0; + end + + if (pt.ICACHE_ENABLE) begin : icache + el2_ifu_ic_mem #( + .pt(pt) + ) icm ( + .clk_override(icm_clk_override), + .* + ); + end else begin + assign ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0] = '0; + assign ic_tag_perr = '0 ; + assign ic_rd_data = '0 ; + assign ictag_debug_rd_data = '0 ; + end // else: !if( pt.ICACHE_ENABLE ) -if (pt.ICCM_ENABLE) begin : iccm - el2_ifu_iccm_mem #(.pt(pt)) iccm (.*, - .clk_override(icm_clk_override), - .iccm_rw_addr(iccm_rw_addr[pt.ICCM_BITS-1:1]), - .iccm_rd_data(iccm_rd_data[63:0]) - ); -end -else begin - assign iccm_rd_data = '0 ; - assign iccm_rd_data_ecc = '0 ; -end + if (pt.ICCM_ENABLE) begin : iccm + el2_ifu_iccm_mem #( + .pt(pt) + ) iccm ( + .*, + .clk_override(icm_clk_override), + .iccm_rw_addr(iccm_rw_addr[pt.ICCM_BITS-1:1]), + .iccm_rd_data(iccm_rd_data[63:0]) + ); + end else begin + assign iccm_rd_data = '0 ; + assign iccm_rd_data_ecc = '0 ; + end endmodule diff --git a/Flow/design/el2_pic_ctrl.sv b/Flow/design/el2_pic_ctrl.sv index b35c50f..5d0bda0 100644 --- a/Flow/design/el2_pic_ctrl.sv +++ b/Flow/design/el2_pic_ctrl.sv @@ -21,190 +21,239 @@ //******************************************************************************** module el2_pic_ctrl #( -`include "el2_param.vh" - ) - ( + `include "el2_param.vh" +) ( - input logic clk, // Core clock - input logic free_clk, // free clock - input logic rst_l, // Reset for all flops - input logic clk_override, // Clock over-ride for gating - input logic io_clk_override, // PIC IO Clock over-ride for gating - input logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req, // Interrupt requests - input logic [31:0] picm_rdaddr, // Address of the register - input logic [31:0] picm_wraddr, // Address of the register - input logic [31:0] picm_wr_data, // Data to be written to the register - input logic picm_wren, // Write enable to the register - input logic picm_rden, // Read enable for the register - input logic picm_mken, // Read the Mask for the register - input logic [3:0] meicurpl, // Current Priority Level - input logic [3:0] meipt, // Current Priority Threshold + input logic clk, // Core clock + input logic free_clk, // free clock + input logic rst_l, // Reset for all flops + input logic clk_override, // Clock over-ride for gating + input logic io_clk_override, // PIC IO Clock over-ride for gating + input logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req, // Interrupt requests + input logic [ 31:0] picm_rdaddr, // Address of the register + input logic [ 31:0] picm_wraddr, // Address of the register + input logic [ 31:0] picm_wr_data, // Data to be written to the register + input logic picm_wren, // Write enable to the register + input logic picm_rden, // Read enable for the register + input logic picm_mken, // Read the Mask for the register + input logic [ 3:0] meicurpl, // Current Priority Level + input logic [ 3:0] meipt, // Current Priority Threshold - output logic mexintpend, // External Inerrupt request to the core - output logic [7:0] claimid, // Claim Id of the requested interrupt - output logic [3:0] pl, // Priority level of the requested interrupt - output logic [31:0] picm_rd_data, // Read data of the register - output logic mhwakeup, // Wake-up interrupt request - input logic scan_mode // scan mode + output logic mexintpend, // External Inerrupt request to the core + output logic [ 7:0] claimid, // Claim Id of the requested interrupt + output logic [ 3:0] pl, // Priority level of the requested interrupt + output logic [31:0] picm_rd_data, // Read data of the register + output logic mhwakeup, // Wake-up interrupt request + input logic scan_mode // scan mode ); -localparam NUM_LEVELS = $clog2(pt.PIC_TOTAL_INT_PLUS1); -localparam INTPRIORITY_BASE_ADDR = pt.PIC_BASE_ADDR ; -localparam INTPEND_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00001000 ; -localparam INTENABLE_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00002000 ; -localparam EXT_INTR_PIC_CONFIG = pt.PIC_BASE_ADDR + 32'h00003000 ; -localparam EXT_INTR_GW_CONFIG = pt.PIC_BASE_ADDR + 32'h00004000 ; -localparam EXT_INTR_GW_CLEAR = pt.PIC_BASE_ADDR + 32'h00005000 ; + localparam NUM_LEVELS = $clog2(pt.PIC_TOTAL_INT_PLUS1); + localparam INTPRIORITY_BASE_ADDR = pt.PIC_BASE_ADDR; + localparam INTPEND_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00001000; + localparam INTENABLE_BASE_ADDR = pt.PIC_BASE_ADDR + 32'h00002000; + localparam EXT_INTR_PIC_CONFIG = pt.PIC_BASE_ADDR + 32'h00003000; + localparam EXT_INTR_GW_CONFIG = pt.PIC_BASE_ADDR + 32'h00004000; + localparam EXT_INTR_GW_CLEAR = pt.PIC_BASE_ADDR + 32'h00005000; -localparam INTPEND_SIZE = (pt.PIC_TOTAL_INT_PLUS1 < 32) ? 32 : + localparam INTPEND_SIZE = (pt.PIC_TOTAL_INT_PLUS1 < 32) ? 32 : (pt.PIC_TOTAL_INT_PLUS1 < 64) ? 64 : (pt.PIC_TOTAL_INT_PLUS1 < 128) ? 128 : (pt.PIC_TOTAL_INT_PLUS1 < 256) ? 256 : (pt.PIC_TOTAL_INT_PLUS1 < 512) ? 512 : 1024 ; -localparam INT_GRPS = INTPEND_SIZE / 32 ; -localparam INTPRIORITY_BITS = 4 ; -localparam ID_BITS = 8 ; -localparam int GW_CONFIG[pt.PIC_TOTAL_INT_PLUS1-1:0] = '{default:0} ; + localparam INT_GRPS = INTPEND_SIZE / 32; + localparam INTPRIORITY_BITS = 4; + localparam ID_BITS = 8; + localparam int GW_CONFIG[pt.PIC_TOTAL_INT_PLUS1-1:0] = '{default: 0}; -localparam INT_ENABLE_GRPS = (pt.PIC_TOTAL_INT_PLUS1 - 1) / 4 ; + localparam INT_ENABLE_GRPS = (pt.PIC_TOTAL_INT_PLUS1 - 1) / 4; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_clk_enable ; -logic [INT_ENABLE_GRPS:0] intenable_clk_enable_grp ; -logic [INT_ENABLE_GRPS:0] gw_clk ; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_clk_enable; + logic [ INT_ENABLE_GRPS:0] intenable_clk_enable_grp; + logic [ INT_ENABLE_GRPS:0] gw_clk; -logic addr_intpend_base_match; + logic addr_intpend_base_match; -logic raddr_config_pic_match ; -logic raddr_intenable_base_match; -logic raddr_intpriority_base_match; -logic raddr_config_gw_base_match ; + logic raddr_config_pic_match; + logic raddr_intenable_base_match; + logic raddr_intpriority_base_match; + logic raddr_config_gw_base_match; -logic waddr_config_pic_match ; -logic waddr_intpriority_base_match; -logic waddr_intenable_base_match; -logic waddr_config_gw_base_match ; -logic addr_clear_gw_base_match ; + logic waddr_config_pic_match; + logic waddr_intpriority_base_match; + logic waddr_intenable_base_match; + logic waddr_config_gw_base_match; + logic addr_clear_gw_base_match; -logic mexintpend_in; -logic mhwakeup_in ; -logic intpend_reg_read ; + logic mexintpend_in; + logic mhwakeup_in; + logic intpend_reg_read; -logic [31:0] picm_rd_data_in, intpend_rd_out; -logic intenable_rd_out ; -logic [INTPRIORITY_BITS-1:0] intpriority_rd_out; -logic [1:0] gw_config_rd_out; + logic [31:0] picm_rd_data_in, intpend_rd_out; + logic intenable_rd_out; + logic [ INTPRIORITY_BITS-1:0] intpriority_rd_out; + logic [ 1:0] gw_config_rd_out; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpriority_reg_inv; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_we; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_re; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [1:0] gw_config_reg; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0][INTPRIORITY_BITS-1:0] intpriority_reg; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0][INTPRIORITY_BITS-1:0] intpriority_reg_inv; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_we; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intpriority_reg_re; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0][ 1:0] gw_config_reg; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_we; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_re; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_we; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_re; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_clear_reg_we; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_we; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] intenable_reg_re; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_we; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_config_reg_re; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] gw_clear_reg_we; -logic [INTPEND_SIZE-1:0] intpend_reg_extended; + logic [ INTPEND_SIZE-1:0] intpend_reg_extended; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [INTPRIORITY_BITS-1:0] intpend_w_prior_en; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] [ID_BITS-1:0] intpend_id; -logic [INTPRIORITY_BITS-1:0] maxint; -logic [INTPRIORITY_BITS-1:0] selected_int_priority; -logic [INT_GRPS-1:0] [31:0] intpend_rd_part_out ; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0][INTPRIORITY_BITS-1:0] intpend_w_prior_en; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0][ ID_BITS-1:0] intpend_id; + logic [ INTPRIORITY_BITS-1:0] maxint; + logic [ INTPRIORITY_BITS-1:0] selected_int_priority; + logic [ INT_GRPS-1:0][ 31:0] intpend_rd_part_out; -logic config_reg; -logic intpriord; -logic config_reg_we ; -logic config_reg_re ; -logic config_reg_in ; -logic prithresh_reg_write , prithresh_reg_read; -logic intpriority_reg_read ; -logic intenable_reg_read ; -logic gw_config_reg_read ; -logic picm_wren_ff , picm_rden_ff ; -logic [31:0] picm_raddr_ff; -logic [31:0] picm_waddr_ff; -logic [31:0] picm_wr_data_ff; -logic [3:0] mask; -logic picm_mken_ff; -logic [ID_BITS-1:0] claimid_in ; -logic [INTPRIORITY_BITS-1:0] pl_in ; -logic [INTPRIORITY_BITS-1:0] pl_in_q ; + logic config_reg; + logic intpriord; + logic config_reg_we; + logic config_reg_re; + logic config_reg_in; + logic prithresh_reg_write, prithresh_reg_read; + logic intpriority_reg_read; + logic intenable_reg_read; + logic gw_config_reg_read; + logic picm_wren_ff, picm_rden_ff; + logic [ 31:0] picm_raddr_ff; + logic [ 31:0] picm_waddr_ff; + logic [ 31:0] picm_wr_data_ff; + logic [ 3:0] mask; + logic picm_mken_ff; + logic [ ID_BITS-1:0] claimid_in; + logic [ INTPRIORITY_BITS-1:0] pl_in; + logic [ INTPRIORITY_BITS-1:0] pl_in_q; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_sync; -logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_gw; - logic picm_bypass_ff; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_sync; + logic [pt.PIC_TOTAL_INT_PLUS1-1:0] extintsrc_req_gw; + logic picm_bypass_ff; -// clkens - logic pic_raddr_c1_clken; - logic pic_waddr_c1_clken; - logic pic_data_c1_clken; - logic pic_pri_c1_clken; - logic pic_int_c1_clken; - logic gw_config_c1_clken; + // clkens + logic pic_raddr_c1_clken; + logic pic_waddr_c1_clken; + logic pic_data_c1_clken; + logic pic_pri_c1_clken; + logic pic_int_c1_clken; + logic gw_config_c1_clken; -// clocks - logic pic_raddr_c1_clk; - logic pic_data_c1_clk; - logic pic_pri_c1_clk; - logic pic_int_c1_clk; - logic gw_config_c1_clk; + // clocks + logic pic_raddr_c1_clk; + logic pic_data_c1_clk; + logic pic_pri_c1_clk; + logic pic_int_c1_clk; + logic gw_config_c1_clk; -// ---- Clock gating section ------ -// c1 clock enables - assign pic_raddr_c1_clken = picm_mken | picm_rden | clk_override; - assign pic_data_c1_clken = picm_wren | clk_override; - assign pic_pri_c1_clken = (waddr_intpriority_base_match & picm_wren_ff) | (raddr_intpriority_base_match & picm_rden_ff) | clk_override; - assign pic_int_c1_clken = (waddr_intenable_base_match & picm_wren_ff) | (raddr_intenable_base_match & picm_rden_ff) | clk_override; - assign gw_config_c1_clken = (waddr_config_gw_base_match & picm_wren_ff) | (raddr_config_gw_base_match & picm_rden_ff) | clk_override; + // ---- Clock gating section ------ + // c1 clock enables + assign pic_raddr_c1_clken = picm_mken | picm_rden | clk_override; + assign pic_data_c1_clken = picm_wren | clk_override; + assign pic_pri_c1_clken = (waddr_intpriority_base_match & picm_wren_ff) | (raddr_intpriority_base_match & picm_rden_ff) | clk_override; + assign pic_int_c1_clken = (waddr_intenable_base_match & picm_wren_ff) | (raddr_intenable_base_match & picm_rden_ff) | clk_override; + assign gw_config_c1_clken = (waddr_config_gw_base_match & picm_wren_ff) | (raddr_config_gw_base_match & picm_rden_ff) | clk_override; - // C1 - 1 clock pulse for data - rvoclkhdr pic_addr_c1_cgc ( .en(pic_raddr_c1_clken), .l1clk(pic_raddr_c1_clk), .* ); - rvoclkhdr pic_data_c1_cgc ( .en(pic_data_c1_clken), .l1clk(pic_data_c1_clk), .* ); - rvoclkhdr pic_pri_c1_cgc ( .en(pic_pri_c1_clken), .l1clk(pic_pri_c1_clk), .* ); - rvoclkhdr pic_int_c1_cgc ( .en(pic_int_c1_clken), .l1clk(pic_int_c1_clk), .* ); - rvoclkhdr gw_config_c1_cgc ( .en(gw_config_c1_clken), .l1clk(gw_config_c1_clk), .* ); + // C1 - 1 clock pulse for data + rvoclkhdr pic_addr_c1_cgc ( + .en(pic_raddr_c1_clken), + .l1clk(pic_raddr_c1_clk), + .* + ); + rvoclkhdr pic_data_c1_cgc ( + .en(pic_data_c1_clken), + .l1clk(pic_data_c1_clk), + .* + ); + rvoclkhdr pic_pri_c1_cgc ( + .en(pic_pri_c1_clken), + .l1clk(pic_pri_c1_clk), + .* + ); + rvoclkhdr pic_int_c1_cgc ( + .en(pic_int_c1_clken), + .l1clk(pic_int_c1_clk), + .* + ); + rvoclkhdr gw_config_c1_cgc ( + .en(gw_config_c1_clken), + .l1clk(gw_config_c1_clk), + .* + ); -// ------ end clock gating section ------------------------ + // ------ end clock gating section ------------------------ -assign raddr_intenable_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; -assign raddr_intpriority_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; -assign raddr_config_gw_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; -assign raddr_config_pic_match = (picm_raddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ; + assign raddr_intenable_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; + assign raddr_intpriority_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; + assign raddr_config_gw_base_match = (picm_raddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; + assign raddr_config_pic_match = (picm_raddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]); -assign addr_intpend_base_match = (picm_raddr_ff[31:6] == INTPEND_BASE_ADDR[31:6]) ; + assign addr_intpend_base_match = (picm_raddr_ff[31:6] == INTPEND_BASE_ADDR[31:6]); -assign waddr_config_pic_match = (picm_waddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]) ; -assign addr_clear_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CLEAR[31:NUM_LEVELS+2]) ; -assign waddr_intpriority_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; -assign waddr_intenable_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; -assign waddr_config_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; + assign waddr_config_pic_match = (picm_waddr_ff[31:0] == EXT_INTR_PIC_CONFIG[31:0]); + assign addr_clear_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CLEAR[31:NUM_LEVELS+2]) ; + assign waddr_intpriority_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTPRIORITY_BASE_ADDR[31:NUM_LEVELS+2]) ; + assign waddr_intenable_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == INTENABLE_BASE_ADDR[31:NUM_LEVELS+2]) ; + assign waddr_config_gw_base_match = (picm_waddr_ff[31:NUM_LEVELS+2] == EXT_INTR_GW_CONFIG[31:NUM_LEVELS+2]) ; - assign picm_bypass_ff = picm_rden_ff & picm_wren_ff & ( picm_raddr_ff[31:0] == picm_waddr_ff[31:0] ); // pic writes and reads to same address together + assign picm_bypass_ff = picm_rden_ff & picm_wren_ff & ( picm_raddr_ff[31:0] == picm_waddr_ff[31:0] ); // pic writes and reads to same address together -rvdff #(32) picm_radd_flop (.*, .din (picm_rdaddr), .dout(picm_raddr_ff), .clk(pic_raddr_c1_clk)); -rvdff #(32) picm_wadd_flop (.*, .din (picm_wraddr), .dout(picm_waddr_ff), .clk(pic_data_c1_clk)); -rvdff #(1) picm_wre_flop (.*, .din (picm_wren), .dout(picm_wren_ff), .clk(free_clk)); -rvdff #(1) picm_rde_flop (.*, .din (picm_rden), .dout(picm_rden_ff), .clk(free_clk)); -rvdff #(1) picm_mke_flop (.*, .din (picm_mken), .dout(picm_mken_ff), .clk(free_clk)); -rvdff #(32) picm_dat_flop (.*, .din (picm_wr_data[31:0]), .dout(picm_wr_data_ff[31:0]), .clk(pic_data_c1_clk)); + rvdff #(32) picm_radd_flop ( + .*, + .din (picm_rdaddr), + .dout(picm_raddr_ff), + .clk (pic_raddr_c1_clk) + ); + rvdff #(32) picm_wadd_flop ( + .*, + .din (picm_wraddr), + .dout(picm_waddr_ff), + .clk (pic_data_c1_clk) + ); + rvdff #(1) picm_wre_flop ( + .*, + .din (picm_wren), + .dout(picm_wren_ff), + .clk (free_clk) + ); + rvdff #(1) picm_rde_flop ( + .*, + .din (picm_rden), + .dout(picm_rden_ff), + .clk (free_clk) + ); + rvdff #(1) picm_mke_flop ( + .*, + .din (picm_mken), + .dout(picm_mken_ff), + .clk (free_clk) + ); + rvdff #(32) picm_dat_flop ( + .*, + .din (picm_wr_data[31:0]), + .dout(picm_wr_data_ff[31:0]), + .clk (pic_data_c1_clk) + ); -//rvsyncss #(pt.PIC_TOTAL_INT_PLUS1-1) sync_inst -//( -// .clk (free_clk), -// .dout(extintsrc_req_sync[pt.PIC_TOTAL_INT_PLUS1-1:1]), -// .din (extintsrc_req[pt.PIC_TOTAL_INT_PLUS1-1:1]), -// .*) ; -// -//assign extintsrc_req_sync[0] = extintsrc_req[0]; -/* + //rvsyncss #(pt.PIC_TOTAL_INT_PLUS1-1) sync_inst + //( + // .clk (free_clk), + // .dout(extintsrc_req_sync[pt.PIC_TOTAL_INT_PLUS1-1:1]), + // .din (extintsrc_req[pt.PIC_TOTAL_INT_PLUS1-1:1]), + // .*) ; + // + //assign extintsrc_req_sync[0] = extintsrc_req[0]; + /* genvar p ; for (p=0; p<=INT_ENABLE_GRPS ; p++) begin : IO_CLK_GRP if (p==INT_ENABLE_GRPS) begin : LAST_GRP @@ -219,29 +268,37 @@ end -genvar i ; -genvar p ; -for (p=0; p<=INT_ENABLE_GRPS ; p++) begin : IO_CLK_GRP -wire grp_clk, grp_clken; + genvar i; + genvar p; + for (p = 0; p <= INT_ENABLE_GRPS; p++) begin : IO_CLK_GRP + wire grp_clk, grp_clken; assign grp_clken = |intenable_clk_enable[(p==INT_ENABLE_GRPS?pt.PIC_TOTAL_INT_PLUS1-1:p*4+3) : p*4] | io_clk_override; - rvclkhdr intenable_c1_cgc( .en(grp_clken), .l1clk(grp_clk), .* ); + rvclkhdr intenable_c1_cgc ( + .en(grp_clken), + .l1clk(grp_clk), + .* + ); - for(genvar i= (p==0 ? 1: 0); i< (p==INT_ENABLE_GRPS ? pt.PIC_TOTAL_INT_PLUS1-p*4 :4); i++) begin : GW - el2_configurable_gw gw_inst( - .*, - .gw_clk(grp_clk), - .rawclk(clk), - .clken (grp_clken), - .extintsrc_req(extintsrc_req[i+p*4]) , - .meigwctrl_polarity(gw_config_reg[i+p*4][0]) , - .meigwctrl_type(gw_config_reg[i+p*4][1]) , - .meigwclr(gw_clear_reg_we[i+p*4]) , - .extintsrc_req_config(extintsrc_req_gw[i+p*4]) - ); + for ( + genvar i = (p == 0 ? 1 : 0); + i < (p == INT_ENABLE_GRPS ? pt.PIC_TOTAL_INT_PLUS1 - p * 4 : 4); + i++ + ) begin : GW + el2_configurable_gw gw_inst ( + .*, + .gw_clk(grp_clk), + .rawclk(clk), + .clken(grp_clken), + .extintsrc_req(extintsrc_req[i+p*4]), + .meigwctrl_polarity(gw_config_reg[i+p*4][0]), + .meigwctrl_type(gw_config_reg[i+p*4][1]), + .meigwclr(gw_clear_reg_we[i+p*4]), + .extintsrc_req_config(extintsrc_req_gw[i+p*4]) + ); end -end + end @@ -250,27 +307,45 @@ end -for (i=0; i 0 ) begin : NON_ZERO_INT - assign intpriority_reg_we[i] = waddr_intpriority_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; - assign intpriority_reg_re[i] = raddr_intpriority_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + if (i > 0) begin : NON_ZERO_INT + assign intpriority_reg_we[i] = waddr_intpriority_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign intpriority_reg_re[i] = raddr_intpriority_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; - assign intenable_reg_we[i] = waddr_intenable_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; - assign intenable_reg_re[i] = raddr_intenable_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + assign intenable_reg_we[i] = waddr_intenable_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign intenable_reg_re[i] = raddr_intenable_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; - assign gw_config_reg_we[i] = waddr_config_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; - assign gw_config_reg_re[i] = raddr_config_gw_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; + assign gw_config_reg_we[i] = waddr_config_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff; + assign gw_config_reg_re[i] = raddr_config_gw_base_match & (picm_raddr_ff[NUM_LEVELS+1:2] == i) & picm_rden_ff; - assign gw_clear_reg_we[i] = addr_clear_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff ; + assign gw_clear_reg_we[i] = addr_clear_gw_base_match & (picm_waddr_ff[NUM_LEVELS+1:2] == i) & picm_wren_ff ; - rvdffs #(INTPRIORITY_BITS) intpriority_ff (.*, .en( intpriority_reg_we[i]), .din (picm_wr_data_ff[INTPRIORITY_BITS-1:0]), .dout(intpriority_reg[i]), .clk(pic_pri_c1_clk)); - rvdffs #(1) intenable_ff (.*, .en( intenable_reg_we[i]), .din (picm_wr_data_ff[0]), .dout(intenable_reg[i]), .clk(pic_int_c1_clk)); - rvdffs #(2) gw_config_ff (.*, .en( gw_config_reg_we[i]), .din (picm_wr_data_ff[1:0]), .dout(gw_config_reg[i]), .clk(gw_config_c1_clk)); + rvdffs #(INTPRIORITY_BITS) intpriority_ff ( + .*, + .en (intpriority_reg_we[i]), + .din (picm_wr_data_ff[INTPRIORITY_BITS-1:0]), + .dout(intpriority_reg[i]), + .clk (pic_pri_c1_clk) + ); + rvdffs #(1) intenable_ff ( + .*, + .en (intenable_reg_we[i]), + .din (picm_wr_data_ff[0]), + .dout(intenable_reg[i]), + .clk (pic_int_c1_clk) + ); + rvdffs #(2) gw_config_ff ( + .*, + .en (gw_config_reg_we[i]), + .din (picm_wr_data_ff[1:0]), + .dout(gw_config_reg[i]), + .clk (gw_config_c1_clk) + ); - assign intenable_clk_enable[i] = gw_config_reg[i][1] | intenable_reg_we[i] | intenable_reg[i] | gw_clear_reg_we[i] ; + assign intenable_clk_enable[i] = gw_config_reg[i][1] | intenable_reg_we[i] | intenable_reg[i] | gw_clear_reg_we[i] ; -/* + /* rvsyncss_fpga #(1) sync_inst ( .gw_clk (gw_clk[i/4]), @@ -296,215 +371,273 @@ for (i=0; i meipt_inv[INTPRIORITY_BITS-1:0]) & + ( selected_int_priority[INTPRIORITY_BITS-1:0] > meicurpl_inv[INTPRIORITY_BITS-1:0]) ); + rvdff #(1) mexintpend_ff ( + .*, + .clk (free_clk), + .din (mexintpend_in), + .dout(mexintpend) + ); + + assign maxint[INTPRIORITY_BITS-1:0] = intpriord ? 0 : 15; + assign mhwakeup_in = (pl_in_q[INTPRIORITY_BITS-1:0] == maxint); + rvdff #(1) wake_up_ff ( + .*, + .clk (free_clk), + .din (mhwakeup_in), + .dout(mhwakeup) + ); + + + + + + ////////////////////////////////////////////////////////////////////////// + // Reads of register. + // 1- intpending + ////////////////////////////////////////////////////////////////////////// + + assign intpend_reg_read = addr_intpend_base_match & picm_rden_ff; + assign intpriority_reg_read = raddr_intpriority_base_match & picm_rden_ff; + assign intenable_reg_read = raddr_intenable_base_match & picm_rden_ff; + assign gw_config_reg_read = raddr_config_gw_base_match & picm_rden_ff; + + assign intpend_reg_extended[INTPEND_SIZE-1:0] = { + {INTPEND_SIZE - pt.PIC_TOTAL_INT_PLUS1{1'b0}}, extintsrc_req_gw[pt.PIC_TOTAL_INT_PLUS1-1:0] + }; + + for (i = 0; i < (INT_GRPS); i++) begin + assign intpend_rd_part_out[i] = (({32{intpend_reg_read & picm_raddr_ff[5:2] == i}}) & intpend_reg_extended[((32*i)+31):(32*i)]) ; + end + + always_comb begin : INTPEND_RD + intpend_rd_out = '0; + for (int i = 0; i < INT_GRPS; i++) begin + intpend_rd_out |= intpend_rd_part_out[i]; end end - assign claimid_in[ID_BITS-1:0] = levelx_intpend_id[NUM_LEVELS][0] ; // This is the last level output - assign selected_int_priority[INTPRIORITY_BITS-1:0] = levelx_intpend_w_prior_en[NUM_LEVELS][0] ; -end -else begin : genblock - - logic [NUM_LEVELS:0] [pt.PIC_TOTAL_INT_PLUS1+1:0] [INTPRIORITY_BITS-1:0] level_intpend_w_prior_en; - logic [NUM_LEVELS:0] [pt.PIC_TOTAL_INT_PLUS1+1:0] [ID_BITS-1:0] level_intpend_id; - - assign level_intpend_w_prior_en[0][pt.PIC_TOTAL_INT_PLUS1+1:0] = {{2*INTPRIORITY_BITS{1'b0}},intpend_w_prior_en[pt.PIC_TOTAL_INT_PLUS1-1:0]} ; - assign level_intpend_id[0][pt.PIC_TOTAL_INT_PLUS1+1:0] = {{2*ID_BITS{1'b1}},intpend_id[pt.PIC_TOTAL_INT_PLUS1-1:0]} ; - -/// Do the prioritization of the interrupts here //////////// -// genvar l, m , j, k; already declared outside ifdef - for (l=0; l AHB Gasket for LSU - axi4_to_ahb #(.pt(pt), - .TAG(pt.LSU_BUS_TAG)) lsu_axi4_to_ahb ( - - .clk(free_l2clk), - .free_clk(free_clk), - .rst_l(core_rst_l), - .clk_override(dec_tlu_bus_clk_override), - .bus_clk_en(lsu_bus_clk_en), - .dec_tlu_force_halt(dec_tlu_force_halt), - - // AXI Write Channels - .axi_awvalid(lsu_axi_awvalid), - .axi_awready(lsu_axi_awready_ahb), - .axi_awid(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]), - .axi_awaddr(lsu_axi_awaddr[31:0]), - .axi_awsize(lsu_axi_awsize[2:0]), - .axi_awprot(lsu_axi_awprot[2:0]), - - .axi_wvalid(lsu_axi_wvalid), - .axi_wready(lsu_axi_wready_ahb), - .axi_wdata(lsu_axi_wdata[63:0]), - .axi_wstrb(lsu_axi_wstrb[7:0]), - .axi_wlast(lsu_axi_wlast), - - .axi_bvalid(lsu_axi_bvalid_ahb), - .axi_bready(lsu_axi_bready), - .axi_bresp(lsu_axi_bresp_ahb[1:0]), - .axi_bid(lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0]), - - // AXI Read Channels - .axi_arvalid(lsu_axi_arvalid), - .axi_arready(lsu_axi_arready_ahb), - .axi_arid(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]), - .axi_araddr(lsu_axi_araddr[31:0]), - .axi_arsize(lsu_axi_arsize[2:0]), - .axi_arprot(lsu_axi_arprot[2:0]), - - .axi_rvalid(lsu_axi_rvalid_ahb), - .axi_rready(lsu_axi_rready), - .axi_rid(lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0]), - .axi_rdata(lsu_axi_rdata_ahb[63:0]), - .axi_rresp(lsu_axi_rresp_ahb[1:0]), - .axi_rlast(lsu_axi_rlast_ahb), - - // AHB-LITE signals - .ahb_haddr(lsu_haddr[31:0]), - .ahb_hburst(lsu_hburst), - .ahb_hmastlock(lsu_hmastlock), - .ahb_hprot(lsu_hprot[3:0]), - .ahb_hsize(lsu_hsize[2:0]), - .ahb_htrans(lsu_htrans[1:0]), - .ahb_hwrite(lsu_hwrite), - .ahb_hwdata(lsu_hwdata[63:0]), - - .ahb_hrdata(lsu_hrdata[63:0]), - .ahb_hready(lsu_hready), - .ahb_hresp(lsu_hresp), - - .* - ); - - axi4_to_ahb #(.pt(pt), - .TAG(pt.IFU_BUS_TAG)) ifu_axi4_to_ahb ( - .clk(free_l2clk), - .free_clk(free_clk), - .rst_l(core_rst_l), - .clk_override(dec_tlu_bus_clk_override), - .bus_clk_en(ifu_bus_clk_en), - .dec_tlu_force_halt(dec_tlu_force_halt), - - // AHB-Lite signals - .ahb_haddr(haddr[31:0]), - .ahb_hburst(hburst), - .ahb_hmastlock(hmastlock), - .ahb_hprot(hprot[3:0]), - .ahb_hsize(hsize[2:0]), - .ahb_htrans(htrans[1:0]), - .ahb_hwrite(hwrite), - .ahb_hwdata(hwdata_nc[63:0]), - - .ahb_hrdata(hrdata[63:0]), - .ahb_hready(hready), - .ahb_hresp(hresp), - - // AXI Write Channels - .axi_awvalid(ifu_axi_awvalid), - .axi_awready(ifu_axi_awready_ahb), - .axi_awid(ifu_axi_awid[pt.IFU_BUS_TAG-1:0]), - .axi_awaddr(ifu_axi_awaddr[31:0]), - .axi_awsize(ifu_axi_awsize[2:0]), - .axi_awprot(ifu_axi_awprot[2:0]), - - .axi_wvalid(ifu_axi_wvalid), - .axi_wready(ifu_axi_wready_ahb), - .axi_wdata(ifu_axi_wdata[63:0]), - .axi_wstrb(ifu_axi_wstrb[7:0]), - .axi_wlast(ifu_axi_wlast), - - .axi_bvalid(ifu_axi_bvalid_ahb), - .axi_bready(1'b1), - .axi_bresp(ifu_axi_bresp_ahb[1:0]), - .axi_bid(ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0]), - - // AXI Read Channels - .axi_arvalid(ifu_axi_arvalid), - .axi_arready(ifu_axi_arready_ahb), - .axi_arid(ifu_axi_arid[pt.IFU_BUS_TAG-1:0]), - .axi_araddr(ifu_axi_araddr[31:0]), - .axi_arsize(ifu_axi_arsize[2:0]), - .axi_arprot(ifu_axi_arprot[2:0]), - - .axi_rvalid(ifu_axi_rvalid_ahb), - .axi_rready(ifu_axi_rready), - .axi_rid(ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0]), - .axi_rdata(ifu_axi_rdata_ahb[63:0]), - .axi_rresp(ifu_axi_rresp_ahb[1:0]), - .axi_rlast(ifu_axi_rlast_ahb), - .* - ); - - // AXI4 -> AHB Gasket for System Bus - axi4_to_ahb #(.pt(pt), - .TAG(pt.SB_BUS_TAG)) sb_axi4_to_ahb ( - .clk(free_l2clk), - .free_clk(free_clk), - .rst_l(dbg_rst_l), - .clk_override(dec_tlu_bus_clk_override), - .bus_clk_en(dbg_bus_clk_en), - .dec_tlu_force_halt(1'b0), - - // AXI Write Channels - .axi_awvalid(sb_axi_awvalid), - .axi_awready(sb_axi_awready_ahb), - .axi_awid(sb_axi_awid[pt.SB_BUS_TAG-1:0]), - .axi_awaddr(sb_axi_awaddr[31:0]), - .axi_awsize(sb_axi_awsize[2:0]), - .axi_awprot(sb_axi_awprot[2:0]), - - .axi_wvalid(sb_axi_wvalid), - .axi_wready(sb_axi_wready_ahb), - .axi_wdata(sb_axi_wdata[63:0]), - .axi_wstrb(sb_axi_wstrb[7:0]), - .axi_wlast(sb_axi_wlast), - - .axi_bvalid(sb_axi_bvalid_ahb), - .axi_bready(sb_axi_bready), - .axi_bresp(sb_axi_bresp_ahb[1:0]), - .axi_bid(sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0]), - - // AXI Read Channels - .axi_arvalid(sb_axi_arvalid), - .axi_arready(sb_axi_arready_ahb), - .axi_arid(sb_axi_arid[pt.SB_BUS_TAG-1:0]), - .axi_araddr(sb_axi_araddr[31:0]), - .axi_arsize(sb_axi_arsize[2:0]), - .axi_arprot(sb_axi_arprot[2:0]), - - .axi_rvalid(sb_axi_rvalid_ahb), - .axi_rready(sb_axi_rready), - .axi_rid(sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0]), - .axi_rdata(sb_axi_rdata_ahb[63:0]), - .axi_rresp(sb_axi_rresp_ahb[1:0]), - .axi_rlast(sb_axi_rlast_ahb), - // AHB-LITE signals - .ahb_haddr(sb_haddr[31:0]), - .ahb_hburst(sb_hburst), - .ahb_hmastlock(sb_hmastlock), - .ahb_hprot(sb_hprot[3:0]), - .ahb_hsize(sb_hsize[2:0]), - .ahb_htrans(sb_htrans[1:0]), - .ahb_hwrite(sb_hwrite), - .ahb_hwdata(sb_hwdata[63:0]), - - .ahb_hrdata(sb_hrdata[63:0]), - .ahb_hready(sb_hready), - .ahb_hresp(sb_hresp), - - .* - ); - - //AHB -> AXI4 Gasket for DMA - ahb_to_axi4 #(.pt(pt), - .TAG(pt.DMA_BUS_TAG)) dma_ahb_to_axi4 ( - .clk(free_l2clk), - .rst_l(core_rst_l), - .clk_override(dec_tlu_bus_clk_override), - .bus_clk_en(dma_bus_clk_en), - - // AXI Write Channels - .axi_awvalid(dma_axi_awvalid_ahb), - .axi_awready(dma_axi_awready), - .axi_awid(dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0]), - .axi_awaddr(dma_axi_awaddr_ahb[31:0]), - .axi_awsize(dma_axi_awsize_ahb[2:0]), - .axi_awprot(dma_axi_awprot_ahb[2:0]), - .axi_awlen(dma_axi_awlen_ahb[7:0]), - .axi_awburst(dma_axi_awburst_ahb[1:0]), - - .axi_wvalid(dma_axi_wvalid_ahb), - .axi_wready(dma_axi_wready), - .axi_wdata(dma_axi_wdata_ahb[63:0]), - .axi_wstrb(dma_axi_wstrb_ahb[7:0]), - .axi_wlast(dma_axi_wlast_ahb), - - .axi_bvalid(dma_axi_bvalid), - .axi_bready(dma_axi_bready_ahb), - .axi_bresp(dma_axi_bresp[1:0]), - .axi_bid(dma_axi_bid[pt.DMA_BUS_TAG-1:0]), - - // AXI Read Channels - .axi_arvalid(dma_axi_arvalid_ahb), - .axi_arready(dma_axi_arready), - .axi_arid(dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0]), - .axi_araddr(dma_axi_araddr_ahb[31:0]), - .axi_arsize(dma_axi_arsize_ahb[2:0]), - .axi_arprot(dma_axi_arprot_ahb[2:0]), - .axi_arlen(dma_axi_arlen_ahb[7:0]), - .axi_arburst(dma_axi_arburst_ahb[1:0]), - - .axi_rvalid(dma_axi_rvalid), - .axi_rready(dma_axi_rready_ahb), - .axi_rid(dma_axi_rid[pt.DMA_BUS_TAG-1:0]), - .axi_rdata(dma_axi_rdata[63:0]), - .axi_rresp(dma_axi_rresp[1:0]), - - // AHB signals - .ahb_haddr(dma_haddr[31:0]), - .ahb_hburst(dma_hburst), - .ahb_hmastlock(dma_hmastlock), - .ahb_hprot(dma_hprot[3:0]), - .ahb_hsize(dma_hsize[2:0]), - .ahb_htrans(dma_htrans[1:0]), - .ahb_hwrite(dma_hwrite), - .ahb_hwdata(dma_hwdata[63:0]), - - .ahb_hrdata(dma_hrdata[63:0]), - .ahb_hreadyout(dma_hreadyout), - .ahb_hresp(dma_hresp), - .ahb_hreadyin(dma_hreadyin), - .ahb_hsel(dma_hsel), - .* - ); - - end - - // Drive the final AXI inputs - assign lsu_axi_awready_int = pt.BUILD_AHB_LITE ? lsu_axi_awready_ahb : lsu_axi_awready; - assign lsu_axi_wready_int = pt.BUILD_AHB_LITE ? lsu_axi_wready_ahb : lsu_axi_wready; - assign lsu_axi_bvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_bvalid_ahb : lsu_axi_bvalid; - assign lsu_axi_bready_int = pt.BUILD_AHB_LITE ? lsu_axi_bready_ahb : lsu_axi_bready; - assign lsu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bresp_ahb[1:0] : lsu_axi_bresp[1:0]; - assign lsu_axi_bid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_bid[pt.LSU_BUS_TAG-1:0]; - assign lsu_axi_arready_int = pt.BUILD_AHB_LITE ? lsu_axi_arready_ahb : lsu_axi_arready; - assign lsu_axi_rvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_rvalid_ahb : lsu_axi_rvalid; - assign lsu_axi_rid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_rid[pt.LSU_BUS_TAG-1:0]; - assign lsu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? lsu_axi_rdata_ahb[63:0] : lsu_axi_rdata[63:0]; - assign lsu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rresp_ahb[1:0] : lsu_axi_rresp[1:0]; - assign lsu_axi_rlast_int = pt.BUILD_AHB_LITE ? lsu_axi_rlast_ahb : lsu_axi_rlast; - - assign ifu_axi_awready_int = pt.BUILD_AHB_LITE ? ifu_axi_awready_ahb : ifu_axi_awready; - assign ifu_axi_wready_int = pt.BUILD_AHB_LITE ? ifu_axi_wready_ahb : ifu_axi_wready; - assign ifu_axi_bvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_bvalid_ahb : ifu_axi_bvalid; - assign ifu_axi_bready_int = pt.BUILD_AHB_LITE ? ifu_axi_bready_ahb : ifu_axi_bready; - assign ifu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bresp_ahb[1:0] : ifu_axi_bresp[1:0]; - assign ifu_axi_bid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_bid[pt.IFU_BUS_TAG-1:0]; - assign ifu_axi_arready_int = pt.BUILD_AHB_LITE ? ifu_axi_arready_ahb : ifu_axi_arready; - assign ifu_axi_rvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_rvalid_ahb : ifu_axi_rvalid; - assign ifu_axi_rid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_rid[pt.IFU_BUS_TAG-1:0]; - assign ifu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? ifu_axi_rdata_ahb[63:0] : ifu_axi_rdata[63:0]; - assign ifu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rresp_ahb[1:0] : ifu_axi_rresp[1:0]; - assign ifu_axi_rlast_int = pt.BUILD_AHB_LITE ? ifu_axi_rlast_ahb : ifu_axi_rlast; - - assign sb_axi_awready_int = pt.BUILD_AHB_LITE ? sb_axi_awready_ahb : sb_axi_awready; - assign sb_axi_wready_int = pt.BUILD_AHB_LITE ? sb_axi_wready_ahb : sb_axi_wready; - assign sb_axi_bvalid_int = pt.BUILD_AHB_LITE ? sb_axi_bvalid_ahb : sb_axi_bvalid; - assign sb_axi_bready_int = pt.BUILD_AHB_LITE ? sb_axi_bready_ahb : sb_axi_bready; - assign sb_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_bresp_ahb[1:0] : sb_axi_bresp[1:0]; - assign sb_axi_bid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_bid[pt.SB_BUS_TAG-1:0]; - assign sb_axi_arready_int = pt.BUILD_AHB_LITE ? sb_axi_arready_ahb : sb_axi_arready; - assign sb_axi_rvalid_int = pt.BUILD_AHB_LITE ? sb_axi_rvalid_ahb : sb_axi_rvalid; - assign sb_axi_rid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_rid[pt.SB_BUS_TAG-1:0]; - assign sb_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? sb_axi_rdata_ahb[63:0] : sb_axi_rdata[63:0]; - assign sb_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_rresp_ahb[1:0] : sb_axi_rresp[1:0]; - assign sb_axi_rlast_int = pt.BUILD_AHB_LITE ? sb_axi_rlast_ahb : sb_axi_rlast; - - assign dma_axi_awvalid_int = pt.BUILD_AHB_LITE ? dma_axi_awvalid_ahb : dma_axi_awvalid; - assign dma_axi_awid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_awid[pt.DMA_BUS_TAG-1:0]; - assign dma_axi_awaddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_awaddr_ahb[31:0] : dma_axi_awaddr[31:0]; - assign dma_axi_awsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awsize_ahb[2:0] : dma_axi_awsize[2:0]; - assign dma_axi_awprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awprot_ahb[2:0] : dma_axi_awprot[2:0]; - assign dma_axi_awlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_awlen_ahb[7:0] : dma_axi_awlen[7:0]; - assign dma_axi_awburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_awburst_ahb[1:0] : dma_axi_awburst[1:0]; - assign dma_axi_wvalid_int = pt.BUILD_AHB_LITE ? dma_axi_wvalid_ahb : dma_axi_wvalid; - assign dma_axi_wdata_int[63:0] = pt.BUILD_AHB_LITE ? dma_axi_wdata_ahb[63:0] : dma_axi_wdata; - assign dma_axi_wstrb_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_wstrb_ahb[7:0] : dma_axi_wstrb[7:0]; - assign dma_axi_wlast_int = pt.BUILD_AHB_LITE ? dma_axi_wlast_ahb : dma_axi_wlast; - assign dma_axi_bready_int = pt.BUILD_AHB_LITE ? dma_axi_bready_ahb : dma_axi_bready; - assign dma_axi_arvalid_int = pt.BUILD_AHB_LITE ? dma_axi_arvalid_ahb : dma_axi_arvalid; - assign dma_axi_arid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_arid[pt.DMA_BUS_TAG-1:0]; - assign dma_axi_araddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_araddr_ahb[31:0] : dma_axi_araddr[31:0]; - assign dma_axi_arsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arsize_ahb[2:0] : dma_axi_arsize[2:0]; - assign dma_axi_arprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arprot_ahb[2:0] : dma_axi_arprot[2:0]; - assign dma_axi_arlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_arlen_ahb[7:0] : dma_axi_arlen[7:0]; - assign dma_axi_arburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_arburst_ahb[1:0] : dma_axi_arburst[1:0]; - assign dma_axi_rready_int = pt.BUILD_AHB_LITE ? dma_axi_rready_ahb : dma_axi_rready; - - -if (pt.BUILD_AHB_LITE == 1) begin - - end // if (pt.BUILD_AHB_LITE == 1) - - - // unpack packet - // also need retires_p==3 - - assign trace_rv_i_insn_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_insn_ip[31:0]; - - assign trace_rv_i_address_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_address_ip[31:0]; - - assign trace_rv_i_valid_ip = trace_rv_trace_pkt.trace_rv_i_valid_ip; - - assign trace_rv_i_exception_ip = trace_rv_trace_pkt.trace_rv_i_exception_ip; - - assign trace_rv_i_ecause_ip[4:0] = trace_rv_trace_pkt.trace_rv_i_ecause_ip[4:0]; - - assign trace_rv_i_interrupt_ip = trace_rv_trace_pkt.trace_rv_i_interrupt_ip; - - assign trace_rv_i_tval_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_tval_ip[31:0]; - - - -endmodule // el2_swerv + ); + + // ----------------- DEBUG END ----------------------------- + + assign core_rst_l = rst_l & (dbg_core_rst_l | scan_mode); + // fetch + el2_ifu #( + .pt(pt) + ) ifu ( + .clk (active_l2clk), + .rst_l (core_rst_l), + .dec_tlu_flush_err_wb (dec_tlu_flush_err_r), + .dec_tlu_flush_noredir_wb (dec_tlu_flush_noredir_r), + .dec_tlu_fence_i_wb (dec_tlu_fence_i_r), + .dec_tlu_flush_leak_one_wb(dec_tlu_flush_leak_one_r), + .dec_tlu_flush_lower_wb (dec_tlu_flush_lower_r), + + // AXI signals + .ifu_axi_arready(ifu_axi_arready_int), + .ifu_axi_rvalid(ifu_axi_rvalid_int), + .ifu_axi_rid(ifu_axi_rid_int[pt.IFU_BUS_TAG-1:0]), + .ifu_axi_rdata(ifu_axi_rdata_int[63:0]), + .ifu_axi_rresp(ifu_axi_rresp_int[1:0]), + + .* + ); + + + el2_dec #( + .pt(pt) + ) dec ( + .clk(active_l2clk), + .dbg_cmd_wrdata(dbg_cmd_wrdata[1:0]), + .rst_l(core_rst_l), + .* + ); + + el2_exu #( + .pt(pt) + ) exu ( + .clk (active_l2clk), + .rst_l(core_rst_l), + .* + ); + + el2_lsu #( + .pt(pt) + ) lsu ( + .clk(active_l2clk), + .rst_l(core_rst_l), + .clk_override(dec_tlu_lsu_clk_override), + .dec_tlu_i0_kill_writeb_r(dec_tlu_i0_kill_writeb_r), + + // AXI signals + .lsu_axi_awready(lsu_axi_awready_int), + .lsu_axi_wready(lsu_axi_wready_int), + .lsu_axi_bvalid(lsu_axi_bvalid_int), + .lsu_axi_bid(lsu_axi_bid_int[pt.LSU_BUS_TAG-1:0]), + .lsu_axi_bresp(lsu_axi_bresp_int[1:0]), + + .lsu_axi_arready(lsu_axi_arready_int), + .lsu_axi_rvalid(lsu_axi_rvalid_int), + .lsu_axi_rid(lsu_axi_rid_int[pt.LSU_BUS_TAG-1:0]), + .lsu_axi_rdata(lsu_axi_rdata_int[63:0]), + .lsu_axi_rresp(lsu_axi_rresp_int[1:0]), + .lsu_axi_rlast(lsu_axi_rlast_int), + + .* + + ); + + + el2_pic_ctrl #( + .pt(pt) + ) pic_ctrl_inst ( + .clk(free_l2clk), + .clk_override(dec_tlu_pic_clk_override), + .io_clk_override(dec_tlu_picio_clk_override), + .picm_mken(picm_mken), + .extintsrc_req({extintsrc_req[pt.PIC_TOTAL_INT:1], 1'b0}), + .pl(pic_pl[3:0]), + .claimid(pic_claimid[7:0]), + .meicurpl(dec_tlu_meicurpl[3:0]), + .meipt(dec_tlu_meipt[3:0]), + .rst_l(core_rst_l), + .* + ); + + el2_dma_ctrl #( + .pt(pt) + ) dma_ctrl ( + .clk(free_l2clk), + .rst_l(core_rst_l), + .clk_override(dec_tlu_misc_clk_override), + + // AXI signals + .dma_axi_awvalid(dma_axi_awvalid_int), + .dma_axi_awid(dma_axi_awid_int[pt.DMA_BUS_TAG-1:0]), + .dma_axi_awaddr(dma_axi_awaddr_int[31:0]), + .dma_axi_awsize(dma_axi_awsize_int[2:0]), + .dma_axi_wvalid(dma_axi_wvalid_int), + .dma_axi_wdata(dma_axi_wdata_int[63:0]), + .dma_axi_wstrb(dma_axi_wstrb_int[7:0]), + .dma_axi_bready(dma_axi_bready_int), + + .dma_axi_arvalid(dma_axi_arvalid_int), + .dma_axi_arid(dma_axi_arid_int[pt.DMA_BUS_TAG-1:0]), + .dma_axi_araddr(dma_axi_araddr_int[31:0]), + .dma_axi_arsize(dma_axi_arsize_int[2:0]), + .dma_axi_rready(dma_axi_rready_int), + + .* + ); + + if (pt.BUILD_AHB_LITE == 1) begin : Gen_AXI_To_AHB + + // AXI4 -> AHB Gasket for LSU + axi4_to_ahb #( + .pt (pt), + .TAG(pt.LSU_BUS_TAG) + ) lsu_axi4_to_ahb ( + + .clk(free_l2clk), + .free_clk(free_clk), + .rst_l(core_rst_l), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(lsu_bus_clk_en), + .dec_tlu_force_halt(dec_tlu_force_halt), + + // AXI Write Channels + .axi_awvalid(lsu_axi_awvalid), + .axi_awready(lsu_axi_awready_ahb), + .axi_awid(lsu_axi_awid[pt.LSU_BUS_TAG-1:0]), + .axi_awaddr(lsu_axi_awaddr[31:0]), + .axi_awsize(lsu_axi_awsize[2:0]), + .axi_awprot(lsu_axi_awprot[2:0]), + + .axi_wvalid(lsu_axi_wvalid), + .axi_wready(lsu_axi_wready_ahb), + .axi_wdata (lsu_axi_wdata[63:0]), + .axi_wstrb (lsu_axi_wstrb[7:0]), + .axi_wlast (lsu_axi_wlast), + + .axi_bvalid(lsu_axi_bvalid_ahb), + .axi_bready(lsu_axi_bready), + .axi_bresp(lsu_axi_bresp_ahb[1:0]), + .axi_bid(lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(lsu_axi_arvalid), + .axi_arready(lsu_axi_arready_ahb), + .axi_arid(lsu_axi_arid[pt.LSU_BUS_TAG-1:0]), + .axi_araddr(lsu_axi_araddr[31:0]), + .axi_arsize(lsu_axi_arsize[2:0]), + .axi_arprot(lsu_axi_arprot[2:0]), + + .axi_rvalid(lsu_axi_rvalid_ahb), + .axi_rready(lsu_axi_rready), + .axi_rid(lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0]), + .axi_rdata(lsu_axi_rdata_ahb[63:0]), + .axi_rresp(lsu_axi_rresp_ahb[1:0]), + .axi_rlast(lsu_axi_rlast_ahb), + + // AHB-LITE signals + .ahb_haddr(lsu_haddr[31:0]), + .ahb_hburst(lsu_hburst), + .ahb_hmastlock(lsu_hmastlock), + .ahb_hprot(lsu_hprot[3:0]), + .ahb_hsize(lsu_hsize[2:0]), + .ahb_htrans(lsu_htrans[1:0]), + .ahb_hwrite(lsu_hwrite), + .ahb_hwdata(lsu_hwdata[63:0]), + + .ahb_hrdata(lsu_hrdata[63:0]), + .ahb_hready(lsu_hready), + .ahb_hresp (lsu_hresp), + + .* + ); + + axi4_to_ahb #( + .pt (pt), + .TAG(pt.IFU_BUS_TAG) + ) ifu_axi4_to_ahb ( + .clk(free_l2clk), + .free_clk(free_clk), + .rst_l(core_rst_l), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(ifu_bus_clk_en), + .dec_tlu_force_halt(dec_tlu_force_halt), + + // AHB-Lite signals + .ahb_haddr(haddr[31:0]), + .ahb_hburst(hburst), + .ahb_hmastlock(hmastlock), + .ahb_hprot(hprot[3:0]), + .ahb_hsize(hsize[2:0]), + .ahb_htrans(htrans[1:0]), + .ahb_hwrite(hwrite), + .ahb_hwdata(hwdata_nc[63:0]), + + .ahb_hrdata(hrdata[63:0]), + .ahb_hready(hready), + .ahb_hresp (hresp), + + // AXI Write Channels + .axi_awvalid(ifu_axi_awvalid), + .axi_awready(ifu_axi_awready_ahb), + .axi_awid(ifu_axi_awid[pt.IFU_BUS_TAG-1:0]), + .axi_awaddr(ifu_axi_awaddr[31:0]), + .axi_awsize(ifu_axi_awsize[2:0]), + .axi_awprot(ifu_axi_awprot[2:0]), + + .axi_wvalid(ifu_axi_wvalid), + .axi_wready(ifu_axi_wready_ahb), + .axi_wdata (ifu_axi_wdata[63:0]), + .axi_wstrb (ifu_axi_wstrb[7:0]), + .axi_wlast (ifu_axi_wlast), + + .axi_bvalid(ifu_axi_bvalid_ahb), + .axi_bready(1'b1), + .axi_bresp(ifu_axi_bresp_ahb[1:0]), + .axi_bid(ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(ifu_axi_arvalid), + .axi_arready(ifu_axi_arready_ahb), + .axi_arid(ifu_axi_arid[pt.IFU_BUS_TAG-1:0]), + .axi_araddr(ifu_axi_araddr[31:0]), + .axi_arsize(ifu_axi_arsize[2:0]), + .axi_arprot(ifu_axi_arprot[2:0]), + + .axi_rvalid(ifu_axi_rvalid_ahb), + .axi_rready(ifu_axi_rready), + .axi_rid(ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0]), + .axi_rdata(ifu_axi_rdata_ahb[63:0]), + .axi_rresp(ifu_axi_rresp_ahb[1:0]), + .axi_rlast(ifu_axi_rlast_ahb), + .* + ); + + // AXI4 -> AHB Gasket for System Bus + axi4_to_ahb #( + .pt (pt), + .TAG(pt.SB_BUS_TAG) + ) sb_axi4_to_ahb ( + .clk(free_l2clk), + .free_clk(free_clk), + .rst_l(dbg_rst_l), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dbg_bus_clk_en), + .dec_tlu_force_halt(1'b0), + + // AXI Write Channels + .axi_awvalid(sb_axi_awvalid), + .axi_awready(sb_axi_awready_ahb), + .axi_awid(sb_axi_awid[pt.SB_BUS_TAG-1:0]), + .axi_awaddr(sb_axi_awaddr[31:0]), + .axi_awsize(sb_axi_awsize[2:0]), + .axi_awprot(sb_axi_awprot[2:0]), + + .axi_wvalid(sb_axi_wvalid), + .axi_wready(sb_axi_wready_ahb), + .axi_wdata (sb_axi_wdata[63:0]), + .axi_wstrb (sb_axi_wstrb[7:0]), + .axi_wlast (sb_axi_wlast), + + .axi_bvalid(sb_axi_bvalid_ahb), + .axi_bready(sb_axi_bready), + .axi_bresp(sb_axi_bresp_ahb[1:0]), + .axi_bid(sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(sb_axi_arvalid), + .axi_arready(sb_axi_arready_ahb), + .axi_arid(sb_axi_arid[pt.SB_BUS_TAG-1:0]), + .axi_araddr(sb_axi_araddr[31:0]), + .axi_arsize(sb_axi_arsize[2:0]), + .axi_arprot(sb_axi_arprot[2:0]), + + .axi_rvalid(sb_axi_rvalid_ahb), + .axi_rready(sb_axi_rready), + .axi_rid(sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0]), + .axi_rdata(sb_axi_rdata_ahb[63:0]), + .axi_rresp(sb_axi_rresp_ahb[1:0]), + .axi_rlast(sb_axi_rlast_ahb), + // AHB-LITE signals + .ahb_haddr(sb_haddr[31:0]), + .ahb_hburst(sb_hburst), + .ahb_hmastlock(sb_hmastlock), + .ahb_hprot(sb_hprot[3:0]), + .ahb_hsize(sb_hsize[2:0]), + .ahb_htrans(sb_htrans[1:0]), + .ahb_hwrite(sb_hwrite), + .ahb_hwdata(sb_hwdata[63:0]), + + .ahb_hrdata(sb_hrdata[63:0]), + .ahb_hready(sb_hready), + .ahb_hresp (sb_hresp), + + .* + ); + + //AHB -> AXI4 Gasket for DMA + ahb_to_axi4 #( + .pt (pt), + .TAG(pt.DMA_BUS_TAG) + ) dma_ahb_to_axi4 ( + .clk(free_l2clk), + .rst_l(core_rst_l), + .clk_override(dec_tlu_bus_clk_override), + .bus_clk_en(dma_bus_clk_en), + + // AXI Write Channels + .axi_awvalid(dma_axi_awvalid_ahb), + .axi_awready(dma_axi_awready), + .axi_awid(dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0]), + .axi_awaddr(dma_axi_awaddr_ahb[31:0]), + .axi_awsize(dma_axi_awsize_ahb[2:0]), + .axi_awprot(dma_axi_awprot_ahb[2:0]), + .axi_awlen(dma_axi_awlen_ahb[7:0]), + .axi_awburst(dma_axi_awburst_ahb[1:0]), + + .axi_wvalid(dma_axi_wvalid_ahb), + .axi_wready(dma_axi_wready), + .axi_wdata (dma_axi_wdata_ahb[63:0]), + .axi_wstrb (dma_axi_wstrb_ahb[7:0]), + .axi_wlast (dma_axi_wlast_ahb), + + .axi_bvalid(dma_axi_bvalid), + .axi_bready(dma_axi_bready_ahb), + .axi_bresp(dma_axi_bresp[1:0]), + .axi_bid(dma_axi_bid[pt.DMA_BUS_TAG-1:0]), + + // AXI Read Channels + .axi_arvalid(dma_axi_arvalid_ahb), + .axi_arready(dma_axi_arready), + .axi_arid(dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0]), + .axi_araddr(dma_axi_araddr_ahb[31:0]), + .axi_arsize(dma_axi_arsize_ahb[2:0]), + .axi_arprot(dma_axi_arprot_ahb[2:0]), + .axi_arlen(dma_axi_arlen_ahb[7:0]), + .axi_arburst(dma_axi_arburst_ahb[1:0]), + + .axi_rvalid(dma_axi_rvalid), + .axi_rready(dma_axi_rready_ahb), + .axi_rid(dma_axi_rid[pt.DMA_BUS_TAG-1:0]), + .axi_rdata(dma_axi_rdata[63:0]), + .axi_rresp(dma_axi_rresp[1:0]), + + // AHB signals + .ahb_haddr(dma_haddr[31:0]), + .ahb_hburst(dma_hburst), + .ahb_hmastlock(dma_hmastlock), + .ahb_hprot(dma_hprot[3:0]), + .ahb_hsize(dma_hsize[2:0]), + .ahb_htrans(dma_htrans[1:0]), + .ahb_hwrite(dma_hwrite), + .ahb_hwdata(dma_hwdata[63:0]), + + .ahb_hrdata(dma_hrdata[63:0]), + .ahb_hreadyout(dma_hreadyout), + .ahb_hresp(dma_hresp), + .ahb_hreadyin(dma_hreadyin), + .ahb_hsel(dma_hsel), + .* + ); + + end + + // Drive the final AXI inputs + assign lsu_axi_awready_int = pt.BUILD_AHB_LITE ? lsu_axi_awready_ahb : lsu_axi_awready; + assign lsu_axi_wready_int = pt.BUILD_AHB_LITE ? lsu_axi_wready_ahb : lsu_axi_wready; + assign lsu_axi_bvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_bvalid_ahb : lsu_axi_bvalid; + assign lsu_axi_bready_int = pt.BUILD_AHB_LITE ? lsu_axi_bready_ahb : lsu_axi_bready; + assign lsu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bresp_ahb[1:0] : lsu_axi_bresp[1:0]; + assign lsu_axi_bid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_bid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_bid[pt.LSU_BUS_TAG-1:0]; + assign lsu_axi_arready_int = pt.BUILD_AHB_LITE ? lsu_axi_arready_ahb : lsu_axi_arready; + assign lsu_axi_rvalid_int = pt.BUILD_AHB_LITE ? lsu_axi_rvalid_ahb : lsu_axi_rvalid; + assign lsu_axi_rid_int[pt.LSU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rid_ahb[pt.LSU_BUS_TAG-1:0] : lsu_axi_rid[pt.LSU_BUS_TAG-1:0]; + assign lsu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? lsu_axi_rdata_ahb[63:0] : lsu_axi_rdata[63:0]; + assign lsu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? lsu_axi_rresp_ahb[1:0] : lsu_axi_rresp[1:0]; + assign lsu_axi_rlast_int = pt.BUILD_AHB_LITE ? lsu_axi_rlast_ahb : lsu_axi_rlast; + + assign ifu_axi_awready_int = pt.BUILD_AHB_LITE ? ifu_axi_awready_ahb : ifu_axi_awready; + assign ifu_axi_wready_int = pt.BUILD_AHB_LITE ? ifu_axi_wready_ahb : ifu_axi_wready; + assign ifu_axi_bvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_bvalid_ahb : ifu_axi_bvalid; + assign ifu_axi_bready_int = pt.BUILD_AHB_LITE ? ifu_axi_bready_ahb : ifu_axi_bready; + assign ifu_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bresp_ahb[1:0] : ifu_axi_bresp[1:0]; + assign ifu_axi_bid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_bid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_bid[pt.IFU_BUS_TAG-1:0]; + assign ifu_axi_arready_int = pt.BUILD_AHB_LITE ? ifu_axi_arready_ahb : ifu_axi_arready; + assign ifu_axi_rvalid_int = pt.BUILD_AHB_LITE ? ifu_axi_rvalid_ahb : ifu_axi_rvalid; + assign ifu_axi_rid_int[pt.IFU_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rid_ahb[pt.IFU_BUS_TAG-1:0] : ifu_axi_rid[pt.IFU_BUS_TAG-1:0]; + assign ifu_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? ifu_axi_rdata_ahb[63:0] : ifu_axi_rdata[63:0]; + assign ifu_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? ifu_axi_rresp_ahb[1:0] : ifu_axi_rresp[1:0]; + assign ifu_axi_rlast_int = pt.BUILD_AHB_LITE ? ifu_axi_rlast_ahb : ifu_axi_rlast; + + assign sb_axi_awready_int = pt.BUILD_AHB_LITE ? sb_axi_awready_ahb : sb_axi_awready; + assign sb_axi_wready_int = pt.BUILD_AHB_LITE ? sb_axi_wready_ahb : sb_axi_wready; + assign sb_axi_bvalid_int = pt.BUILD_AHB_LITE ? sb_axi_bvalid_ahb : sb_axi_bvalid; + assign sb_axi_bready_int = pt.BUILD_AHB_LITE ? sb_axi_bready_ahb : sb_axi_bready; + assign sb_axi_bresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_bresp_ahb[1:0] : sb_axi_bresp[1:0]; + assign sb_axi_bid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_bid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_bid[pt.SB_BUS_TAG-1:0]; + assign sb_axi_arready_int = pt.BUILD_AHB_LITE ? sb_axi_arready_ahb : sb_axi_arready; + assign sb_axi_rvalid_int = pt.BUILD_AHB_LITE ? sb_axi_rvalid_ahb : sb_axi_rvalid; + assign sb_axi_rid_int[pt.SB_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? sb_axi_rid_ahb[pt.SB_BUS_TAG-1:0] : sb_axi_rid[pt.SB_BUS_TAG-1:0]; + assign sb_axi_rdata_int[63:0] = pt.BUILD_AHB_LITE ? sb_axi_rdata_ahb[63:0] : sb_axi_rdata[63:0]; + assign sb_axi_rresp_int[1:0] = pt.BUILD_AHB_LITE ? sb_axi_rresp_ahb[1:0] : sb_axi_rresp[1:0]; + assign sb_axi_rlast_int = pt.BUILD_AHB_LITE ? sb_axi_rlast_ahb : sb_axi_rlast; + + assign dma_axi_awvalid_int = pt.BUILD_AHB_LITE ? dma_axi_awvalid_ahb : dma_axi_awvalid; + assign dma_axi_awid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_awid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_awid[pt.DMA_BUS_TAG-1:0]; + assign dma_axi_awaddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_awaddr_ahb[31:0] : dma_axi_awaddr[31:0]; + assign dma_axi_awsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awsize_ahb[2:0] : dma_axi_awsize[2:0]; + assign dma_axi_awprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_awprot_ahb[2:0] : dma_axi_awprot[2:0]; + assign dma_axi_awlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_awlen_ahb[7:0] : dma_axi_awlen[7:0]; + assign dma_axi_awburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_awburst_ahb[1:0] : dma_axi_awburst[1:0]; + assign dma_axi_wvalid_int = pt.BUILD_AHB_LITE ? dma_axi_wvalid_ahb : dma_axi_wvalid; + assign dma_axi_wdata_int[63:0] = pt.BUILD_AHB_LITE ? dma_axi_wdata_ahb[63:0] : dma_axi_wdata; + assign dma_axi_wstrb_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_wstrb_ahb[7:0] : dma_axi_wstrb[7:0]; + assign dma_axi_wlast_int = pt.BUILD_AHB_LITE ? dma_axi_wlast_ahb : dma_axi_wlast; + assign dma_axi_bready_int = pt.BUILD_AHB_LITE ? dma_axi_bready_ahb : dma_axi_bready; + assign dma_axi_arvalid_int = pt.BUILD_AHB_LITE ? dma_axi_arvalid_ahb : dma_axi_arvalid; + assign dma_axi_arid_int[pt.DMA_BUS_TAG-1:0] = pt.BUILD_AHB_LITE ? dma_axi_arid_ahb[pt.DMA_BUS_TAG-1:0] : dma_axi_arid[pt.DMA_BUS_TAG-1:0]; + assign dma_axi_araddr_int[31:0] = pt.BUILD_AHB_LITE ? dma_axi_araddr_ahb[31:0] : dma_axi_araddr[31:0]; + assign dma_axi_arsize_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arsize_ahb[2:0] : dma_axi_arsize[2:0]; + assign dma_axi_arprot_int[2:0] = pt.BUILD_AHB_LITE ? dma_axi_arprot_ahb[2:0] : dma_axi_arprot[2:0]; + assign dma_axi_arlen_int[7:0] = pt.BUILD_AHB_LITE ? dma_axi_arlen_ahb[7:0] : dma_axi_arlen[7:0]; + assign dma_axi_arburst_int[1:0] = pt.BUILD_AHB_LITE ? dma_axi_arburst_ahb[1:0] : dma_axi_arburst[1:0]; + assign dma_axi_rready_int = pt.BUILD_AHB_LITE ? dma_axi_rready_ahb : dma_axi_rready; + + + if (pt.BUILD_AHB_LITE == 1) begin + + end // if (pt.BUILD_AHB_LITE == 1) + + + // unpack packet + // also need retires_p==3 + + assign trace_rv_i_insn_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_insn_ip[31:0]; + + assign trace_rv_i_address_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_address_ip[31:0]; + + assign trace_rv_i_valid_ip = trace_rv_trace_pkt.trace_rv_i_valid_ip; + + assign trace_rv_i_exception_ip = trace_rv_trace_pkt.trace_rv_i_exception_ip; + + assign trace_rv_i_ecause_ip[4:0] = trace_rv_trace_pkt.trace_rv_i_ecause_ip[4:0]; + + assign trace_rv_i_interrupt_ip = trace_rv_trace_pkt.trace_rv_i_interrupt_ip; + + assign trace_rv_i_tval_ip[31:0] = trace_rv_trace_pkt.trace_rv_i_tval_ip[31:0]; + + + +endmodule // el2_swerv diff --git a/Flow/design/el2_swerv_wrapper.sv b/Flow/design/el2_swerv_wrapper.sv index 34ca46f..26b4b17 100644 --- a/Flow/design/el2_swerv_wrapper.sv +++ b/Flow/design/el2_swerv_wrapper.sv @@ -21,702 +21,705 @@ // //******************************************************************************** module el2_swerv_wrapper -import el2_pkg::*; - #( -`include "el2_param.vh" -) -( - input logic clk, - input logic rst_l, - input logic dbg_rst_l, - input logic [31:1] rst_vec, - input logic nmi_int, - input logic [31:1] nmi_vec, - input logic [31:1] jtag_id, + import el2_pkg::*; +#( + `include "el2_param.vh" +) ( + input logic clk, + input logic rst_l, + input logic dbg_rst_l, + input logic [31:1] rst_vec, + input logic nmi_int, + input logic [31:1] nmi_vec, + input logic [31:1] jtag_id, - output logic [31:0] trace_rv_i_insn_ip, - output logic [31:0] trace_rv_i_address_ip, - output logic trace_rv_i_valid_ip, - output logic trace_rv_i_exception_ip, - output logic [4:0] trace_rv_i_ecause_ip, - output logic trace_rv_i_interrupt_ip, - output logic [31:0] trace_rv_i_tval_ip, + output logic [31:0] trace_rv_i_insn_ip, + output logic [31:0] trace_rv_i_address_ip, + output logic trace_rv_i_valid_ip, + output logic trace_rv_i_exception_ip, + output logic [ 4:0] trace_rv_i_ecause_ip, + output logic trace_rv_i_interrupt_ip, + output logic [31:0] trace_rv_i_tval_ip, - // Bus signals + // Bus signals `ifdef RV_BUILD_AXI4 - //-------------------------- LSU AXI signals-------------------------- - // AXI Write Channels - output logic lsu_axi_awvalid, - input logic lsu_axi_awready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, - output logic [31:0] lsu_axi_awaddr, - output logic [3:0] lsu_axi_awregion, - output logic [7:0] lsu_axi_awlen, - output logic [2:0] lsu_axi_awsize, - output logic [1:0] lsu_axi_awburst, - output logic lsu_axi_awlock, - output logic [3:0] lsu_axi_awcache, - output logic [2:0] lsu_axi_awprot, - output logic [3:0] lsu_axi_awqos, + //-------------------------- LSU AXI signals-------------------------- + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [ 31:0] lsu_axi_awaddr, + output logic [ 3:0] lsu_axi_awregion, + output logic [ 7:0] lsu_axi_awlen, + output logic [ 2:0] lsu_axi_awsize, + output logic [ 1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [ 3:0] lsu_axi_awcache, + output logic [ 2:0] lsu_axi_awprot, + output logic [ 3:0] lsu_axi_awqos, - output logic lsu_axi_wvalid, - input logic lsu_axi_wready, - output logic [63:0] lsu_axi_wdata, - output logic [7:0] lsu_axi_wstrb, - output logic lsu_axi_wlast, + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [ 7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, - input logic lsu_axi_bvalid, - output logic lsu_axi_bready, - input logic [1:0] lsu_axi_bresp, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [ 1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, - // AXI Read Channels - output logic lsu_axi_arvalid, - input logic lsu_axi_arready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, - output logic [31:0] lsu_axi_araddr, - output logic [3:0] lsu_axi_arregion, - output logic [7:0] lsu_axi_arlen, - output logic [2:0] lsu_axi_arsize, - output logic [1:0] lsu_axi_arburst, - output logic lsu_axi_arlock, - output logic [3:0] lsu_axi_arcache, - output logic [2:0] lsu_axi_arprot, - output logic [3:0] lsu_axi_arqos, + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [ 31:0] lsu_axi_araddr, + output logic [ 3:0] lsu_axi_arregion, + output logic [ 7:0] lsu_axi_arlen, + output logic [ 2:0] lsu_axi_arsize, + output logic [ 1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [ 3:0] lsu_axi_arcache, + output logic [ 2:0] lsu_axi_arprot, + output logic [ 3:0] lsu_axi_arqos, - input logic lsu_axi_rvalid, - output logic lsu_axi_rready, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, - input logic [63:0] lsu_axi_rdata, - input logic [1:0] lsu_axi_rresp, - input logic lsu_axi_rlast, + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [ 63:0] lsu_axi_rdata, + input logic [ 1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, - //-------------------------- IFU AXI signals-------------------------- - // AXI Write Channels - output logic ifu_axi_awvalid, - input logic ifu_axi_awready, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, - output logic [31:0] ifu_axi_awaddr, - output logic [3:0] ifu_axi_awregion, - output logic [7:0] ifu_axi_awlen, - output logic [2:0] ifu_axi_awsize, - output logic [1:0] ifu_axi_awburst, - output logic ifu_axi_awlock, - output logic [3:0] ifu_axi_awcache, - output logic [2:0] ifu_axi_awprot, - output logic [3:0] ifu_axi_awqos, + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + input logic ifu_axi_awready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [ 31:0] ifu_axi_awaddr, + output logic [ 3:0] ifu_axi_awregion, + output logic [ 7:0] ifu_axi_awlen, + output logic [ 2:0] ifu_axi_awsize, + output logic [ 1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [ 3:0] ifu_axi_awcache, + output logic [ 2:0] ifu_axi_awprot, + output logic [ 3:0] ifu_axi_awqos, - output logic ifu_axi_wvalid, - input logic ifu_axi_wready, - output logic [63:0] ifu_axi_wdata, - output logic [7:0] ifu_axi_wstrb, - output logic ifu_axi_wlast, + output logic ifu_axi_wvalid, + input logic ifu_axi_wready, + output logic [63:0] ifu_axi_wdata, + output logic [ 7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, - input logic ifu_axi_bvalid, - output logic ifu_axi_bready, - input logic [1:0] ifu_axi_bresp, - input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_bid, + input logic ifu_axi_bvalid, + output logic ifu_axi_bready, + input logic [ 1:0] ifu_axi_bresp, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_bid, - // AXI Read Channels - output logic ifu_axi_arvalid, - input logic ifu_axi_arready, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, - output logic [31:0] ifu_axi_araddr, - output logic [3:0] ifu_axi_arregion, - output logic [7:0] ifu_axi_arlen, - output logic [2:0] ifu_axi_arsize, - output logic [1:0] ifu_axi_arburst, - output logic ifu_axi_arlock, - output logic [3:0] ifu_axi_arcache, - output logic [2:0] ifu_axi_arprot, - output logic [3:0] ifu_axi_arqos, + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [ 31:0] ifu_axi_araddr, + output logic [ 3:0] ifu_axi_arregion, + output logic [ 7:0] ifu_axi_arlen, + output logic [ 2:0] ifu_axi_arsize, + output logic [ 1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [ 3:0] ifu_axi_arcache, + output logic [ 2:0] ifu_axi_arprot, + output logic [ 3:0] ifu_axi_arqos, - input logic ifu_axi_rvalid, - output logic ifu_axi_rready, - input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, - input logic [63:0] ifu_axi_rdata, - input logic [1:0] ifu_axi_rresp, - input logic ifu_axi_rlast, + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [ 63:0] ifu_axi_rdata, + input logic [ 1:0] ifu_axi_rresp, + input logic ifu_axi_rlast, - //-------------------------- SB AXI signals-------------------------- - // AXI Write Channels - output logic sb_axi_awvalid, - input logic sb_axi_awready, - output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, - output logic [31:0] sb_axi_awaddr, - output logic [3:0] sb_axi_awregion, - output logic [7:0] sb_axi_awlen, - output logic [2:0] sb_axi_awsize, - output logic [1:0] sb_axi_awburst, - output logic sb_axi_awlock, - output logic [3:0] sb_axi_awcache, - output logic [2:0] sb_axi_awprot, - output logic [3:0] sb_axi_awqos, + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + output logic sb_axi_awvalid, + input logic sb_axi_awready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_awid, + output logic [ 31:0] sb_axi_awaddr, + output logic [ 3:0] sb_axi_awregion, + output logic [ 7:0] sb_axi_awlen, + output logic [ 2:0] sb_axi_awsize, + output logic [ 1:0] sb_axi_awburst, + output logic sb_axi_awlock, + output logic [ 3:0] sb_axi_awcache, + output logic [ 2:0] sb_axi_awprot, + output logic [ 3:0] sb_axi_awqos, - output logic sb_axi_wvalid, - input logic sb_axi_wready, - output logic [63:0] sb_axi_wdata, - output logic [7:0] sb_axi_wstrb, - output logic sb_axi_wlast, + output logic sb_axi_wvalid, + input logic sb_axi_wready, + output logic [63:0] sb_axi_wdata, + output logic [ 7:0] sb_axi_wstrb, + output logic sb_axi_wlast, - input logic sb_axi_bvalid, - output logic sb_axi_bready, - input logic [1:0] sb_axi_bresp, - input logic [pt.SB_BUS_TAG-1:0] sb_axi_bid, + input logic sb_axi_bvalid, + output logic sb_axi_bready, + input logic [ 1:0] sb_axi_bresp, + input logic [pt.SB_BUS_TAG-1:0] sb_axi_bid, - // AXI Read Channels - output logic sb_axi_arvalid, - input logic sb_axi_arready, - output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, - output logic [31:0] sb_axi_araddr, - output logic [3:0] sb_axi_arregion, - output logic [7:0] sb_axi_arlen, - output logic [2:0] sb_axi_arsize, - output logic [1:0] sb_axi_arburst, - output logic sb_axi_arlock, - output logic [3:0] sb_axi_arcache, - output logic [2:0] sb_axi_arprot, - output logic [3:0] sb_axi_arqos, + // AXI Read Channels + output logic sb_axi_arvalid, + input logic sb_axi_arready, + output logic [pt.SB_BUS_TAG-1:0] sb_axi_arid, + output logic [ 31:0] sb_axi_araddr, + output logic [ 3:0] sb_axi_arregion, + output logic [ 7:0] sb_axi_arlen, + output logic [ 2:0] sb_axi_arsize, + output logic [ 1:0] sb_axi_arburst, + output logic sb_axi_arlock, + output logic [ 3:0] sb_axi_arcache, + output logic [ 2:0] sb_axi_arprot, + output logic [ 3:0] sb_axi_arqos, - input logic sb_axi_rvalid, - output logic sb_axi_rready, - input logic [pt.SB_BUS_TAG-1:0] sb_axi_rid, - input logic [63:0] sb_axi_rdata, - input logic [1:0] sb_axi_rresp, - input logic sb_axi_rlast, + input logic sb_axi_rvalid, + output logic sb_axi_rready, + input logic [pt.SB_BUS_TAG-1:0] sb_axi_rid, + input logic [ 63:0] sb_axi_rdata, + input logic [ 1:0] sb_axi_rresp, + input logic sb_axi_rlast, - //-------------------------- DMA AXI signals-------------------------- - // AXI Write Channels - input logic dma_axi_awvalid, - output logic dma_axi_awready, - input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, - input logic [31:0] dma_axi_awaddr, - input logic [2:0] dma_axi_awsize, - input logic [2:0] dma_axi_awprot, - input logic [7:0] dma_axi_awlen, - input logic [1:0] dma_axi_awburst, + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + input logic dma_axi_awvalid, + output logic dma_axi_awready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_awid, + input logic [ 31:0] dma_axi_awaddr, + input logic [ 2:0] dma_axi_awsize, + input logic [ 2:0] dma_axi_awprot, + input logic [ 7:0] dma_axi_awlen, + input logic [ 1:0] dma_axi_awburst, - input logic dma_axi_wvalid, - output logic dma_axi_wready, - input logic [63:0] dma_axi_wdata, - input logic [7:0] dma_axi_wstrb, - input logic dma_axi_wlast, + input logic dma_axi_wvalid, + output logic dma_axi_wready, + input logic [63:0] dma_axi_wdata, + input logic [ 7:0] dma_axi_wstrb, + input logic dma_axi_wlast, - output logic dma_axi_bvalid, - input logic dma_axi_bready, - output logic [1:0] dma_axi_bresp, - output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, + output logic dma_axi_bvalid, + input logic dma_axi_bready, + output logic [ 1:0] dma_axi_bresp, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_bid, - // AXI Read Channels - input logic dma_axi_arvalid, - output logic dma_axi_arready, - input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, - input logic [31:0] dma_axi_araddr, - input logic [2:0] dma_axi_arsize, - input logic [2:0] dma_axi_arprot, - input logic [7:0] dma_axi_arlen, - input logic [1:0] dma_axi_arburst, + // AXI Read Channels + input logic dma_axi_arvalid, + output logic dma_axi_arready, + input logic [pt.DMA_BUS_TAG-1:0] dma_axi_arid, + input logic [ 31:0] dma_axi_araddr, + input logic [ 2:0] dma_axi_arsize, + input logic [ 2:0] dma_axi_arprot, + input logic [ 7:0] dma_axi_arlen, + input logic [ 1:0] dma_axi_arburst, - output logic dma_axi_rvalid, - input logic dma_axi_rready, - output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, - output logic [63:0] dma_axi_rdata, - output logic [1:0] dma_axi_rresp, - output logic dma_axi_rlast, + output logic dma_axi_rvalid, + input logic dma_axi_rready, + output logic [pt.DMA_BUS_TAG-1:0] dma_axi_rid, + output logic [ 63:0] dma_axi_rdata, + output logic [ 1:0] dma_axi_rresp, + output logic dma_axi_rlast, `endif `ifdef RV_BUILD_AHB_LITE - //// AHB LITE BUS - output logic [31:0] haddr, - output logic [2:0] hburst, - output logic hmastlock, - output logic [3:0] hprot, - output logic [2:0] hsize, - output logic [1:0] htrans, - output logic hwrite, + //// AHB LITE BUS + output logic [31:0] haddr, + output logic [ 2:0] hburst, + output logic hmastlock, + output logic [ 3:0] hprot, + output logic [ 2:0] hsize, + output logic [ 1:0] htrans, + output logic hwrite, - input logic [63:0] hrdata, - input logic hready, - input logic hresp, + input logic [63:0] hrdata, + input logic hready, + input logic hresp, - // LSU AHB Master - output logic [31:0] lsu_haddr, - output logic [2:0] lsu_hburst, - output logic lsu_hmastlock, - output logic [3:0] lsu_hprot, - output logic [2:0] lsu_hsize, - output logic [1:0] lsu_htrans, - output logic lsu_hwrite, - output logic [63:0] lsu_hwdata, + // LSU AHB Master + output logic [31:0] lsu_haddr, + output logic [ 2:0] lsu_hburst, + output logic lsu_hmastlock, + output logic [ 3:0] lsu_hprot, + output logic [ 2:0] lsu_hsize, + output logic [ 1:0] lsu_htrans, + output logic lsu_hwrite, + output logic [63:0] lsu_hwdata, - input logic [63:0] lsu_hrdata, - input logic lsu_hready, - input logic lsu_hresp, - // Debug Syster Bus AHB - output logic [31:0] sb_haddr, - output logic [2:0] sb_hburst, - output logic sb_hmastlock, - output logic [3:0] sb_hprot, - output logic [2:0] sb_hsize, - output logic [1:0] sb_htrans, - output logic sb_hwrite, - output logic [63:0] sb_hwdata, + input logic [63:0] lsu_hrdata, + input logic lsu_hready, + input logic lsu_hresp, + // Debug Syster Bus AHB + output logic [31:0] sb_haddr, + output logic [ 2:0] sb_hburst, + output logic sb_hmastlock, + output logic [ 3:0] sb_hprot, + output logic [ 2:0] sb_hsize, + output logic [ 1:0] sb_htrans, + output logic sb_hwrite, + output logic [63:0] sb_hwdata, - input logic [63:0] sb_hrdata, - input logic sb_hready, - input logic sb_hresp, + input logic [63:0] sb_hrdata, + input logic sb_hready, + input logic sb_hresp, - // DMA Slave - input logic dma_hsel, - input logic [31:0] dma_haddr, - input logic [2:0] dma_hburst, - input logic dma_hmastlock, - input logic [3:0] dma_hprot, - input logic [2:0] dma_hsize, - input logic [1:0] dma_htrans, - input logic dma_hwrite, - input logic [63:0] dma_hwdata, - input logic dma_hreadyin, + // DMA Slave + input logic dma_hsel, + input logic [31:0] dma_haddr, + input logic [ 2:0] dma_hburst, + input logic dma_hmastlock, + input logic [ 3:0] dma_hprot, + input logic [ 2:0] dma_hsize, + input logic [ 1:0] dma_htrans, + input logic dma_hwrite, + input logic [63:0] dma_hwdata, + input logic dma_hreadyin, - output logic [63:0] dma_hrdata, - output logic dma_hreadyout, - output logic dma_hresp, + output logic [63:0] dma_hrdata, + output logic dma_hreadyout, + output logic dma_hresp, `endif - // clk ratio signals - input logic lsu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface - input logic ifu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface - input logic dbg_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface - input logic dma_bus_clk_en, // Clock ratio b/w cpu core clk & AHB slave interface + // clk ratio signals + input logic lsu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic ifu_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dbg_bus_clk_en, // Clock ratio b/w cpu core clk & AHB master interface + input logic dma_bus_clk_en, // Clock ratio b/w cpu core clk & AHB slave interface - // all of these test inputs are brought to top-level; must be tied off based on usage by physical design (ie. icache or not, iccm or not, dccm or not) + // all of these test inputs are brought to top-level; must be tied off based on usage by physical design (ie. icache or not, iccm or not, dccm or not) - input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt, - input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, - input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, - input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, + input el2_dccm_ext_in_pkt_t [pt.DCCM_NUM_BANKS-1:0] dccm_ext_in_pkt, + input el2_ccm_ext_in_pkt_t [pt.ICCM_NUM_BANKS-1:0] iccm_ext_in_pkt, + input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, + input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, - input logic timer_int, - input logic soft_int, - input logic [pt.PIC_TOTAL_INT:1] extintsrc_req, + input logic timer_int, + input logic soft_int, + input logic [pt.PIC_TOTAL_INT:1] extintsrc_req, - output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc - output logic dec_tlu_perfcnt1, - output logic dec_tlu_perfcnt2, - output logic dec_tlu_perfcnt3, + output logic dec_tlu_perfcnt0, // toggles when slot0 perf counter 0 has an event inc + output logic dec_tlu_perfcnt1, + output logic dec_tlu_perfcnt2, + output logic dec_tlu_perfcnt3, - // ports added by the soc team - input logic jtag_tck, // JTAG clk - input logic jtag_tms, // JTAG TMS - input logic jtag_tdi, // JTAG tdi - input logic jtag_trst_n, // JTAG Reset - output logic jtag_tdo, // JTAG TDO + // ports added by the soc team + input logic jtag_tck, // JTAG clk + input logic jtag_tms, // JTAG TMS + input logic jtag_tdi, // JTAG tdi + input logic jtag_trst_n, // JTAG Reset + output logic jtag_tdo, // JTAG TDO - input logic [31:4] core_id, + input logic [31:4] core_id, - // external MPC halt/run interface - input logic mpc_debug_halt_req, // Async halt request - input logic mpc_debug_run_req, // Async run request - input logic mpc_reset_run_req, // Run/halt after reset - output logic mpc_debug_halt_ack, // Halt ack - output logic mpc_debug_run_ack, // Run ack - output logic debug_brkpt_status, // debug breakpoint + // external MPC halt/run interface + input logic mpc_debug_halt_req, // Async halt request + input logic mpc_debug_run_req, // Async run request + input logic mpc_reset_run_req, // Run/halt after reset + output logic mpc_debug_halt_ack, // Halt ack + output logic mpc_debug_run_ack, // Run ack + output logic debug_brkpt_status, // debug breakpoint - input logic i_cpu_halt_req, // Async halt req to CPU - output logic o_cpu_halt_ack, // core response to halt - output logic o_cpu_halt_status, // 1'b1 indicates core is halted - output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request - input logic i_cpu_run_req, // Async restart req to CPU - output logic o_cpu_run_ack, // Core response to run req - input logic scan_mode, // To enable scan mode - input logic mbist_mode // to enable mbist + input logic i_cpu_halt_req, // Async halt req to CPU + output logic o_cpu_halt_ack, // core response to halt + output logic o_cpu_halt_status, // 1'b1 indicates core is halted + output logic o_debug_mode_status, // Core to the PMU that core is in debug mode. When core is in debug mode, the PMU should refrain from sendng a halt or run request + input logic i_cpu_run_req, // Async restart req to CPU + output logic o_cpu_run_ack, // Core response to run req + input logic scan_mode, // To enable scan mode + input logic mbist_mode // to enable mbist ); - logic active_l2clk; - logic free_l2clk; + logic active_l2clk; + logic free_l2clk; - // DCCM ports - logic dccm_wren; - logic dccm_rden; - logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo; - logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi; - logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo; - logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi; - logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo; - logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi; + // DCCM ports + logic dccm_wren; + logic dccm_rden; + logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo; + logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi; + logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo; + logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi; - logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo; - logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo; + logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi; - // PIC ports + // PIC ports - // Icache & Itag ports - logic [31:1] ic_rw_addr; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en ; // Which way to write - logic ic_rd_en ; + // Icache & Itag ports + logic [31:1] ic_rw_addr; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en; // Which way to write + logic ic_rd_en; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid; // Valid from the I$ tag valid outside (in flops). + logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid; // Valid from the I$ tag valid outside (in flops). - logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit; // ic_rd_hit[3:0] - logic ic_tag_perr; // Ic tag parity error + logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit; // ic_rd_hit[3:0] + logic ic_tag_perr; // Ic tag parity error - logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr; // Read/Write addresss to the Icache. - logic ic_debug_rd_en; // Icache debug rd - logic ic_debug_wr_en; // Icache debug wr - logic ic_debug_tag_array; // Debug tag array - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way; // Debug way. Rd or Wr. + logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr; // Read/Write addresss to the Icache. + logic ic_debug_rd_en; // Icache debug rd + logic ic_debug_wr_en; // Icache debug wr + logic ic_debug_tag_array; // Debug tag array + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way; // Debug way. Rd or Wr. - logic [25:0] ictag_debug_rd_data; // Debug icache tag. - logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data; - logic [63:0] ic_rd_data; - logic [70:0] ic_debug_rd_data; // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - logic [70:0] ic_debug_wr_data; // Debug wr cache. + logic [25:0] ictag_debug_rd_data; // Debug icache tag. + logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data; + logic [63:0] ic_rd_data; + logic [70:0] ic_debug_rd_data; // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + logic [70:0] ic_debug_wr_data; // Debug wr cache. - logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr; // ecc error per bank - logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr; // parity error per bank + logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr; // ecc error per bank + logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr; // parity error per bank - logic [63:0] ic_premux_data; - logic ic_sel_premux_data; + logic [63:0] ic_premux_data; + logic ic_sel_premux_data; - // ICCM ports - logic [pt.ICCM_BITS-1:1] iccm_rw_addr; - logic iccm_wren; - logic iccm_rden; - logic [2:0] iccm_wr_size; - logic [77:0] iccm_wr_data; - logic iccm_buf_correct_ecc; - logic iccm_correction_state; + // ICCM ports + logic [pt.ICCM_BITS-1:1] iccm_rw_addr; + logic iccm_wren; + logic iccm_rden; + logic [2:0] iccm_wr_size; + logic [77:0] iccm_wr_data; + logic iccm_buf_correct_ecc; + logic iccm_correction_state; - logic [63:0] iccm_rd_data; - logic [77:0] iccm_rd_data_ecc; + logic [63:0] iccm_rd_data; + logic [77:0] iccm_rd_data_ecc; - logic core_rst_l; // Core reset including rst_l and dbg_rst_l - logic jtag_tdoEn; + logic core_rst_l; // Core reset including rst_l and dbg_rst_l + logic jtag_tdoEn; - logic dccm_clk_override; - logic icm_clk_override; - logic dec_tlu_core_ecc_disable; + logic dccm_clk_override; + logic icm_clk_override; + logic dec_tlu_core_ecc_disable; - // zero out the signals not presented at the wrapper instantiation level + // zero out the signals not presented at the wrapper instantiation level `ifdef RV_BUILD_AXI4 - //// AHB LITE BUS - logic [31:0] haddr; - logic [2:0] hburst; - logic hmastlock; - logic [3:0] hprot; - logic [2:0] hsize; - logic [1:0] htrans; - logic hwrite; + //// AHB LITE BUS + logic [31:0] haddr; + logic [ 2:0] hburst; + logic hmastlock; + logic [ 3:0] hprot; + logic [ 2:0] hsize; + logic [ 1:0] htrans; + logic hwrite; - logic [63:0] hrdata; - logic hready; - logic hresp; + logic [63:0] hrdata; + logic hready; + logic hresp; - // LSU AHB Master - logic [31:0] lsu_haddr; - logic [2:0] lsu_hburst; - logic lsu_hmastlock; - logic [3:0] lsu_hprot; - logic [2:0] lsu_hsize; - logic [1:0] lsu_htrans; - logic lsu_hwrite; - logic [63:0] lsu_hwdata; + // LSU AHB Master + logic [31:0] lsu_haddr; + logic [ 2:0] lsu_hburst; + logic lsu_hmastlock; + logic [ 3:0] lsu_hprot; + logic [ 2:0] lsu_hsize; + logic [ 1:0] lsu_htrans; + logic lsu_hwrite; + logic [63:0] lsu_hwdata; - logic [63:0] lsu_hrdata; - logic lsu_hready; - logic lsu_hresp; - // Debug Syster Bus AHB - logic [31:0] sb_haddr; - logic [2:0] sb_hburst; - logic sb_hmastlock; - logic [3:0] sb_hprot; - logic [2:0] sb_hsize; - logic [1:0] sb_htrans; - logic sb_hwrite; - logic [63:0] sb_hwdata; + logic [63:0] lsu_hrdata; + logic lsu_hready; + logic lsu_hresp; + // Debug Syster Bus AHB + logic [31:0] sb_haddr; + logic [ 2:0] sb_hburst; + logic sb_hmastlock; + logic [ 3:0] sb_hprot; + logic [ 2:0] sb_hsize; + logic [ 1:0] sb_htrans; + logic sb_hwrite; + logic [63:0] sb_hwdata; - logic [63:0] sb_hrdata; - logic sb_hready; - logic sb_hresp; + logic [63:0] sb_hrdata; + logic sb_hready; + logic sb_hresp; - // DMA Slave - logic dma_hsel; - logic [31:0] dma_haddr; - logic [2:0] dma_hburst; - logic dma_hmastlock; - logic [3:0] dma_hprot; - logic [2:0] dma_hsize; - logic [1:0] dma_htrans; - logic dma_hwrite; - logic [63:0] dma_hwdata; - logic dma_hreadyin; + // DMA Slave + logic dma_hsel; + logic [31:0] dma_haddr; + logic [ 2:0] dma_hburst; + logic dma_hmastlock; + logic [ 3:0] dma_hprot; + logic [ 2:0] dma_hsize; + logic [ 1:0] dma_htrans; + logic dma_hwrite; + logic [63:0] dma_hwdata; + logic dma_hreadyin; - logic [63:0] dma_hrdata; - logic dma_hreadyout; - logic dma_hresp; + logic [63:0] dma_hrdata; + logic dma_hreadyout; + logic dma_hresp; - // AHB - assign hrdata[63:0] = '0; - assign hready = '0; - assign hresp = '0; - // LSU - assign lsu_hrdata[63:0] = '0; - assign lsu_hready = '0; - assign lsu_hresp = '0; - // Debu - assign sb_hrdata[63:0] = '0; - assign sb_hready = '0; - assign sb_hresp = '0; + // AHB + assign hrdata[63:0] = '0; + assign hready = '0; + assign hresp = '0; + // LSU + assign lsu_hrdata[63:0] = '0; + assign lsu_hready = '0; + assign lsu_hresp = '0; + // Debu + assign sb_hrdata[63:0] = '0; + assign sb_hready = '0; + assign sb_hresp = '0; - // DMA - assign dma_hsel = '0; - assign dma_haddr[31:0] = '0; - assign dma_hburst[2:0] = '0; - assign dma_hmastlock = '0; - assign dma_hprot[3:0] = '0; - assign dma_hsize[2:0] = '0; - assign dma_htrans[1:0] = '0; - assign dma_hwrite = '0; - assign dma_hwdata[63:0] = '0; - assign dma_hreadyin = '0; + // DMA + assign dma_hsel = '0; + assign dma_haddr[31:0] = '0; + assign dma_hburst[2:0] = '0; + assign dma_hmastlock = '0; + assign dma_hprot[3:0] = '0; + assign dma_hsize[2:0] = '0; + assign dma_htrans[1:0] = '0; + assign dma_hwrite = '0; + assign dma_hwdata[63:0] = '0; + assign dma_hreadyin = '0; -`endif // `ifdef RV_BUILD_AXI4 +`endif // `ifdef RV_BUILD_AXI4 `ifdef RV_BUILD_AHB_LITE - wire lsu_axi_awvalid; - wire lsu_axi_awready; - wire [pt.LSU_BUS_TAG-1:0] lsu_axi_awid; - wire [31:0] lsu_axi_awaddr; - wire [3:0] lsu_axi_awregion; - wire [7:0] lsu_axi_awlen; - wire [2:0] lsu_axi_awsize; - wire [1:0] lsu_axi_awburst; - wire lsu_axi_awlock; - wire [3:0] lsu_axi_awcache; - wire [2:0] lsu_axi_awprot; - wire [3:0] lsu_axi_awqos; + wire lsu_axi_awvalid; + wire lsu_axi_awready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_awid; + wire [ 31:0] lsu_axi_awaddr; + wire [ 3:0] lsu_axi_awregion; + wire [ 7:0] lsu_axi_awlen; + wire [ 2:0] lsu_axi_awsize; + wire [ 1:0] lsu_axi_awburst; + wire lsu_axi_awlock; + wire [ 3:0] lsu_axi_awcache; + wire [ 2:0] lsu_axi_awprot; + wire [ 3:0] lsu_axi_awqos; - wire lsu_axi_wvalid; - wire lsu_axi_wready; - wire [63:0] lsu_axi_wdata; - wire [7:0] lsu_axi_wstrb; - wire lsu_axi_wlast; + wire lsu_axi_wvalid; + wire lsu_axi_wready; + wire [ 63:0] lsu_axi_wdata; + wire [ 7:0] lsu_axi_wstrb; + wire lsu_axi_wlast; - wire lsu_axi_bvalid; - wire lsu_axi_bready; - wire [1:0] lsu_axi_bresp; - wire [pt.LSU_BUS_TAG-1:0] lsu_axi_bid; + wire lsu_axi_bvalid; + wire lsu_axi_bready; + wire [ 1:0] lsu_axi_bresp; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_bid; - // AXI Read Channels - wire lsu_axi_arvalid; - wire lsu_axi_arready; - wire [pt.LSU_BUS_TAG-1:0] lsu_axi_arid; - wire [31:0] lsu_axi_araddr; - wire [3:0] lsu_axi_arregion; - wire [7:0] lsu_axi_arlen; - wire [2:0] lsu_axi_arsize; - wire [1:0] lsu_axi_arburst; - wire lsu_axi_arlock; - wire [3:0] lsu_axi_arcache; - wire [2:0] lsu_axi_arprot; - wire [3:0] lsu_axi_arqos; + // AXI Read Channels + wire lsu_axi_arvalid; + wire lsu_axi_arready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_arid; + wire [ 31:0] lsu_axi_araddr; + wire [ 3:0] lsu_axi_arregion; + wire [ 7:0] lsu_axi_arlen; + wire [ 2:0] lsu_axi_arsize; + wire [ 1:0] lsu_axi_arburst; + wire lsu_axi_arlock; + wire [ 3:0] lsu_axi_arcache; + wire [ 2:0] lsu_axi_arprot; + wire [ 3:0] lsu_axi_arqos; - wire lsu_axi_rvalid; - wire lsu_axi_rready; - wire [pt.LSU_BUS_TAG-1:0] lsu_axi_rid; - wire [63:0] lsu_axi_rdata; - wire [1:0] lsu_axi_rresp; - wire lsu_axi_rlast; + wire lsu_axi_rvalid; + wire lsu_axi_rready; + wire [pt.LSU_BUS_TAG-1:0] lsu_axi_rid; + wire [ 63:0] lsu_axi_rdata; + wire [ 1:0] lsu_axi_rresp; + wire lsu_axi_rlast; - //-------------------------- IFU AXI signals-------------------------- - // AXI Write Channels - wire ifu_axi_awvalid; - wire ifu_axi_awready; - wire [pt.IFU_BUS_TAG-1:0] ifu_axi_awid; - wire [31:0] ifu_axi_awaddr; - wire [3:0] ifu_axi_awregion; - wire [7:0] ifu_axi_awlen; - wire [2:0] ifu_axi_awsize; - wire [1:0] ifu_axi_awburst; - wire ifu_axi_awlock; - wire [3:0] ifu_axi_awcache; - wire [2:0] ifu_axi_awprot; - wire [3:0] ifu_axi_awqos; + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + wire ifu_axi_awvalid; + wire ifu_axi_awready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_awid; + wire [ 31:0] ifu_axi_awaddr; + wire [ 3:0] ifu_axi_awregion; + wire [ 7:0] ifu_axi_awlen; + wire [ 2:0] ifu_axi_awsize; + wire [ 1:0] ifu_axi_awburst; + wire ifu_axi_awlock; + wire [ 3:0] ifu_axi_awcache; + wire [ 2:0] ifu_axi_awprot; + wire [ 3:0] ifu_axi_awqos; - wire ifu_axi_wvalid; - wire ifu_axi_wready; - wire [63:0] ifu_axi_wdata; - wire [7:0] ifu_axi_wstrb; - wire ifu_axi_wlast; + wire ifu_axi_wvalid; + wire ifu_axi_wready; + wire [ 63:0] ifu_axi_wdata; + wire [ 7:0] ifu_axi_wstrb; + wire ifu_axi_wlast; - wire ifu_axi_bvalid; - wire ifu_axi_bready; - wire [1:0] ifu_axi_bresp; - wire [pt.IFU_BUS_TAG-1:0] ifu_axi_bid; + wire ifu_axi_bvalid; + wire ifu_axi_bready; + wire [ 1:0] ifu_axi_bresp; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_bid; - // AXI Read Channels - wire ifu_axi_arvalid; - wire ifu_axi_arready; - wire [pt.IFU_BUS_TAG-1:0] ifu_axi_arid; - wire [31:0] ifu_axi_araddr; - wire [3:0] ifu_axi_arregion; - wire [7:0] ifu_axi_arlen; - wire [2:0] ifu_axi_arsize; - wire [1:0] ifu_axi_arburst; - wire ifu_axi_arlock; - wire [3:0] ifu_axi_arcache; - wire [2:0] ifu_axi_arprot; - wire [3:0] ifu_axi_arqos; + // AXI Read Channels + wire ifu_axi_arvalid; + wire ifu_axi_arready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_arid; + wire [ 31:0] ifu_axi_araddr; + wire [ 3:0] ifu_axi_arregion; + wire [ 7:0] ifu_axi_arlen; + wire [ 2:0] ifu_axi_arsize; + wire [ 1:0] ifu_axi_arburst; + wire ifu_axi_arlock; + wire [ 3:0] ifu_axi_arcache; + wire [ 2:0] ifu_axi_arprot; + wire [ 3:0] ifu_axi_arqos; - wire ifu_axi_rvalid; - wire ifu_axi_rready; - wire [pt.IFU_BUS_TAG-1:0] ifu_axi_rid; - wire [63:0] ifu_axi_rdata; - wire [1:0] ifu_axi_rresp; - wire ifu_axi_rlast; + wire ifu_axi_rvalid; + wire ifu_axi_rready; + wire [pt.IFU_BUS_TAG-1:0] ifu_axi_rid; + wire [ 63:0] ifu_axi_rdata; + wire [ 1:0] ifu_axi_rresp; + wire ifu_axi_rlast; - //-------------------------- SB AXI signals-------------------------- - // AXI Write Channels - wire sb_axi_awvalid; - wire sb_axi_awready; - wire [pt.SB_BUS_TAG-1:0] sb_axi_awid; - wire [31:0] sb_axi_awaddr; - wire [3:0] sb_axi_awregion; - wire [7:0] sb_axi_awlen; - wire [2:0] sb_axi_awsize; - wire [1:0] sb_axi_awburst; - wire sb_axi_awlock; - wire [3:0] sb_axi_awcache; - wire [2:0] sb_axi_awprot; - wire [3:0] sb_axi_awqos; + //-------------------------- SB AXI signals-------------------------- + // AXI Write Channels + wire sb_axi_awvalid; + wire sb_axi_awready; + wire [ pt.SB_BUS_TAG-1:0] sb_axi_awid; + wire [ 31:0] sb_axi_awaddr; + wire [ 3:0] sb_axi_awregion; + wire [ 7:0] sb_axi_awlen; + wire [ 2:0] sb_axi_awsize; + wire [ 1:0] sb_axi_awburst; + wire sb_axi_awlock; + wire [ 3:0] sb_axi_awcache; + wire [ 2:0] sb_axi_awprot; + wire [ 3:0] sb_axi_awqos; - wire sb_axi_wvalid; - wire sb_axi_wready; - wire [63:0] sb_axi_wdata; - wire [7:0] sb_axi_wstrb; - wire sb_axi_wlast; + wire sb_axi_wvalid; + wire sb_axi_wready; + wire [ 63:0] sb_axi_wdata; + wire [ 7:0] sb_axi_wstrb; + wire sb_axi_wlast; - wire sb_axi_bvalid; - wire sb_axi_bready; - wire [1:0] sb_axi_bresp; - wire [pt.SB_BUS_TAG-1:0] sb_axi_bid; + wire sb_axi_bvalid; + wire sb_axi_bready; + wire [ 1:0] sb_axi_bresp; + wire [ pt.SB_BUS_TAG-1:0] sb_axi_bid; - // AXI Read Channels - wire sb_axi_arvalid; - wire sb_axi_arready; - wire [pt.SB_BUS_TAG-1:0] sb_axi_arid; - wire [31:0] sb_axi_araddr; - wire [3:0] sb_axi_arregion; - wire [7:0] sb_axi_arlen; - wire [2:0] sb_axi_arsize; - wire [1:0] sb_axi_arburst; - wire sb_axi_arlock; - wire [3:0] sb_axi_arcache; - wire [2:0] sb_axi_arprot; - wire [3:0] sb_axi_arqos; + // AXI Read Channels + wire sb_axi_arvalid; + wire sb_axi_arready; + wire [ pt.SB_BUS_TAG-1:0] sb_axi_arid; + wire [ 31:0] sb_axi_araddr; + wire [ 3:0] sb_axi_arregion; + wire [ 7:0] sb_axi_arlen; + wire [ 2:0] sb_axi_arsize; + wire [ 1:0] sb_axi_arburst; + wire sb_axi_arlock; + wire [ 3:0] sb_axi_arcache; + wire [ 2:0] sb_axi_arprot; + wire [ 3:0] sb_axi_arqos; - wire sb_axi_rvalid; - wire sb_axi_rready; - wire [pt.SB_BUS_TAG-1:0] sb_axi_rid; - wire [63:0] sb_axi_rdata; - wire [1:0] sb_axi_rresp; - wire sb_axi_rlast; + wire sb_axi_rvalid; + wire sb_axi_rready; + wire [ pt.SB_BUS_TAG-1:0] sb_axi_rid; + wire [ 63:0] sb_axi_rdata; + wire [ 1:0] sb_axi_rresp; + wire sb_axi_rlast; - //-------------------------- DMA AXI signals-------------------------- - // AXI Write Channels - wire dma_axi_awvalid; - wire dma_axi_awready; - wire [pt.DMA_BUS_TAG-1:0] dma_axi_awid; - wire [31:0] dma_axi_awaddr; - wire [2:0] dma_axi_awsize; - wire [2:0] dma_axi_awprot; - wire [7:0] dma_axi_awlen; - wire [1:0] dma_axi_awburst; + //-------------------------- DMA AXI signals-------------------------- + // AXI Write Channels + wire dma_axi_awvalid; + wire dma_axi_awready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_awid; + wire [ 31:0] dma_axi_awaddr; + wire [ 2:0] dma_axi_awsize; + wire [ 2:0] dma_axi_awprot; + wire [ 7:0] dma_axi_awlen; + wire [ 1:0] dma_axi_awburst; - wire dma_axi_wvalid; - wire dma_axi_wready; - wire [63:0] dma_axi_wdata; - wire [7:0] dma_axi_wstrb; - wire dma_axi_wlast; + wire dma_axi_wvalid; + wire dma_axi_wready; + wire [ 63:0] dma_axi_wdata; + wire [ 7:0] dma_axi_wstrb; + wire dma_axi_wlast; - wire dma_axi_bvalid; - wire dma_axi_bready; - wire [1:0] dma_axi_bresp; - wire [pt.DMA_BUS_TAG-1:0] dma_axi_bid; + wire dma_axi_bvalid; + wire dma_axi_bready; + wire [ 1:0] dma_axi_bresp; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_bid; - // AXI Read Channels - wire dma_axi_arvalid; - wire dma_axi_arready; - wire [pt.DMA_BUS_TAG-1:0] dma_axi_arid; - wire [31:0] dma_axi_araddr; - wire [2:0] dma_axi_arsize; - wire [2:0] dma_axi_arprot; - wire [7:0] dma_axi_arlen; - wire [1:0] dma_axi_arburst; + // AXI Read Channels + wire dma_axi_arvalid; + wire dma_axi_arready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_arid; + wire [ 31:0] dma_axi_araddr; + wire [ 2:0] dma_axi_arsize; + wire [ 2:0] dma_axi_arprot; + wire [ 7:0] dma_axi_arlen; + wire [ 1:0] dma_axi_arburst; - wire dma_axi_rvalid; - wire dma_axi_rready; - wire [pt.DMA_BUS_TAG-1:0] dma_axi_rid; - wire [63:0] dma_axi_rdata; - wire [1:0] dma_axi_rresp; - wire dma_axi_rlast; + wire dma_axi_rvalid; + wire dma_axi_rready; + wire [pt.DMA_BUS_TAG-1:0] dma_axi_rid; + wire [ 63:0] dma_axi_rdata; + wire [ 1:0] dma_axi_rresp; + wire dma_axi_rlast; - // AXI - assign ifu_axi_awready = 1'b1; - assign ifu_axi_wready = 1'b1; - assign ifu_axi_bvalid = '0; - assign ifu_axi_bresp[1:0] = '0; - assign ifu_axi_bid[pt.IFU_BUS_TAG-1:0] = '0; + // AXI + assign ifu_axi_awready = 1'b1; + assign ifu_axi_wready = 1'b1; + assign ifu_axi_bvalid = '0; + assign ifu_axi_bresp[1:0] = '0; + assign ifu_axi_bid[pt.IFU_BUS_TAG-1:0] = '0; -`endif // `ifdef RV_BUILD_AHB_LITE +`endif // `ifdef RV_BUILD_AHB_LITE - logic dmi_reg_en; - logic [6:0] dmi_reg_addr; - logic dmi_reg_wr_en; - logic [31:0] dmi_reg_wdata; - logic [31:0] dmi_reg_rdata; + logic dmi_reg_en; + logic [ 6:0] dmi_reg_addr; + logic dmi_reg_wr_en; + logic [31:0] dmi_reg_wdata; + logic [31:0] dmi_reg_rdata; - // Instantiate the el2_swerv core - el2_swerv #(.pt(pt)) swerv ( - .clk(clk), - .* - ); + // Instantiate the el2_swerv core + el2_swerv #( + .pt(pt) + ) swerv ( + .clk(clk), + .* + ); - // Instantiate the mem - el2_mem #(.pt(pt)) mem ( - .clk(active_l2clk), - .rst_l(core_rst_l), - .* - ); + // Instantiate the mem + el2_mem #( + .pt(pt) + ) mem ( + .clk (active_l2clk), + .rst_l(core_rst_l), + .* + ); - // JTAG/DMI instance - dmi_wrapper dmi_wrapper ( - // JTAG signals - .trst_n (jtag_trst_n), // JTAG reset - .tck (jtag_tck), // JTAG clock - .tms (jtag_tms), // Test mode select - .tdi (jtag_tdi), // Test Data Input - .tdo (jtag_tdo), // Test Data Output - .tdoEnable (), - // Processor Signals - .core_rst_n (dbg_rst_l), // Debug reset, active low - .core_clk (clk), // Core clock - .jtag_id (jtag_id), // JTAG ID - .rd_data (dmi_reg_rdata), // Read data from Processor - .reg_wr_data (dmi_reg_wdata), // Write data to Processor - .reg_wr_addr (dmi_reg_addr), // Write address to Processor - .reg_en (dmi_reg_en), // Write interface bit to Processor - .reg_wr_en (dmi_reg_wr_en), // Write enable to Processor - .dmi_hard_reset () - ); + // JTAG/DMI instance + dmi_wrapper dmi_wrapper ( + // JTAG signals + .trst_n (jtag_trst_n), // JTAG reset + .tck (jtag_tck), // JTAG clock + .tms (jtag_tms), // Test mode select + .tdi (jtag_tdi), // Test Data Input + .tdo (jtag_tdo), // Test Data Output + .tdoEnable (), + // Processor Signals + .core_rst_n (dbg_rst_l), // Debug reset, active low + .core_clk (clk), // Core clock + .jtag_id (jtag_id), // JTAG ID + .rd_data (dmi_reg_rdata), // Read data from Processor + .reg_wr_data (dmi_reg_wdata), // Write data to Processor + .reg_wr_addr (dmi_reg_addr), // Write address to Processor + .reg_en (dmi_reg_en), // Write interface bit to Processor + .reg_wr_en (dmi_reg_wr_en), // Write enable to Processor + .dmi_hard_reset() + ); endmodule diff --git a/Flow/design/exu/el2_exu.sv b/Flow/design/exu/el2_exu.sv index 5f7319b..ea9d4a6 100644 --- a/Flow/design/exu/el2_exu.sv +++ b/Flow/design/exu/el2_exu.sv @@ -15,355 +15,437 @@ module el2_exu -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" -) - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan control + `include "el2_param.vh" +) ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan control - input logic [1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse - input logic [1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse - input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1 - input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes} + input logic [ 1:0] dec_data_en, // Clock enable {x,r}, one cycle pulse + input logic [ 1:0] dec_ctl_en, // Clock enable {x,r}, two cycle pulse + input logic [31:0] dbg_cmd_wrdata, // Debug data to primary I0 RS1 + input el2_alu_pkt_t i0_ap, // DEC alu {valid,predecodes} - input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1 + input logic dec_debug_wdata_rs1_d, // Debug select to primary I0 RS1 - input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet - input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index - input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag + input el2_predict_pkt_t dec_i0_predict_p_d, // DEC branch predict packet + input logic [pt.BHT_GHR_SIZE-1:0] i0_predict_fghr_d, // DEC predict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] i0_predict_index_d, // DEC predict index + input logic [pt.BTB_BTAG_SIZE-1:0] i0_predict_btag_d, // DEC predict branch tag - input logic [31:0] lsu_result_m, // Load result M-stage - input logic [31:0] lsu_nonblock_load_data, // nonblock load data - input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data - input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data - input logic [31:0] gpr_i0_rs1_d, // DEC data gpr - input logic [31:0] gpr_i0_rs2_d, // DEC data gpr - input logic [31:0] dec_i0_immed_d, // DEC data immediate - input logic [31:0] dec_i0_result_r, // DEC result in R-stage - input logic [12:1] dec_i0_br_immed_d, // Branch immediate - input logic dec_i0_alu_decode_d, // Valid to X-stage ALU - input logic dec_i0_branch_d, // Branch in D-stage - input logic dec_i0_select_pc_d, // PC select to RS1 - input logic [31:1] dec_i0_pc_d, // Instruction PC - input logic [3:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data - input logic [3:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data - input logic dec_csr_ren_d, // CSR read select - input logic [31:0] dec_csr_rddata_d, // CSR read data + input logic [31:0] lsu_result_m, // Load result M-stage + input logic [31:0] lsu_nonblock_load_data, // nonblock load data + input logic dec_i0_rs1_en_d, // Qualify GPR RS1 data + input logic dec_i0_rs2_en_d, // Qualify GPR RS2 data + input logic [31:0] gpr_i0_rs1_d, // DEC data gpr + input logic [31:0] gpr_i0_rs2_d, // DEC data gpr + input logic [31:0] dec_i0_immed_d, // DEC data immediate + input logic [31:0] dec_i0_result_r, // DEC result in R-stage + input logic [12:1] dec_i0_br_immed_d, // Branch immediate + input logic dec_i0_alu_decode_d, // Valid to X-stage ALU + input logic dec_i0_branch_d, // Branch in D-stage + input logic dec_i0_select_pc_d, // PC select to RS1 + input logic [31:1] dec_i0_pc_d, // Instruction PC + input logic [3:0] dec_i0_rs1_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data + input logic [3:0] dec_i0_rs2_bypass_en_d, // DEC bypass select 1 - X-stage, 0 - dec bypass data + input logic dec_csr_ren_d, // CSR read select + input logic [31:0] dec_csr_rddata_d, // CSR read data - input logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands - input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass} - input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem} - input logic dec_div_cancel, // Cancel the divide operation + input logic dec_qual_lsu_d, // LSU instruction at D. Use to quiet LSU operands + input el2_mul_pkt_t mul_p, // DEC {valid, operand signs, low, operand bypass} + input el2_div_pkt_t div_p, // DEC {valid, unsigned, rem} + input logic dec_div_cancel, // Cancel the divide operation - input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch + input logic [31:1] pred_correct_npc_x, // DEC NPC for correctly predicted branch - input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs - input logic [31:1] dec_tlu_flush_path_r, // Redirect target + input logic dec_tlu_flush_lower_r, // Flush divide and secondary ALUs + input logic [31:1] dec_tlu_flush_path_r, // Redirect target - input logic dec_extint_stall, // External stall mux select - input logic [31:2] dec_tlu_meihap, // External stall mux data + input logic dec_extint_stall, // External stall mux select + input logic [31:2] dec_tlu_meihap, // External stall mux data - output logic [31:0] exu_lsu_rs1_d, // LSU operand - output logic [31:0] exu_lsu_rs2_d, // LSU operand + output logic [31:0] exu_lsu_rs1_d, // LSU operand + output logic [31:0] exu_lsu_rs2_d, // LSU operand - output logic exu_flush_final, // Pipe is being flushed this cycle - output logic [31:1] exu_flush_path_final, // Target for the oldest flush source + output logic exu_flush_final, // Pipe is being flushed this cycle + output logic [31:1] exu_flush_path_final, // Target for the oldest flush source - output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC - output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC - output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction + output logic [31:0] exu_i0_result_x, // Primary ALU result to DEC + output logic [31:1] exu_i0_pc_x, // Primary PC result to DEC + output logic [31:0] exu_csr_rs1_x, // RS1 source for a CSR instruction - output logic [31:1] exu_npc_r, // Divide NPC - output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history - output logic exu_i0_br_error_r, // to DEC I0 branch error - output logic exu_i0_br_start_error_r, // to DEC I0 branch start error - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index - output logic exu_i0_br_valid_r, // to DEC I0 branch valid - output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict - output logic exu_i0_br_middle_r, // to DEC I0 branch middle - output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr - output logic exu_i0_br_way_r, // to DEC I0 branch way + output logic [31:1] exu_npc_r, // Divide NPC + output logic [1:0] exu_i0_br_hist_r, // to DEC I0 branch history + output logic exu_i0_br_error_r, // to DEC I0 branch error + output logic exu_i0_br_start_error_r, // to DEC I0 branch start error + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // to DEC I0 branch index + output logic exu_i0_br_valid_r, // to DEC I0 branch valid + output logic exu_i0_br_mp_r, // to DEC I0 branch mispredict + output logic exu_i0_br_middle_r, // to DEC I0 branch middle + output logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // to DEC I0 branch fghr + output logic exu_i0_br_way_r, // to DEC I0 branch way - output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet - output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history - output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index - output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + output el2_predict_pkt_t exu_mp_pkt, // Mispredict branch packet + output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // Mispredict global history + output logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + output logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag - output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict - output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken - output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC + output logic exu_pmu_i0_br_misp, // to PMU - I0 E4 branch mispredict + output logic exu_pmu_i0_br_ataken, // to PMU - I0 E4 taken + output logic exu_pmu_i0_pc4, // to PMU - I0 E4 PC - output logic [31:0] exu_div_result, // Divide result - output logic exu_div_wren // Divide write enable to GPR + output logic [31:0] exu_div_result, // Divide result + output logic exu_div_wren // Divide write enable to GPR +); + + + + + logic [31:0] i0_rs1_bypass_data_d; + logic [31:0] i0_rs2_bypass_data_d; + logic i0_rs1_bypass_en_d; + logic i0_rs2_bypass_en_d; + logic [31:0] i0_rs1_d, i0_rs2_d; + logic [31:0] muldiv_rs1_d; + logic [31:1] pred_correct_npc_r; + logic i0_pred_correct_upper_r; + logic [31:1] i0_flush_path_upper_r; + logic x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2; + logic x_ctl_en, r_ctl_en; + + logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d; + logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x; + logic i0_taken_d; + logic i0_taken_x; + logic i0_valid_d; + logic i0_valid_x; + logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr; + + el2_predict_pkt_t final_predict_mp; + el2_predict_pkt_t i0_predict_newp_d; + + logic flush_in_d; + logic [ 31:0] alu_result_x; + + logic mul_valid_x; + logic [ 31:0] mul_result_x; + + el2_predict_pkt_t i0_pp_r; + + logic i0_flush_upper_d; + logic [ 31:1] i0_flush_path_d; + el2_predict_pkt_t i0_predict_p_d; + logic i0_pred_correct_upper_d; + + logic i0_flush_upper_x; + logic [ 31:1] i0_flush_path_x; + el2_predict_pkt_t i0_predict_p_x; + logic i0_pred_correct_upper_x; + logic i0_branch_x; + + localparam PREDPIPESIZE = pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1+pt.BHT_GHR_SIZE+pt.BTB_BTAG_SIZE; + logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp; + + + + + rvdffpcie #(31) i_flush_path_x_ff ( + .*, + .clk (clk), + .en (x_data_en), + .din (i0_flush_path_d[31:1]), + .dout(i0_flush_path_x[31:1]) + ); + rvdffe #(32) i_csr_rs1_x_ff ( + .*, + .clk (clk), + .en (x_data_en_q1), + .din (i0_rs1_d[31:0]), + .dout(exu_csr_rs1_x[31:0]) + ); + rvdffppe #($bits( + el2_predict_pkt_t + )) i_predictpacket_x_ff ( + .*, + .clk (clk), + .en (x_data_en), + .din (i0_predict_p_d), + .dout(i0_predict_p_x) + ); + rvdffe #(PREDPIPESIZE) i_predpipe_x_ff ( + .*, + .clk (clk), + .en (x_data_en_q2), + .din (predpipe_d), + .dout(predpipe_x) + ); + rvdffe #(PREDPIPESIZE) i_predpipe_r_ff ( + .*, + .clk (clk), + .en (r_data_en_q2), + .din (predpipe_x), + .dout(predpipe_r) + ); + + rvdffe #(4 + pt.BHT_GHR_SIZE) i_x_ff ( + .*, + .clk(clk), + .en(x_ctl_en), + .din({ + i0_valid_d, + i0_taken_d, + i0_flush_upper_d, + i0_pred_correct_upper_d, + ghr_x_ns[pt.BHT_GHR_SIZE-1:0] + }), + .dout({ + i0_valid_x, + i0_taken_x, + i0_flush_upper_x, + i0_pred_correct_upper_x, + ghr_x[pt.BHT_GHR_SIZE-1:0] + }) + ); + + rvdffppe #($bits( + el2_predict_pkt_t + ) + 1) i_r_ff0 ( + .*, + .clk (clk), + .en (r_ctl_en), + .din ({i0_pred_correct_upper_x, i0_predict_p_x}), + .dout({i0_pred_correct_upper_r, i0_pp_r}) + ); + + rvdffpcie #(31) i_flush_r_ff ( + .*, + .clk (clk), + .en (r_data_en), + .din (i0_flush_path_x[31:1]), + .dout(i0_flush_path_upper_r[31:1]) + ); + rvdffpcie #(31) i_npc_r_ff ( + .*, + .clk (clk), + .en (r_data_en), + .din (pred_correct_npc_x[31:1]), + .dout(pred_correct_npc_r[31:1]) + ); + + rvdffie #(pt.BHT_GHR_SIZE + 2, 1) i_misc_ff ( + .*, + .clk (clk), + .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}), + .dout({ghr_d[pt.BHT_GHR_SIZE-1:0], mul_valid_x, i0_branch_x}) ); - logic [31:0] i0_rs1_bypass_data_d; - logic [31:0] i0_rs2_bypass_data_d; - logic i0_rs1_bypass_en_d; - logic i0_rs2_bypass_en_d; - logic [31:0] i0_rs1_d, i0_rs2_d; - logic [31:0] muldiv_rs1_d; - logic [31:1] pred_correct_npc_r; - logic i0_pred_correct_upper_r; - logic [31:1] i0_flush_path_upper_r; - logic x_data_en, x_data_en_q1, x_data_en_q2, r_data_en, r_data_en_q2; - logic x_ctl_en, r_ctl_en; - logic [pt.BHT_GHR_SIZE-1:0] ghr_d_ns, ghr_d; - logic [pt.BHT_GHR_SIZE-1:0] ghr_x_ns, ghr_x; - logic i0_taken_d; - logic i0_taken_x; - logic i0_valid_d; - logic i0_valid_x; - logic [pt.BHT_GHR_SIZE-1:0] after_flush_eghr; - - el2_predict_pkt_t final_predict_mp; - el2_predict_pkt_t i0_predict_newp_d; - - logic flush_in_d; - logic [31:0] alu_result_x; - - logic mul_valid_x; - logic [31:0] mul_result_x; - - el2_predict_pkt_t i0_pp_r; - - logic i0_flush_upper_d; - logic [31:1] i0_flush_path_d; - el2_predict_pkt_t i0_predict_p_d; - logic i0_pred_correct_upper_d; - - logic i0_flush_upper_x; - logic [31:1] i0_flush_path_x; - el2_predict_pkt_t i0_predict_p_x; - logic i0_pred_correct_upper_x; - logic i0_branch_x; - - localparam PREDPIPESIZE = pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1+pt.BHT_GHR_SIZE+pt.BTB_BTAG_SIZE; - logic [PREDPIPESIZE-1:0] predpipe_d, predpipe_x, predpipe_r, final_predpipe_mp; + assign predpipe_d[PREDPIPESIZE-1:0] = {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d}; + assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3]; + assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3]; - - rvdffpcie #(31) i_flush_path_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_flush_path_d[31:1] ), .dout( i0_flush_path_x[31:1] ) ); - rvdffe #(32) i_csr_rs1_x_ff (.*, .clk(clk), .en ( x_data_en_q1 ), .din ( i0_rs1_d[31:0] ), .dout( exu_csr_rs1_x[31:0] ) ); - rvdffppe #($bits(el2_predict_pkt_t)) i_predictpacket_x_ff (.*, .clk(clk), .en ( x_data_en ), .din ( i0_predict_p_d ), .dout( i0_predict_p_x ) ); - rvdffe #(PREDPIPESIZE) i_predpipe_x_ff (.*, .clk(clk), .en ( x_data_en_q2 ), .din ( predpipe_d ), .dout( predpipe_x ) ); - rvdffe #(PREDPIPESIZE) i_predpipe_r_ff (.*, .clk(clk), .en ( r_data_en_q2 ), .din ( predpipe_x ), .dout( predpipe_r ) ); - - rvdffe #(4+pt.BHT_GHR_SIZE) i_x_ff (.*, .clk(clk), .en ( x_ctl_en ), .din ({i0_valid_d,i0_taken_d,i0_flush_upper_d,i0_pred_correct_upper_d,ghr_x_ns[pt.BHT_GHR_SIZE-1:0]} ), - .dout({i0_valid_x,i0_taken_x,i0_flush_upper_x,i0_pred_correct_upper_x,ghr_x[pt.BHT_GHR_SIZE-1:0]} ) ); - - rvdffppe #($bits(el2_predict_pkt_t)+1) i_r_ff0 (.*, .clk(clk), .en ( r_ctl_en ), .din ({i0_pred_correct_upper_x, i0_predict_p_x}), - .dout({i0_pred_correct_upper_r, i0_pp_r }) ); - - rvdffpcie #(31) i_flush_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( i0_flush_path_x[31:1] ), .dout( i0_flush_path_upper_r[31:1]) ); - rvdffpcie #(31) i_npc_r_ff (.*, .clk(clk), .en ( r_data_en ), .din ( pred_correct_npc_x[31:1] ), .dout( pred_correct_npc_r[31:1] ) ); - - rvdffie #(pt.BHT_GHR_SIZE+2,1) i_misc_ff (.*, .clk(clk), .din ({ghr_d_ns[pt.BHT_GHR_SIZE-1:0], mul_p.valid, dec_i0_branch_d}), - .dout({ghr_d[pt.BHT_GHR_SIZE-1:0] , mul_valid_x, i0_branch_x}) ); - - - - - - assign predpipe_d[PREDPIPESIZE-1:0] - = {i0_predict_fghr_d, i0_predict_index_d, i0_predict_btag_d}; - - - assign i0_rs1_bypass_en_d = dec_i0_rs1_bypass_en_d[0] | dec_i0_rs1_bypass_en_d[1] | dec_i0_rs1_bypass_en_d[2] | dec_i0_rs1_bypass_en_d[3]; - assign i0_rs2_bypass_en_d = dec_i0_rs2_bypass_en_d[0] | dec_i0_rs2_bypass_en_d[1] | dec_i0_rs2_bypass_en_d[2] | dec_i0_rs2_bypass_en_d[3]; - - assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) | + assign i0_rs1_bypass_data_d[31:0]=({32{dec_i0_rs1_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) | ({32{dec_i0_rs1_bypass_en_d[1]}} & lsu_result_m[31:0] ) | ({32{dec_i0_rs1_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) | ({32{dec_i0_rs1_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]); - assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) | + assign i0_rs2_bypass_data_d[31:0]=({32{dec_i0_rs2_bypass_en_d[0]}} & dec_i0_result_r[31:0] ) | ({32{dec_i0_rs2_bypass_en_d[1]}} & lsu_result_m[31:0] ) | ({32{dec_i0_rs2_bypass_en_d[2]}} & exu_i0_result_x[31:0] ) | ({32{dec_i0_rs2_bypass_en_d[3]}} & lsu_nonblock_load_data[31:0]); - assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) | + assign i0_rs1_d[31:0] = ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]) | ({32{~i0_rs1_bypass_en_d & dec_i0_select_pc_d }} & {dec_i0_pc_d[31:1],1'b0} ) | // for jal's ({32{~i0_rs1_bypass_en_d & dec_debug_wdata_rs1_d }} & dbg_cmd_wrdata[31:0] ) | ({32{~i0_rs1_bypass_en_d & ~dec_debug_wdata_rs1_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ); - assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) | + assign i0_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & dec_i0_rs2_en_d}} & gpr_i0_rs2_d[31:0] ) | ({32{~i0_rs2_bypass_en_d }} & dec_i0_immed_d[31:0] ) | ({32{ i0_rs2_bypass_en_d }} & i0_rs2_bypass_data_d[31:0]); - assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0] ) | + assign exu_lsu_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & ~dec_extint_stall & dec_i0_rs1_en_d & dec_qual_lsu_d}} & gpr_i0_rs1_d[31:0] ) | ({32{ i0_rs1_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs1_bypass_data_d[31:0]) | ({32{ dec_extint_stall & dec_qual_lsu_d}} & {dec_tlu_meihap[31:2],2'b0}); - assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0] ) | + assign exu_lsu_rs2_d[31:0] = ({32{~i0_rs2_bypass_en_d & ~dec_extint_stall & dec_i0_rs2_en_d & dec_qual_lsu_d}} & gpr_i0_rs2_d[31:0] ) | ({32{ i0_rs2_bypass_en_d & ~dec_extint_stall & dec_qual_lsu_d}} & i0_rs2_bypass_data_d[31:0]); - assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) | + assign muldiv_rs1_d[31:0] = ({32{~i0_rs1_bypass_en_d & dec_i0_rs1_en_d}} & gpr_i0_rs1_d[31:0] ) | ({32{ i0_rs1_bypass_en_d }} & i0_rs1_bypass_data_d[31:0]); - assign x_data_en = dec_data_en[1]; - assign x_data_en_q1 = dec_data_en[1] & dec_csr_ren_d; - assign x_data_en_q2 = dec_data_en[1] & dec_i0_branch_d; - assign r_data_en = dec_data_en[0]; - assign r_data_en_q2 = dec_data_en[0] & i0_branch_x; - assign x_ctl_en = dec_ctl_en[1]; - assign r_ctl_en = dec_ctl_en[0]; + assign x_data_en = dec_data_en[1]; + assign x_data_en_q1 = dec_data_en[1] & dec_csr_ren_d; + assign x_data_en_q2 = dec_data_en[1] & dec_i0_branch_d; + assign r_data_en = dec_data_en[0]; + assign r_data_en_q2 = dec_data_en[0] & i0_branch_x; + assign x_ctl_en = dec_ctl_en[1]; + assign r_ctl_en = dec_ctl_en[0]; - el2_exu_alu_ctl #(.pt(pt)) i_alu (.*, - .enable ( x_data_en ), // I - .pp_in ( i0_predict_newp_d ), // I - .valid_in ( dec_i0_alu_decode_d ), // I - .flush_upper_x ( i0_flush_upper_x ), // I - .flush_lower_r ( dec_tlu_flush_lower_r ), // I - .a_in ( i0_rs1_d[31:0] ), // I - .b_in ( i0_rs2_d[31:0] ), // I - .pc_in ( dec_i0_pc_d[31:1] ), // I - .brimm_in ( dec_i0_br_immed_d[12:1] ), // I - .ap ( i0_ap ), // I - .csr_ren_in ( dec_csr_ren_d ), // I - .csr_rddata_in ( dec_csr_rddata_d[31:0] ), // I - .result_ff ( alu_result_x[31:0] ), // O - .flush_upper_out ( i0_flush_upper_d ), // O - .flush_final_out ( exu_flush_final ), // O - .flush_path_out ( i0_flush_path_d[31:1] ), // O - .predict_p_out ( i0_predict_p_d ), // O - .pred_correct_out ( i0_pred_correct_upper_d ), // O - .pc_ff ( exu_i0_pc_x[31:1] )); // O + el2_exu_alu_ctl #( + .pt(pt) + ) i_alu ( + .*, + .enable (x_data_en), // I + .pp_in (i0_predict_newp_d), // I + .valid_in (dec_i0_alu_decode_d), // I + .flush_upper_x (i0_flush_upper_x), // I + .flush_lower_r (dec_tlu_flush_lower_r), // I + .a_in (i0_rs1_d[31:0]), // I + .b_in (i0_rs2_d[31:0]), // I + .pc_in (dec_i0_pc_d[31:1]), // I + .brimm_in (dec_i0_br_immed_d[12:1]), // I + .ap (i0_ap), // I + .csr_ren_in (dec_csr_ren_d), // I + .csr_rddata_in (dec_csr_rddata_d[31:0]), // I + .result_ff (alu_result_x[31:0]), // O + .flush_upper_out (i0_flush_upper_d), // O + .flush_final_out (exu_flush_final), // O + .flush_path_out (i0_flush_path_d[31:1]), // O + .predict_p_out (i0_predict_p_d), // O + .pred_correct_out(i0_pred_correct_upper_d), // O + .pc_ff (exu_i0_pc_x[31:1]) + ); // O - el2_exu_mul_ctl #(.pt(pt)) i_mul (.*, - .mul_p ( mul_p & {$bits(el2_mul_pkt_t){mul_p.valid}} ), // I - .rs1_in ( muldiv_rs1_d[31:0] & {32{mul_p.valid}} ), // I - .rs2_in ( i0_rs2_d[31:0] & {32{mul_p.valid}} ), // I - .result_x ( mul_result_x[31:0] )); // O + el2_exu_mul_ctl #( + .pt(pt) + ) i_mul ( + .*, + .mul_p (mul_p & {$bits(el2_mul_pkt_t) {mul_p.valid}}), // I + .rs1_in (muldiv_rs1_d[31:0] & {32{mul_p.valid}}), // I + .rs2_in (i0_rs2_d[31:0] & {32{mul_p.valid}}), // I + .result_x(mul_result_x[31:0]) + ); // O - el2_exu_div_ctl #(.pt(pt)) i_div (.*, - .cancel ( dec_div_cancel ), // I - .dp ( div_p ), // I - .dividend ( muldiv_rs1_d[31:0] ), // I - .divisor ( i0_rs2_d[31:0] ), // I - .finish_dly ( exu_div_wren ), // O - .out ( exu_div_result[31:0] )); // O + el2_exu_div_ctl #( + .pt(pt) + ) i_div ( + .*, + .cancel (dec_div_cancel), // I + .dp (div_p), // I + .dividend (muldiv_rs1_d[31:0]), // I + .divisor (i0_rs2_d[31:0]), // I + .finish_dly(exu_div_wren), // O + .out (exu_div_result[31:0]) + ); // O - assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0]; + assign exu_i0_result_x[31:0] = (mul_valid_x) ? mul_result_x[31:0] : alu_result_x[31:0]; - always_comb begin - i0_predict_newp_d = dec_i0_predict_p_d; - i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst - end + always_comb begin + i0_predict_newp_d = dec_i0_predict_p_d; + i0_predict_newp_d.boffset = dec_i0_pc_d[1]; // from the start of inst + end - assign exu_pmu_i0_br_misp = i0_pp_r.misp; - assign exu_pmu_i0_br_ataken = i0_pp_r.ataken; - assign exu_pmu_i0_pc4 = i0_pp_r.pc4; + assign exu_pmu_i0_br_misp = i0_pp_r.misp; + assign exu_pmu_i0_br_ataken = i0_pp_r.ataken; + assign exu_pmu_i0_pc4 = i0_pp_r.pc4; - assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r; - assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d); + assign i0_valid_d = i0_predict_p_d.valid & dec_i0_alu_decode_d & ~dec_tlu_flush_lower_r; + assign i0_taken_d = (i0_predict_p_d.ataken & dec_i0_alu_decode_d); -if(pt.BTB_ENABLE==1) begin - // maintain GHR at D - assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0] + if (pt.BTB_ENABLE == 1) begin + // maintain GHR at D + assign ghr_d_ns[pt.BHT_GHR_SIZE-1:0] = ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & i0_valid_d}} & {ghr_d[pt.BHT_GHR_SIZE-2:0], i0_taken_d}) | ({pt.BHT_GHR_SIZE{~dec_tlu_flush_lower_r & ~i0_valid_d}} & ghr_d[pt.BHT_GHR_SIZE-1:0] ) | ({pt.BHT_GHR_SIZE{ dec_tlu_flush_lower_r }} & ghr_x[pt.BHT_GHR_SIZE-1:0] ); - // maintain GHR at X - assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0] + // maintain GHR at X + assign ghr_x_ns[pt.BHT_GHR_SIZE-1:0] = ({pt.BHT_GHR_SIZE{ i0_valid_x}} & {ghr_x[pt.BHT_GHR_SIZE-2:0], i0_taken_x}) | ({pt.BHT_GHR_SIZE{~i0_valid_x}} & ghr_x[pt.BHT_GHR_SIZE-1:0] ) ; - assign exu_i0_br_valid_r = i0_pp_r.valid; - assign exu_i0_br_mp_r = i0_pp_r.misp; - assign exu_i0_br_way_r = i0_pp_r.way; - assign exu_i0_br_hist_r[1:0] = {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0]; - assign exu_i0_br_error_r = i0_pp_r.br_error; - assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset; - assign exu_i0_br_start_error_r = i0_pp_r.br_start_error; + assign exu_i0_br_valid_r = i0_pp_r.valid; + assign exu_i0_br_mp_r = i0_pp_r.misp; + assign exu_i0_br_way_r = i0_pp_r.way; + assign exu_i0_br_hist_r[1:0] = {2{i0_pp_r.valid}} & i0_pp_r.hist[1:0]; + assign exu_i0_br_error_r = i0_pp_r.br_error; + assign exu_i0_br_middle_r = i0_pp_r.pc4 ^ i0_pp_r.boffset; + assign exu_i0_br_start_error_r = i0_pp_r.br_start_error; - assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0], + assign {exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0], exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]}= predpipe_r[PREDPIPESIZE-1:pt.BTB_BTAG_SIZE]; - assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0; + assign final_predict_mp = (i0_flush_upper_x) ? i0_predict_p_x : '0; - assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0; + assign final_predpipe_mp[PREDPIPESIZE-1:0] = (i0_flush_upper_x) ? predpipe_x : '0; - assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0]; + assign after_flush_eghr[pt.BHT_GHR_SIZE-1:0] = (i0_flush_upper_x & ~dec_tlu_flush_lower_r) ? ghr_d[pt.BHT_GHR_SIZE-1:0] : ghr_x[pt.BHT_GHR_SIZE-1:0]; - assign exu_mp_pkt.valid = final_predict_mp.valid; - assign exu_mp_pkt.way = final_predict_mp.way; - assign exu_mp_pkt.misp = final_predict_mp.misp; - assign exu_mp_pkt.pcall = final_predict_mp.pcall; - assign exu_mp_pkt.pja = final_predict_mp.pja; - assign exu_mp_pkt.pret = final_predict_mp.pret; - assign exu_mp_pkt.ataken = final_predict_mp.ataken; - assign exu_mp_pkt.boffset = final_predict_mp.boffset; - assign exu_mp_pkt.pc4 = final_predict_mp.pc4; - assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0]; - assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0]; + assign exu_mp_pkt.valid = final_predict_mp.valid; + assign exu_mp_pkt.way = final_predict_mp.way; + assign exu_mp_pkt.misp = final_predict_mp.misp; + assign exu_mp_pkt.pcall = final_predict_mp.pcall; + assign exu_mp_pkt.pja = final_predict_mp.pja; + assign exu_mp_pkt.pret = final_predict_mp.pret; + assign exu_mp_pkt.ataken = final_predict_mp.ataken; + assign exu_mp_pkt.boffset = final_predict_mp.boffset; + assign exu_mp_pkt.pc4 = final_predict_mp.pc4; + assign exu_mp_pkt.hist[1:0] = final_predict_mp.hist[1:0]; + assign exu_mp_pkt.toffset[11:0] = final_predict_mp.toffset[11:0]; - assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0]; + assign exu_mp_fghr[pt.BHT_GHR_SIZE-1:0] = after_flush_eghr[pt.BHT_GHR_SIZE-1:0]; - assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], + assign {exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]} = final_predpipe_mp[PREDPIPESIZE-pt.BHT_GHR_SIZE-1:0]; - assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write -end // if (pt.BTB_ENABLE==1) + assign exu_mp_eghr[pt.BHT_GHR_SIZE-1:0] = final_predpipe_mp[PREDPIPESIZE-1:pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+pt.BTB_BTAG_SIZE+1]; // mp ghr for bht write + end // if (pt.BTB_ENABLE==1) else begin - assign ghr_d_ns = '0; - assign ghr_x_ns = '0; - assign exu_mp_pkt = '0; - assign exu_mp_eghr = '0; - assign exu_mp_fghr = '0; - assign exu_mp_index = '0; - assign exu_mp_btag = '0; - assign exu_i0_br_hist_r = '0; - assign exu_i0_br_error_r = '0; - assign exu_i0_br_start_error_r = '0; - assign exu_i0_br_index_r = '0; - assign exu_i0_br_valid_r = '0; - assign exu_i0_br_mp_r = '0; - assign exu_i0_br_middle_r = '0; - assign exu_i0_br_fghr_r = '0; - assign exu_i0_br_way_r = '0; -end // else: !if(pt.BTB_ENABLE==1) + assign ghr_d_ns = '0; + assign ghr_x_ns = '0; + assign exu_mp_pkt = '0; + assign exu_mp_eghr = '0; + assign exu_mp_fghr = '0; + assign exu_mp_index = '0; + assign exu_mp_btag = '0; + assign exu_i0_br_hist_r = '0; + assign exu_i0_br_error_r = '0; + assign exu_i0_br_start_error_r = '0; + assign exu_i0_br_index_r = '0; + assign exu_i0_br_valid_r = '0; + assign exu_i0_br_mp_r = '0; + assign exu_i0_br_middle_r = '0; + assign exu_i0_br_fghr_r = '0; + assign exu_i0_br_way_r = '0; + end // else: !if(pt.BTB_ENABLE==1) - assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r }} & dec_tlu_flush_path_r[31:1] ) | + assign exu_flush_path_final[31:1] = ( {31{ dec_tlu_flush_lower_r }} & dec_tlu_flush_path_r[31:1] ) | ( {31{~dec_tlu_flush_lower_r & i0_flush_upper_d}} & i0_flush_path_d[31:1] ); - assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : i0_flush_path_upper_r[31:1]; + assign exu_npc_r[31:1] = (i0_pred_correct_upper_r) ? pred_correct_npc_r[31:1] : i0_flush_path_upper_r[31:1]; -endmodule // el2_exu +endmodule // el2_exu diff --git a/Flow/design/exu/el2_exu_alu_ctl.sv b/Flow/design/exu/el2_exu_alu_ctl.sv index 00234ef..6fef51d 100644 --- a/Flow/design/exu/el2_exu_alu_ctl.sv +++ b/Flow/design/exu/el2_exu_alu_ctl.sv @@ -15,255 +15,247 @@ module el2_exu_alu_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" -) - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan control + `include "el2_param.vh" +) ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan control - input logic flush_upper_x, // Branch flush from previous cycle - input logic flush_lower_r, // Master flush of entire pipeline - input logic enable, // Clock enable - input logic valid_in, // Valid - input el2_alu_pkt_t ap, // predecodes - input logic csr_ren_in, // CSR select - input logic [31:0] csr_rddata_in, // CSR data - input logic signed [31:0] a_in, // A operand - input logic [31:0] b_in, // B operand - input logic [31:1] pc_in, // for pc=pc+2,4 calculations - input el2_predict_pkt_t pp_in, // Predicted branch structure - input logic [12:1] brimm_in, // Branch offset + input logic flush_upper_x, // Branch flush from previous cycle + input logic flush_lower_r, // Master flush of entire pipeline + input logic enable, // Clock enable + input logic valid_in, // Valid + input el2_alu_pkt_t ap, // predecodes + input logic csr_ren_in, // CSR select + input logic [31:0] csr_rddata_in, // CSR data + input logic signed [31:0] a_in, // A operand + input logic [31:0] b_in, // B operand + input logic [31:1] pc_in, // for pc=pc+2,4 calculations + input el2_predict_pkt_t pp_in, // Predicted branch structure + input logic [12:1] brimm_in, // Branch offset - output logic [31:0] result_ff, // final result - output logic flush_upper_out, // Branch flush - output logic flush_final_out, // Branch flush or flush entire pipeline - output logic [31:1] flush_path_out, // Branch flush PC - output logic [31:1] pc_ff, // flopped PC - output logic pred_correct_out, // NPC control - output el2_predict_pkt_t predict_p_out // Predicted branch structure + output logic [31:0] result_ff, // final result + output logic flush_upper_out, // Branch flush + output logic flush_final_out, // Branch flush or flush entire pipeline + output logic [31:1] flush_path_out, // Branch flush PC + output logic [31:1] pc_ff, // flopped PC + output logic pred_correct_out, // NPC control + output el2_predict_pkt_t predict_p_out // Predicted branch structure +); + + + logic [31:0] zba_a_in; + logic [31:0] aout; + logic cout, ov, neg; + logic [31:0] lout; + logic [31:0] sout; + logic sel_shift; + logic sel_adder; + logic slt_one; + logic actual_taken; + logic [31:1] pcout; + logic cond_mispredict; + logic target_mispredict; + logic eq, ne, lt, ge; + logic any_jal; + logic [ 1:0] newhist; + logic sel_pc; + logic [31:0] csr_write_data; + logic [31:0] result; + + + + + // *** Start - BitManip *** + + // Zbb + logic ap_clz; + logic ap_ctz; + logic ap_cpop; + logic ap_sext_b; + logic ap_sext_h; + logic ap_min; + logic ap_max; + logic ap_rol; + logic ap_ror; + logic ap_rev8; + logic ap_orc_b; + logic ap_zbb; + + // Zbs + logic ap_bset; + logic ap_bclr; + logic ap_binv; + logic ap_bext; + + // Zbp + logic ap_pack; + logic ap_packu; + logic ap_packh; + + // Zba + logic ap_sh1add; + logic ap_sh2add; + logic ap_sh3add; + logic ap_zba; + + + + if (pt.BITMANIP_ZBB == 1) begin + assign ap_clz = ap.clz; + assign ap_ctz = ap.ctz; + assign ap_cpop = ap.cpop; + assign ap_sext_b = ap.sext_b; + assign ap_sext_h = ap.sext_h; + assign ap_min = ap.min; + assign ap_max = ap.max; + end else begin + assign ap_clz = 1'b0; + assign ap_ctz = 1'b0; + assign ap_cpop = 1'b0; + assign ap_sext_b = 1'b0; + assign ap_sext_h = 1'b0; + assign ap_min = 1'b0; + assign ap_max = 1'b0; + end + + + if ((pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1)) begin + assign ap_rol = ap.rol; + assign ap_ror = ap.ror; + assign ap_rev8 = ap.grev & (b_in[4:0] == 5'b11000); + assign ap_orc_b = ap.gorc & (b_in[4:0] == 5'b00111); + assign ap_zbb = ap.zbb; + end else begin + assign ap_rol = 1'b0; + assign ap_ror = 1'b0; + assign ap_rev8 = 1'b0; + assign ap_orc_b = 1'b0; + assign ap_zbb = 1'b0; + end + + + if (pt.BITMANIP_ZBS == 1) begin + assign ap_bset = ap.bset; + assign ap_bclr = ap.bclr; + assign ap_binv = ap.binv; + assign ap_bext = ap.bext; + end else begin + assign ap_bset = 1'b0; + assign ap_bclr = 1'b0; + assign ap_binv = 1'b0; + assign ap_bext = 1'b0; + end + + + if (pt.BITMANIP_ZBP == 1) begin + assign ap_packu = ap.packu; + end else begin + assign ap_packu = 1'b0; + end + + + if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) | (pt.BITMANIP_ZBE == 1) | (pt.BITMANIP_ZBF == 1) ) + begin + assign ap_pack = ap.pack; + assign ap_packh = ap.packh; + end else begin + assign ap_pack = 1'b0; + assign ap_packh = 1'b0; + end + + + if (pt.BITMANIP_ZBA == 1) begin + assign ap_sh1add = ap.sh1add; + assign ap_sh2add = ap.sh2add; + assign ap_sh3add = ap.sh3add; + assign ap_zba = ap.zba; + end else begin + assign ap_sh1add = 1'b0; + assign ap_sh2add = 1'b0; + assign ap_sh3add = 1'b0; + assign ap_zba = 1'b0; + end + + + + + // *** End - BitManip *** + + + + + rvdffpcie #(31) i_pc_ff ( + .*, + .clk (clk), + .en (enable), + .din (pc_in[31:1]), + .dout(pc_ff[31:1]) + ); // any PC is run through here - doesn't have to be alu + rvdffe #(32) i_result_ff ( + .*, + .clk (clk), + .en (enable & valid_in), + .din (result[31:0]), + .dout(result_ff[31:0]) ); - logic [31:0] zba_a_in; - logic [31:0] aout; - logic cout,ov,neg; - logic [31:0] lout; - logic [31:0] sout; - logic sel_shift; - logic sel_adder; - logic slt_one; - logic actual_taken; - logic [31:1] pcout; - logic cond_mispredict; - logic target_mispredict; - logic eq, ne, lt, ge; - logic any_jal; - logic [1:0] newhist; - logic sel_pc; - logic [31:0] csr_write_data; - logic [31:0] result; + + // immediates are just muxed into rs2 + + // add => add=1; + // sub => add=1; sub=1; + + // and => lctl=3 + // or => lctl=2 + // xor => lctl=1 + + // sll => sctl=3 + // srl => sctl=2 + // sra => sctl=1 + + // slt => slt + + // lui => lctl=2; or x0, imm20 previously << 12 + // auipc => add; add pc, imm20 previously << 12 + + // beq => bctl=4; add; add x0, pc, sext(offset[12:1]) + // bne => bctl=3; add; add x0, pc, sext(offset[12:1]) + // blt => bctl=2; add; add x0, pc, sext(offset[12:1]) + // bge => bctl=1; add; add x0, pc, sext(offset[12:1]) + + // jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4] + // jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4] - - // *** Start - BitManip *** - - // Zbb - logic ap_clz; - logic ap_ctz; - logic ap_cpop; - logic ap_sext_b; - logic ap_sext_h; - logic ap_min; - logic ap_max; - logic ap_rol; - logic ap_ror; - logic ap_rev8; - logic ap_orc_b; - logic ap_zbb; - - // Zbs - logic ap_bset; - logic ap_bclr; - logic ap_binv; - logic ap_bext; - - // Zbp - logic ap_pack; - logic ap_packu; - logic ap_packh; - - // Zba - logic ap_sh1add; - logic ap_sh2add; - logic ap_sh3add; - logic ap_zba; - - - - if (pt.BITMANIP_ZBB == 1) - begin - assign ap_clz = ap.clz; - assign ap_ctz = ap.ctz; - assign ap_cpop = ap.cpop; - assign ap_sext_b = ap.sext_b; - assign ap_sext_h = ap.sext_h; - assign ap_min = ap.min; - assign ap_max = ap.max; - end - else - begin - assign ap_clz = 1'b0; - assign ap_ctz = 1'b0; - assign ap_cpop = 1'b0; - assign ap_sext_b = 1'b0; - assign ap_sext_h = 1'b0; - assign ap_min = 1'b0; - assign ap_max = 1'b0; - end - - - if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) ) - begin - assign ap_rol = ap.rol; - assign ap_ror = ap.ror; - assign ap_rev8 = ap.grev & (b_in[4:0] == 5'b11000); - assign ap_orc_b = ap.gorc & (b_in[4:0] == 5'b00111); - assign ap_zbb = ap.zbb; - end - else - begin - assign ap_rol = 1'b0; - assign ap_ror = 1'b0; - assign ap_rev8 = 1'b0; - assign ap_orc_b = 1'b0; - assign ap_zbb = 1'b0; - end - - - if (pt.BITMANIP_ZBS == 1) - begin - assign ap_bset = ap.bset; - assign ap_bclr = ap.bclr; - assign ap_binv = ap.binv; - assign ap_bext = ap.bext; - end - else - begin - assign ap_bset = 1'b0; - assign ap_bclr = 1'b0; - assign ap_binv = 1'b0; - assign ap_bext = 1'b0; - end - - - if (pt.BITMANIP_ZBP == 1) - begin - assign ap_packu = ap.packu; - end - else - begin - assign ap_packu = 1'b0; - end - - - if ( (pt.BITMANIP_ZBB == 1) | (pt.BITMANIP_ZBP == 1) | (pt.BITMANIP_ZBE == 1) | (pt.BITMANIP_ZBF == 1) ) - begin - assign ap_pack = ap.pack; - assign ap_packh = ap.packh; - end - else - begin - assign ap_pack = 1'b0; - assign ap_packh = 1'b0; - end - - - if (pt.BITMANIP_ZBA == 1) - begin - assign ap_sh1add = ap.sh1add; - assign ap_sh2add = ap.sh2add; - assign ap_sh3add = ap.sh3add; - assign ap_zba = ap.zba; - end - else - begin - assign ap_sh1add = 1'b0; - assign ap_sh2add = 1'b0; - assign ap_sh3add = 1'b0; - assign ap_zba = 1'b0; - end - - - - - // *** End - BitManip *** - - - - - rvdffpcie #(31) i_pc_ff (.*, .clk(clk), .en(enable), .din(pc_in[31:1]), .dout(pc_ff[31:1])); // any PC is run through here - doesn't have to be alu - rvdffe #(32) i_result_ff (.*, .clk(clk), .en(enable & valid_in), .din(result[31:0]), .dout(result_ff[31:0])); - - - - // immediates are just muxed into rs2 - - // add => add=1; - // sub => add=1; sub=1; - - // and => lctl=3 - // or => lctl=2 - // xor => lctl=1 - - // sll => sctl=3 - // srl => sctl=2 - // sra => sctl=1 - - // slt => slt - - // lui => lctl=2; or x0, imm20 previously << 12 - // auipc => add; add pc, imm20 previously << 12 - - // beq => bctl=4; add; add x0, pc, sext(offset[12:1]) - // bne => bctl=3; add; add x0, pc, sext(offset[12:1]) - // blt => bctl=2; add; add x0, pc, sext(offset[12:1]) - // bge => bctl=1; add; add x0, pc, sext(offset[12:1]) - - // jal => rs1=pc {pc[31:1],1'b0}, rs2=sext(offset20:1]); rd=pc+[2,4] - // jalr => rs1=rs1, rs2=sext(offset20:1]); rd=pc+[2,4] - - - - assign zba_a_in[31:0] = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) | + assign zba_a_in[31:0] = ( {32{ ap_sh1add}} & {a_in[30:0],1'b0} ) | ( {32{ ap_sh2add}} & {a_in[29:0],2'b0} ) | ( {32{ ap_sh3add}} & {a_in[28:0],3'b0} ) | ( {32{~ap_zba }} & a_in[31:0] ); - logic [31:0] bm; + logic [31:0] bm; - assign bm[31:0] = ( ap.sub ) ? ~b_in[31:0] : b_in[31:0]; + assign bm[31:0] = (ap.sub) ? ~b_in[31:0] : b_in[31:0]; - assign {cout, aout[31:0]} = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub}; + assign {cout, aout[31:0]} = {1'b0, zba_a_in[31:0]} + {1'b0, bm[31:0]} + {32'b0, ap.sub}; - assign ov = (~a_in[31] & ~bm[31] & aout[31]) | - ( a_in[31] & bm[31] & ~aout[31] ); + assign ov = (~a_in[31] & ~bm[31] & aout[31]) | (a_in[31] & bm[31] & ~aout[31]); - assign lt = (~ap.unsign & (neg ^ ov)) | - ( ap.unsign & ~cout); + assign lt = (~ap.unsign & (neg ^ ov)) | (ap.unsign & ~cout); - assign eq = (a_in[31:0] == b_in[31:0]); - assign ne = ~eq; - assign neg = aout[31]; - assign ge = ~lt; + assign eq = (a_in[31:0] == b_in[31:0]); + assign ne = ~eq; + assign neg = aout[31]; + assign ge = ~lt; - assign lout[31:0] = ( {32{csr_ren_in }} & csr_rddata_in[31:0] ) | + assign lout[31:0] = ( {32{csr_ren_in }} & csr_rddata_in[31:0] ) | ( {32{ap.land & ~ap_zbb}} & a_in[31:0] & b_in[31:0] ) | ( {32{ap.lor & ~ap_zbb}} & (a_in[31:0] | b_in[31:0]) ) | ( {32{ap.lxor & ~ap_zbb}} & (a_in[31:0] ^ b_in[31:0]) ) | @@ -274,16 +266,16 @@ import el2_pkg::*; - // * * * * * * * * * * * * * * * * * * BitManip : ROL,ROR * * * * * * * * * * * * * * * * * * - // * * * * * * * * * * * * * * * * * * BitManip : ZBEXT * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : ROL,ROR * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : ZBEXT * * * * * * * * * * * * * * * * * * - logic [5:0] shift_amount; - logic [31:0] shift_mask; - logic [62:0] shift_extend; - logic [62:0] shift_long; + logic [ 5:0] shift_amount; + logic [31:0] shift_mask; + logic [62:0] shift_extend; + logic [62:0] shift_long; - assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused + assign shift_amount[5:0] = ( { 6{ap.sll}} & (6'd32 - {1'b0,b_in[4:0]}) ) | // [5] unused ( { 6{ap.srl}} & {1'b0,b_in[4:0]} ) | ( { 6{ap.sra}} & {1'b0,b_in[4:0]} ) | ( { 6{ap_rol}} & (6'd32 - {1'b0,b_in[4:0]}) ) | @@ -291,174 +283,199 @@ import el2_pkg::*; ( { 6{ap_bext}} & {1'b0,b_in[4:0]} ); - assign shift_mask[31:0] = ( 32'hffffffff << ({5{ap.sll}} & b_in[4:0]) ); + assign shift_mask[31:0] = (32'hffffffff << ({5{ap.sll}} & b_in[4:0])); - assign shift_extend[31:0] = a_in[31:0]; + assign shift_extend[31:0] = a_in[31:0]; - assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) | + assign shift_extend[62:32] = ( {31{ap.sra}} & {31{a_in[31]}} ) | ( {31{ap.sll}} & a_in[30:0] ) | ( {31{ap_rol}} & a_in[30:0] ) | ( {31{ap_ror}} & a_in[30:0] ); - assign shift_long[62:0] = ( shift_extend[62:0] >> shift_amount[4:0] ); // 62-32 unused + assign shift_long[62:0] = (shift_extend[62:0] >> shift_amount[4:0]); // 62-32 unused - assign sout[31:0] = shift_long[31:0] & shift_mask[31:0]; + assign sout[31:0] = shift_long[31:0] & shift_mask[31:0]; - // * * * * * * * * * * * * * * * * * * BitManip : CLZ,CTZ * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : CLZ,CTZ * * * * * * * * * * * * * * * * * * - logic bitmanip_clz_ctz_sel; - logic [31:0] bitmanip_a_reverse_ff; - logic [31:0] bitmanip_lzd_in; - logic [5:0] bitmanip_dw_lzd_enc; - logic [5:0] bitmanip_clz_ctz_result; + logic bitmanip_clz_ctz_sel; + logic [31:0] bitmanip_a_reverse_ff; + logic [31:0] bitmanip_lzd_in; + logic [ 5:0] bitmanip_dw_lzd_enc; + logic [ 5:0] bitmanip_clz_ctz_result; - assign bitmanip_clz_ctz_sel = ap_clz | ap_ctz; + assign bitmanip_clz_ctz_sel = ap_clz | ap_ctz; - assign bitmanip_a_reverse_ff[31:0] = {a_in[0], a_in[1], a_in[2], a_in[3], a_in[4], a_in[5], a_in[6], a_in[7], - a_in[8], a_in[9], a_in[10], a_in[11], a_in[12], a_in[13], a_in[14], a_in[15], - a_in[16], a_in[17], a_in[18], a_in[19], a_in[20], a_in[21], a_in[22], a_in[23], - a_in[24], a_in[25], a_in[26], a_in[27], a_in[28], a_in[29], a_in[30], a_in[31]}; + assign bitmanip_a_reverse_ff[31:0] = { + a_in[0], + a_in[1], + a_in[2], + a_in[3], + a_in[4], + a_in[5], + a_in[6], + a_in[7], + a_in[8], + a_in[9], + a_in[10], + a_in[11], + a_in[12], + a_in[13], + a_in[14], + a_in[15], + a_in[16], + a_in[17], + a_in[18], + a_in[19], + a_in[20], + a_in[21], + a_in[22], + a_in[23], + a_in[24], + a_in[25], + a_in[26], + a_in[27], + a_in[28], + a_in[29], + a_in[30], + a_in[31] + }; - assign bitmanip_lzd_in[31:0] = ( {32{ap_clz}} & a_in[31:0] ) | + assign bitmanip_lzd_in[31:0] = ( {32{ap_clz}} & a_in[31:0] ) | ( {32{ap_ctz}} & bitmanip_a_reverse_ff[31:0]); - logic [31:0] bitmanip_lzd_os; - integer i; - logic found; + logic [31:0] bitmanip_lzd_os; + integer i; + logic found; - always_comb - begin - bitmanip_lzd_os[31:0] = bitmanip_lzd_in[31:0]; - bitmanip_dw_lzd_enc[5:0]= 6'b0; - found = 1'b0; + always_comb begin + bitmanip_lzd_os[31:0] = bitmanip_lzd_in[31:0]; + bitmanip_dw_lzd_enc[5:0] = 6'b0; + found = 1'b0; - for (int i=0; i<32 && found==0; i++) begin - if (bitmanip_lzd_os[31] == 1'b0) begin - bitmanip_dw_lzd_enc[5:0]= bitmanip_dw_lzd_enc[5:0] + 6'b00_0001; - bitmanip_lzd_os[31:0] = bitmanip_lzd_os[31:0] << 1; - end - else - found=1'b1; - end - end + for (int i = 0; i < 32 && found == 0; i++) begin + if (bitmanip_lzd_os[31] == 1'b0) begin + bitmanip_dw_lzd_enc[5:0] = bitmanip_dw_lzd_enc[5:0] + 6'b00_0001; + bitmanip_lzd_os[31:0] = bitmanip_lzd_os[31:0] << 1; + end else found = 1'b1; + end + end - assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )}; + assign bitmanip_clz_ctz_result[5:0] = {6{bitmanip_clz_ctz_sel}} & {bitmanip_dw_lzd_enc[5],( {5{~bitmanip_dw_lzd_enc[5]}} & bitmanip_dw_lzd_enc[4:0] )}; - // * * * * * * * * * * * * * * * * * * BitManip : CPOP * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : CPOP * * * * * * * * * * * * * * * * * * - logic [5:0] bitmanip_cpop; - logic [5:0] bitmanip_cpop_result; + logic [5:0] bitmanip_cpop; + logic [5:0] bitmanip_cpop_result; - integer bitmanip_cpop_i; + integer bitmanip_cpop_i; - always_comb - begin - bitmanip_cpop[5:0] = 6'b0; + always_comb begin + bitmanip_cpop[5:0] = 6'b0; - for (bitmanip_cpop_i=0; bitmanip_cpop_i<32; bitmanip_cpop_i++) - begin - bitmanip_cpop[5:0] = bitmanip_cpop[5:0] + {5'b0,a_in[bitmanip_cpop_i]}; - end // FOR bitmanip_cpop_i - end // ALWAYS_COMB + for (bitmanip_cpop_i = 0; bitmanip_cpop_i < 32; bitmanip_cpop_i++) begin + bitmanip_cpop[5:0] = bitmanip_cpop[5:0] + {5'b0, a_in[bitmanip_cpop_i]}; + end // FOR bitmanip_cpop_i + end // ALWAYS_COMB - assign bitmanip_cpop_result[5:0] = {6{ap_cpop}} & bitmanip_cpop[5:0]; + assign bitmanip_cpop_result[5:0] = {6{ap_cpop}} & bitmanip_cpop[5:0]; - // * * * * * * * * * * * * * * * * * * BitManip : SEXT_B,SEXT_H * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : SEXT_B,SEXT_H * * * * * * * * * * * * * * * * * - logic [31:0] bitmanip_sext_result; + logic [31:0] bitmanip_sext_result; - assign bitmanip_sext_result[31:0] = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0] } ) | + assign bitmanip_sext_result[31:0] = ( {32{ap_sext_b}} & { {24{a_in[7]}} ,a_in[7:0] } ) | ( {32{ap_sext_h}} & { {16{a_in[15]}},a_in[15:0] } ); - // * * * * * * * * * * * * * * * * * * BitManip : MIN,MAX,MINU,MAXU * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : MIN,MAX,MINU,MAXU * * * * * * * * * * * * * * * - logic bitmanip_minmax_sel; - logic [31:0] bitmanip_minmax_result; + logic bitmanip_minmax_sel; + logic [31:0] bitmanip_minmax_result; - assign bitmanip_minmax_sel = ap_min | ap_max; + assign bitmanip_minmax_sel = ap_min | ap_max; - logic bitmanip_minmax_sel_a; + logic bitmanip_minmax_sel_a; - assign bitmanip_minmax_sel_a = ge ^ ap_min; + assign bitmanip_minmax_sel_a = ge ^ ap_min; - assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel & bitmanip_minmax_sel_a}} & a_in[31:0]) | + assign bitmanip_minmax_result[31:0] = ({32{bitmanip_minmax_sel & bitmanip_minmax_sel_a}} & a_in[31:0]) | ({32{bitmanip_minmax_sel & ~bitmanip_minmax_sel_a}} & b_in[31:0]); - // * * * * * * * * * * * * * * * * * * BitManip : PACK, PACKU, PACKH * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : PACK, PACKU, PACKH * * * * * * * * * * * * * * * - logic [31:0] bitmanip_pack_result; - logic [31:0] bitmanip_packu_result; - logic [31:0] bitmanip_packh_result; + logic [31:0] bitmanip_pack_result; + logic [31:0] bitmanip_packu_result; + logic [31:0] bitmanip_packh_result; - assign bitmanip_pack_result[31:0] = {32{ap_pack}} & {b_in[15:0], a_in[15:0]}; - assign bitmanip_packu_result[31:0] = {32{ap_packu}} & {b_in[31:16],a_in[31:16]}; - assign bitmanip_packh_result[31:0] = {32{ap_packh}} & {16'b0,b_in[7:0],a_in[7:0]}; + assign bitmanip_pack_result[31:0] = {32{ap_pack}} & {b_in[15:0], a_in[15:0]}; + assign bitmanip_packu_result[31:0] = {32{ap_packu}} & {b_in[31:16], a_in[31:16]}; + assign bitmanip_packh_result[31:0] = {32{ap_packh}} & {16'b0, b_in[7:0], a_in[7:0]}; - // * * * * * * * * * * * * * * * * * * BitManip : REV, ORC_B * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : REV, ORC_B * * * * * * * * * * * * * * * * * * - logic [31:0] bitmanip_rev8_result; - logic [31:0] bitmanip_orc_b_result; + logic [31:0] bitmanip_rev8_result; + logic [31:0] bitmanip_orc_b_result; - assign bitmanip_rev8_result[31:0] = {32{ap_rev8}} & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]}; + assign bitmanip_rev8_result[31:0] = {32{ap_rev8}} & {a_in[7:0],a_in[15:8],a_in[23:16],a_in[31:24]}; -// uint32_t gorc32(uint32_t rs1, uint32_t rs2) -// { -// uint32_t x = rs1; -// int shamt = rs2 & 31; ORC.B ORC16 -// if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); 1 0 -// if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); 1 0 -// if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); 1 0 -// if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); 0 0 -// if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); 0 1 -// return x; -// } + // uint32_t gorc32(uint32_t rs1, uint32_t rs2) + // { + // uint32_t x = rs1; + // int shamt = rs2 & 31; ORC.B ORC16 + // if (shamt & 1) x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); 1 0 + // if (shamt & 2) x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); 1 0 + // if (shamt & 4) x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); 1 0 + // if (shamt & 8) x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); 0 0 + // if (shamt & 16) x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); 0 1 + // return x; + // } -// BEFORE 31 , 30 , 29 , 28 , 27 , 26, 25, 24 -// shamt[0] b = a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24 -// shamt[1] c = b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24 -// shamt[2] d = c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24 -// -// Expand d31 = c31 | c27; -// = b31 | b29 | b27 | b25; -// = a31|a30 | a29|a28 | a27|a26 | a25|a24 + // BEFORE 31 , 30 , 29 , 28 , 27 , 26, 25, 24 + // shamt[0] b = a31|a30,a31|a30,a29|a28,a29|a28, a27|a26,a27|a26,a25|a24,a25|a24 + // shamt[1] c = b31|b29,b30|b28,b31|b29,b30|b28, b27|b25,b26|b24,b27|b25,b26|b24 + // shamt[2] d = c31|c27,c30|c26,c29|c25,c28|c24, c31|c27,c30|c26,c29|c25,c28|c24 + // + // Expand d31 = c31 | c27; + // = b31 | b29 | b27 | b25; + // = a31|a30 | a29|a28 | a27|a26 | a25|a24 - assign bitmanip_orc_b_result[31:0] = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} }; + assign bitmanip_orc_b_result[31:0] = {32{ap_orc_b}} & { {8{| a_in[31:24]}}, {8{| a_in[23:16]}}, {8{| a_in[15:8]}}, {8{| a_in[7:0]}} }; - // * * * * * * * * * * * * * * * * * * BitManip : ZBSET, ZBCLR, ZBINV * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : ZBSET, ZBCLR, ZBINV * * * * * * * * * * * * * * - logic [31:0] bitmanip_sb_1hot; - logic [31:0] bitmanip_sb_data; + logic [31:0] bitmanip_sb_1hot; + logic [31:0] bitmanip_sb_data; - assign bitmanip_sb_1hot[31:0] = ( 32'h00000001 << b_in[4:0] ); + assign bitmanip_sb_1hot[31:0] = (32'h00000001 << b_in[4:0]); - assign bitmanip_sb_data[31:0] = ( {32{ap_bset}} & ( a_in[31:0] | bitmanip_sb_1hot[31:0]) ) | + assign bitmanip_sb_data[31:0] = ( {32{ap_bset}} & ( a_in[31:0] | bitmanip_sb_1hot[31:0]) ) | ( {32{ap_bclr}} & ( a_in[31:0] & ~bitmanip_sb_1hot[31:0]) ) | ( {32{ap_binv}} & ( a_in[31:0] ^ bitmanip_sb_1hot[31:0]) ); @@ -467,16 +484,16 @@ import el2_pkg::*; - assign sel_shift = ap.sll | ap.srl | ap.sra | ap_rol | ap_ror; - assign sel_adder = (ap.add | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max; - assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret; - assign csr_write_data[31:0]= (ap.csr_imm) ? b_in[31:0] : a_in[31:0]; + assign sel_shift = ap.sll | ap.srl | ap.sra | ap_rol | ap_ror; + assign sel_adder = (ap.add | ap.sub | ap_zba) & ~ap.slt & ~ap_min & ~ap_max; + assign sel_pc = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret; + assign csr_write_data[31:0] = (ap.csr_imm) ? b_in[31:0] : a_in[31:0]; - assign slt_one = ap.slt & lt; + assign slt_one = ap.slt & lt; - assign result[31:0] = lout[31:0] | + assign result[31:0] = lout[31:0] | ({32{sel_shift}} & sout[31:0] ) | ({32{sel_adder}} & aout[31:0] ) | ({32{sel_pc}} & {pcout[31:1],1'b0} ) | @@ -496,81 +513,74 @@ import el2_pkg::*; - // *** branch handling *** + // *** branch handling *** - assign any_jal = ap.jal | - pp_in.pcall | - pp_in.pja | - pp_in.pret; + assign any_jal = ap.jal | pp_in.pcall | pp_in.pja | pp_in.pret; - assign actual_taken = (ap.beq & eq) | - (ap.bne & ne) | - (ap.blt & lt) | - (ap.bge & ge) | - any_jal; + assign actual_taken = (ap.beq & eq) | (ap.bne & ne) | (ap.blt & lt) | (ap.bge & ge) | any_jal; - // for a conditional br pcout[] will be the opposite of the branch prediction - // for jal or pcall, it will be the link address pc+2 or pc+4 + // for a conditional br pcout[] will be the opposite of the branch prediction + // for jal or pcall, it will be the link address pc+2 or pc+4 - rvbradder ibradder ( - .pc ( pc_in[31:1] ), - .offset ( brimm_in[12:1] ), - .dout ( pcout[31:1] )); + rvbradder ibradder ( + .pc (pc_in[31:1]), + .offset(brimm_in[12:1]), + .dout (pcout[31:1]) + ); - // pred_correct is for the npc logic - // pred_correct indicates not to use the flush_path - // for any_jal pred_correct==0 + // pred_correct is for the npc logic + // pred_correct indicates not to use the flush_path + // for any_jal pred_correct==0 - assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) | + assign pred_correct_out = (valid_in & ap.predict_nt & ~actual_taken & ~any_jal) | (valid_in & ap.predict_t & actual_taken & ~any_jal); - // for any_jal adder output is the flush path - assign flush_path_out[31:1]= (any_jal) ? aout[31:1] : pcout[31:1]; + // for any_jal adder output is the flush path + assign flush_path_out[31:1] = (any_jal) ? aout[31:1] : pcout[31:1]; - // pcall and pret are included here - assign cond_mispredict = (ap.predict_t & ~actual_taken) | - (ap.predict_nt & actual_taken); + // pcall and pret are included here + assign cond_mispredict = (ap.predict_t & ~actual_taken) | (ap.predict_nt & actual_taken); - // target mispredicts on ret's + // target mispredicts on ret's - assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]); + assign target_mispredict = pp_in.pret & (pp_in.prett[31:1] != aout[31:1]); - assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r; - assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r; + assign flush_upper_out = (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x & ~flush_lower_r; + assign flush_final_out = ( (ap.jal | cond_mispredict | target_mispredict) & valid_in & ~flush_upper_x ) | flush_lower_r; - // .i 3 - // .o 2 - // .ilb hist[1] hist[0] taken - // .ob newhist[1] newhist[0] - // .type fd - // - // 00 0 01 - // 01 0 01 - // 10 0 00 - // 11 0 10 - // 00 1 10 - // 01 1 00 - // 10 1 11 - // 11 1 11 + // .i 3 + // .o 2 + // .ilb hist[1] hist[0] taken + // .ob newhist[1] newhist[0] + // .type fd + // + // 00 0 01 + // 01 0 01 + // 10 0 00 + // 11 0 10 + // 00 1 10 + // 01 1 00 + // 10 1 11 + // 11 1 11 - assign newhist[1] = ( pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken); - assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | ( pp_in.hist[1] & actual_taken); + assign newhist[1] = (pp_in.hist[1] & pp_in.hist[0]) | (~pp_in.hist[0] & actual_taken); + assign newhist[0] = (~pp_in.hist[1] & ~actual_taken) | (pp_in.hist[1] & actual_taken); - always_comb begin - predict_p_out = pp_in; + always_comb begin + predict_p_out = pp_in; - predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict); - predict_p_out.ataken = actual_taken; - predict_p_out.hist[1] = newhist[1]; - predict_p_out.hist[0] = newhist[0]; + predict_p_out.misp = ~flush_upper_x & ~flush_lower_r & (cond_mispredict | target_mispredict); + predict_p_out.ataken = actual_taken; + predict_p_out.hist[1] = newhist[1]; + predict_p_out.hist[0] = newhist[0]; - end + end -endmodule // el2_exu_alu_ctl +endmodule // el2_exu_alu_ctl diff --git a/Flow/design/exu/el2_exu_div_ctl.sv b/Flow/design/exu/el2_exu_div_ctl.sv index 43d1021..5f97c15 100644 --- a/Flow/design/exu/el2_exu_div_ctl.sv +++ b/Flow/design/exu/el2_exu_div_ctl.sv @@ -15,120 +15,119 @@ module el2_exu_div_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" -) - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode + `include "el2_param.vh" +) ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input el2_div_pkt_t dp, // valid, sign, rem - input logic [31:0] dividend, // Numerator - input logic [31:0] divisor, // Denominator + input el2_div_pkt_t dp, // valid, sign, rem + input logic [31:0] dividend, // Numerator + input logic [31:0] divisor, // Denominator - input logic cancel, // Cancel divide + input logic cancel, // Cancel divide - output logic finish_dly, // Finish to match data - output logic [31:0] out // Result - ); + output logic finish_dly, // Finish to match data + output logic [31:0] out // Result +); - logic [31:0] out_raw; + logic [31:0] out_raw; - assign out[31:0] = {32{finish_dly}} & out_raw[31:0]; // Qualification added to quiet result bus while divide is iterating + assign out[31:0] = {32{finish_dly}} & out_raw[31:0]; // Qualification added to quiet result bus while divide is iterating - if (pt.DIV_NEW == 0) - begin - el2_exu_div_existing_1bit_cheapshortq i_existing_1bit_div_cheapshortq ( - .clk ( clk ), // I - .rst_l ( rst_l ), // I - .scan_mode ( scan_mode ), // I - .cancel ( cancel ), // I - .valid_in ( dp.valid ), // I - .signed_in (~dp.unsign ), // I - .rem_in ( dp.rem ), // I - .dividend_in ( dividend[31:0] ), // I - .divisor_in ( divisor[31:0] ), // I - .valid_out ( finish_dly ), // O - .data_out ( out_raw[31:0] )); // O - end + if (pt.DIV_NEW == 0) begin + el2_exu_div_existing_1bit_cheapshortq i_existing_1bit_div_cheapshortq ( + .clk (clk), // I + .rst_l (rst_l), // I + .scan_mode (scan_mode), // I + .cancel (cancel), // I + .valid_in (dp.valid), // I + .signed_in (~dp.unsign), // I + .rem_in (dp.rem), // I + .dividend_in(dividend[31:0]), // I + .divisor_in (divisor[31:0]), // I + .valid_out (finish_dly), // O + .data_out (out_raw[31:0]) + ); // O + end - if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 1) ) - begin - el2_exu_div_new_1bit_fullshortq i_new_1bit_div_fullshortq ( - .clk ( clk ), // I - .rst_l ( rst_l ), // I - .scan_mode ( scan_mode ), // I - .cancel ( cancel ), // I - .valid_in ( dp.valid ), // I - .signed_in (~dp.unsign ), // I - .rem_in ( dp.rem ), // I - .dividend_in ( dividend[31:0] ), // I - .divisor_in ( divisor[31:0] ), // I - .valid_out ( finish_dly ), // O - .data_out ( out_raw[31:0] )); // O - end + if ((pt.DIV_NEW == 1) & (pt.DIV_BIT == 1)) begin + el2_exu_div_new_1bit_fullshortq i_new_1bit_div_fullshortq ( + .clk (clk), // I + .rst_l (rst_l), // I + .scan_mode (scan_mode), // I + .cancel (cancel), // I + .valid_in (dp.valid), // I + .signed_in (~dp.unsign), // I + .rem_in (dp.rem), // I + .dividend_in(dividend[31:0]), // I + .divisor_in (divisor[31:0]), // I + .valid_out (finish_dly), // O + .data_out (out_raw[31:0]) + ); // O + end - if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 2) ) - begin - el2_exu_div_new_2bit_fullshortq i_new_2bit_div_fullshortq ( - .clk ( clk ), // I - .rst_l ( rst_l ), // I - .scan_mode ( scan_mode ), // I - .cancel ( cancel ), // I - .valid_in ( dp.valid ), // I - .signed_in (~dp.unsign ), // I - .rem_in ( dp.rem ), // I - .dividend_in ( dividend[31:0] ), // I - .divisor_in ( divisor[31:0] ), // I - .valid_out ( finish_dly ), // O - .data_out ( out_raw[31:0] )); // O - end + if ((pt.DIV_NEW == 1) & (pt.DIV_BIT == 2)) begin + el2_exu_div_new_2bit_fullshortq i_new_2bit_div_fullshortq ( + .clk (clk), // I + .rst_l (rst_l), // I + .scan_mode (scan_mode), // I + .cancel (cancel), // I + .valid_in (dp.valid), // I + .signed_in (~dp.unsign), // I + .rem_in (dp.rem), // I + .dividend_in(dividend[31:0]), // I + .divisor_in (divisor[31:0]), // I + .valid_out (finish_dly), // O + .data_out (out_raw[31:0]) + ); // O + end - if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 3) ) - begin - el2_exu_div_new_3bit_fullshortq i_new_3bit_div_fullshortq ( - .clk ( clk ), // I - .rst_l ( rst_l ), // I - .scan_mode ( scan_mode ), // I - .cancel ( cancel ), // I - .valid_in ( dp.valid ), // I - .signed_in (~dp.unsign ), // I - .rem_in ( dp.rem ), // I - .dividend_in ( dividend[31:0] ), // I - .divisor_in ( divisor[31:0] ), // I - .valid_out ( finish_dly ), // O - .data_out ( out_raw[31:0] )); // O - end + if ((pt.DIV_NEW == 1) & (pt.DIV_BIT == 3)) begin + el2_exu_div_new_3bit_fullshortq i_new_3bit_div_fullshortq ( + .clk (clk), // I + .rst_l (rst_l), // I + .scan_mode (scan_mode), // I + .cancel (cancel), // I + .valid_in (dp.valid), // I + .signed_in (~dp.unsign), // I + .rem_in (dp.rem), // I + .dividend_in(dividend[31:0]), // I + .divisor_in (divisor[31:0]), // I + .valid_out (finish_dly), // O + .data_out (out_raw[31:0]) + ); // O + end - if ( (pt.DIV_NEW == 1) & (pt.DIV_BIT == 4) ) - begin - el2_exu_div_new_4bit_fullshortq i_new_4bit_div_fullshortq ( - .clk ( clk ), // I - .rst_l ( rst_l ), // I - .scan_mode ( scan_mode ), // I - .cancel ( cancel ), // I - .valid_in ( dp.valid ), // I - .signed_in (~dp.unsign ), // I - .rem_in ( dp.rem ), // I - .dividend_in ( dividend[31:0] ), // I - .divisor_in ( divisor[31:0] ), // I - .valid_out ( finish_dly ), // O - .data_out ( out_raw[31:0] )); // O - end + if ((pt.DIV_NEW == 1) & (pt.DIV_BIT == 4)) begin + el2_exu_div_new_4bit_fullshortq i_new_4bit_div_fullshortq ( + .clk (clk), // I + .rst_l (rst_l), // I + .scan_mode (scan_mode), // I + .cancel (cancel), // I + .valid_in (dp.valid), // I + .signed_in (~dp.unsign), // I + .rem_in (dp.rem), // I + .dividend_in(dividend[31:0]), // I + .divisor_in (divisor[31:0]), // I + .valid_out (finish_dly), // O + .data_out (out_raw[31:0]) + ); // O + end -endmodule // el2_exu_div_ctl +endmodule // el2_exu_div_ctl @@ -136,129 +135,164 @@ endmodule // el2_exu_div_ctl // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_existing_1bit_cheapshortq - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode +module el2_exu_div_existing_1bit_cheapshortq ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input logic cancel, // Flush pipeline - input logic valid_in, - input logic signed_in, - input logic rem_in, - input logic [31:0] dividend_in, - input logic [31:0] divisor_in, + input logic cancel, // Flush pipeline + input logic valid_in, + input logic signed_in, + input logic rem_in, + input logic [31:0] dividend_in, + input logic [31:0] divisor_in, - output logic valid_out, - output logic [31:0] data_out + output logic valid_out, + output logic [31:0] data_out +); + + + logic div_clken; + logic run_in, run_state; + logic [5:0] count_in, count; + logic [32:0] m_ff; + logic qff_enable; + logic aff_enable; + logic [32:0] q_in, q_ff; + logic [32:0] a_in, a_ff; + logic [32:0] m_eff; + logic [32:0] a_shift; + logic dividend_neg_ff, divisor_neg_ff; + logic [31:0] dividend_comp; + logic [31:0] dividend_eff; + logic [31:0] q_ff_comp; + logic [31:0] q_ff_eff; + logic [31:0] a_ff_comp; + logic [31:0] a_ff_eff; + logic sign_ff, sign_eff; + logic rem_ff; + logic add; + logic [32:0] a_eff; + logic [64:0] a_eff_shift; + logic rem_correct; + logic valid_ff_x; + logic valid_x; + logic finish; + logic finish_ff; + + logic smallnum_case, smallnum_case_ff; + logic [3:0] smallnum, smallnum_ff; + logic m_already_comp; + + logic [ 4:0] a_cls; + logic [ 4:0] b_cls; + logic [ 5:0] shortq_shift; + logic [ 5:0] shortq_shift_ff; + logic [ 5:0] shortq; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] short_dividend; + logic [ 3:0] shortq_raw; + logic [ 3:0] shortq_shift_xx; + + + + rvdffe #(23) i_misc_ff ( + .*, + .clk(clk), + .en(div_clken), + .din({ + valid_in & ~cancel, + finish & ~cancel, + run_in, + count_in[5:0], + (valid_in & dividend_in[31]) | (~valid_in & dividend_neg_ff), + (valid_in & divisor_in[31]) | (~valid_in & divisor_neg_ff), + (valid_in & sign_eff) | (~valid_in & sign_ff), + (valid_in & rem_in) | (~valid_in & rem_ff), + smallnum_case, + smallnum[3:0], + shortq_enable, + shortq_shift[3:0] + }), + + .dout({ + valid_ff_x, + finish_ff, + run_state, + count[5:0], + dividend_neg_ff, + divisor_neg_ff, + sign_ff, + rem_ff, + smallnum_case_ff, + smallnum_ff[3:0], + shortq_enable_ff, + shortq_shift_xx[3:0] + }) ); - logic div_clken; - logic run_in, run_state; - logic [5:0] count_in, count; - logic [32:0] m_ff; - logic qff_enable; - logic aff_enable; - logic [32:0] q_in, q_ff; - logic [32:0] a_in, a_ff; - logic [32:0] m_eff; - logic [32:0] a_shift; - logic dividend_neg_ff, divisor_neg_ff; - logic [31:0] dividend_comp; - logic [31:0] dividend_eff; - logic [31:0] q_ff_comp; - logic [31:0] q_ff_eff; - logic [31:0] a_ff_comp; - logic [31:0] a_ff_eff; - logic sign_ff, sign_eff; - logic rem_ff; - logic add; - logic [32:0] a_eff; - logic [64:0] a_eff_shift; - logic rem_correct; - logic valid_ff_x; - logic valid_x; - logic finish; - logic finish_ff; + rvdffe #(33) mff ( + .*, + .clk (clk), + .en (valid_in), + .din ({signed_in & divisor_in[31], divisor_in[31:0]}), + .dout(m_ff[32:0]) + ); + rvdffe #(33) qff ( + .*, + .clk (clk), + .en (qff_enable), + .din (q_in[32:0]), + .dout(q_ff[32:0]) + ); + rvdffe #(33) aff ( + .*, + .clk (clk), + .en (aff_enable), + .din (a_in[32:0]), + .dout(a_ff[32:0]) + ); - logic smallnum_case, smallnum_case_ff; - logic [3:0] smallnum, smallnum_ff; - logic m_already_comp; - - logic [4:0] a_cls; - logic [4:0] b_cls; - logic [5:0] shortq_shift; - logic [5:0] shortq_shift_ff; - logic [5:0] shortq; - logic shortq_enable; - logic shortq_enable_ff; - logic [32:0] short_dividend; - logic [3:0] shortq_raw; - logic [3:0] shortq_shift_xx; + rvtwoscomp #(32) i_dividend_comp ( + .din (q_ff[31:0]), + .dout(dividend_comp[31:0]) + ); + rvtwoscomp #(32) i_q_ff_comp ( + .din (q_ff[31:0]), + .dout(q_ff_comp[31:0]) + ); + rvtwoscomp #(32) i_a_ff_comp ( + .din (a_ff[31:0]), + .dout(a_ff_comp[31:0]) + ); - - rvdffe #(23) i_misc_ff (.*, .clk(clk), .en(div_clken), .din ({valid_in & ~cancel, - finish & ~cancel, - run_in, - count_in[5:0], - (valid_in & dividend_in[31]) | (~valid_in & dividend_neg_ff), - (valid_in & divisor_in[31] ) | (~valid_in & divisor_neg_ff ), - (valid_in & sign_eff ) | (~valid_in & sign_ff ), - (valid_in & rem_in ) | (~valid_in & rem_ff ), - smallnum_case, - smallnum[3:0], - shortq_enable, - shortq_shift[3:0]}), - - .dout({valid_ff_x, - finish_ff, - run_state, - count[5:0], - dividend_neg_ff, - divisor_neg_ff, - sign_ff, - rem_ff, - smallnum_case_ff, - smallnum_ff[3:0], - shortq_enable_ff, - shortq_shift_xx[3:0]})); + assign valid_x = valid_ff_x & ~cancel; - rvdffe #(33) mff (.*, .clk(clk), .en(valid_in), .din({signed_in & divisor_in[31], divisor_in[31:0]}), .dout(m_ff[32:0])); - rvdffe #(33) qff (.*, .clk(clk), .en(qff_enable), .din(q_in[32:0]), .dout(q_ff[32:0])); - rvdffe #(33) aff (.*, .clk(clk), .en(aff_enable), .din(a_in[32:0]), .dout(a_ff[32:0])); + // START - short circuit logic for small numbers {{ - rvtwoscomp #(32) i_dividend_comp (.din(q_ff[31:0]), .dout(dividend_comp[31:0])); - rvtwoscomp #(32) i_q_ff_comp (.din(q_ff[31:0]), .dout(q_ff_comp[31:0])); - rvtwoscomp #(32) i_a_ff_comp (.din(a_ff[31:0]), .dout(a_ff_comp[31:0])); + // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0) + // to generate espresso equations: + // 1. smalldiv > smalldiv.e + // 2. espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv - - assign valid_x = valid_ff_x & ~cancel; - - - // START - short circuit logic for small numbers {{ - - // small number divides - any 4b / 4b is done in 1 cycle (divisor != 0) - // to generate espresso equations: - // 1. smalldiv > smalldiv.e - // 2. espresso -Dso -oeqntott smalldiv.e | addassign > smalldiv - - // smallnum case does not cover divide by 0 - assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x) | + // smallnum case does not cover divide by 0 + assign smallnum_case = ((q_ff[31:4] == 28'b0) & (m_ff[31:4] == 28'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x) | ((q_ff[31:0] == 32'b0) & (m_ff[31:0] != 32'b0) & ~rem_ff & valid_x); - assign smallnum[3] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ); + assign smallnum[3] = (q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1]); - assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + assign smallnum[2] = ( q_ff[3] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & q_ff[2] & ~m_ff[3] & ~m_ff[2] ); - assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | + assign smallnum[1] = ( q_ff[2] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[1] ) | ( q_ff[3] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[3] & ~q_ff[2] & ~m_ff[3] & ~m_ff[2] & m_ff[1] & m_ff[0]) | @@ -269,7 +303,7 @@ module el2_exu_div_existing_1bit_cheapshortq ( q_ff[3] & q_ff[2] & q_ff[1] & ~m_ff[3] & m_ff[2] ); - assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | + assign smallnum[0] = ( q_ff[2] & q_ff[1] & q_ff[0] & ~m_ff[3] & ~m_ff[1] ) | ( q_ff[3] & ~q_ff[2] & q_ff[0] & ~m_ff[3] & m_ff[1] & m_ff[0]) | ( q_ff[2] & ~m_ff[3] & ~m_ff[1] & ~m_ff[0]) | ( q_ff[1] & ~m_ff[3] & ~m_ff[2] & ~m_ff[0]) | @@ -297,146 +331,146 @@ module el2_exu_div_existing_1bit_cheapshortq ( q_ff[3] & q_ff[2] & q_ff[1] & q_ff[0] & m_ff[3] ); - // END - short circuit logic for small numbers }} + // END - short circuit logic for small numbers }} - // *** Start Short Q *** {{ + // *** Start Short Q *** {{ - assign short_dividend[31:0] = q_ff[31:0]; - assign short_dividend[32] = sign_ff & q_ff[31]; + assign short_dividend[31:0] = q_ff[31:0]; + assign short_dividend[32] = sign_ff & q_ff[31]; - // A B - // 210 210 SH - // --- --- -- - // 1xx 000 0 - // 1xx 001 8 - // 1xx 01x 16 - // 1xx 1xx 24 - // 01x 000 8 - // 01x 001 16 - // 01x 01x 24 - // 01x 1xx 32 - // 001 000 16 - // 001 001 24 - // 001 01x 32 - // 001 1xx 32 - // 000 000 24 - // 000 001 32 - // 000 01x 32 - // 000 1xx 32 + // A B + // 210 210 SH + // --- --- -- + // 1xx 000 0 + // 1xx 001 8 + // 1xx 01x 16 + // 1xx 1xx 24 + // 01x 000 8 + // 01x 001 16 + // 01x 01x 24 + // 01x 1xx 32 + // 001 000 16 + // 001 001 24 + // 001 01x 32 + // 001 1xx 32 + // 000 000 24 + // 000 001 32 + // 000 01x 32 + // 000 1xx 32 - assign a_cls[4:3] = 2'b0; - assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}})); - assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}})); - assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}})); + assign a_cls[4:3] = 2'b0; + assign a_cls[2] = (~short_dividend[32] & (short_dividend[31:24] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[31:23] != {9{1'b1}})); + assign a_cls[1] = (~short_dividend[32] & (short_dividend[23:16] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[22:15] != {8{1'b1}})); + assign a_cls[0] = (~short_dividend[32] & (short_dividend[15:08] != {8{1'b0}})) | ( short_dividend[32] & (short_dividend[14:07] != {8{1'b1}})); - assign b_cls[4:3] = 2'b0; - assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}})); - assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}})); - assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}})); + assign b_cls[4:3] = 2'b0; + assign b_cls[2] = (~m_ff[32] & ( m_ff[31:24] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[31:24] != {8{1'b1}})); + assign b_cls[1] = (~m_ff[32] & ( m_ff[23:16] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[23:16] != {8{1'b1}})); + assign b_cls[0] = (~m_ff[32] & ( m_ff[15:08] != {8{1'b0}})) | ( m_ff[32] & ( m_ff[15:08] != {8{1'b1}})); - assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32 + assign shortq_raw[3] = ( (a_cls[2:1] == 2'b01 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 32 ( (a_cls[2:0] == 3'b001) & (b_cls[2] == 1'b1 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2] == 1'b1 ) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b001) ); - assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24 + assign shortq_raw[2] = ( (a_cls[2] == 1'b1 ) & (b_cls[2] == 1'b1 ) ) | // Shift by 24 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:1] == 2'b01 ) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b001) ) | ( (a_cls[2:0] == 3'b000) & (b_cls[2:0] == 3'b000) ); - assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16 + assign shortq_raw[1] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:1] == 2'b01 ) ) | // Shift by 16 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b001) ) | ( (a_cls[2:0] == 3'b001) & (b_cls[2:0] == 3'b000) ); - assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8 + assign shortq_raw[0] = ( (a_cls[2] == 1'b1 ) & (b_cls[2:0] == 3'b001) ) | // Shift by 8 ( (a_cls[2:1] == 2'b01 ) & (b_cls[2:0] == 3'b000) ); - assign shortq_enable = valid_ff_x & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0); + assign shortq_enable = valid_ff_x & (m_ff[31:0] != 32'b0) & (shortq_raw[3:0] != 4'b0); - assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]); + assign shortq_shift[3:0] = ({4{shortq_enable}} & shortq_raw[3:0]); - assign shortq[5:0] = 6'b0; - assign shortq_shift[5:4] = 2'b0; - assign shortq_shift_ff[5] = 1'b0; + assign shortq[5:0] = 6'b0; + assign shortq_shift[5:4] = 2'b0; + assign shortq_shift_ff[5] = 1'b0; - assign shortq_shift_ff[4:0] = ({5{shortq_shift_xx[3]}} & 5'b1_1111) | // 31 - ({5{shortq_shift_xx[2]}} & 5'b1_1000) | // 24 - ({5{shortq_shift_xx[1]}} & 5'b1_0000) | // 16 - ({5{shortq_shift_xx[0]}} & 5'b0_1000); // 8 + assign shortq_shift_ff[4:0] = ({5{shortq_shift_xx[3]}} & 5'b1_1111) | // 31 + ({5{shortq_shift_xx[2]}} & 5'b1_1000) | // 24 + ({5{shortq_shift_xx[1]}} & 5'b1_0000) | // 16 + ({5{shortq_shift_xx[0]}} & 5'b0_1000); // 8 - // *** End Short *** }} + // *** End Short *** }} - assign div_clken = valid_in | run_state | finish | finish_ff; + assign div_clken = valid_in | run_state | finish | finish_ff; - assign run_in = (valid_in | run_state) & ~finish & ~cancel; + assign run_in = (valid_in | run_state) & ~finish & ~cancel; - assign count_in[5:0] = {6{run_state & ~finish & ~cancel & ~shortq_enable}} & (count[5:0] + {1'b0,shortq_shift_ff[4:0]} + 6'd1); + assign count_in[5:0] = {6{run_state & ~finish & ~cancel & ~shortq_enable}} & (count[5:0] + {1'b0,shortq_shift_ff[4:0]} + 6'd1); - assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33))); + assign finish = (smallnum_case | ((~rem_ff) ? (count[5:0] == 6'd32) : (count[5:0] == 6'd33))); - assign valid_out = finish_ff & ~cancel; + assign valid_out = finish_ff & ~cancel; - assign sign_eff = signed_in & (divisor_in[31:0] != 32'b0); + assign sign_eff = signed_in & (divisor_in[31:0] != 32'b0); - assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend_in[31:0]}) | + assign q_in[32:0] = ({33{~run_state }} & {1'b0,dividend_in[31:0]}) | ({33{ run_state & (valid_ff_x | shortq_enable_ff)}} & ({dividend_eff[31:0], ~a_in[32]} << shortq_shift_ff[4:0])) | ({33{ run_state & ~(valid_ff_x | shortq_enable_ff)}} & {q_ff[31:0], ~a_in[32]}); - assign qff_enable = valid_in | (run_state & ~shortq_enable); + assign qff_enable = valid_in | (run_state & ~shortq_enable); - assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0]; + assign dividend_eff[31:0] = (sign_ff & dividend_neg_ff) ? dividend_comp[31:0] : q_ff[31:0]; - assign m_eff[32:0] = ( add ) ? m_ff[32:0] : ~m_ff[32:0]; + assign m_eff[32:0] = (add) ? m_ff[32:0] : ~m_ff[32:0]; - assign a_eff_shift[64:0] = {33'b0, dividend_eff[31:0]} << shortq_shift_ff[4:0]; + assign a_eff_shift[64:0] = {33'b0, dividend_eff[31:0]} << shortq_shift_ff[4:0]; - assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) | + assign a_eff[32:0] = ({33{ rem_correct }} & a_ff[32:0] ) | ({33{~rem_correct & ~shortq_enable_ff}} & {a_ff[31:0], q_ff[32]} ) | ({33{~rem_correct & shortq_enable_ff}} & a_eff_shift[64:32] ); - assign a_shift[32:0] = {33{run_state}} & a_eff[32:0]; + assign a_shift[32:0] = {33{run_state}} & a_eff[32:0]; - assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0,~add}); + assign a_in[32:0] = {33{run_state}} & (a_shift[32:0] + m_eff[32:0] + {32'b0, ~add}); - assign aff_enable = valid_in | (run_state & ~shortq_enable & (count[5:0]!=6'd33)) | rem_correct; + assign aff_enable = valid_in | (run_state & ~shortq_enable & (count[5:0] != 6'd33)) | rem_correct; - assign m_already_comp = (divisor_neg_ff & sign_ff); + assign m_already_comp = (divisor_neg_ff & sign_ff); - // if m already complemented, then invert operation add->sub, sub->add - assign add = (a_ff[32] | rem_correct) ^ m_already_comp; + // if m already complemented, then invert operation add->sub, sub->add + assign add = (a_ff[32] | rem_correct) ^ m_already_comp; - assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32]; + assign rem_correct = (count[5:0] == 6'd33) & rem_ff & a_ff[32]; - assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0]; + assign q_ff_eff[31:0] = (sign_ff & (dividend_neg_ff ^ divisor_neg_ff)) ? q_ff_comp[31:0] : q_ff[31:0]; - assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0]; + assign a_ff_eff[31:0] = (sign_ff & dividend_neg_ff) ? a_ff_comp[31:0] : a_ff[31:0]; - assign data_out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) | + assign data_out[31:0] = ({32{ smallnum_case_ff }} & {28'b0, smallnum_ff[3:0]}) | ({32{ rem_ff}} & a_ff_eff[31:0] ) | ({32{~smallnum_case_ff & ~rem_ff}} & q_ff_eff[31:0] ); -endmodule // el2_exu_div_existing_1bit_cheapshortq +endmodule // el2_exu_div_existing_1bit_cheapshortq @@ -444,177 +478,222 @@ endmodule // el2_exu_div_existing_1bit_cheapshortq // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_new_1bit_fullshortq - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode +module el2_exu_div_new_1bit_fullshortq ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input logic cancel, // Flush pipeline - input logic valid_in, - input logic signed_in, - input logic rem_in, - input logic [31:0] dividend_in, - input logic [31:0] divisor_in, + input logic cancel, // Flush pipeline + input logic valid_in, + input logic signed_in, + input logic rem_in, + input logic [31:0] dividend_in, + input logic [31:0] divisor_in, - output logic valid_out, - output logic [31:0] data_out + output logic valid_out, + output logic [31:0] data_out +); + + + logic valid_ff_in, valid_ff; + logic finish_raw, finish, finish_ff; + logic running_state; + logic misc_enable; + logic [2:0] control_in, control_ff; + logic dividend_sign_ff, divisor_sign_ff, rem_ff; + logic count_enable; + logic [6:0] count_in, count_ff; + + logic smallnum_case; + logic [3:0] smallnum; + + logic a_enable, a_shift; + logic [31:0] a_in, a_ff; + + logic b_enable, b_twos_comp; + logic [32:0] b_in, b_ff; + + logic [31:0] q_in, q_ff; + + logic rq_enable, r_sign_sel, r_restore_sel, r_adder_sel; + logic [31:0] r_in, r_ff; + + logic twos_comp_q_sel, twos_comp_b_sel; + logic [31:0] twos_comp_in, twos_comp_out; + + logic quotient_set; + logic [32:0] adder_out; + + logic [63:0] ar_shifted; + logic [ 5:0] shortq; + logic [ 4:0] shortq_shift; + logic [ 4:0] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] shortq_dividend; + + logic by_zero_case; + logic by_zero_case_ff; + + + + rvdffe #(19) i_misc_ff ( + .*, + .clk(clk), + .en(misc_enable), + .din({ + valid_ff_in, + control_in[2:0], + by_zero_case, + shortq_enable, + shortq_shift[4:0], + finish, + count_in[6:0] + }), + .dout({ + valid_ff, + control_ff[2:0], + by_zero_case_ff, + shortq_enable_ff, + shortq_shift_ff[4:0], + finish_ff, + count_ff[6:0] + }) + ); + + rvdffe #(32) i_a_ff ( + .*, + .clk (clk), + .en (a_enable), + .din (a_in[31:0]), + .dout(a_ff[31:0]) + ); + rvdffe #(33) i_b_ff ( + .*, + .clk (clk), + .en (b_enable), + .din (b_in[32:0]), + .dout(b_ff[32:0]) + ); + rvdffe #(32) i_r_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (r_in[31:0]), + .dout(r_ff[31:0]) + ); + rvdffe #(32) i_q_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (q_in[31:0]), + .dout(q_ff[31:0]) ); - logic valid_ff_in, valid_ff; - logic finish_raw, finish, finish_ff; - logic running_state; - logic misc_enable; - logic [2:0] control_in, control_ff; - logic dividend_sign_ff, divisor_sign_ff, rem_ff; - logic count_enable; - logic [6:0] count_in, count_ff; - - logic smallnum_case; - logic [3:0] smallnum; - - logic a_enable, a_shift; - logic [31:0] a_in, a_ff; - - logic b_enable, b_twos_comp; - logic [32:0] b_in, b_ff; - - logic [31:0] q_in, q_ff; - - logic rq_enable, r_sign_sel, r_restore_sel, r_adder_sel; - logic [31:0] r_in, r_ff; - - logic twos_comp_q_sel, twos_comp_b_sel; - logic [31:0] twos_comp_in, twos_comp_out; - - logic quotient_set; - logic [32:0] adder_out; - - logic [63:0] ar_shifted; - logic [5:0] shortq; - logic [4:0] shortq_shift; - logic [4:0] shortq_shift_ff; - logic shortq_enable; - logic shortq_enable_ff; - logic [32:0] shortq_dividend; - - logic by_zero_case; - logic by_zero_case_ff; + assign valid_ff_in = valid_in & ~cancel; - rvdffe #(19) i_misc_ff (.*, .clk(clk), .en(misc_enable), .din ({valid_ff_in, control_in[2:0], by_zero_case, shortq_enable, shortq_shift[4:0], finish, count_in[6:0]}), - .dout({valid_ff, control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]})); + assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); + assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); + assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - rvdffe #(32) i_a_ff (.*, .clk(clk), .en(a_enable), .din(a_in[31:0]), .dout(a_ff[31:0])); - rvdffe #(33) i_b_ff (.*, .clk(clk), .en(b_enable), .din(b_in[32:0]), .dout(b_ff[32:0])); - rvdffe #(32) i_r_ff (.*, .clk(clk), .en(rq_enable), .din(r_in[31:0]), .dout(r_ff[31:0])); - rvdffe #(32) i_q_ff (.*, .clk(clk), .en(rq_enable), .din(q_in[31:0]), .dout(q_ff[31:0])); + assign dividend_sign_ff = control_ff[2]; + assign divisor_sign_ff = control_ff[1]; + assign rem_ff = control_ff[0]; + assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + + assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; + assign running_state = (|count_ff[6:0]) | shortq_enable_ff; + assign finish_raw = smallnum_case | by_zero_case | (count_ff[6:0] == 7'd32); - assign valid_ff_in = valid_in & ~cancel; - - assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); - assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); - assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - - assign dividend_sign_ff = control_ff[2]; - assign divisor_sign_ff = control_ff[1]; - assign rem_ff = control_ff[0]; + assign finish = finish_raw & ~cancel; + assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; + assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {6'b0,1'b1} + {2'b0,shortq_shift_ff[4:0]}); - assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + assign a_enable = valid_in | running_state; + assign a_shift = running_state & ~shortq_enable_ff; - assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; - assign running_state = (| count_ff[6:0]) | shortq_enable_ff; - assign finish_raw = smallnum_case | - by_zero_case | - (count_ff[6:0] == 7'd32); + assign ar_shifted[63:0] = {{32{dividend_sign_ff}}, a_ff[31:0]} << shortq_shift_ff[4:0]; - - assign finish = finish_raw & ~cancel; - assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; - assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {6'b0,1'b1} + {2'b0,shortq_shift_ff[4:0]}); - - - assign a_enable = valid_in | running_state; - assign a_shift = running_state & ~shortq_enable_ff; - - assign ar_shifted[63:0] = { {32{dividend_sign_ff}} , a_ff[31:0]} << shortq_shift_ff[4:0]; - - assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | + assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | ( {32{ a_shift }} & {a_ff[30:0],1'b0} ) | ( {32{ shortq_enable_ff}} & ar_shifted[31:0] ); - assign b_enable = valid_in | b_twos_comp; - assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign b_enable = valid_in | b_twos_comp; + assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | + assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } ); - assign rq_enable = valid_in | valid_ff | running_state; - assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; - assign r_restore_sel = running_state & ~quotient_set & ~shortq_enable_ff; - assign r_adder_sel = running_state & quotient_set & ~shortq_enable_ff; + assign rq_enable = valid_in | valid_ff | running_state; + assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; + assign r_restore_sel = running_state & ~quotient_set & ~shortq_enable_ff; + assign r_adder_sel = running_state & quotient_set & ~shortq_enable_ff; - assign r_in[31:0] = ( {32{r_sign_sel }} & 32'hffffffff ) | + assign r_in[31:0] = ( {32{r_sign_sel }} & 32'hffffffff ) | ( {32{r_restore_sel }} & {r_ff[30:0] ,a_ff[31]} ) | ( {32{r_adder_sel }} & adder_out[31:0] ) | ( {32{shortq_enable_ff}} & ar_shifted[63:32] ) | ( {32{by_zero_case }} & a_ff[31:0] ); - assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[30:0], quotient_set} ) | + assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[30:0], quotient_set} ) | ( {32{ smallnum_case }} & {28'b0 , smallnum[3:0]} ) | ( {32{ by_zero_case }} & {32{1'b1}} ); - assign adder_out[32:0] = {r_ff[31:0],a_ff[31]} + {b_ff[32:0] }; + assign adder_out[32:0] = {r_ff[31:0], a_ff[31]} + {b_ff[32:0]}; - assign quotient_set = (~adder_out[32] ^ dividend_sign_ff) | ( (a_ff[30:0] == 31'b0) & (adder_out[32:0] == 33'b0) ); + assign quotient_set = (~adder_out[32] ^ dividend_sign_ff) | ( (a_ff[30:0] == 31'b0) & (adder_out[32:0] == 33'b0) ); - assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; + assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; - assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | + assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{twos_comp_b_sel}} & b_ff[31:0] ); - rvtwoscomp #(32) i_twos_comp (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0])); + rvtwoscomp #(32) i_twos_comp ( + .din (twos_comp_in[31:0]), + .dout(twos_comp_out[31:0]) + ); - assign valid_out = finish_ff & ~cancel; + assign valid_out = finish_ff & ~cancel; - assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | + assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{ rem_ff }} & r_ff[31:0] ) | ( {32{ twos_comp_q_sel}} & twos_comp_out[31:0] ); - // *** *** *** START : SMALLNUM {{ + // *** *** *** START : SMALLNUM {{ - assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | + assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | ( (a_ff[31:0] == 32'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel); - assign smallnum[3] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ); + assign smallnum[3] = (a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]); - assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & ~b_ff[3] & ~b_ff[2] ); - assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[3] & ~a_ff[2] & ~b_ff[3] & ~b_ff[2] & b_ff[1] & b_ff[0]) | @@ -624,7 +703,7 @@ module el2_exu_div_new_1bit_fullshortq ( a_ff[3] & a_ff[1] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & ~b_ff[3] & b_ff[2] ); - assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | + assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & ~a_ff[2] & a_ff[0] & ~b_ff[3] & b_ff[1] & b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | @@ -651,50 +730,52 @@ module el2_exu_div_new_1bit_fullshortq ( a_ff[3] & a_ff[1] & a_ff[0] & ~b_ff[2] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & a_ff[0] & b_ff[3] ); - // *** *** *** END : SMALLNUM }} + // *** *** *** END : SMALLNUM }} - // *** *** *** Start : Short Q {{ + // *** *** *** Start : Short Q {{ - assign shortq_dividend[32:0] = {dividend_sign_ff,a_ff[31:0]}; + assign shortq_dividend[32:0] = {dividend_sign_ff, a_ff[31:0]}; - logic [5:0] dw_a_enc; - logic [5:0] dw_b_enc; - logic [6:0] dw_shortq_raw; + logic [5:0] dw_a_enc; + logic [5:0] dw_b_enc; + logic [6:0] dw_shortq_raw; - el2_exu_div_cls i_a_cls ( - .operand ( shortq_dividend[32:0] ), - .cls ( dw_a_enc[4:0] )); + el2_exu_div_cls i_a_cls ( + .operand(shortq_dividend[32:0]), + .cls (dw_a_enc[4:0]) + ); - el2_exu_div_cls i_b_cls ( - .operand ( b_ff[32:0] ), - .cls ( dw_b_enc[4:0] )); + el2_exu_div_cls i_b_cls ( + .operand(b_ff[32:0]), + .cls (dw_b_enc[4:0]) + ); - assign dw_a_enc[5] = 1'b0; - assign dw_b_enc[5] = 1'b0; + assign dw_a_enc[5] = 1'b0; + assign dw_b_enc[5] = 1'b0; - assign dw_shortq_raw[6:0] = {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1; - assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; + assign dw_shortq_raw[6:0] = {1'b0, dw_b_enc[5:0]} - {1'b0, dw_a_enc[5:0]} + 7'd1; + assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; - assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:1] == 4'b1111) & ~cancel; + assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:1] == 4'b1111) & ~cancel; - assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : (5'b11111 - shortq[4:0]); + assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : (5'b11111 - shortq[4:0]); - // *** *** *** End : Short Q }} + // *** *** *** End : Short Q }} -endmodule // el2_exu_div_new_1bit_fullshortq +endmodule // el2_exu_div_new_1bit_fullshortq @@ -702,133 +783,175 @@ endmodule // el2_exu_div_new_1bit_fullshortq // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_new_2bit_fullshortq - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode +module el2_exu_div_new_2bit_fullshortq ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input logic cancel, // Flush pipeline - input logic valid_in, - input logic signed_in, - input logic rem_in, - input logic [31:0] dividend_in, - input logic [31:0] divisor_in, + input logic cancel, // Flush pipeline + input logic valid_in, + input logic signed_in, + input logic rem_in, + input logic [31:0] dividend_in, + input logic [31:0] divisor_in, - output logic valid_out, - output logic [31:0] data_out + output logic valid_out, + output logic [31:0] data_out +); + + + logic valid_ff_in, valid_ff; + logic finish_raw, finish, finish_ff; + logic running_state; + logic misc_enable; + logic [2:0] control_in, control_ff; + logic dividend_sign_ff, divisor_sign_ff, rem_ff; + logic count_enable; + logic [6:0] count_in, count_ff; + + logic smallnum_case; + logic [3:0] smallnum; + + logic a_enable, a_shift; + logic [31:0] a_in, a_ff; + + logic b_enable, b_twos_comp; + logic [32:0] b_in; + logic [34:0] b_ff; + + logic [31:0] q_in, q_ff; + + logic rq_enable, r_sign_sel, r_restore_sel, r_adder1_sel, r_adder2_sel, r_adder3_sel; + logic [31:0] r_in, r_ff; + + logic twos_comp_q_sel, twos_comp_b_sel; + logic [31:0] twos_comp_in, twos_comp_out; + + logic [ 3:1] quotient_raw; + logic [ 1:0] quotient_new; + logic [32:0] adder1_out; + logic [33:0] adder2_out; + logic [34:0] adder3_out; + + logic [63:0] ar_shifted; + logic [ 5:0] shortq; + logic [ 4:0] shortq_shift; + logic [ 4:1] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] shortq_dividend; + + logic by_zero_case; + logic by_zero_case_ff; + + + + rvdffe #(18) i_misc_ff ( + .*, + .clk(clk), + .en(misc_enable), + .din({ + valid_ff_in, + control_in[2:0], + by_zero_case, + shortq_enable, + shortq_shift[4:1], + finish, + count_in[6:0] + }), + .dout({ + valid_ff, + control_ff[2:0], + by_zero_case_ff, + shortq_enable_ff, + shortq_shift_ff[4:1], + finish_ff, + count_ff[6:0] + }) + ); + + rvdffe #(32) i_a_ff ( + .*, + .clk (clk), + .en (a_enable), + .din (a_in[31:0]), + .dout(a_ff[31:0]) + ); + rvdffe #(33) i_b_ff ( + .*, + .clk (clk), + .en (b_enable), + .din (b_in[32:0]), + .dout(b_ff[32:0]) + ); + rvdffe #(32) i_r_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (r_in[31:0]), + .dout(r_ff[31:0]) + ); + rvdffe #(32) i_q_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (q_in[31:0]), + .dout(q_ff[31:0]) ); - logic valid_ff_in, valid_ff; - logic finish_raw, finish, finish_ff; - logic running_state; - logic misc_enable; - logic [2:0] control_in, control_ff; - logic dividend_sign_ff, divisor_sign_ff, rem_ff; - logic count_enable; - logic [6:0] count_in, count_ff; - - logic smallnum_case; - logic [3:0] smallnum; - - logic a_enable, a_shift; - logic [31:0] a_in, a_ff; - - logic b_enable, b_twos_comp; - logic [32:0] b_in; - logic [34:0] b_ff; - - logic [31:0] q_in, q_ff; - - logic rq_enable, r_sign_sel, r_restore_sel, r_adder1_sel, r_adder2_sel, r_adder3_sel; - logic [31:0] r_in, r_ff; - - logic twos_comp_q_sel, twos_comp_b_sel; - logic [31:0] twos_comp_in, twos_comp_out; - - logic [3:1] quotient_raw; - logic [1:0] quotient_new; - logic [32:0] adder1_out; - logic [33:0] adder2_out; - logic [34:0] adder3_out; - - logic [63:0] ar_shifted; - logic [5:0] shortq; - logic [4:0] shortq_shift; - logic [4:1] shortq_shift_ff; - logic shortq_enable; - logic shortq_enable_ff; - logic [32:0] shortq_dividend; - - logic by_zero_case; - logic by_zero_case_ff; + assign valid_ff_in = valid_in & ~cancel; - rvdffe #(18) i_misc_ff (.*, .clk(clk), .en(misc_enable), .din ({valid_ff_in, control_in[2:0], by_zero_case, shortq_enable, shortq_shift[4:1], finish, count_in[6:0]}), - .dout({valid_ff, control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:1], finish_ff, count_ff[6:0]})); + assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); + assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); + assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - rvdffe #(32) i_a_ff (.*, .clk(clk), .en(a_enable), .din(a_in[31:0]), .dout(a_ff[31:0])); - rvdffe #(33) i_b_ff (.*, .clk(clk), .en(b_enable), .din(b_in[32:0]), .dout(b_ff[32:0])); - rvdffe #(32) i_r_ff (.*, .clk(clk), .en(rq_enable), .din(r_in[31:0]), .dout(r_ff[31:0])); - rvdffe #(32) i_q_ff (.*, .clk(clk), .en(rq_enable), .din(q_in[31:0]), .dout(q_ff[31:0])); + assign dividend_sign_ff = control_ff[2]; + assign divisor_sign_ff = control_ff[1]; + assign rem_ff = control_ff[0]; + assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + + assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; + assign running_state = (|count_ff[6:0]) | shortq_enable_ff; + assign finish_raw = smallnum_case | by_zero_case | (count_ff[6:0] == 7'd32); - assign valid_ff_in = valid_in & ~cancel; - - assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); - assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); - assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - - assign dividend_sign_ff = control_ff[2]; - assign divisor_sign_ff = control_ff[1]; - assign rem_ff = control_ff[0]; + assign finish = finish_raw & ~cancel; + assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; + assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b10} + {2'b0,shortq_shift_ff[4:1],1'b0}); - assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + assign a_enable = valid_in | running_state; + assign a_shift = running_state & ~shortq_enable_ff; - assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; - assign running_state = (| count_ff[6:0]) | shortq_enable_ff; - assign finish_raw = smallnum_case | - by_zero_case | - (count_ff[6:0] == 7'd32); + assign ar_shifted[63:0] = {{32{dividend_sign_ff}}, a_ff[31:0]} << {shortq_shift_ff[4:1], 1'b0}; - - assign finish = finish_raw & ~cancel; - assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; - assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b10} + {2'b0,shortq_shift_ff[4:1],1'b0}); - - - assign a_enable = valid_in | running_state; - assign a_shift = running_state & ~shortq_enable_ff; - - assign ar_shifted[63:0] = { {32{dividend_sign_ff}} , a_ff[31:0]} << {shortq_shift_ff[4:1],1'b0}; - - assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | + assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | ( {32{ a_shift }} & {a_ff[29:0],2'b0} ) | ( {32{ shortq_enable_ff}} & ar_shifted[31:0] ); - assign b_enable = valid_in | b_twos_comp; - assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign b_enable = valid_in | b_twos_comp; + assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | + assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } ); - assign rq_enable = valid_in | valid_ff | running_state; - assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; - assign r_restore_sel = running_state & (quotient_new[1:0] == 2'b00) & ~shortq_enable_ff; - assign r_adder1_sel = running_state & (quotient_new[1:0] == 2'b01) & ~shortq_enable_ff; - assign r_adder2_sel = running_state & (quotient_new[1:0] == 2'b10) & ~shortq_enable_ff; - assign r_adder3_sel = running_state & (quotient_new[1:0] == 2'b11) & ~shortq_enable_ff; + assign rq_enable = valid_in | valid_ff | running_state; + assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; + assign r_restore_sel = running_state & (quotient_new[1:0] == 2'b00) & ~shortq_enable_ff; + assign r_adder1_sel = running_state & (quotient_new[1:0] == 2'b01) & ~shortq_enable_ff; + assign r_adder2_sel = running_state & (quotient_new[1:0] == 2'b10) & ~shortq_enable_ff; + assign r_adder3_sel = running_state & (quotient_new[1:0] == 2'b11) & ~shortq_enable_ff; - assign r_in[31:0] = ( {32{r_sign_sel }} & 32'hffffffff ) | + assign r_in[31:0] = ( {32{r_sign_sel }} & 32'hffffffff ) | ( {32{r_restore_sel }} & {r_ff[29:0] ,a_ff[31:30]} ) | ( {32{r_adder1_sel }} & adder1_out[31:0] ) | ( {32{r_adder2_sel }} & adder2_out[31:0] ) | @@ -837,58 +960,61 @@ module el2_exu_div_new_2bit_fullshortq ( {32{by_zero_case }} & a_ff[31:0] ); - assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[29:0], quotient_new[1:0]} ) | + assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[29:0], quotient_new[1:0]} ) | ( {32{ smallnum_case }} & {28'b0 , smallnum[3:0]} ) | ( {32{ by_zero_case }} & {32{1'b1}} ); - assign b_ff[34:33] = {b_ff[32],b_ff[32]}; + assign b_ff[34:33] = {b_ff[32], b_ff[32]}; - assign adder1_out[32:0] = { r_ff[30:0],a_ff[31:30]} + b_ff[32:0]; - assign adder2_out[33:0] = { r_ff[31:0],a_ff[31:30]} + {b_ff[32:0],1'b0}; - assign adder3_out[34:0] = {r_ff[31],r_ff[31:0],a_ff[31:30]} + {b_ff[33:0],1'b0} + b_ff[34:0]; + assign adder1_out[32:0] = {r_ff[30:0], a_ff[31:30]} + b_ff[32:0]; + assign adder2_out[33:0] = {r_ff[31:0], a_ff[31:30]} + {b_ff[32:0], 1'b0}; + assign adder3_out[34:0] = {r_ff[31], r_ff[31:0], a_ff[31:30]} + {b_ff[33:0], 1'b0} + b_ff[34:0]; - assign quotient_raw[1] = (~adder1_out[32] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[32:0] == 33'b0) ); - assign quotient_raw[2] = (~adder2_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[33:0] == 34'b0) ); - assign quotient_raw[3] = (~adder3_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[34:0] == 35'b0) ); + assign quotient_raw[1] = (~adder1_out[32] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[32:0] == 33'b0) ); + assign quotient_raw[2] = (~adder2_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[33:0] == 34'b0) ); + assign quotient_raw[3] = (~adder3_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[34:0] == 35'b0) ); - assign quotient_new[1] = quotient_raw[3] | quotient_raw[2]; - assign quotient_new[0] = quotient_raw[3] |(~quotient_raw[2] & quotient_raw[1]); + assign quotient_new[1] = quotient_raw[3] | quotient_raw[2]; + assign quotient_new[0] = quotient_raw[3] | (~quotient_raw[2] & quotient_raw[1]); - assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; + assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; - assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | + assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{twos_comp_b_sel}} & b_ff[31:0] ); - rvtwoscomp #(32) i_twos_comp (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0])); + rvtwoscomp #(32) i_twos_comp ( + .din (twos_comp_in[31:0]), + .dout(twos_comp_out[31:0]) + ); - assign valid_out = finish_ff & ~cancel; + assign valid_out = finish_ff & ~cancel; - assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | + assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{ rem_ff }} & r_ff[31:0] ) | ( {32{ twos_comp_q_sel}} & twos_comp_out[31:0] ); - // *** *** *** START : SMALLNUM {{ + // *** *** *** START : SMALLNUM {{ - assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | + assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | ( (a_ff[31:0] == 32'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel); - assign smallnum[3] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ); + assign smallnum[3] = (a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]); - assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & ~b_ff[3] & ~b_ff[2] ); - assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[3] & ~a_ff[2] & ~b_ff[3] & ~b_ff[2] & b_ff[1] & b_ff[0]) | @@ -898,7 +1024,7 @@ module el2_exu_div_new_2bit_fullshortq ( a_ff[3] & a_ff[1] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & ~b_ff[3] & b_ff[2] ); - assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | + assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & ~a_ff[2] & a_ff[0] & ~b_ff[3] & b_ff[1] & b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | @@ -925,50 +1051,52 @@ module el2_exu_div_new_2bit_fullshortq ( a_ff[3] & a_ff[1] & a_ff[0] & ~b_ff[2] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & a_ff[0] & b_ff[3] ); - // *** *** *** END : SMALLNUM }} + // *** *** *** END : SMALLNUM }} - // *** *** *** Start : Short Q {{ + // *** *** *** Start : Short Q {{ - assign shortq_dividend[32:0] = {dividend_sign_ff,a_ff[31:0]}; + assign shortq_dividend[32:0] = {dividend_sign_ff, a_ff[31:0]}; - logic [5:0] dw_a_enc; - logic [5:0] dw_b_enc; - logic [6:0] dw_shortq_raw; + logic [5:0] dw_a_enc; + logic [5:0] dw_b_enc; + logic [6:0] dw_shortq_raw; - el2_exu_div_cls i_a_cls ( - .operand ( shortq_dividend[32:0] ), - .cls ( dw_a_enc[4:0] )); + el2_exu_div_cls i_a_cls ( + .operand(shortq_dividend[32:0]), + .cls (dw_a_enc[4:0]) + ); - el2_exu_div_cls i_b_cls ( - .operand ( b_ff[32:0] ), - .cls ( dw_b_enc[4:0] )); + el2_exu_div_cls i_b_cls ( + .operand(b_ff[32:0]), + .cls (dw_b_enc[4:0]) + ); - assign dw_a_enc[5] = 1'b0; - assign dw_b_enc[5] = 1'b0; + assign dw_a_enc[5] = 1'b0; + assign dw_b_enc[5] = 1'b0; - assign dw_shortq_raw[6:0] = {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1; - assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; + assign dw_shortq_raw[6:0] = {1'b0, dw_b_enc[5:0]} - {1'b0, dw_a_enc[5:0]} + 7'd1; + assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; - assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:1] == 4'b1111) & ~cancel; + assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:1] == 4'b1111) & ~cancel; - assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : (5'b11111 - shortq[4:0]); // [0] is unused + assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : (5'b11111 - shortq[4:0]); // [0] is unused - // *** *** *** End : Short Q }} + // *** *** *** End : Short Q }} -endmodule // el2_exu_div_new_2bit_fullshortq +endmodule // el2_exu_div_new_2bit_fullshortq @@ -976,145 +1104,194 @@ endmodule // el2_exu_div_new_2bit_fullshortq // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_new_3bit_fullshortq - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode +module el2_exu_div_new_3bit_fullshortq ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input logic cancel, // Flush pipeline - input logic valid_in, - input logic signed_in, - input logic rem_in, - input logic [31:0] dividend_in, - input logic [31:0] divisor_in, + input logic cancel, // Flush pipeline + input logic valid_in, + input logic signed_in, + input logic rem_in, + input logic [31:0] dividend_in, + input logic [31:0] divisor_in, - output logic valid_out, - output logic [31:0] data_out + output logic valid_out, + output logic [31:0] data_out +); + + + logic valid_ff_in, valid_ff; + logic finish_raw, finish, finish_ff; + logic running_state; + logic misc_enable; + logic [2:0] control_in, control_ff; + logic dividend_sign_ff, divisor_sign_ff, rem_ff; + logic count_enable; + logic [6:0] count_in, count_ff; + + logic smallnum_case; + logic [3:0] smallnum; + + logic a_enable, a_shift; + logic [32:0] a_in, a_ff; + + logic b_enable, b_twos_comp; + logic [32:0] b_in; + logic [36:0] b_ff; + + logic [31:0] q_in, q_ff; + + logic rq_enable; + logic r_sign_sel; + logic r_restore_sel; + logic + r_adder1_sel, + r_adder2_sel, + r_adder3_sel, + r_adder4_sel, + r_adder5_sel, + r_adder6_sel, + r_adder7_sel; + logic [32:0] r_in, r_ff; + + logic twos_comp_q_sel, twos_comp_b_sel; + logic [31:0] twos_comp_in, twos_comp_out; + + logic [ 7:1] quotient_raw; + logic [ 2:0] quotient_new; + logic [33:0] adder1_out; + logic [34:0] adder2_out; + logic [35:0] adder3_out; + logic [36:0] adder4_out; + logic [36:0] adder5_out; + logic [36:0] adder6_out; + logic [36:0] adder7_out; + + logic [65:0] ar_shifted; + logic [ 5:0] shortq; + logic [ 4:0] shortq_shift; + logic [ 4:0] shortq_decode; + logic [ 4:0] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] shortq_dividend; + + logic by_zero_case; + logic by_zero_case_ff; + + + + rvdffe #(19) i_misc_ff ( + .*, + .clk(clk), + .en(misc_enable), + .din({ + valid_ff_in, + control_in[2:0], + by_zero_case, + shortq_enable, + shortq_shift[4:0], + finish, + count_in[6:0] + }), + .dout({ + valid_ff, + control_ff[2:0], + by_zero_case_ff, + shortq_enable_ff, + shortq_shift_ff[4:0], + finish_ff, + count_ff[6:0] + }) + ); + + rvdffe #(33) i_a_ff ( + .*, + .clk (clk), + .en (a_enable), + .din (a_in[32:0]), + .dout(a_ff[32:0]) + ); + rvdffe #(33) i_b_ff ( + .*, + .clk (clk), + .en (b_enable), + .din (b_in[32:0]), + .dout(b_ff[32:0]) + ); + rvdffe #(33) i_r_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (r_in[32:0]), + .dout(r_ff[32:0]) + ); + rvdffe #(32) i_q_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (q_in[31:0]), + .dout(q_ff[31:0]) ); - logic valid_ff_in, valid_ff; - logic finish_raw, finish, finish_ff; - logic running_state; - logic misc_enable; - logic [2:0] control_in, control_ff; - logic dividend_sign_ff, divisor_sign_ff, rem_ff; - logic count_enable; - logic [6:0] count_in, count_ff; - - logic smallnum_case; - logic [3:0] smallnum; - - logic a_enable, a_shift; - logic [32:0] a_in, a_ff; - - logic b_enable, b_twos_comp; - logic [32:0] b_in; - logic [36:0] b_ff; - - logic [31:0] q_in, q_ff; - - logic rq_enable; - logic r_sign_sel; - logic r_restore_sel; - logic r_adder1_sel, r_adder2_sel, r_adder3_sel, r_adder4_sel, r_adder5_sel, r_adder6_sel, r_adder7_sel; - logic [32:0] r_in, r_ff; - - logic twos_comp_q_sel, twos_comp_b_sel; - logic [31:0] twos_comp_in, twos_comp_out; - - logic [7:1] quotient_raw; - logic [2:0] quotient_new; - logic [33:0] adder1_out; - logic [34:0] adder2_out; - logic [35:0] adder3_out; - logic [36:0] adder4_out; - logic [36:0] adder5_out; - logic [36:0] adder6_out; - logic [36:0] adder7_out; - - logic [65:0] ar_shifted; - logic [5:0] shortq; - logic [4:0] shortq_shift; - logic [4:0] shortq_decode; - logic [4:0] shortq_shift_ff; - logic shortq_enable; - logic shortq_enable_ff; - logic [32:0] shortq_dividend; - - logic by_zero_case; - logic by_zero_case_ff; + assign valid_ff_in = valid_in & ~cancel; - rvdffe #(19) i_misc_ff (.*, .clk(clk), .en(misc_enable), .din ({valid_ff_in, control_in[2:0], by_zero_case, shortq_enable, shortq_shift[4:0], finish, count_in[6:0]}), - .dout({valid_ff, control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]})); + assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); + assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); + assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - rvdffe #(33) i_a_ff (.*, .clk(clk), .en(a_enable), .din(a_in[32:0]), .dout(a_ff[32:0])); - rvdffe #(33) i_b_ff (.*, .clk(clk), .en(b_enable), .din(b_in[32:0]), .dout(b_ff[32:0])); - rvdffe #(33) i_r_ff (.*, .clk(clk), .en(rq_enable), .din(r_in[32:0]), .dout(r_ff[32:0])); - rvdffe #(32) i_q_ff (.*, .clk(clk), .en(rq_enable), .din(q_in[31:0]), .dout(q_ff[31:0])); + assign dividend_sign_ff = control_ff[2]; + assign divisor_sign_ff = control_ff[1]; + assign rem_ff = control_ff[0]; + assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + + assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; + assign running_state = (|count_ff[6:0]) | shortq_enable_ff; + assign finish_raw = smallnum_case | by_zero_case | (count_ff[6:0] == 7'd33); - assign valid_ff_in = valid_in & ~cancel; - - assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); - assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); - assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - - assign dividend_sign_ff = control_ff[2]; - assign divisor_sign_ff = control_ff[1]; - assign rem_ff = control_ff[0]; + assign finish = finish_raw & ~cancel; + assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; + assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b11} + {2'b0,shortq_shift_ff[4:0]}); - assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + assign a_enable = valid_in | running_state; + assign a_shift = running_state & ~shortq_enable_ff; - assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; - assign running_state = (| count_ff[6:0]) | shortq_enable_ff; - assign finish_raw = smallnum_case | - by_zero_case | - (count_ff[6:0] == 7'd33); + assign ar_shifted[65:0] = {{33{dividend_sign_ff}}, a_ff[32:0]} << {shortq_shift_ff[4:0]}; - - assign finish = finish_raw & ~cancel; - assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; - assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + {5'b0,2'b11} + {2'b0,shortq_shift_ff[4:0]}); - - - assign a_enable = valid_in | running_state; - assign a_shift = running_state & ~shortq_enable_ff; - - assign ar_shifted[65:0] = { {33{dividend_sign_ff}} , a_ff[32:0]} << {shortq_shift_ff[4:0]}; - - assign a_in[32:0] = ( {33{~a_shift & ~shortq_enable_ff}} & {signed_in & dividend_in[31],dividend_in[31:0]} ) | + assign a_in[32:0] = ( {33{~a_shift & ~shortq_enable_ff}} & {signed_in & dividend_in[31],dividend_in[31:0]} ) | ( {33{ a_shift }} & {a_ff[29:0],3'b0} ) | ( {33{ shortq_enable_ff}} & ar_shifted[32:0] ); - assign b_enable = valid_in | b_twos_comp; - assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign b_enable = valid_in | b_twos_comp; + assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | + assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } ); - assign rq_enable = valid_in | valid_ff | running_state; - assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; - assign r_restore_sel = running_state & (quotient_new[2:0] == 3'b000) & ~shortq_enable_ff; - assign r_adder1_sel = running_state & (quotient_new[2:0] == 3'b001) & ~shortq_enable_ff; - assign r_adder2_sel = running_state & (quotient_new[2:0] == 3'b010) & ~shortq_enable_ff; - assign r_adder3_sel = running_state & (quotient_new[2:0] == 3'b011) & ~shortq_enable_ff; - assign r_adder4_sel = running_state & (quotient_new[2:0] == 3'b100) & ~shortq_enable_ff; - assign r_adder5_sel = running_state & (quotient_new[2:0] == 3'b101) & ~shortq_enable_ff; - assign r_adder6_sel = running_state & (quotient_new[2:0] == 3'b110) & ~shortq_enable_ff; - assign r_adder7_sel = running_state & (quotient_new[2:0] == 3'b111) & ~shortq_enable_ff; + assign rq_enable = valid_in | valid_ff | running_state; + assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; + assign r_restore_sel = running_state & (quotient_new[2:0] == 3'b000) & ~shortq_enable_ff; + assign r_adder1_sel = running_state & (quotient_new[2:0] == 3'b001) & ~shortq_enable_ff; + assign r_adder2_sel = running_state & (quotient_new[2:0] == 3'b010) & ~shortq_enable_ff; + assign r_adder3_sel = running_state & (quotient_new[2:0] == 3'b011) & ~shortq_enable_ff; + assign r_adder4_sel = running_state & (quotient_new[2:0] == 3'b100) & ~shortq_enable_ff; + assign r_adder5_sel = running_state & (quotient_new[2:0] == 3'b101) & ~shortq_enable_ff; + assign r_adder6_sel = running_state & (quotient_new[2:0] == 3'b110) & ~shortq_enable_ff; + assign r_adder7_sel = running_state & (quotient_new[2:0] == 3'b111) & ~shortq_enable_ff; - assign r_in[32:0] = ( {33{r_sign_sel }} & {33{1'b1}} ) | + assign r_in[32:0] = ( {33{r_sign_sel }} & {33{1'b1}} ) | ( {33{r_restore_sel }} & {r_ff[29:0] ,a_ff[32:30]} ) | ( {33{r_adder1_sel }} & adder1_out[32:0] ) | ( {33{r_adder2_sel }} & adder2_out[32:0] ) | @@ -1127,66 +1304,69 @@ module el2_exu_div_new_3bit_fullshortq ( {33{by_zero_case }} & {1'b0,a_ff[31:0]} ); - assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[28:0], quotient_new[2:0]} ) | + assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[28:0], quotient_new[2:0]} ) | ( {32{ smallnum_case}} & {28'b0 , smallnum[3:0]} ) | ( {32{ by_zero_case }} & {32{1'b1}} ); - assign b_ff[36:33] = {b_ff[32],b_ff[32],b_ff[32],b_ff[32]}; + assign b_ff[36:33] = {b_ff[32], b_ff[32], b_ff[32], b_ff[32]}; - assign adder1_out[33:0] = { r_ff[30:0],a_ff[32:30]} + b_ff[33:0]; - assign adder2_out[34:0] = { r_ff[31:0],a_ff[32:30]} + {b_ff[33:0],1'b0}; - assign adder3_out[35:0] = { r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],1'b0} + b_ff[35:0]; - assign adder4_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0}; - assign adder5_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0} + b_ff[36:0]; - assign adder6_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0} + {b_ff[35:0],1'b0}; - assign adder7_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0} + {b_ff[35:0],1'b0} + b_ff[36:0]; + assign adder1_out[33:0] = {r_ff[30:0], a_ff[32:30]} + b_ff[33:0]; + assign adder2_out[34:0] = {r_ff[31:0], a_ff[32:30]} + {b_ff[33:0], 1'b0}; + assign adder3_out[35:0] = {r_ff[32:0], a_ff[32:30]} + {b_ff[34:0], 1'b0} + b_ff[35:0]; + assign adder4_out[36:0] = {r_ff[32], r_ff[32:0], a_ff[32:30]} + {b_ff[34:0], 2'b0}; + assign adder5_out[36:0] = {r_ff[32], r_ff[32:0], a_ff[32:30]} + {b_ff[34:0], 2'b0} + b_ff[36:0]; + assign adder6_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0} + {b_ff[35:0],1'b0}; + assign adder7_out[36:0] = {r_ff[32],r_ff[32:0],a_ff[32:30]} + {b_ff[34:0],2'b0} + {b_ff[35:0],1'b0} + b_ff[36:0]; - assign quotient_raw[1] = (~adder1_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[33:0] == 34'b0) ); - assign quotient_raw[2] = (~adder2_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[34:0] == 35'b0) ); - assign quotient_raw[3] = (~adder3_out[35] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[35:0] == 36'b0) ); - assign quotient_raw[4] = (~adder4_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder4_out[36:0] == 37'b0) ); - assign quotient_raw[5] = (~adder5_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder5_out[36:0] == 37'b0) ); - assign quotient_raw[6] = (~adder6_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder6_out[36:0] == 37'b0) ); - assign quotient_raw[7] = (~adder7_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder7_out[36:0] == 37'b0) ); + assign quotient_raw[1] = (~adder1_out[33] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder1_out[33:0] == 34'b0) ); + assign quotient_raw[2] = (~adder2_out[34] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder2_out[34:0] == 35'b0) ); + assign quotient_raw[3] = (~adder3_out[35] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder3_out[35:0] == 36'b0) ); + assign quotient_raw[4] = (~adder4_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder4_out[36:0] == 37'b0) ); + assign quotient_raw[5] = (~adder5_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder5_out[36:0] == 37'b0) ); + assign quotient_raw[6] = (~adder6_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder6_out[36:0] == 37'b0) ); + assign quotient_raw[7] = (~adder7_out[36] ^ dividend_sign_ff) | ( (a_ff[29:0] == 30'b0) & (adder7_out[36:0] == 37'b0) ); - assign quotient_new[2] = quotient_raw[7] | quotient_raw[6] | quotient_raw[5] | quotient_raw[4]; - assign quotient_new[1] = quotient_raw[7] | quotient_raw[6] | (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[3] & quotient_raw[2]); - assign quotient_new[0] = quotient_raw[7] | (~quotient_raw[6] & quotient_raw[5]) | (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[2] & quotient_raw[1]); + assign quotient_new[2] = quotient_raw[7] | quotient_raw[6] | quotient_raw[5] | quotient_raw[4]; + assign quotient_new[1] = quotient_raw[7] | quotient_raw[6] | (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[3] & quotient_raw[2]); + assign quotient_new[0] = quotient_raw[7] | (~quotient_raw[6] & quotient_raw[5]) | (~quotient_raw[4] & quotient_raw[3]) | (~quotient_raw[2] & quotient_raw[1]); - assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; + assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; - assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | + assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{twos_comp_b_sel}} & b_ff[31:0] ); - rvtwoscomp #(32) i_twos_comp (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0])); + rvtwoscomp #(32) i_twos_comp ( + .din (twos_comp_in[31:0]), + .dout(twos_comp_out[31:0]) + ); - assign valid_out = finish_ff & ~cancel; + assign valid_out = finish_ff & ~cancel; - assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | + assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{ rem_ff }} & r_ff[31:0] ) | ( {32{ twos_comp_q_sel}} & twos_comp_out[31:0] ); - // *** *** *** START : SMALLNUM {{ + // *** *** *** START : SMALLNUM {{ - assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | + assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | ( (a_ff[31:0] == 32'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel); - assign smallnum[3] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ); + assign smallnum[3] = (a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]); - assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & ~b_ff[3] & ~b_ff[2] ); - assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[3] & ~a_ff[2] & ~b_ff[3] & ~b_ff[2] & b_ff[1] & b_ff[0]) | @@ -1196,7 +1376,7 @@ module el2_exu_div_new_3bit_fullshortq ( a_ff[3] & a_ff[1] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & ~b_ff[3] & b_ff[2] ); - assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | + assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & ~a_ff[2] & a_ff[0] & ~b_ff[3] & b_ff[1] & b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | @@ -1223,41 +1403,43 @@ module el2_exu_div_new_3bit_fullshortq ( a_ff[3] & a_ff[1] & a_ff[0] & ~b_ff[2] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & a_ff[0] & b_ff[3] ); - // *** *** *** END : SMALLNUM }} + // *** *** *** END : SMALLNUM }} - // *** *** *** Start : Short Q {{ + // *** *** *** Start : Short Q {{ - assign shortq_dividend[32:0] = {dividend_sign_ff,a_ff[31:0]}; + assign shortq_dividend[32:0] = {dividend_sign_ff, a_ff[31:0]}; - logic [5:0] dw_a_enc; - logic [5:0] dw_b_enc; - logic [6:0] dw_shortq_raw; + logic [5:0] dw_a_enc; + logic [5:0] dw_b_enc; + logic [6:0] dw_shortq_raw; - el2_exu_div_cls i_a_cls ( - .operand ( shortq_dividend[32:0] ), - .cls ( dw_a_enc[4:0] )); + el2_exu_div_cls i_a_cls ( + .operand(shortq_dividend[32:0]), + .cls (dw_a_enc[4:0]) + ); - el2_exu_div_cls i_b_cls ( - .operand ( b_ff[32:0] ), - .cls ( dw_b_enc[4:0] )); + el2_exu_div_cls i_b_cls ( + .operand(b_ff[32:0]), + .cls (dw_b_enc[4:0]) + ); - assign dw_a_enc[5] = 1'b0; - assign dw_b_enc[5] = 1'b0; + assign dw_a_enc[5] = 1'b0; + assign dw_b_enc[5] = 1'b0; - assign dw_shortq_raw[6:0] = {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1; - assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; + assign dw_shortq_raw[6:0] = {1'b0, dw_b_enc[5:0]} - {1'b0, dw_a_enc[5:0]} + 7'd1; + assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; - assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:2] == 3'b111) & ~cancel; + assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:2] == 3'b111) & ~cancel; - assign shortq_decode[4:0] = ( {5{shortq[4:0] == 5'd31}} & 5'd00) | + assign shortq_decode[4:0] = ( {5{shortq[4:0] == 5'd31}} & 5'd00) | ( {5{shortq[4:0] == 5'd30}} & 5'd00) | ( {5{shortq[4:0] == 5'd29}} & 5'd00) | ( {5{shortq[4:0] == 5'd28}} & 5'd00) | @@ -1291,16 +1473,16 @@ module el2_exu_div_new_3bit_fullshortq ( {5{shortq[4:0] == 5'd00}} & 5'd27); - assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : shortq_decode[4:0]; + assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : shortq_decode[4:0]; - // *** *** *** End : Short Q }} + // *** *** *** End : Short Q }} -endmodule // el2_exu_div_new_3bit_fullshortq +endmodule // el2_exu_div_new_3bit_fullshortq @@ -1308,163 +1490,205 @@ endmodule // el2_exu_div_new_3bit_fullshortq // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_new_4bit_fullshortq - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode +module el2_exu_div_new_4bit_fullshortq ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input logic cancel, // Flush pipeline - input logic valid_in, - input logic signed_in, - input logic rem_in, - input logic [31:0] dividend_in, - input logic [31:0] divisor_in, + input logic cancel, // Flush pipeline + input logic valid_in, + input logic signed_in, + input logic rem_in, + input logic [31:0] dividend_in, + input logic [31:0] divisor_in, - output logic valid_out, - output logic [31:0] data_out + output logic valid_out, + output logic [31:0] data_out +); + + + logic valid_ff_in, valid_ff; + logic finish_raw, finish, finish_ff; + logic running_state; + logic misc_enable; + logic [2:0] control_in, control_ff; + logic dividend_sign_ff, divisor_sign_ff, rem_ff; + logic count_enable; + logic [6:0] count_in, count_ff; + + logic smallnum_case; + logic [3:0] smallnum; + + logic a_enable, a_shift; + logic [31:0] a_in, a_ff; + + logic b_enable, b_twos_comp; + logic [32:0] b_in; + logic [37:0] b_ff; + + logic [31:0] q_in, q_ff; + + logic rq_enable; + logic r_sign_sel; + logic r_restore_sel; + logic r_adder01_sel, r_adder02_sel, r_adder03_sel; + logic r_adder04_sel, r_adder05_sel, r_adder06_sel, r_adder07_sel; + logic r_adder08_sel, r_adder09_sel, r_adder10_sel, r_adder11_sel; + logic r_adder12_sel, r_adder13_sel, r_adder14_sel, r_adder15_sel; + logic [32:0] r_in, r_ff; + + logic twos_comp_q_sel, twos_comp_b_sel; + logic [31:0] twos_comp_in, twos_comp_out; + + logic [15:1] quotient_raw; + logic [ 3:0] quotient_new; + logic [34:0] adder01_out; + logic [35:0] adder02_out; + logic [36:0] adder03_out; + logic [37:0] adder04_out; + logic [37:0] adder05_out; + logic [37:0] adder06_out; + logic [37:0] adder07_out; + logic [37:0] adder08_out; + logic [37:0] adder09_out; + logic [37:0] adder10_out; + logic [37:0] adder11_out; + logic [37:0] adder12_out; + logic [37:0] adder13_out; + logic [37:0] adder14_out; + logic [37:0] adder15_out; + + logic [64:0] ar_shifted; + logic [ 5:0] shortq; + logic [ 4:0] shortq_shift; + logic [ 4:0] shortq_decode; + logic [ 4:0] shortq_shift_ff; + logic shortq_enable; + logic shortq_enable_ff; + logic [32:0] shortq_dividend; + + logic by_zero_case; + logic by_zero_case_ff; + + + + rvdffe #(19) i_misc_ff ( + .*, + .clk(clk), + .en(misc_enable), + .din({ + valid_ff_in, + control_in[2:0], + by_zero_case, + shortq_enable, + shortq_shift[4:0], + finish, + count_in[6:0] + }), + .dout({ + valid_ff, + control_ff[2:0], + by_zero_case_ff, + shortq_enable_ff, + shortq_shift_ff[4:0], + finish_ff, + count_ff[6:0] + }) + ); + + rvdffe #(32) i_a_ff ( + .*, + .clk (clk), + .en (a_enable), + .din (a_in[31:0]), + .dout(a_ff[31:0]) + ); + rvdffe #(33) i_b_ff ( + .*, + .clk (clk), + .en (b_enable), + .din (b_in[32:0]), + .dout(b_ff[32:0]) + ); + rvdffe #(33) i_r_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (r_in[32:0]), + .dout(r_ff[32:0]) + ); + rvdffe #(32) i_q_ff ( + .*, + .clk (clk), + .en (rq_enable), + .din (q_in[31:0]), + .dout(q_ff[31:0]) ); - logic valid_ff_in, valid_ff; - logic finish_raw, finish, finish_ff; - logic running_state; - logic misc_enable; - logic [2:0] control_in, control_ff; - logic dividend_sign_ff, divisor_sign_ff, rem_ff; - logic count_enable; - logic [6:0] count_in, count_ff; - - logic smallnum_case; - logic [3:0] smallnum; - - logic a_enable, a_shift; - logic [31:0] a_in, a_ff; - - logic b_enable, b_twos_comp; - logic [32:0] b_in; - logic [37:0] b_ff; - - logic [31:0] q_in, q_ff; - - logic rq_enable; - logic r_sign_sel; - logic r_restore_sel; - logic r_adder01_sel, r_adder02_sel, r_adder03_sel; - logic r_adder04_sel, r_adder05_sel, r_adder06_sel, r_adder07_sel; - logic r_adder08_sel, r_adder09_sel, r_adder10_sel, r_adder11_sel; - logic r_adder12_sel, r_adder13_sel, r_adder14_sel, r_adder15_sel; - logic [32:0] r_in, r_ff; - - logic twos_comp_q_sel, twos_comp_b_sel; - logic [31:0] twos_comp_in, twos_comp_out; - - logic [15:1] quotient_raw; - logic [3:0] quotient_new; - logic [34:0] adder01_out; - logic [35:0] adder02_out; - logic [36:0] adder03_out; - logic [37:0] adder04_out; - logic [37:0] adder05_out; - logic [37:0] adder06_out; - logic [37:0] adder07_out; - logic [37:0] adder08_out; - logic [37:0] adder09_out; - logic [37:0] adder10_out; - logic [37:0] adder11_out; - logic [37:0] adder12_out; - logic [37:0] adder13_out; - logic [37:0] adder14_out; - logic [37:0] adder15_out; - - logic [64:0] ar_shifted; - logic [5:0] shortq; - logic [4:0] shortq_shift; - logic [4:0] shortq_decode; - logic [4:0] shortq_shift_ff; - logic shortq_enable; - logic shortq_enable_ff; - logic [32:0] shortq_dividend; - - logic by_zero_case; - logic by_zero_case_ff; + assign valid_ff_in = valid_in & ~cancel; - rvdffe #(19) i_misc_ff (.*, .clk(clk), .en(misc_enable), .din ({valid_ff_in, control_in[2:0], by_zero_case, shortq_enable, shortq_shift[4:0], finish, count_in[6:0]}), - .dout({valid_ff, control_ff[2:0], by_zero_case_ff, shortq_enable_ff, shortq_shift_ff[4:0], finish_ff, count_ff[6:0]})); + assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); + assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); + assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - rvdffe #(32) i_a_ff (.*, .clk(clk), .en(a_enable), .din(a_in[31:0]), .dout(a_ff[31:0])); - rvdffe #(33) i_b_ff (.*, .clk(clk), .en(b_enable), .din(b_in[32:0]), .dout(b_ff[32:0])); - rvdffe #(33) i_r_ff (.*, .clk(clk), .en(rq_enable), .din(r_in[32:0]), .dout(r_ff[32:0])); - rvdffe #(32) i_q_ff (.*, .clk(clk), .en(rq_enable), .din(q_in[31:0]), .dout(q_ff[31:0])); + assign dividend_sign_ff = control_ff[2]; + assign divisor_sign_ff = control_ff[1]; + assign rem_ff = control_ff[0]; + assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + + assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; + assign running_state = (|count_ff[6:0]) | shortq_enable_ff; + assign finish_raw = smallnum_case | by_zero_case | (count_ff[6:0] == 7'd32); - assign valid_ff_in = valid_in & ~cancel; - - assign control_in[2] = (~valid_in & control_ff[2]) | (valid_in & signed_in & dividend_in[31]); - assign control_in[1] = (~valid_in & control_ff[1]) | (valid_in & signed_in & divisor_in[31]); - assign control_in[0] = (~valid_in & control_ff[0]) | (valid_in & rem_in); - - assign dividend_sign_ff = control_ff[2]; - assign divisor_sign_ff = control_ff[1]; - assign rem_ff = control_ff[0]; + assign finish = finish_raw & ~cancel; + assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; + assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + 7'd4 + {2'b0, shortq_shift_ff[4:0]}); - assign by_zero_case = valid_ff & (b_ff[31:0] == 32'b0); + assign a_enable = valid_in | running_state; + assign a_shift = running_state & ~shortq_enable_ff; - assign misc_enable = valid_in | valid_ff | cancel | running_state | finish_ff; - assign running_state = (| count_ff[6:0]) | shortq_enable_ff; - assign finish_raw = smallnum_case | - by_zero_case | - (count_ff[6:0] == 7'd32); + assign ar_shifted[64:0] = {{33{dividend_sign_ff}}, a_ff[31:0]} << {shortq_shift_ff[4:0]}; - - assign finish = finish_raw & ~cancel; - assign count_enable = (valid_ff | running_state) & ~finish & ~finish_ff & ~cancel & ~shortq_enable; - assign count_in[6:0] = {7{count_enable}} & (count_ff[6:0] + 7'd4 + {2'b0,shortq_shift_ff[4:0]}); - - - assign a_enable = valid_in | running_state; - assign a_shift = running_state & ~shortq_enable_ff; - - assign ar_shifted[64:0] = { {33{dividend_sign_ff}} , a_ff[31:0]} << {shortq_shift_ff[4:0]}; - - assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | + assign a_in[31:0] = ( {32{~a_shift & ~shortq_enable_ff}} & dividend_in[31:0] ) | ( {32{ a_shift }} & {a_ff[27:0],4'b0} ) | ( {32{ shortq_enable_ff}} & ar_shifted[31:0] ); - assign b_enable = valid_in | b_twos_comp; - assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign b_enable = valid_in | b_twos_comp; + assign b_twos_comp = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | + assign b_in[32:0] = ( {33{~b_twos_comp}} & { (signed_in & divisor_in[31]),divisor_in[31:0] } ) | ( {33{ b_twos_comp}} & {~divisor_sign_ff,twos_comp_out[31:0] } ); - assign rq_enable = valid_in | valid_ff | running_state; - assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; - assign r_restore_sel = running_state & (quotient_new[3:0] == 4'd00) & ~shortq_enable_ff; - assign r_adder01_sel = running_state & (quotient_new[3:0] == 4'd01) & ~shortq_enable_ff; - assign r_adder02_sel = running_state & (quotient_new[3:0] == 4'd02) & ~shortq_enable_ff; - assign r_adder03_sel = running_state & (quotient_new[3:0] == 4'd03) & ~shortq_enable_ff; - assign r_adder04_sel = running_state & (quotient_new[3:0] == 4'd04) & ~shortq_enable_ff; - assign r_adder05_sel = running_state & (quotient_new[3:0] == 4'd05) & ~shortq_enable_ff; - assign r_adder06_sel = running_state & (quotient_new[3:0] == 4'd06) & ~shortq_enable_ff; - assign r_adder07_sel = running_state & (quotient_new[3:0] == 4'd07) & ~shortq_enable_ff; - assign r_adder08_sel = running_state & (quotient_new[3:0] == 4'd08) & ~shortq_enable_ff; - assign r_adder09_sel = running_state & (quotient_new[3:0] == 4'd09) & ~shortq_enable_ff; - assign r_adder10_sel = running_state & (quotient_new[3:0] == 4'd10) & ~shortq_enable_ff; - assign r_adder11_sel = running_state & (quotient_new[3:0] == 4'd11) & ~shortq_enable_ff; - assign r_adder12_sel = running_state & (quotient_new[3:0] == 4'd12) & ~shortq_enable_ff; - assign r_adder13_sel = running_state & (quotient_new[3:0] == 4'd13) & ~shortq_enable_ff; - assign r_adder14_sel = running_state & (quotient_new[3:0] == 4'd14) & ~shortq_enable_ff; - assign r_adder15_sel = running_state & (quotient_new[3:0] == 4'd15) & ~shortq_enable_ff; + assign rq_enable = valid_in | valid_ff | running_state; + assign r_sign_sel = valid_ff & dividend_sign_ff & ~by_zero_case; + assign r_restore_sel = running_state & (quotient_new[3:0] == 4'd00) & ~shortq_enable_ff; + assign r_adder01_sel = running_state & (quotient_new[3:0] == 4'd01) & ~shortq_enable_ff; + assign r_adder02_sel = running_state & (quotient_new[3:0] == 4'd02) & ~shortq_enable_ff; + assign r_adder03_sel = running_state & (quotient_new[3:0] == 4'd03) & ~shortq_enable_ff; + assign r_adder04_sel = running_state & (quotient_new[3:0] == 4'd04) & ~shortq_enable_ff; + assign r_adder05_sel = running_state & (quotient_new[3:0] == 4'd05) & ~shortq_enable_ff; + assign r_adder06_sel = running_state & (quotient_new[3:0] == 4'd06) & ~shortq_enable_ff; + assign r_adder07_sel = running_state & (quotient_new[3:0] == 4'd07) & ~shortq_enable_ff; + assign r_adder08_sel = running_state & (quotient_new[3:0] == 4'd08) & ~shortq_enable_ff; + assign r_adder09_sel = running_state & (quotient_new[3:0] == 4'd09) & ~shortq_enable_ff; + assign r_adder10_sel = running_state & (quotient_new[3:0] == 4'd10) & ~shortq_enable_ff; + assign r_adder11_sel = running_state & (quotient_new[3:0] == 4'd11) & ~shortq_enable_ff; + assign r_adder12_sel = running_state & (quotient_new[3:0] == 4'd12) & ~shortq_enable_ff; + assign r_adder13_sel = running_state & (quotient_new[3:0] == 4'd13) & ~shortq_enable_ff; + assign r_adder14_sel = running_state & (quotient_new[3:0] == 4'd14) & ~shortq_enable_ff; + assign r_adder15_sel = running_state & (quotient_new[3:0] == 4'd15) & ~shortq_enable_ff; - assign r_in[32:0] = ( {33{r_sign_sel }} & {33{1'b1}} ) | + assign r_in[32:0] = ( {33{r_sign_sel }} & {33{1'b1}} ) | ( {33{r_restore_sel }} & {r_ff[28:0],a_ff[31:28]} ) | ( {33{r_adder01_sel }} & adder01_out[32:0] ) | ( {33{r_adder02_sel }} & adder02_out[32:0] ) | @@ -1485,115 +1709,118 @@ module el2_exu_div_new_4bit_fullshortq ( {33{by_zero_case }} & {1'b0,a_ff[31:0]} ); - assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[27:0], quotient_new[3:0]} ) | + assign q_in[31:0] = ( {32{~valid_ff }} & {q_ff[27:0], quotient_new[3:0]} ) | ( {32{ smallnum_case}} & {28'b0 , smallnum[3:0]} ) | ( {32{ by_zero_case }} & {32{1'b1}} ); - assign b_ff[37:33] = {b_ff[32],b_ff[32],b_ff[32],b_ff[32],b_ff[32]}; + assign b_ff[37:33] = {b_ff[32], b_ff[32], b_ff[32], b_ff[32], b_ff[32]}; - assign adder01_out[34:0] = { r_ff[30:0],a_ff[31:28]} + b_ff[34:0]; - assign adder02_out[35:0] = { r_ff[31:0],a_ff[31:28]} + {b_ff[34:0],1'b0}; - assign adder03_out[36:0] = { r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],1'b0} + b_ff[36:0]; - assign adder04_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0}; - assign adder05_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0} + b_ff[37:0]; - assign adder06_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0}; - assign adder07_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; - assign adder08_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0}; - assign adder09_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + b_ff[37:0]; - assign adder10_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[36:0],1'b0}; - assign adder11_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; - assign adder12_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0}; - assign adder13_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + b_ff[37:0]; - assign adder14_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0}; - assign adder15_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; + assign adder01_out[34:0] = {r_ff[30:0], a_ff[31:28]} + b_ff[34:0]; + assign adder02_out[35:0] = {r_ff[31:0], a_ff[31:28]} + {b_ff[34:0], 1'b0}; + assign adder03_out[36:0] = {r_ff[32:0], a_ff[31:28]} + {b_ff[35:0], 1'b0} + b_ff[36:0]; + assign adder04_out[37:0] = {r_ff[32], r_ff[32:0], a_ff[31:28]} + {b_ff[35:0], 2'b0}; + assign adder05_out[37:0] = {r_ff[32], r_ff[32:0], a_ff[31:28]} + {b_ff[35:0], 2'b0} + b_ff[37:0]; + assign adder06_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0}; + assign adder07_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; + assign adder08_out[37:0] = {r_ff[32], r_ff[32:0], a_ff[31:28]} + {b_ff[34:0], 3'b0}; + assign adder09_out[37:0] = {r_ff[32], r_ff[32:0], a_ff[31:28]} + {b_ff[34:0], 3'b0} + b_ff[37:0]; + assign adder10_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[36:0],1'b0}; + assign adder11_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; + assign adder12_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0}; + assign adder13_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + b_ff[37:0]; + assign adder14_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0}; + assign adder15_out[37:0] = {r_ff[32],r_ff[32:0],a_ff[31:28]} + {b_ff[34:0],3'b0} + {b_ff[35:0],2'b0} + {b_ff[36:0],1'b0} + b_ff[37:0]; - assign quotient_raw[01] = (~adder01_out[34] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder01_out[34:0] == 35'b0) ); - assign quotient_raw[02] = (~adder02_out[35] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder02_out[35:0] == 36'b0) ); - assign quotient_raw[03] = (~adder03_out[36] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder03_out[36:0] == 37'b0) ); - assign quotient_raw[04] = (~adder04_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder04_out[37:0] == 38'b0) ); - assign quotient_raw[05] = (~adder05_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder05_out[37:0] == 38'b0) ); - assign quotient_raw[06] = (~adder06_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder06_out[37:0] == 38'b0) ); - assign quotient_raw[07] = (~adder07_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder07_out[37:0] == 38'b0) ); - assign quotient_raw[08] = (~adder08_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder08_out[37:0] == 38'b0) ); - assign quotient_raw[09] = (~adder09_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder09_out[37:0] == 38'b0) ); - assign quotient_raw[10] = (~adder10_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder10_out[37:0] == 38'b0) ); - assign quotient_raw[11] = (~adder11_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder11_out[37:0] == 38'b0) ); - assign quotient_raw[12] = (~adder12_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder12_out[37:0] == 38'b0) ); - assign quotient_raw[13] = (~adder13_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder13_out[37:0] == 38'b0) ); - assign quotient_raw[14] = (~adder14_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder14_out[37:0] == 38'b0) ); - assign quotient_raw[15] = (~adder15_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder15_out[37:0] == 38'b0) ); + assign quotient_raw[01] = (~adder01_out[34] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder01_out[34:0] == 35'b0) ); + assign quotient_raw[02] = (~adder02_out[35] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder02_out[35:0] == 36'b0) ); + assign quotient_raw[03] = (~adder03_out[36] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder03_out[36:0] == 37'b0) ); + assign quotient_raw[04] = (~adder04_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder04_out[37:0] == 38'b0) ); + assign quotient_raw[05] = (~adder05_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder05_out[37:0] == 38'b0) ); + assign quotient_raw[06] = (~adder06_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder06_out[37:0] == 38'b0) ); + assign quotient_raw[07] = (~adder07_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder07_out[37:0] == 38'b0) ); + assign quotient_raw[08] = (~adder08_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder08_out[37:0] == 38'b0) ); + assign quotient_raw[09] = (~adder09_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder09_out[37:0] == 38'b0) ); + assign quotient_raw[10] = (~adder10_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder10_out[37:0] == 38'b0) ); + assign quotient_raw[11] = (~adder11_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder11_out[37:0] == 38'b0) ); + assign quotient_raw[12] = (~adder12_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder12_out[37:0] == 38'b0) ); + assign quotient_raw[13] = (~adder13_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder13_out[37:0] == 38'b0) ); + assign quotient_raw[14] = (~adder14_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder14_out[37:0] == 38'b0) ); + assign quotient_raw[15] = (~adder15_out[37] ^ dividend_sign_ff) | ( (a_ff[27:0] == 28'b0) & (adder15_out[37:0] == 38'b0) ); - assign quotient_new[0] = ( quotient_raw[15:01] == 15'b000_0000_0000_0001 ) | // 1 - ( quotient_raw[15:03] == 13'b000_0000_0000_01 ) | // 3 - ( quotient_raw[15:05] == 11'b000_0000_0001 ) | // 5 - ( quotient_raw[15:07] == 9'b000_0000_01 ) | // 7 - ( quotient_raw[15:09] == 7'b000_0001 ) | // 9 - ( quotient_raw[15:11] == 5'b000_01 ) | // 11 - ( quotient_raw[15:13] == 3'b001 ) | // 13 - ( quotient_raw[ 15] == 1'b1 ); // 15 + assign quotient_new[0] = (quotient_raw[15:01] == 15'b000_0000_0000_0001) | // 1 + (quotient_raw[15:03] == 13'b000_0000_0000_01) | // 3 + (quotient_raw[15:05] == 11'b000_0000_0001) | // 5 + (quotient_raw[15:07] == 9'b000_0000_01) | // 7 + (quotient_raw[15:09] == 7'b000_0001) | // 9 + (quotient_raw[15:11] == 5'b000_01) | // 11 + (quotient_raw[15:13] == 3'b001) | // 13 + (quotient_raw[15] == 1'b1); // 15 - assign quotient_new[1] = ( quotient_raw[15:02] == 14'b000_0000_0000_001 ) | // 2 - ( quotient_raw[15:03] == 13'b000_0000_0000_01 ) | // 3 - ( quotient_raw[15:06] == 10'b000_0000_001 ) | // 6 - ( quotient_raw[15:07] == 9'b000_0000_01 ) | // 7 - ( quotient_raw[15:10] == 6'b000_001 ) | // 10 - ( quotient_raw[15:11] == 5'b000_01 ) | // 11 - ( quotient_raw[15:14] == 2'b01 ) | // 14 - ( quotient_raw[ 15] == 1'b1 ); // 15 + assign quotient_new[1] = (quotient_raw[15:02] == 14'b000_0000_0000_001) | // 2 + (quotient_raw[15:03] == 13'b000_0000_0000_01) | // 3 + (quotient_raw[15:06] == 10'b000_0000_001) | // 6 + (quotient_raw[15:07] == 9'b000_0000_01) | // 7 + (quotient_raw[15:10] == 6'b000_001) | // 10 + (quotient_raw[15:11] == 5'b000_01) | // 11 + (quotient_raw[15:14] == 2'b01) | // 14 + (quotient_raw[15] == 1'b1); // 15 - assign quotient_new[2] = ( quotient_raw[15:04] == 12'b000_0000_0000_1 ) | // 4 - ( quotient_raw[15:05] == 11'b000_0000_0001 ) | // 5 - ( quotient_raw[15:06] == 10'b000_0000_001 ) | // 6 - ( quotient_raw[15:07] == 9'b000_0000_01 ) | // 7 - ( quotient_raw[15:12] == 4'b000_1 ) | // 12 - ( quotient_raw[15:13] == 3'b001 ) | // 13 - ( quotient_raw[15:14] == 2'b01 ) | // 14 - ( quotient_raw[ 15] == 1'b1 ); // 15 + assign quotient_new[2] = (quotient_raw[15:04] == 12'b000_0000_0000_1) | // 4 + (quotient_raw[15:05] == 11'b000_0000_0001) | // 5 + (quotient_raw[15:06] == 10'b000_0000_001) | // 6 + (quotient_raw[15:07] == 9'b000_0000_01) | // 7 + (quotient_raw[15:12] == 4'b000_1) | // 12 + (quotient_raw[15:13] == 3'b001) | // 13 + (quotient_raw[15:14] == 2'b01) | // 14 + (quotient_raw[15] == 1'b1); // 15 - assign quotient_new[3] = ( quotient_raw[15:08] == 8'b000_0000_1 ) | // 8 - ( quotient_raw[15:09] == 7'b000_0001 ) | // 9 - ( quotient_raw[15:10] == 6'b000_001 ) | // 10 - ( quotient_raw[15:11] == 5'b000_01 ) | // 11 - ( quotient_raw[15:12] == 4'b000_1 ) | // 12 - ( quotient_raw[15:13] == 3'b001 ) | // 13 - ( quotient_raw[15:14] == 2'b01 ) | // 14 - ( quotient_raw[ 15] == 1'b1 ); // 15 + assign quotient_new[3] = (quotient_raw[15:08] == 8'b000_0000_1) | // 8 + (quotient_raw[15:09] == 7'b000_0001) | // 9 + (quotient_raw[15:10] == 6'b000_001) | // 10 + (quotient_raw[15:11] == 5'b000_01) | // 11 + (quotient_raw[15:12] == 4'b000_1) | // 12 + (quotient_raw[15:13] == 3'b001) | // 13 + (quotient_raw[15:14] == 2'b01) | // 14 + (quotient_raw[15] == 1'b1); // 15 - assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); - assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; + assign twos_comp_b_sel = valid_ff & ~(dividend_sign_ff ^ divisor_sign_ff); + assign twos_comp_q_sel = ~valid_ff & ~rem_ff & (dividend_sign_ff ^ divisor_sign_ff) & ~by_zero_case_ff; - assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | + assign twos_comp_in[31:0] = ( {32{twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{twos_comp_b_sel}} & b_ff[31:0] ); - rvtwoscomp #(32) i_twos_comp (.din(twos_comp_in[31:0]), .dout(twos_comp_out[31:0])); + rvtwoscomp #(32) i_twos_comp ( + .din (twos_comp_in[31:0]), + .dout(twos_comp_out[31:0]) + ); - assign valid_out = finish_ff & ~cancel; + assign valid_out = finish_ff & ~cancel; - assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | + assign data_out[31:0] = ( {32{~rem_ff & ~twos_comp_q_sel}} & q_ff[31:0] ) | ( {32{ rem_ff }} & r_ff[31:0] ) | ( {32{ twos_comp_q_sel}} & twos_comp_out[31:0] ); - // *** *** *** START : SMALLNUM {{ + // *** *** *** START : SMALLNUM {{ - assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | + assign smallnum_case = ( (a_ff[31:4] == 28'b0) & (b_ff[31:4] == 28'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel) | ( (a_ff[31:0] == 32'b0) & ~by_zero_case & ~rem_ff & valid_ff & ~cancel); - assign smallnum[3] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ); + assign smallnum[3] = (a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1]); - assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[2] = ( a_ff[3] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & ~b_ff[3] & ~b_ff[2] ); - assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | + assign smallnum[1] = ( a_ff[2] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[1] ) | ( a_ff[3] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[3] & ~a_ff[2] & ~b_ff[3] & ~b_ff[2] & b_ff[1] & b_ff[0]) | @@ -1603,7 +1830,7 @@ module el2_exu_div_new_4bit_fullshortq ( a_ff[3] & a_ff[1] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & ~b_ff[3] & b_ff[2] ); - assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | + assign smallnum[0] = ( a_ff[2] & a_ff[1] & a_ff[0] & ~b_ff[3] & ~b_ff[1] ) | ( a_ff[3] & ~a_ff[2] & a_ff[0] & ~b_ff[3] & b_ff[1] & b_ff[0]) | ( a_ff[2] & ~b_ff[3] & ~b_ff[1] & ~b_ff[0]) | ( a_ff[1] & ~b_ff[3] & ~b_ff[2] & ~b_ff[0]) | @@ -1630,40 +1857,42 @@ module el2_exu_div_new_4bit_fullshortq ( a_ff[3] & a_ff[1] & a_ff[0] & ~b_ff[2] ) | ( a_ff[3] & a_ff[2] & a_ff[1] & a_ff[0] & b_ff[3] ); - // *** *** *** END : SMALLNUM }} + // *** *** *** END : SMALLNUM }} - // *** *** *** Start : Short Q {{ + // *** *** *** Start : Short Q {{ - assign shortq_dividend[32:0] = {dividend_sign_ff,a_ff[31:0]}; + assign shortq_dividend[32:0] = {dividend_sign_ff, a_ff[31:0]}; - logic [5:0] dw_a_enc; - logic [5:0] dw_b_enc; - logic [6:0] dw_shortq_raw; + logic [5:0] dw_a_enc; + logic [5:0] dw_b_enc; + logic [6:0] dw_shortq_raw; - el2_exu_div_cls i_a_cls ( - .operand ( shortq_dividend[32:0] ), - .cls ( dw_a_enc[4:0] )); + el2_exu_div_cls i_a_cls ( + .operand(shortq_dividend[32:0]), + .cls (dw_a_enc[4:0]) + ); - el2_exu_div_cls i_b_cls ( - .operand ( b_ff[32:0] ), - .cls ( dw_b_enc[4:0] )); + el2_exu_div_cls i_b_cls ( + .operand(b_ff[32:0]), + .cls (dw_b_enc[4:0]) + ); - assign dw_a_enc[5] = 1'b0; - assign dw_b_enc[5] = 1'b0; + assign dw_a_enc[5] = 1'b0; + assign dw_b_enc[5] = 1'b0; - assign dw_shortq_raw[6:0] = {1'b0,dw_b_enc[5:0]} - {1'b0,dw_a_enc[5:0]} + 7'd1; - assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; + assign dw_shortq_raw[6:0] = {1'b0, dw_b_enc[5:0]} - {1'b0, dw_a_enc[5:0]} + 7'd1; + assign shortq[5:0] = dw_shortq_raw[6] ? 6'd0 : dw_shortq_raw[5:0]; - assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:2] == 3'b111) & ~cancel; + assign shortq_enable = valid_ff & ~shortq[5] & ~(shortq[4:2] == 3'b111) & ~cancel; - assign shortq_decode[4:0] = ( {5{shortq[4:0] == 5'd31}} & 5'd00) | + assign shortq_decode[4:0] = ( {5{shortq[4:0] == 5'd31}} & 5'd00) | ( {5{shortq[4:0] == 5'd30}} & 5'd00) | ( {5{shortq[4:0] == 5'd29}} & 5'd00) | ( {5{shortq[4:0] == 5'd28}} & 5'd00) | @@ -1697,16 +1926,16 @@ module el2_exu_div_new_4bit_fullshortq ( {5{shortq[4:0] == 5'd00}} & 5'd28); - assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : shortq_decode[4:0]; + assign shortq_shift[4:0] = ~shortq_enable ? 5'd0 : shortq_decode[4:0]; - // *** *** *** End : Short Q }} + // *** *** *** End : Short Q }} -endmodule // el2_exu_div_new_4bit_fullshortq +endmodule // el2_exu_div_new_4bit_fullshortq @@ -1715,19 +1944,18 @@ endmodule // el2_exu_div_new_4bit_fullshortq // * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * -module el2_exu_div_cls - ( - input logic [32:0] operand, +module el2_exu_div_cls ( + input logic [32:0] operand, - output logic [4:0] cls // Count leading sign bits - "n" format ignoring [32] - ); + output logic [4:0] cls // Count leading sign bits - "n" format ignoring [32] +); - logic [4:0] cls_zeros; - logic [4:0] cls_ones; + logic [4:0] cls_zeros; + logic [4:0] cls_ones; -assign cls_zeros[4:0] = ({5{operand[31] == { 1'b1} }} & 5'd00) | + assign cls_zeros[4:0] = ({5{operand[31] == { 1'b1} }} & 5'd00) | ({5{operand[31:30] == {{ 1{1'b0}},1'b1} }} & 5'd01) | ({5{operand[31:29] == {{ 2{1'b0}},1'b1} }} & 5'd02) | ({5{operand[31:28] == {{ 3{1'b0}},1'b1} }} & 5'd03) | @@ -1762,7 +1990,7 @@ assign cls_zeros[4:0] = ({5{operand[31] == { 1'b1} }} ({5{operand[31:00] == {{32{1'b0}} } }} & 5'd00); // Don't care case as it will be handled as special case -assign cls_ones[4:0] = ({5{operand[31:30] == {{ 1{1'b1}},1'b0} }} & 5'd00) | + assign cls_ones[4:0] = ({5{operand[31:30] == {{ 1{1'b1}},1'b0} }} & 5'd00) | ({5{operand[31:29] == {{ 2{1'b1}},1'b0} }} & 5'd01) | ({5{operand[31:28] == {{ 3{1'b1}},1'b0} }} & 5'd02) | ({5{operand[31:27] == {{ 4{1'b1}},1'b0} }} & 5'd03) | @@ -1796,6 +2024,6 @@ assign cls_ones[4:0] = ({5{operand[31:30] == {{ 1{1'b1}},1'b0} }} ({5{operand[31:00] == {{32{1'b1}} } }} & 5'd31); -assign cls[4:0] = operand[32] ? cls_ones[4:0] : cls_zeros[4:0]; + assign cls[4:0] = operand[32] ? cls_ones[4:0] : cls_zeros[4:0]; -endmodule // el2_exu_div_cls +endmodule // el2_exu_div_cls diff --git a/Flow/design/exu/el2_exu_mul_ctl.sv b/Flow/design/exu/el2_exu_mul_ctl.sv index e47a7d8..3484ad6 100644 --- a/Flow/design/exu/el2_exu_mul_ctl.sv +++ b/Flow/design/exu/el2_exu_mul_ctl.sv @@ -15,241 +15,231 @@ module el2_exu_mul_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, // Top level clock - input logic rst_l, // Reset - input logic scan_mode, // Scan mode + `include "el2_param.vh" +) ( + input logic clk, // Top level clock + input logic rst_l, // Reset + input logic scan_mode, // Scan mode - input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result} + input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result} - input logic [31:0] rs1_in, // A operand - input logic [31:0] rs2_in, // B operand + input logic [31:0] rs1_in, // A operand + input logic [31:0] rs2_in, // B operand - output logic [31:0] result_x // Result + output logic [31:0] result_x // Result +); + + + logic mul_x_enable; + logic bit_x_enable; + logic signed [32:0] rs1_ext_in; + logic signed [32:0] rs2_ext_in; + logic [65:0] prod_x; + logic low_x; + + + + // *** Start - BitManip *** + + logic bitmanip_sel_d; + logic bitmanip_sel_x; + logic [31:0] bitmanip_d; + logic [31:0] bitmanip_x; + + + + // ZBE + logic ap_bcompress; + logic ap_bdecompress; + + // ZBC + logic ap_clmul; + logic ap_clmulh; + logic ap_clmulr; + + // ZBP + logic ap_grev; + logic ap_gorc; + logic ap_shfl; + logic ap_unshfl; + logic ap_xperm_n; + logic ap_xperm_b; + logic ap_xperm_h; + + // ZBR + logic ap_crc32_b; + logic ap_crc32_h; + logic ap_crc32_w; + logic ap_crc32c_b; + logic ap_crc32c_h; + logic ap_crc32c_w; + + // ZBF + logic ap_bfp; + + + if (pt.BITMANIP_ZBE == 1) begin + assign ap_bcompress = mul_p.bcompress; + assign ap_bdecompress = mul_p.bdecompress; + end else begin + assign ap_bcompress = 1'b0; + assign ap_bdecompress = 1'b0; + end + + if (pt.BITMANIP_ZBC == 1) begin + assign ap_clmul = mul_p.clmul; + assign ap_clmulh = mul_p.clmulh; + assign ap_clmulr = mul_p.clmulr; + end else begin + assign ap_clmul = 1'b0; + assign ap_clmulh = 1'b0; + assign ap_clmulr = 1'b0; + end + + if (pt.BITMANIP_ZBP == 1) begin + assign ap_grev = mul_p.grev; + assign ap_gorc = mul_p.gorc; + assign ap_shfl = mul_p.shfl; + assign ap_unshfl = mul_p.unshfl; + assign ap_xperm_n = mul_p.xperm_n; + assign ap_xperm_b = mul_p.xperm_b; + assign ap_xperm_h = mul_p.xperm_h; + end else begin + assign ap_grev = 1'b0; + assign ap_gorc = 1'b0; + assign ap_shfl = 1'b0; + assign ap_unshfl = 1'b0; + assign ap_xperm_n = 1'b0; + assign ap_xperm_b = 1'b0; + assign ap_xperm_h = 1'b0; + end + + if (pt.BITMANIP_ZBR == 1) begin + assign ap_crc32_b = mul_p.crc32_b; + assign ap_crc32_h = mul_p.crc32_h; + assign ap_crc32_w = mul_p.crc32_w; + assign ap_crc32c_b = mul_p.crc32c_b; + assign ap_crc32c_h = mul_p.crc32c_h; + assign ap_crc32c_w = mul_p.crc32c_w; + end else begin + assign ap_crc32_b = 1'b0; + assign ap_crc32_h = 1'b0; + assign ap_crc32_w = 1'b0; + assign ap_crc32c_b = 1'b0; + assign ap_crc32c_h = 1'b0; + assign ap_crc32c_w = 1'b0; + end + + if (pt.BITMANIP_ZBF == 1) begin + assign ap_bfp = mul_p.bfp; + end else begin + assign ap_bfp = 1'b0; + end + + + // *** End - BitManip *** + + + + assign mul_x_enable = mul_p.valid; + assign bit_x_enable = mul_p.valid; + + assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31]; + assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31]; + + assign rs1_ext_in[31:0] = rs1_in[31:0]; + assign rs2_ext_in[31:0] = rs2_in[31:0]; + + + + // --------------------------- Multiply ---------------------------------- + + + logic signed [32:0] rs1_x; + logic signed [32:0] rs2_x; + + rvdffe #(34) i_a_x_ff ( + .*, + .clk (clk), + .din ({mul_p.low, rs1_ext_in[32:0]}), + .dout({low_x, rs1_x[32:0]}), + .en (mul_x_enable) + ); + rvdffe #(33) i_b_x_ff ( + .*, + .clk (clk), + .din (rs2_ext_in[32:0]), + .dout(rs2_x[32:0]), + .en (mul_x_enable) ); - logic mul_x_enable; - logic bit_x_enable; - logic signed [32:0] rs1_ext_in; - logic signed [32:0] rs2_ext_in; - logic [65:0] prod_x; - logic low_x; - - - - // *** Start - BitManip *** - - logic bitmanip_sel_d; - logic bitmanip_sel_x; - logic [31:0] bitmanip_d; - logic [31:0] bitmanip_x; - - - - // ZBE - logic ap_bcompress; - logic ap_bdecompress; - - // ZBC - logic ap_clmul; - logic ap_clmulh; - logic ap_clmulr; - - // ZBP - logic ap_grev; - logic ap_gorc; - logic ap_shfl; - logic ap_unshfl; - logic ap_xperm_n; - logic ap_xperm_b; - logic ap_xperm_h; - - // ZBR - logic ap_crc32_b; - logic ap_crc32_h; - logic ap_crc32_w; - logic ap_crc32c_b; - logic ap_crc32c_h; - logic ap_crc32c_w; - - // ZBF - logic ap_bfp; - - - if (pt.BITMANIP_ZBE == 1) - begin - assign ap_bcompress = mul_p.bcompress; - assign ap_bdecompress = mul_p.bdecompress; - end - else - begin - assign ap_bcompress = 1'b0; - assign ap_bdecompress = 1'b0; - end - - if (pt.BITMANIP_ZBC == 1) - begin - assign ap_clmul = mul_p.clmul; - assign ap_clmulh = mul_p.clmulh; - assign ap_clmulr = mul_p.clmulr; - end - else - begin - assign ap_clmul = 1'b0; - assign ap_clmulh = 1'b0; - assign ap_clmulr = 1'b0; - end - - if (pt.BITMANIP_ZBP == 1) - begin - assign ap_grev = mul_p.grev; - assign ap_gorc = mul_p.gorc; - assign ap_shfl = mul_p.shfl; - assign ap_unshfl = mul_p.unshfl; - assign ap_xperm_n = mul_p.xperm_n; - assign ap_xperm_b = mul_p.xperm_b; - assign ap_xperm_h = mul_p.xperm_h; - end - else - begin - assign ap_grev = 1'b0; - assign ap_gorc = 1'b0; - assign ap_shfl = 1'b0; - assign ap_unshfl = 1'b0; - assign ap_xperm_n = 1'b0; - assign ap_xperm_b = 1'b0; - assign ap_xperm_h = 1'b0; - end - - if (pt.BITMANIP_ZBR == 1) - begin - assign ap_crc32_b = mul_p.crc32_b; - assign ap_crc32_h = mul_p.crc32_h; - assign ap_crc32_w = mul_p.crc32_w; - assign ap_crc32c_b = mul_p.crc32c_b; - assign ap_crc32c_h = mul_p.crc32c_h; - assign ap_crc32c_w = mul_p.crc32c_w; - end - else - begin - assign ap_crc32_b = 1'b0; - assign ap_crc32_h = 1'b0; - assign ap_crc32_w = 1'b0; - assign ap_crc32c_b = 1'b0; - assign ap_crc32c_h = 1'b0; - assign ap_crc32c_w = 1'b0; - end - - if (pt.BITMANIP_ZBF == 1) - begin - assign ap_bfp = mul_p.bfp; - end - else - begin - assign ap_bfp = 1'b0; - end - - - // *** End - BitManip *** - - - - assign mul_x_enable = mul_p.valid; - assign bit_x_enable = mul_p.valid; - - assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31]; - assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31]; - - assign rs1_ext_in[31:0] = rs1_in[31:0]; - assign rs2_ext_in[31:0] = rs2_in[31:0]; - - - - // --------------------------- Multiply ---------------------------------- - - - logic signed [32:0] rs1_x; - logic signed [32:0] rs2_x; - - rvdffe #(34) i_a_x_ff (.*, .clk(clk), .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable)); - rvdffe #(33) i_b_x_ff (.*, .clk(clk), .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable)); - - - assign prod_x[65:0] = rs1_x * rs2_x; + assign prod_x[65:0] = rs1_x * rs2_x; - // * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * * - // *** BCOMPRESS == "gather" *** + // *** BCOMPRESS == "gather" *** - logic [31:0] bcompress_d; - logic bcompress_test_bit_d; - integer bcompress_i, bcompress_j; + logic [31:0] bcompress_d; + logic bcompress_test_bit_d; + integer bcompress_i, bcompress_j; - always_comb - begin + always_comb begin - bcompress_j = 0; - bcompress_test_bit_d = 1'b0; - bcompress_d[31:0] = 32'b0; + bcompress_j = 0; + bcompress_test_bit_d = 1'b0; + bcompress_d[31:0] = 32'b0; - for (bcompress_i=0; bcompress_i<32; bcompress_i++) - begin - bcompress_test_bit_d = rs2_in[bcompress_i]; - if (bcompress_test_bit_d) - begin - bcompress_d[bcompress_j] = rs1_in[bcompress_i]; - bcompress_j = bcompress_j + 1; - end // IF bcompress_test_bit - end // FOR bcompress_i - end // ALWAYS_COMB + for (bcompress_i = 0; bcompress_i < 32; bcompress_i++) begin + bcompress_test_bit_d = rs2_in[bcompress_i]; + if (bcompress_test_bit_d) begin + bcompress_d[bcompress_j] = rs1_in[bcompress_i]; + bcompress_j = bcompress_j + 1; + end // IF bcompress_test_bit + end // FOR bcompress_i + end // ALWAYS_COMB - // *** BDECOMPRESS == "scatter" *** + // *** BDECOMPRESS == "scatter" *** - logic [31:0] bdecompress_d; - logic bdecompress_test_bit_d; - integer bdecompress_i, bdecompress_j; + logic [31:0] bdecompress_d; + logic bdecompress_test_bit_d; + integer bdecompress_i, bdecompress_j; - always_comb - begin + always_comb begin - bdecompress_j = 0; - bdecompress_test_bit_d = 1'b0; - bdecompress_d[31:0] = 32'b0; + bdecompress_j = 0; + bdecompress_test_bit_d = 1'b0; + bdecompress_d[31:0] = 32'b0; - for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++) - begin - bdecompress_test_bit_d = rs2_in[bdecompress_i]; - if (bdecompress_test_bit_d) - begin - bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j]; - bdecompress_j = bdecompress_j + 1; - end // IF bdecompress_test_bit - end // FOR bdecompress_i - end // ALWAYS_COMB + for (bdecompress_i = 0; bdecompress_i < 32; bdecompress_i++) begin + bdecompress_test_bit_d = rs2_in[bdecompress_i]; + if (bdecompress_test_bit_d) begin + bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j]; + bdecompress_j = bdecompress_j + 1; + end // IF bdecompress_test_bit + end // FOR bdecompress_i + end // ALWAYS_COMB - // * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * * - logic [62:0] clmul_raw_d; + logic [62:0] clmul_raw_d; - assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^ + assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^ ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^ ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^ ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^ @@ -285,137 +275,137 @@ import el2_pkg::*; - // * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * * - // uint32_t grev32(uint32_t rs1, uint32_t rs2) - // { - // uint32_t x = rs1; - // int shamt = rs2 & 31; - // - // if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1); - // if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2); - // if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4); - // if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8); - // if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16); - // - // return x; - // } + // uint32_t grev32(uint32_t rs1, uint32_t rs2) + // { + // uint32_t x = rs1; + // int shamt = rs2 & 31; + // + // if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1); + // if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2); + // if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4); + // if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8); + // if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16); + // + // return x; + // } - logic [31:0] grev1_d; - logic [31:0] grev2_d; - logic [31:0] grev4_d; - logic [31:0] grev8_d; - logic [31:0] grev_d; + logic [31:0] grev1_d; + logic [31:0] grev2_d; + logic [31:0] grev4_d; + logic [31:0] grev8_d; + logic [31:0] grev_d; - assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25], + assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25], rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17], rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09], rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} : rs1_in[31:0]; - assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26], + assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26], grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18], grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10], grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]} : grev1_d[31:0]; - assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20], + assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20], grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]} : grev2_d[31:0]; - assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0]; + assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0]; - assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00],grev8_d[31:16]} : grev8_d[31:0]; + assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00], grev8_d[31:16]} : grev8_d[31:0]; - // * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * * - // uint32_t gorc32(uint32_t rs1, uint32_t rs2) - // { - // uint32_t x = rs1; - // int shamt = rs2 & 31; - // - // if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1); - // if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2); - // if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4); - // if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8); - // if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16); - // - // return x; - // } + // uint32_t gorc32(uint32_t rs1, uint32_t rs2) + // { + // uint32_t x = rs1; + // int shamt = rs2 & 31; + // + // if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1); + // if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2); + // if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4); + // if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8); + // if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16); + // + // return x; + // } - logic [31:0] gorc1_d; - logic [31:0] gorc2_d; - logic [31:0] gorc4_d; - logic [31:0] gorc8_d; - logic [31:0] gorc_d; + logic [31:0] gorc1_d; + logic [31:0] gorc2_d; + logic [31:0] gorc4_d; + logic [31:0] gorc8_d; + logic [31:0] gorc_d; - assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25], + assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25], rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17], rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09], rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0]; - assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26], + assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26], gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18], gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10], gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0]; - assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20], + assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20], gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0]; - assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0]; + assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0]; - assign gorc_d[31:0] = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0]; + assign gorc_d[31:0] = ({32{rs2_in[4]}} & {gorc8_d[15:00], gorc8_d[31:16]}) | gorc8_d[31:0]; - // * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * * - // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N) - // { - // uint32_t x = src & ~(maskL | maskR); - // x |= ((src << N) & maskL) | ((src >> N) & maskR); - // return x; - // } - // - // - // - // uint32_t shfl32(uint32_t rs1, uint32_t rs2) - // { - // uint32_t x = rs1; - // int shamt = rs2 & 15 - // - // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); - // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); - // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2); - // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); - // - // return x; - // } + // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N) + // { + // uint32_t x = src & ~(maskL | maskR); + // x |= ((src << N) & maskL) | ((src >> N) & maskR); + // return x; + // } + // + // + // + // uint32_t shfl32(uint32_t rs1, uint32_t rs2) + // { + // uint32_t x = rs1; + // int shamt = rs2 & 15 + // + // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2); + // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + // + // return x; + // } - logic [31:0] shfl8_d; - logic [31:0] shfl4_d; - logic [31:0] shfl2_d; - logic [31:0] shfl_d; + logic [31:0] shfl8_d; + logic [31:0] shfl4_d; + logic [31:0] shfl2_d; + logic [31:0] shfl_d; - assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0]; + assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0]; - assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16], + assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16], shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]} : shfl8_d[31:0]; - assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24], + assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24], shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16], shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08], shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]} : shfl4_d[31:0]; - assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24], + assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24], shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16], shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08], shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]} : shfl2_d[31:0]; @@ -423,285 +413,273 @@ import el2_pkg::*; - // uint32_t unshfl32(uint32_t rs1, uint32_t rs2) - // { - // uint32_t x = rs1; - // int shamt = rs2 & 15 - // - // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); - // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2); - // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); - // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); - // - // return x; - // } + // uint32_t unshfl32(uint32_t rs1, uint32_t rs2) + // { + // uint32_t x = rs1; + // int shamt = rs2 & 15 + // + // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2); + // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + // + // return x; + // } - logic [31:0] unshfl1_d; - logic [31:0] unshfl2_d; - logic [31:0] unshfl4_d; - logic [31:0] unshfl_d; + logic [31:0] unshfl1_d; + logic [31:0] unshfl2_d; + logic [31:0] unshfl4_d; + logic [31:0] unshfl_d; - assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24], + assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24], rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16], rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08], rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]} : rs1_in[31:0]; - assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24], + assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24], unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16], unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08], unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]} : unshfl1_d[31:0]; - assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16], + assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16], unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]} : unshfl2_d[31:0]; - assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0]; + assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0]; - // * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * * -// -// These instructions operate on nibbles/bytes/half-words/words. -// rs1 is a vector of data words and rs2 is a vector of indices into rs1. -// The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1, -// or zero then the index in rs2 is out of bounds. -// -// uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) -// { -// uint_xlen_t r = 0; -// uint_xlen_t sz = 1LL << sz_log2; -// uint_xlen_t mask = (1LL << sz) - 1; -// for (int i = 0; i < XLEN; i += sz) -// { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2; -// if (pos < XLEN) -// r |= ((rs1 >> pos) & mask) << i; -// } -// return r; -// } -// -// uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); } -// uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); } -// uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); } -// uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32 -// -// The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch. + // + // These instructions operate on nibbles/bytes/half-words/words. + // rs1 is a vector of data words and rs2 is a vector of indices into rs1. + // The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1, + // or zero then the index in rs2 is out of bounds. + // + // uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2) + // { + // uint_xlen_t r = 0; + // uint_xlen_t sz = 1LL << sz_log2; + // uint_xlen_t mask = (1LL << sz) - 1; + // for (int i = 0; i < XLEN; i += sz) + // { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2; + // if (pos < XLEN) + // r |= ((rs1 >> pos) & mask) << i; + // } + // return r; + // } + // + // uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); } + // uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); } + // uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); } + // uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32 + // + // The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch. -// *** XPERM_B *** + // *** XPERM_B *** - // XLEN = 32 - // SZ_LOG2 = 3 - // SZ = 4'd8; - // MASK = ( 1 << 8 ) - 1 - // = 8'hFF + // XLEN = 32 + // SZ_LOG2 = 3 + // SZ = 4'd8; + // MASK = ( 1 << 8 ) - 1 + // = 8'hFF - // integer xperm_b_i; - // logic [31:0] xperm_b_r; - // logic [3:0] xperm_b_sz; - // logic [7:0] xperm_b_mask; - // logic [31:0] xperm_b_pos; - // - // - // assign xperm_b_sz[3:0] = 4'd8; - // assign xperm_b_mask[7:0] = 8'hff; - // - // always_comb - // begin - // xperm_b_r[31:0] = 32'b0; - // - // for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work... - // begin - // xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3; - // if (xperm_b_pos[31:0] < 32'd32) - // xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i ); - // end - // end + // integer xperm_b_i; + // logic [31:0] xperm_b_r; + // logic [3:0] xperm_b_sz; + // logic [7:0] xperm_b_mask; + // logic [31:0] xperm_b_pos; + // + // + // assign xperm_b_sz[3:0] = 4'd8; + // assign xperm_b_mask[7:0] = 8'hff; + // + // always_comb + // begin + // xperm_b_r[31:0] = 32'b0; + // + // for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work... + // begin + // xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3; + // if (xperm_b_pos[31:0] < 32'd32) + // xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i ); + // end + // end - logic [31:0] xperm_n; - logic [31:0] xperm_b; - logic [31:0] xperm_h; + logic [31:0] xperm_n; + logic [31:0] xperm_b; + logic [31:0] xperm_h; - assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & ( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects - assign xperm_n[07:04] = { 4{ ~rs2_in[07] }} & ( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) & 4'hf ); - assign xperm_n[11:08] = { 4{ ~rs2_in[11] }} & ( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) & 4'hf ); - assign xperm_n[15:12] = { 4{ ~rs2_in[15] }} & ( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) & 4'hf ); - assign xperm_n[19:16] = { 4{ ~rs2_in[19] }} & ( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) & 4'hf ); - assign xperm_n[23:20] = { 4{ ~rs2_in[23] }} & ( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) & 4'hf ); - assign xperm_n[27:24] = { 4{ ~rs2_in[27] }} & ( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) & 4'hf ); - assign xperm_n[31:28] = { 4{ ~rs2_in[31] }} & ( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) & 4'hf ); + assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & ( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects + assign xperm_n[07:04] = {4{~rs2_in[07]}} & ((rs1_in[31:0] >> {rs2_in[06:04], 2'b0}) & 4'hf); + assign xperm_n[11:08] = {4{~rs2_in[11]}} & ((rs1_in[31:0] >> {rs2_in[10:08], 2'b0}) & 4'hf); + assign xperm_n[15:12] = {4{~rs2_in[15]}} & ((rs1_in[31:0] >> {rs2_in[14:12], 2'b0}) & 4'hf); + assign xperm_n[19:16] = {4{~rs2_in[19]}} & ((rs1_in[31:0] >> {rs2_in[18:16], 2'b0}) & 4'hf); + assign xperm_n[23:20] = {4{~rs2_in[23]}} & ((rs1_in[31:0] >> {rs2_in[22:20], 2'b0}) & 4'hf); + assign xperm_n[27:24] = {4{~rs2_in[27]}} & ((rs1_in[31:0] >> {rs2_in[26:24], 2'b0}) & 4'hf); + assign xperm_n[31:28] = {4{~rs2_in[31]}} & ((rs1_in[31:0] >> {rs2_in[30:28], 2'b0}) & 4'hf); - assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & ( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects - assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & ( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff ); - assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & ( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff ); - assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & ( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff ); + assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & ( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects + assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & ( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff ); + assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & ( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff ); + assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & ( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff ); - assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & ( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects - assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & ( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff ); + assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & ( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects + assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & ( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff ); - // * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * * - // *** computed from https: //crccalc.com *** - // - // "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e) - // - // Input must first be XORed with 32'hffff_ffff - // - // - // CRC32 - // - // Input Output Input Output - // ----- -------- -------- -------- - // "a" e8b7be43 ffffff9e 174841bc - // "aa" 078a19d7 ffff9e9e f875e628 - // "aaaa" ad98e545 9e9e9e9e 5267a1ba - // - // - // - // CRC32c - // - // Input Output Input Output - // ----- -------- -------- -------- - // "a" c1d04330 ffffff9e 3e2fbccf - // "aa" f1f2dac2 ffff9e9e 0e0d253d - // "aaaa" 6a52eeb0 9e9e9e9e 95ad114f + // *** computed from https: //crccalc.com *** + // + // "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e) + // + // Input must first be XORed with 32'hffff_ffff + // + // + // CRC32 + // + // Input Output Input Output + // ----- -------- -------- -------- + // "a" e8b7be43 ffffff9e 174841bc + // "aa" 078a19d7 ffff9e9e f875e628 + // "aaaa" ad98e545 9e9e9e9e 5267a1ba + // + // + // + // CRC32c + // + // Input Output Input Output + // ----- -------- -------- -------- + // "a" c1d04330 ffffff9e 3e2fbccf + // "aa" f1f2dac2 ffff9e9e 0e0d253d + // "aaaa" 6a52eeb0 9e9e9e9e 95ad114f - logic crc32_all; - logic [31:0] crc32_poly_rev; - logic [31:0] crc32c_poly_rev; - integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi; - logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd; + logic crc32_all; + logic [31:0] crc32_poly_rev; + logic [31:0] crc32c_poly_rev; + integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi; + logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd; - assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w; + assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w; - assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7 - assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41 + assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7 + assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41 - always_comb - begin - crc32_bd[31:0] = rs1_in[31:0]; + always_comb begin + crc32_bd[31:0] = rs1_in[31:0]; - for (crc32_bi=0; crc32_bi<8; crc32_bi++) - begin - crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}}); - end // FOR crc32_bi - end // ALWAYS_COMB + for (crc32_bi = 0; crc32_bi < 8; crc32_bi++) begin + crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}}); + end // FOR crc32_bi + end // ALWAYS_COMB - always_comb - begin - crc32_hd[31:0] = rs1_in[31:0]; + always_comb begin + crc32_hd[31:0] = rs1_in[31:0]; - for (crc32_hi=0; crc32_hi<16; crc32_hi++) - begin - crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}}); - end // FOR crc32_hi - end // ALWAYS_COMB + for (crc32_hi = 0; crc32_hi < 16; crc32_hi++) begin + crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}}); + end // FOR crc32_hi + end // ALWAYS_COMB - always_comb - begin - crc32_wd[31:0] = rs1_in[31:0]; + always_comb begin + crc32_wd[31:0] = rs1_in[31:0]; - for (crc32_wi=0; crc32_wi<32; crc32_wi++) - begin - crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}}); - end // FOR crc32_wi - end // ALWAYS_COMB + for (crc32_wi = 0; crc32_wi < 32; crc32_wi++) begin + crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}}); + end // FOR crc32_wi + end // ALWAYS_COMB - always_comb - begin - crc32c_bd[31:0] = rs1_in[31:0]; + always_comb begin + crc32c_bd[31:0] = rs1_in[31:0]; - for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++) - begin - crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}}); - end // FOR crc32c_bi - end // ALWAYS_COMB + for (crc32c_bi = 0; crc32c_bi < 8; crc32c_bi++) begin + crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}}); + end // FOR crc32c_bi + end // ALWAYS_COMB - always_comb - begin - crc32c_hd[31:0] = rs1_in[31:0]; + always_comb begin + crc32c_hd[31:0] = rs1_in[31:0]; - for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++) - begin - crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}}); - end // FOR crc32c_hi - end // ALWAYS_COMB + for (crc32c_hi = 0; crc32c_hi < 16; crc32c_hi++) begin + crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}}); + end // FOR crc32c_hi + end // ALWAYS_COMB - always_comb - begin - crc32c_wd[31:0] = rs1_in[31:0]; + always_comb begin + crc32c_wd[31:0] = rs1_in[31:0]; - for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++) - begin - crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}}); - end // FOR crc32c_wi - end // ALWAYS_COMB + for (crc32c_wi = 0; crc32c_wi < 32; crc32c_wi++) begin + crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}}); + end // FOR crc32c_wi + end // ALWAYS_COMB - // * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * * - // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2) - // { - // uint_xlen_t cfg = rs2 >> (XLEN/2); - // if ((cfg >> 30) == 2) cfg = cfg >> 16; - // int len = (cfg >> 8) & (XLEN/2-1); - // int off = cfg & (XLEN-1); - // len = len ? len : XLEN/2; - // uint_xlen_t mask = slo(0, len) << off; - // uint_xlen_t data = rs2 << off; - // return (data & mask) | (rs1 & ~mask); + // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2) + // { + // uint_xlen_t cfg = rs2 >> (XLEN/2); + // if ((cfg >> 30) == 2) cfg = cfg >> 16; + // int len = (cfg >> 8) & (XLEN/2-1); + // int off = cfg & (XLEN-1); + // len = len ? len : XLEN/2; + // uint_xlen_t mask = slo(0, len) << off; + // uint_xlen_t data = rs2 << off; + // return (data & mask) | (rs1 & ~mask); - logic [4:0] bfp_len; - logic [4:0] bfp_off; - logic [31:0] bfp_len_mask_; - logic [31:0] bfp_off_mask_; - logic [15:0] bfp_preshift_data; - logic [31:0] bfp_shift_data; - logic [31:0] bfp_shift_mask; - logic [31:0] bfp_result_d; + logic [ 4:0] bfp_len; + logic [ 4:0] bfp_off; + logic [31:0] bfp_len_mask_; + logic [31:0] bfp_off_mask_; + logic [15:0] bfp_preshift_data; + logic [31:0] bfp_shift_data; + logic [31:0] bfp_shift_mask; + logic [31:0] bfp_result_d; - assign bfp_len[3:0] = rs2_in[27:24]; - assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16 - assign bfp_off[4:0] = rs2_in[20:16]; + assign bfp_len[3:0] = rs2_in[27:24]; + assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16 + assign bfp_off[4:0] = rs2_in[20:16]; - assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0]; - assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0]; - assign bfp_preshift_data[15:0]= rs2_in[15:0] & ~bfp_len_mask_[15:0]; + assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0]; + assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0]; + assign bfp_preshift_data[15:0] = rs2_in[15:0] & ~bfp_len_mask_[15:0]; - assign bfp_shift_data[31:0] = {16'b0,bfp_preshift_data[15:0]} << bfp_off[4:0]; - assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0]; + assign bfp_shift_data[31:0] = {16'b0, bfp_preshift_data[15:0]} << bfp_off[4:0]; + assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0]; - assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]); + assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]); - // * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * * + // * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * * - assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h; + assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h; - assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) | + assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) | ( {32{ap_bdecompress}} & bdecompress_d[31:0] ) | ( {32{ap_clmul}} & clmul_raw_d[31:0] ) | ( {32{ap_clmulh}} & {1'b0,clmul_raw_d[62:32]} ) | @@ -723,12 +701,18 @@ import el2_pkg::*; - rvdffe #(33) i_bitmanip_ff (.*, .clk(clk), .din({bitmanip_sel_d,bitmanip_d[31:0]}), .dout({bitmanip_sel_x,bitmanip_x[31:0]}), .en(bit_x_enable)); + rvdffe #(33) i_bitmanip_ff ( + .*, + .clk (clk), + .din ({bitmanip_sel_d, bitmanip_d[31:0]}), + .dout({bitmanip_sel_x, bitmanip_x[31:0]}), + .en (bit_x_enable) + ); - assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) | + assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) | ( {32{~bitmanip_sel_x & low_x}} & prod_x[31:0] ) | bitmanip_x[31:0]; diff --git a/Flow/design/ifu/el2_ifu.sv b/Flow/design/ifu/el2_ifu.sv index 66b3350..52ce438 100644 --- a/Flow/design/ifu/el2_ifu.sv +++ b/Flow/design/ifu/el2_ifu.sv @@ -20,352 +20,411 @@ //******************************************************************************** module el2_ifu -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic rst_l, // reset, active low + `include "el2_param.vh" +) ( + input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic rst_l, // reset, active low - input logic dec_i0_decode_d, // Valid instruction at D and not blocked + input logic dec_i0_decode_d, // Valid instruction at D and not blocked - input logic exu_flush_final, // flush, includes upper and lower - input logic dec_tlu_i0_commit_cmt , // committed i0 - input logic dec_tlu_flush_err_wb , // flush due to parity error. - input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final - input logic [31:1] exu_flush_path_final, // flush fetch address + input logic exu_flush_final, // flush, includes upper and lower + input logic dec_tlu_i0_commit_cmt, // committed i0 + input logic dec_tlu_flush_err_wb, // flush due to parity error. + input logic dec_tlu_flush_noredir_wb, // don't fetch, validated with exu_flush_final + input logic [31:1] exu_flush_path_final, // flush fetch address - input logic [31:0] dec_tlu_mrac_ff ,// Side_effect , cacheable for each region - input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final - input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches + input logic [31:0] dec_tlu_mrac_ff, // Side_effect , cacheable for each region + input logic dec_tlu_fence_i_wb, // fence.i, invalidate icache, validated with exu_flush_final + input logic dec_tlu_flush_leak_one_wb, // ignore bp for leak one fetches - input logic dec_tlu_bpred_disable, // disable all branch prediction - input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging - input logic dec_tlu_force_halt, // force halt + input logic dec_tlu_bpred_disable, // disable all branch prediction + input logic dec_tlu_core_ecc_disable, // disable ecc checking and flagging + input logic dec_tlu_force_halt, // force halt - //-------------------------- IFU AXI signals-------------------------- - // AXI Write Channels - output logic ifu_axi_awvalid, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, - output logic [31:0] ifu_axi_awaddr, - output logic [3:0] ifu_axi_awregion, - output logic [7:0] ifu_axi_awlen, - output logic [2:0] ifu_axi_awsize, - output logic [1:0] ifu_axi_awburst, - output logic ifu_axi_awlock, - output logic [3:0] ifu_axi_awcache, - output logic [2:0] ifu_axi_awprot, - output logic [3:0] ifu_axi_awqos, + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [ 31:0] ifu_axi_awaddr, + output logic [ 3:0] ifu_axi_awregion, + output logic [ 7:0] ifu_axi_awlen, + output logic [ 2:0] ifu_axi_awsize, + output logic [ 1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [ 3:0] ifu_axi_awcache, + output logic [ 2:0] ifu_axi_awprot, + output logic [ 3:0] ifu_axi_awqos, - output logic ifu_axi_wvalid, - output logic [63:0] ifu_axi_wdata, - output logic [7:0] ifu_axi_wstrb, - output logic ifu_axi_wlast, + output logic ifu_axi_wvalid, + output logic [63:0] ifu_axi_wdata, + output logic [ 7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, - output logic ifu_axi_bready, + output logic ifu_axi_bready, - // AXI Read Channels - output logic ifu_axi_arvalid, - input logic ifu_axi_arready, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, - output logic [31:0] ifu_axi_araddr, - output logic [3:0] ifu_axi_arregion, - output logic [7:0] ifu_axi_arlen, - output logic [2:0] ifu_axi_arsize, - output logic [1:0] ifu_axi_arburst, - output logic ifu_axi_arlock, - output logic [3:0] ifu_axi_arcache, - output logic [2:0] ifu_axi_arprot, - output logic [3:0] ifu_axi_arqos, + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [ 31:0] ifu_axi_araddr, + output logic [ 3:0] ifu_axi_arregion, + output logic [ 7:0] ifu_axi_arlen, + output logic [ 2:0] ifu_axi_arsize, + output logic [ 1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [ 3:0] ifu_axi_arcache, + output logic [ 2:0] ifu_axi_arprot, + output logic [ 3:0] ifu_axi_arqos, - input logic ifu_axi_rvalid, - output logic ifu_axi_rready, - input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, - input logic [63:0] ifu_axi_rdata, - input logic [1:0] ifu_axi_rresp, + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [ 63:0] ifu_axi_rdata, + input logic [ 1:0] ifu_axi_rresp, - input logic ifu_bus_clk_en, + input logic ifu_bus_clk_en, - input logic dma_iccm_req, - input logic [31:0] dma_mem_addr, - input logic [2:0] dma_mem_sz, - input logic dma_mem_write, - input logic [63:0] dma_mem_wdata, - input logic [2:0] dma_mem_tag, // DMA Buffer entry number + input logic dma_iccm_req, + input logic [31:0] dma_mem_addr, + input logic [ 2:0] dma_mem_sz, + input logic dma_mem_write, + input logic [63:0] dma_mem_wdata, + input logic [ 2:0] dma_mem_tag, // DMA Buffer entry number - input logic dma_iccm_stall_any, - output logic iccm_dma_ecc_error, - output logic iccm_dma_rvalid, - output logic [63:0] iccm_dma_rdata, - output logic [2:0] iccm_dma_rtag, // Tag of the DMA req - output logic iccm_ready, + input logic dma_iccm_stall_any, + output logic iccm_dma_ecc_error, + output logic iccm_dma_rvalid, + output logic [63:0] iccm_dma_rdata, + output logic [ 2:0] iccm_dma_rtag, // Tag of the DMA req + output logic iccm_ready, - output logic ifu_pmu_instr_aligned, - output logic ifu_pmu_fetch_stall, - output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag + output logic ifu_pmu_instr_aligned, + output logic ifu_pmu_fetch_stall, + output logic ifu_ic_error_start, // has all of the I$ ecc/parity for data/tag -// I$ & ITAG Ports - output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. - output logic ic_rd_en, // Icache read enable. + // I$ & ITAG Ports + output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. - output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC - input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [25:0] ictag_debug_rd_data,// Debug icache tag. - output logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [25:0] ictag_debug_rd_data, // Debug icache tag. + output logic [70:0] ic_debug_wr_data, // Debug wr cache. - output logic [70:0] ifu_ic_debug_rd_data, + output logic [70:0] ifu_ic_debug_rd_data, - input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // - input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, - output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. - output logic ic_sel_premux_data, // Select the premux data. + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + output logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + output logic ic_sel_premux_data, // Select the premux data. - output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. - output logic ic_debug_rd_en, // Icache debug rd - output logic ic_debug_wr_en, // Icache debug wr - output logic ic_debug_tag_array, // Debug tag array - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + output logic [ pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage - input logic ic_tag_perr, // Icache Tag parity error + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error - // ICCM ports - output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. - output logic iccm_wren, // ICCM write enable (through the DMA) - output logic iccm_rden, // ICCM read enable. - output logic [77:0] iccm_wr_data, // ICCM write data. - output logic [2:0] iccm_wr_size, // ICCM write location within DW. + // ICCM ports + output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [ 77:0] iccm_wr_data, // ICCM write data. + output logic [ 2:0] iccm_wr_size, // ICCM write location within DW. - input logic [63:0] iccm_rd_data, // Data read from ICCM. - input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. + input logic [63:0] iccm_rd_data, // Data read from ICCM. + input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. - output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + output logic ifu_iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. -// Perf counter sigs - output logic ifu_pmu_ic_miss, // ic miss - output logic ifu_pmu_ic_hit, // ic hit - output logic ifu_pmu_bus_error, // iside bus error - output logic ifu_pmu_bus_busy, // iside bus busy - output logic ifu_pmu_bus_trxn, // iside bus transactions + // Perf counter sigs + output logic ifu_pmu_ic_miss, // ic miss + output logic ifu_pmu_ic_hit, // ic hit + output logic ifu_pmu_bus_error, // iside bus error + output logic ifu_pmu_bus_busy, // iside bus busy + output logic ifu_pmu_bus_trxn, // iside bus transactions - output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode - output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type + output logic ifu_i0_icaf, // Instruction 0 access fault. From Aligner to Decode + output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type - output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode - output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst - output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error - output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access - output logic[31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode - output logic[31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode - output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode + output logic ifu_i0_valid, // Instruction 0 valid. From Aligner to Decode + output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic [31:0] ifu_i0_instr, // Instruction 0 . From Aligner to Decode + output logic [31:1] ifu_i0_pc, // Instruction 0 pc. From Aligner to Decode + output logic ifu_i0_pc4, // Instruction 0 is 4 byte. From Aligner to Decode - output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle. + output logic ifu_miss_state_idle, // There is no outstanding miss. Cache miss state is idle. - output el2_br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index - output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR - output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag - output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index + output el2_br_pkt_t i0_brp, // Instruction 0 branch packet. From Aligner to Decode + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index - input el2_predict_pkt_t exu_mp_pkt, // mispredict packet - input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr - input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index - input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + input el2_predict_pkt_t exu_mp_pkt, // mispredict packet + input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr + input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + input logic [ pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag - input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt - input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index - input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index + input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // slot0 update/error pkt + input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index + input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associt btb error index - input dec_tlu_flush_lower_wb, + input dec_tlu_flush_lower_wb, - output logic [15:0] ifu_i0_cinst, + output logic [15:0] ifu_i0_cinst, -/// Icache debug - input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt , - output logic ifu_ic_debug_rd_data_valid, - output logic iccm_buf_correct_ecc, - output logic iccm_correction_state, + /// Icache debug + input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, + output logic ifu_ic_debug_rd_data_valid, + output logic iccm_buf_correct_ecc, + output logic iccm_correction_state, - input logic scan_mode - ); + input logic scan_mode +); - localparam TAGWIDTH = 2 ; - localparam IDWIDTH = 2 ; + localparam TAGWIDTH = 2; + localparam IDWIDTH = 2; - logic ifu_fb_consume1, ifu_fb_consume2; - logic [31:1] ifc_fetch_addr_f; - logic [31:1] ifc_fetch_addr_bf; + logic ifu_fb_consume1, ifu_fb_consume2; + logic [31:1] ifc_fetch_addr_f; + logic [31:1] ifc_fetch_addr_bf; - logic [1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch - logic [31:1] ifu_fetch_pc; // starting pc of fetch + logic [ 1:0] ifu_fetch_val; // valids on a 2B boundary, left justified [7] implies valid fetch + logic [31:1] ifu_fetch_pc; // starting pc of fetch - logic iccm_rd_ecc_single_err, ic_error_start; - assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err; - assign ifu_ic_error_start = ic_error_start; + logic iccm_rd_ecc_single_err, ic_error_start; + assign ifu_iccm_rd_ecc_single_err = iccm_rd_ecc_single_err; + assign ifu_ic_error_start = ic_error_start; - logic ic_write_stall; - logic ic_dma_active; - logic ifc_dma_access_ok; - logic [1:0] ic_access_fault_f; - logic [1:0] ic_access_fault_type_f; - logic ifu_ic_mb_empty; + logic ic_write_stall; + logic ic_dma_active; + logic ifc_dma_access_ok; + logic [1:0] ic_access_fault_f; + logic [1:0] ic_access_fault_type_f; + logic ifu_ic_mb_empty; - logic ic_hit_f; + logic ic_hit_f; - logic [1:0] ifu_bp_way_f; // way indication; right justified - logic ifu_bp_hit_taken_f; // kill next fetch; taken target found - logic [31:1] ifu_bp_btb_target_f; // predicted target PC - logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified - logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified - logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified - logic [11:0] ifu_bp_poffset_f; // predicted target - logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified - logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified - logic [1:0] ifu_bp_valid_f; // branch valid, right justified - logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f; - logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f; + logic [1:0] ifu_bp_way_f; // way indication; right justified + logic ifu_bp_hit_taken_f; // kill next fetch; taken target found + logic [31:1] ifu_bp_btb_target_f; // predicted target PC + logic ifu_bp_inst_mask_f; // tell ic which valids to kill because of a taken branch; right justified + logic [1:0] ifu_bp_hist1_f; // history counters for all 4 potential branches; right justified + logic [1:0] ifu_bp_hist0_f; // history counters for all 4 potential branches; right justified + logic [11:0] ifu_bp_poffset_f; // predicted target + logic [1:0] ifu_bp_ret_f; // predicted ret ; right justified + logic [1:0] ifu_bp_pc4_f; // pc4 indication; right justified + logic [1:0] ifu_bp_valid_f; // branch valid, right justified + logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f; + logic [1:0][$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f; - // fetch control - el2_ifu_ifc_ctl #(.pt(pt)) ifc (.* - ); + // fetch control + el2_ifu_ifc_ctl #(.pt(pt)) ifc (.*); - // branch predictor - if (pt.BTB_ENABLE==1) begin : bpred - el2_ifu_bp_ctl #(.pt(pt)) bp (.*); - end - else begin : bpred - assign ifu_bp_hit_taken_f = '0; - // verif wires - logic btb_wr_en_way0, btb_wr_en_way1,dec_tlu_error_wb; - logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data; - assign btb_wr_en_way0 = '0; - assign btb_wr_en_way1 = '0; - assign btb_wr_data = '0; - assign dec_tlu_error_wb ='0; - assign ifu_bp_inst_mask_f = 1'b1; - end + // branch predictor + if (pt.BTB_ENABLE == 1) begin : bpred + el2_ifu_bp_ctl #(.pt(pt)) bp (.*); + end else begin : bpred + assign ifu_bp_hit_taken_f = '0; + // verif wires + logic btb_wr_en_way0, btb_wr_en_way1, dec_tlu_error_wb; + logic [16+pt.BTB_BTAG_SIZE:0] btb_wr_data; + assign btb_wr_en_way0 = '0; + assign btb_wr_en_way1 = '0; + assign btb_wr_data = '0; + assign dec_tlu_error_wb = '0; + assign ifu_bp_inst_mask_f = 1'b1; + end - logic [1:0] ic_fetch_val_f; - logic [31:0] ic_data_f; - logic [31:0] ifu_fetch_data_f; - logic ifc_fetch_req_f; - logic ifc_fetch_req_f_raw; - logic [1:0] iccm_rd_ecc_double_err; // This fetch has an iccm double error. + logic [1:0] ic_fetch_val_f; + logic [31:0] ic_data_f; + logic [31:0] ifu_fetch_data_f; + logic ifc_fetch_req_f; + logic ifc_fetch_req_f_raw; + logic [1:0] iccm_rd_ecc_double_err; // This fetch has an iccm double error. - logic ifu_async_error_start; + logic ifu_async_error_start; - assign ifu_fetch_data_f[31:0] = ic_data_f[31:0]; - assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0]; - assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1]; + assign ifu_fetch_data_f[31:0] = ic_data_f[31:0]; + assign ifu_fetch_val[1:0] = ic_fetch_val_f[1:0]; + assign ifu_fetch_pc[31:1] = ifc_fetch_addr_f[31:1]; - logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage - logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage - logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage - logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus. - logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM. + logic ifc_fetch_uncacheable_bf; // The fetch request is uncacheable space. BF stage + logic ifc_fetch_req_bf; // Fetch request. Comes with the address. BF stage + logic ifc_fetch_req_bf_raw; // Fetch request without some qualifications. Used for clock-gating. BF stage + logic ifc_iccm_access_bf; // This request is to the ICCM. Do not generate misses to the bus. + logic ifc_region_acc_fault_bf; // Access fault. in ICCM region but offset is outside defined ICCM. - // aligner + // aligner - el2_ifu_aln_ctl #(.pt(pt)) aln ( - .* - ); + el2_ifu_aln_ctl #(.pt(pt)) aln (.*); - // icache - el2_ifu_mem_ctl #(.pt(pt)) mem_ctl - (.*, + // icache + el2_ifu_mem_ctl #( + .pt(pt) + ) mem_ctl ( + .*, .ic_data_f(ic_data_f[31:0]) + ); + + + + // Performance debug info + // + // +`ifdef DUMP_BTB_ON + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_way; // conditional branch mispredict + logic exu_mp_ataken; // direction is actual taken + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [ 1:0] exu_mp_hist; // new history + logic [ 11:0] exu_mp_tgt; // target offset + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + + assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict + assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0]; // target offset + assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address + + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f; + `define DEC top.rvtop.swerv.dec + `define EXU top.rvtop.swerv.exu + el2_btb_addr_hash f2hash ( + .pc (ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), + .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); + logic [31:0] mppc_ns, mppc; + logic exu_flush_final_d1; + assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d; + assign mppc_ns[0] = 1'b0; + rvdff #(33) junk_ff ( + .*, + .clk (active_clk), + .din ({mppc_ns[31:0], exu_flush_final}), + .dout({mppc[31:0], exu_flush_final_d1}) + ); + logic tmp_bnk; + assign tmp_bnk = bpred.bp.btb_sel_f[1]; + + always @(negedge clk) begin + if (`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin + $display("BTB_CONFIG: %d", pt.BTB_SIZE); +`ifndef BP_NOGSHARE + $display("BHT_CONFIG: %d gshare: 1", pt.BHT_SIZE); +`else + $display("BHT_CONFIG: %d gshare: 0", pt.BHT_SIZE); +`endif + $display("RS_CONFIG: %d", pt.RET_STACK_SIZE); + end + if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken)) + $display( + "%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", + `DEC.tlu.mcyclel[31:0] + 32'ha, + exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], + 1'b0, + exu_mp_call, + exu_mp_ret, + exu_mp_ataken, + exu_mp_hist[1:0], + exu_mp_valid, + exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], + { + exu_flush_path_final[31:1], 1'b0 + }, + exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], + exu_mp_valid, + bpred.bp.bht_wr_addr0, + mppc[31:0], + exu_mp_pkt.way ); + for (int i = 0; i < 8; i++) begin + if (ifu_bp_valid_f[i] & ifc_fetch_req_f) + $display( + "%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", + `DEC.tlu.mcyclel[31:0] + 32'ha, + btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], + bpred.bp.btb_sel_f[1], + bpred.bp.btb_rd_call_f, + bpred.bp.btb_rd_ret_f, + ifu_bp_hist1_f[tmp_bnk], + ifu_bp_hist0_f[tmp_bnk], + bpred.bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], + { + ifu_bp_btb_target_f[31:1], 1'b0 + }, + bpred.bp.fghr[pt.BHT_GHR_SIZE-1:0], + bpred.bp.bht_rd_addr_f, + ifu_bp_way_f[tmp_bnk] + ); + end + if (dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)) + $display( + "%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", + `DEC.tlu.mcyclel[31:0] + 32'ha, + bpred.bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO], + { + dec_tlu_br0_r_pkt.middle + }, + dec_tlu_br0_r_pkt.hist, + dec_tlu_br0_r_pkt.way + ); + if (dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) + $display( + "%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", + `DEC.tlu.mcyclel[31:0] + 32'ha, + exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], + 1'b0, + dec_tlu_br0_r_pkt.br_start_error, + { + exu_flush_path_final[31:1], 1'b0 + }, + dec_tlu_br0_r_pkt.way + ); + end // always @ (negedge clk) + function [1:0] encode4_2; + input [3:0] in; - // Performance debug info - // - // -`ifdef DUMP_BTB_ON - logic exu_mp_valid; // conditional branch mispredict - logic exu_mp_way; // conditional branch mispredict - logic exu_mp_ataken; // direction is actual taken - logic exu_mp_boffset; // branch offsett - logic exu_mp_pc4; // branch is a 4B inst - logic exu_mp_call; // branch is a call inst - logic exu_mp_ret; // branch is a ret inst - logic exu_mp_ja; // branch is a jump always - logic [1:0] exu_mp_hist; // new history - logic [11:0] exu_mp_tgt; // target offset - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + encode4_2[1] = in[3] | in[2]; + encode4_2[0] = in[3] | in[1]; - assign exu_mp_valid = exu_mp_pkt.misp; // conditional branch mispredict - assign exu_mp_ataken = exu_mp_pkt.ataken; // direction is actual taken - assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset - assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst - assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst - assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst - assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always - assign exu_mp_way = exu_mp_pkt.way; // branch is a jump always - assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history - assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset - assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address - - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_f; - `define DEC top.rvtop.swerv.dec - `define EXU top.rvtop.swerv.exu - el2_btb_addr_hash f2hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); - logic [31:0] mppc_ns, mppc; - logic exu_flush_final_d1; - assign mppc_ns[31:1] = `EXU.i0_flush_upper_x ? `EXU.exu_i0_pc_x : `EXU.dec_i0_pc_d; - assign mppc_ns[0] = 1'b0; - rvdff #(33) junk_ff (.*, .clk(active_clk), .din({mppc_ns[31:0], exu_flush_final}), .dout({mppc[31:0], exu_flush_final_d1})); - logic tmp_bnk; - assign tmp_bnk = bpred.bp.btb_sel_f[1]; - - always @(negedge clk) begin - if(`DEC.tlu.mcyclel[31:0] == 32'h0000_0010) begin - $display("BTB_CONFIG: %d",pt.BTB_SIZE); - `ifndef BP_NOGSHARE - $display("BHT_CONFIG: %d gshare: 1",pt.BHT_SIZE); - `else - $display("BHT_CONFIG: %d gshare: 0",pt.BHT_SIZE); - `endif - $display("RS_CONFIG: %d", pt.RET_STACK_SIZE); - end - if(exu_flush_final_d1 & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) & (exu_mp_pkt.misp | exu_mp_pkt.ataken)) - $display("%7d BTB_MP : index: %0h bank: %0h call: %b ret: %b ataken: %b hist: %h valid: %b tag: %h targ: %h eghr: %b pred: %b ghr_index: %h brpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha, exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO], 1'b0, exu_mp_call, exu_mp_ret, exu_mp_ataken, exu_mp_hist[1:0], exu_mp_valid, exu_mp_btag[pt.BTB_BTAG_SIZE-1:0], {exu_flush_path_final[31:1], 1'b0}, exu_mp_eghr[pt.BHT_GHR_SIZE-1:0], exu_mp_valid, bpred.bp.bht_wr_addr0, mppc[31:0], exu_mp_pkt.way); - - for(int i = 0; i < 8; i++) begin - if(ifu_bp_valid_f[i] & ifc_fetch_req_f) - $display("%7d BTB_HIT : index: %0h bank: %0h call: %b ret: %b taken: %b strength: %b tag: %h targ: %0h ghr: %4b ghr_index: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],bpred.bp.btb_sel_f[1], bpred.bp.btb_rd_call_f, bpred.bp.btb_rd_ret_f, ifu_bp_hist1_f[tmp_bnk], ifu_bp_hist0_f[tmp_bnk], bpred.bp.fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0], {ifu_bp_btb_target_f[31:1], 1'b0}, bpred.bp.fghr[pt.BHT_GHR_SIZE-1:0], bpred.bp.bht_rd_addr_f, ifu_bp_way_f[tmp_bnk]); - end - if(dec_tlu_br0_r_pkt.valid & ~(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error)) - $display("%7d BTB_UPD0: ghr_index: %0h bank: %0h hist: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,bpred.bp.br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO],{dec_tlu_br0_r_pkt.middle}, dec_tlu_br0_r_pkt.hist, dec_tlu_br0_r_pkt.way); - - if(dec_tlu_br0_r_pkt.br_error | dec_tlu_br0_r_pkt.br_start_error) - $display("%7d BTB_ERR0: index: %0h bank: %0h start: %b rfpc: %h way: %h", `DEC.tlu.mcyclel[31:0]+32'ha,exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO],1'b0, dec_tlu_br0_r_pkt.br_start_error, {exu_flush_path_final[31:1], 1'b0}, dec_tlu_br0_r_pkt.way); - end // always @ (negedge clk) - function [1:0] encode4_2; - input [3:0] in; - - encode4_2[1] = in[3] | in[2]; - encode4_2[0] = in[3] | in[1]; - - endfunction + endfunction `endif -endmodule // el2_ifu +endmodule // el2_ifu diff --git a/Flow/design/ifu/el2_ifu_aln_ctl.sv b/Flow/design/ifu/el2_ifu_aln_ctl.sv index 59eb97f..7e579f7 100644 --- a/Flow/design/ifu/el2_ifu_aln_ctl.sv +++ b/Flow/design/ifu/el2_ifu_aln_ctl.sv @@ -19,245 +19,332 @@ // Function: Instruction aligner //******************************************************************************** module el2_ifu_aln_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( + `include "el2_param.vh" +) ( - input logic scan_mode, // Flop scan mode control - input logic rst_l, // reset, active low - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic scan_mode, // Flop scan mode control + input logic rst_l, // reset, active low + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic ifu_async_error_start, // ecc/parity related errors with current fetch - not sent down the pipe + input logic ifu_async_error_start, // ecc/parity related errors with current fetch - not sent down the pipe - input logic [1:0] iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + input logic [1:0] iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. - input logic [1:0] ic_access_fault_f, // Instruction access fault for the current fetch. - input logic [1:0] ic_access_fault_type_f, // Instruction access fault types + input logic [1:0] ic_access_fault_f, // Instruction access fault for the current fetch. + input logic [1:0] ic_access_fault_type_f, // Instruction access fault types - input logic exu_flush_final, // Flush from the pipeline. + input logic exu_flush_final, // Flush from the pipeline. - input logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked + input logic dec_i0_decode_d, // Valid instruction at D-stage and not blocked - input logic [31:0] ifu_fetch_data_f, // fetch data in memory format - not right justified + input logic [31:0] ifu_fetch_data_f, // fetch data in memory format - not right justified - input logic [1:0] ifu_fetch_val, // valids on a 2B boundary, right justified - input logic [31:1] ifu_fetch_pc, // starting pc of fetch + input logic [ 1:0] ifu_fetch_val, // valids on a 2B boundary, right justified + input logic [31:1] ifu_fetch_pc, // starting pc of fetch - output logic ifu_i0_valid, // Instruction 0 is valid - output logic ifu_i0_icaf, // Instruction 0 has access fault - output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type - output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst + output logic ifu_i0_valid, // Instruction 0 is valid + output logic ifu_i0_icaf, // Instruction 0 has access fault + output logic [1:0] ifu_i0_icaf_type, // Instruction 0 access fault type + output logic ifu_i0_icaf_second, // Instruction 0 has access fault on second 2B of 4B inst - output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error - output logic [31:0] ifu_i0_instr, // Instruction 0 - output logic [31:1] ifu_i0_pc, // Instruction 0 PC - output logic ifu_i0_pc4, + output logic ifu_i0_dbecc, // Instruction 0 has double bit ecc error + output logic [31:0] ifu_i0_instr, // Instruction 0 + output logic [31:1] ifu_i0_pc, // Instruction 0 PC + output logic ifu_i0_pc4, - output logic ifu_fb_consume1, // Consumed one buffer. To fetch control fetch for buffer mass balance - output logic ifu_fb_consume2, // Consumed two buffers.To fetch control fetch for buffer mass balance + output logic ifu_fb_consume1, // Consumed one buffer. To fetch control fetch for buffer mass balance + output logic ifu_fb_consume2, // Consumed two buffers.To fetch control fetch for buffer mass balance - input logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch GHR - input logic [31:1] ifu_bp_btb_target_f, // predicted RET target - input logic [11:0] ifu_bp_poffset_f, // predicted target offset - input logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option) + input logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch GHR + input logic [31:1] ifu_bp_btb_target_f, // predicted RET target + input logic [11:0] ifu_bp_poffset_f, // predicted target offset + input logic [1:0][$clog2( +pt.BTB_SIZE +)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option) - input logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 1, right justified - input logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified - input logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified - input logic [1:0] ifu_bp_way_f, // way indication, right justified - input logic [1:0] ifu_bp_valid_f, // branch valid, right justified - input logic [1:0] ifu_bp_ret_f, // predicted ret indication, right justified + input logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 1, right justified + input logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified + input logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified + input logic [1:0] ifu_bp_way_f, // way indication, right justified + input logic [1:0] ifu_bp_valid_f, // branch valid, right justified + input logic [1:0] ifu_bp_ret_f, // predicted ret indication, right justified - output el2_br_pkt_t i0_brp, // Branch packet for I0. - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index - output logic [pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR - output logic [pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag + output el2_br_pkt_t i0_brp, // Branch packet for I0. + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ifu_i0_bp_index, // BP index + output logic [ pt.BHT_GHR_SIZE-1:0] ifu_i0_bp_fghr, // BP FGHR + output logic [ pt.BTB_BTAG_SIZE-1:0] ifu_i0_bp_btag, // BP tag - output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index + output logic [$clog2(pt.BTB_SIZE)-1:0] ifu_i0_fa_index, // Fully associt btb index - output logic ifu_pmu_instr_aligned, // number of inst aligned this cycle + output logic ifu_pmu_instr_aligned, // number of inst aligned this cycle - output logic [15:0] ifu_i0_cinst // 16b compress inst for i0 - ); + output logic [15:0] ifu_i0_cinst // 16b compress inst for i0 +); - logic ifvalid; - logic shift_f1_f0, shift_f2_f0, shift_f2_f1; - logic fetch_to_f0, fetch_to_f1, fetch_to_f2; + logic ifvalid; + logic shift_f1_f0, shift_f2_f0, shift_f2_f1; + logic fetch_to_f0, fetch_to_f1, fetch_to_f2; - logic [1:0] f2val_in, f2val; - logic [1:0] f1val_in, f1val; - logic [1:0] f0val_in, f0val; - logic [1:0] sf1val, sf0val; + logic [1:0] f2val_in, f2val; + logic [1:0] f1val_in, f1val; + logic [1:0] f0val_in, f0val; + logic [1:0] sf1val, sf0val; - logic [31:0] aligndata; - logic first4B, first2B; + logic [31:0] aligndata; + logic first4B, first2B; - logic [31:0] uncompress0; - logic i0_shift; - logic shift_2B, shift_4B; - logic f1_shift_2B; - logic f2_valid, sf1_valid, sf0_valid; + logic [31:0] uncompress0; + logic i0_shift; + logic shift_2B, shift_4B; + logic f1_shift_2B; + logic f2_valid, sf1_valid, sf0_valid; - logic [31:0] ifirst; - logic [1:0] alignval; - logic [31:1] firstpc, secondpc; + logic [31:0] ifirst; + logic [ 1:0] alignval; + logic [31:1] firstpc, secondpc; - logic [11:0] f1poffset; - logic [11:0] f0poffset; - logic [pt.BHT_GHR_SIZE-1:0] f1fghr; - logic [pt.BHT_GHR_SIZE-1:0] f0fghr; - logic [1:0] f1hist1; - logic [1:0] f0hist1; - logic [1:0] f1hist0; - logic [1:0] f0hist0; + logic [ 11:0] f1poffset; + logic [ 11:0] f0poffset; + logic [pt.BHT_GHR_SIZE-1:0] f1fghr; + logic [pt.BHT_GHR_SIZE-1:0] f0fghr; + logic [ 1:0] f1hist1; + logic [ 1:0] f0hist1; + logic [ 1:0] f1hist0; + logic [ 1:0] f0hist0; - logic [1:0][$clog2(pt.BTB_SIZE)-1:0] f0index, f1index, alignindex; + logic [1:0][$clog2(pt.BTB_SIZE)-1:0] f0index, f1index, alignindex; - logic [1:0] f1ictype; - logic [1:0] f0ictype; + logic [ 1:0] f1ictype; + logic [ 1:0] f0ictype; - logic [1:0] f1pc4; - logic [1:0] f0pc4; + logic [ 1:0] f1pc4; + logic [ 1:0] f0pc4; - logic [1:0] f1ret; - logic [1:0] f0ret; - logic [1:0] f1way; - logic [1:0] f0way; + logic [ 1:0] f1ret; + logic [ 1:0] f0ret; + logic [ 1:0] f1way; + logic [ 1:0] f0way; - logic [1:0] f1brend; - logic [1:0] f0brend; + logic [ 1:0] f1brend; + logic [ 1:0] f0brend; - logic [1:0] alignbrend; - logic [1:0] alignpc4; + logic [ 1:0] alignbrend; + logic [ 1:0] alignpc4; - logic [1:0] alignret; - logic [1:0] alignway; - logic [1:0] alignhist1; - logic [1:0] alignhist0; - logic [1:1] alignfromf1; - logic i0_ends_f1; - logic i0_br_start_error; + logic [ 1:0] alignret; + logic [ 1:0] alignway; + logic [ 1:0] alignhist1; + logic [ 1:0] alignhist0; + logic [ 1:1] alignfromf1; + logic i0_ends_f1; + logic i0_br_start_error; - logic [31:1] f1prett; - logic [31:1] f0prett; - logic [1:0] f1dbecc; - logic [1:0] f0dbecc; - logic [1:0] f1icaf; - logic [1:0] f0icaf; + logic [31:1] f1prett; + logic [31:1] f0prett; + logic [ 1:0] f1dbecc; + logic [ 1:0] f0dbecc; + logic [ 1:0] f1icaf; + logic [ 1:0] f0icaf; - logic [1:0] aligndbecc; - logic [1:0] alignicaf; - logic i0_brp_pc4; + logic [ 1:0] aligndbecc; + logic [ 1:0] alignicaf; + logic i0_brp_pc4; - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] firstpc_hash, secondpc_hash; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] firstpc_hash, secondpc_hash; - logic first_legal; + logic first_legal; - logic [1:0] wrptr, wrptr_in; - logic [1:0] rdptr, rdptr_in; - logic [2:0] qwen; - logic [31:0] q2,q1,q0; - logic q2off_in, q2off; - logic q1off_in, q1off; - logic q0off_in, q0off; - logic f0_shift_2B; + logic [1:0] wrptr, wrptr_in; + logic [1:0] rdptr, rdptr_in; + logic [2:0] qwen; + logic [31:0] q2, q1, q0; + logic q2off_in, q2off; + logic q1off_in, q1off; + logic q0off_in, q0off; + logic f0_shift_2B; - logic [31:0] q0eff; - logic [31:0] q0final; - logic q0ptr; - logic [1:0] q0sel; + logic [31:0] q0eff; + logic [31:0] q0final; + logic q0ptr; + logic [ 1:0] q0sel; - logic [31:0] q1eff; - logic [15:0] q1final; - logic q1ptr; - logic [1:0] q1sel; + logic [31:0] q1eff; + logic [15:0] q1final; + logic q1ptr; + logic [ 1:0] q1sel; - logic [2:0] qren; + logic [ 2:0] qren; - logic consume_fb1, consume_fb0; - logic [1:0] icaf_eff; + logic consume_fb1, consume_fb0; + logic [1:0] icaf_eff; - localparam BRDATA_SIZE = pt.BTB_ENABLE ? 16+($clog2(pt.BTB_SIZE)*2*pt.BTB_FULLYA) : 4; - localparam BRDATA_WIDTH = pt.BTB_ENABLE ? 8+($clog2(pt.BTB_SIZE)*pt.BTB_FULLYA) : 2; - logic [BRDATA_SIZE-1:0] brdata_in, brdata2, brdata1, brdata0; - logic [BRDATA_SIZE-1:0] brdata1eff, brdata0eff; - logic [BRDATA_SIZE-1:0] brdata1final, brdata0final; + localparam BRDATA_SIZE = pt.BTB_ENABLE ? 16 + ($clog2(pt.BTB_SIZE) * 2 * pt.BTB_FULLYA) : 4; + localparam BRDATA_WIDTH = pt.BTB_ENABLE ? 8 + ($clog2(pt.BTB_SIZE) * pt.BTB_FULLYA) : 2; + logic [BRDATA_SIZE-1:0] brdata_in, brdata2, brdata1, brdata0; + logic [BRDATA_SIZE-1:0] brdata1eff, brdata0eff; + logic [BRDATA_SIZE-1:0] brdata1final, brdata0final; - localparam MHI = 1+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE)); - localparam MSIZE = 2+(pt.BTB_ENABLE * (43+pt.BHT_GHR_SIZE)); + localparam MHI = 1 + (pt.BTB_ENABLE * (43 + pt.BHT_GHR_SIZE)); + localparam MSIZE = 2 + (pt.BTB_ENABLE * (43 + pt.BHT_GHR_SIZE)); - logic [MHI:0] misc_data_in, misc2, misc1, misc0; - logic [MHI:0] misc1eff, misc0eff; + logic [MHI:0] misc_data_in, misc2, misc1, misc0; + logic [MHI:0] misc1eff, misc0eff; - logic [pt.BTB_BTAG_SIZE-1:0] firstbrtag_hash, secondbrtag_hash; + logic [pt.BTB_BTAG_SIZE-1:0] firstbrtag_hash, secondbrtag_hash; - logic error_stall_in, error_stall; + logic error_stall_in, error_stall; - assign error_stall_in = (error_stall | ifu_async_error_start) & ~exu_flush_final; + assign error_stall_in = (error_stall | ifu_async_error_start) & ~exu_flush_final; - rvdff #(.WIDTH(7)) bundle1ff (.*, - .clk(active_clk), - .din ({wrptr_in[1:0],rdptr_in[1:0],q2off_in,q1off_in,q0off_in}), - .dout({wrptr[1:0], rdptr[1:0], q2off, q1off, q0off}) - ); + rvdff #( + .WIDTH(7) + ) bundle1ff ( + .*, + .clk (active_clk), + .din ({wrptr_in[1:0], rdptr_in[1:0], q2off_in, q1off_in, q0off_in}), + .dout({wrptr[1:0], rdptr[1:0], q2off, q1off, q0off}) + ); - rvdffie #(.WIDTH(7),.OVERRIDE(1)) bundle2ff (.*, - .din ({error_stall_in,f2val_in[1:0],f1val_in[1:0],f0val_in[1:0]}), - .dout({error_stall, f2val[1:0], f1val[1:0], f0val[1:0] }) - ); + rvdffie #( + .WIDTH(7), + .OVERRIDE(1) + ) bundle2ff ( + .*, + .din ({error_stall_in, f2val_in[1:0], f1val_in[1:0], f0val_in[1:0]}), + .dout({error_stall, f2val[1:0], f1val[1:0], f0val[1:0]}) + ); -if(pt.BTB_ENABLE==1) begin - rvdffe #(BRDATA_SIZE) brdata2ff (.*, .clk(clk), .en(qwen[2]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata2[BRDATA_SIZE-1:0])); - rvdffe #(BRDATA_SIZE) brdata1ff (.*, .clk(clk), .en(qwen[1]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata1[BRDATA_SIZE-1:0])); - rvdffe #(BRDATA_SIZE) brdata0ff (.*, .clk(clk), .en(qwen[0]), .din(brdata_in[BRDATA_SIZE-1:0]), .dout(brdata0[BRDATA_SIZE-1:0])); - rvdffe #(MSIZE) misc2ff (.*, .clk(clk), .en(qwen[2]), .din(misc_data_in[MHI:0]), .dout(misc2[MHI:0])); - rvdffe #(MSIZE) misc1ff (.*, .clk(clk), .en(qwen[1]), .din(misc_data_in[MHI:0]), .dout(misc1[MHI:0])); - rvdffe #(MSIZE) misc0ff (.*, .clk(clk), .en(qwen[0]), .din(misc_data_in[MHI:0]), .dout(misc0[MHI:0])); -end -else begin + if (pt.BTB_ENABLE == 1) begin + rvdffe #(BRDATA_SIZE) brdata2ff ( + .*, + .clk (clk), + .en (qwen[2]), + .din (brdata_in[BRDATA_SIZE-1:0]), + .dout(brdata2[BRDATA_SIZE-1:0]) + ); + rvdffe #(BRDATA_SIZE) brdata1ff ( + .*, + .clk (clk), + .en (qwen[1]), + .din (brdata_in[BRDATA_SIZE-1:0]), + .dout(brdata1[BRDATA_SIZE-1:0]) + ); + rvdffe #(BRDATA_SIZE) brdata0ff ( + .*, + .clk (clk), + .en (qwen[0]), + .din (brdata_in[BRDATA_SIZE-1:0]), + .dout(brdata0[BRDATA_SIZE-1:0]) + ); + rvdffe #(MSIZE) misc2ff ( + .*, + .clk (clk), + .en (qwen[2]), + .din (misc_data_in[MHI:0]), + .dout(misc2[MHI:0]) + ); + rvdffe #(MSIZE) misc1ff ( + .*, + .clk (clk), + .en (qwen[1]), + .din (misc_data_in[MHI:0]), + .dout(misc1[MHI:0]) + ); + rvdffe #(MSIZE) misc0ff ( + .*, + .clk (clk), + .en (qwen[0]), + .din (misc_data_in[MHI:0]), + .dout(misc0[MHI:0]) + ); + end else begin - rvdffie #((MSIZE*3)+(BRDATA_SIZE*3)) miscff (.*, - .din({qwen[2] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc2[MHI:0], brdata2[BRDATA_SIZE-1:0]}, - qwen[1] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc1[MHI:0], brdata1[BRDATA_SIZE-1:0]}, - qwen[0] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]}}), - .dout({misc2[MHI:0], brdata2[BRDATA_SIZE-1:0], - misc1[MHI:0], brdata1[BRDATA_SIZE-1:0], - misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]}) - ); -end + rvdffie #((MSIZE * 3) + (BRDATA_SIZE * 3)) miscff ( + .*, + .din({ + qwen[2] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc2[MHI:0], brdata2[BRDATA_SIZE-1:0]}, + qwen[1] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc1[MHI:0], brdata1[BRDATA_SIZE-1:0]}, + qwen[0] ? {misc_data_in[MHI:0], brdata_in[BRDATA_SIZE-1:0]} : {misc0[MHI:0], brdata0[BRDATA_SIZE-1:0]} + }), + .dout({ + misc2[MHI:0], + brdata2[BRDATA_SIZE-1:0], + misc1[MHI:0], + brdata1[BRDATA_SIZE-1:0], + misc0[MHI:0], + brdata0[BRDATA_SIZE-1:0] + }) + ); + end logic [31:1] q2pc, q1pc, q0pc; - rvdffe #(31) q2pcff (.*, .clk(clk), .en(qwen[2]), .din(ifu_fetch_pc[31:1]), .dout(q2pc[31:1])); - rvdffe #(31) q1pcff (.*, .clk(clk), .en(qwen[1]), .din(ifu_fetch_pc[31:1]), .dout(q1pc[31:1])); - rvdffe #(31) q0pcff (.*, .clk(clk), .en(qwen[0]), .din(ifu_fetch_pc[31:1]), .dout(q0pc[31:1])); + rvdffe #(31) q2pcff ( + .*, + .clk (clk), + .en (qwen[2]), + .din (ifu_fetch_pc[31:1]), + .dout(q2pc[31:1]) + ); + rvdffe #(31) q1pcff ( + .*, + .clk (clk), + .en (qwen[1]), + .din (ifu_fetch_pc[31:1]), + .dout(q1pc[31:1]) + ); + rvdffe #(31) q0pcff ( + .*, + .clk (clk), + .en (qwen[0]), + .din (ifu_fetch_pc[31:1]), + .dout(q0pc[31:1]) + ); - rvdffe #(32) q2ff (.*, .clk(clk), .en(qwen[2]), .din(ifu_fetch_data_f[31:0]), .dout(q2[31:0])); - rvdffe #(32) q1ff (.*, .clk(clk), .en(qwen[1]), .din(ifu_fetch_data_f[31:0]), .dout(q1[31:0])); - rvdffe #(32) q0ff (.*, .clk(clk), .en(qwen[0]), .din(ifu_fetch_data_f[31:0]), .dout(q0[31:0])); + rvdffe #(32) q2ff ( + .*, + .clk (clk), + .en (qwen[2]), + .din (ifu_fetch_data_f[31:0]), + .dout(q2[31:0]) + ); + rvdffe #(32) q1ff ( + .*, + .clk (clk), + .en (qwen[1]), + .din (ifu_fetch_data_f[31:0]), + .dout(q1[31:0]) + ); + rvdffe #(32) q0ff ( + .*, + .clk (clk), + .en (qwen[0]), + .din (ifu_fetch_data_f[31:0]), + .dout(q0[31:0]) + ); - // new queue control logic + // new queue control logic - assign qren[2:0] = { rdptr[1:0] == 2'b10, - rdptr[1:0] == 2'b01, - rdptr[1:0] == 2'b00 }; + assign qren[2:0] = {rdptr[1:0] == 2'b10, rdptr[1:0] == 2'b01, rdptr[1:0] == 2'b00}; - assign qwen[2:0] = { (wrptr[1:0] == 2'b10) & ifvalid, - (wrptr[1:0] == 2'b01) & ifvalid, - (wrptr[1:0] == 2'b00) & ifvalid }; + assign qwen[2:0] = { + (wrptr[1:0] == 2'b10) & ifvalid, + (wrptr[1:0] == 2'b01) & ifvalid, + (wrptr[1:0] == 2'b00) & ifvalid + }; - assign rdptr_in[1:0] = ({2{ qren[0] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b01 ) | + assign rdptr_in[1:0] = ({2{ qren[0] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b01 ) | ({2{ qren[1] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b10 ) | ({2{ qren[2] & ifu_fb_consume1 & ~exu_flush_final}} & 2'b00 ) | ({2{ qren[0] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b10 ) | @@ -265,436 +352,468 @@ end ({2{ qren[2] & ifu_fb_consume2 & ~exu_flush_final}} & 2'b01 ) | ({2{~ifu_fb_consume1 & ~ifu_fb_consume2 & ~exu_flush_final}} & rdptr[1:0]); - assign wrptr_in[1:0] = ({2{ qwen[0] & ~exu_flush_final}} & 2'b01 ) | + assign wrptr_in[1:0] = ({2{ qwen[0] & ~exu_flush_final}} & 2'b01 ) | ({2{ qwen[1] & ~exu_flush_final}} & 2'b10 ) | ({2{ qwen[2] & ~exu_flush_final}} & 2'b00 ) | ({2{~ifvalid & ~exu_flush_final}} & wrptr[1:0]); - assign q2off_in = ( ~qwen[2] & (rdptr[1:0]==2'd2) & (q2off | f0_shift_2B) ) | + assign q2off_in = ( ~qwen[2] & (rdptr[1:0]==2'd2) & (q2off | f0_shift_2B) ) | ( ~qwen[2] & (rdptr[1:0]==2'd1) & (q2off | f1_shift_2B) ) | ( ~qwen[2] & (rdptr[1:0]==2'd0) & q2off ); - assign q1off_in = ( ~qwen[1] & (rdptr[1:0]==2'd1) & (q1off | f0_shift_2B) ) | + assign q1off_in = ( ~qwen[1] & (rdptr[1:0]==2'd1) & (q1off | f0_shift_2B) ) | ( ~qwen[1] & (rdptr[1:0]==2'd0) & (q1off | f1_shift_2B) ) | ( ~qwen[1] & (rdptr[1:0]==2'd2) & q1off ); - assign q0off_in = ( ~qwen[0] & (rdptr[1:0]==2'd0) & (q0off | f0_shift_2B) ) | + assign q0off_in = ( ~qwen[0] & (rdptr[1:0]==2'd0) & (q0off | f0_shift_2B) ) | ( ~qwen[0] & (rdptr[1:0]==2'd2) & (q0off | f1_shift_2B) ) | ( ~qwen[0] & (rdptr[1:0]==2'd1) & q0off ); - assign q0ptr = ( (rdptr[1:0]==2'b00) & q0off ) | + assign q0ptr = ( (rdptr[1:0]==2'b00) & q0off ) | ( (rdptr[1:0]==2'b01) & q1off ) | ( (rdptr[1:0]==2'b10) & q2off ); - assign q1ptr = ( (rdptr[1:0]==2'b00) & q1off ) | + assign q1ptr = ( (rdptr[1:0]==2'b00) & q1off ) | ( (rdptr[1:0]==2'b01) & q2off ) | ( (rdptr[1:0]==2'b10) & q0off ); - assign q0sel[1:0] = {q0ptr,~q0ptr}; + assign q0sel[1:0] = {q0ptr, ~q0ptr}; - assign q1sel[1:0] = {q1ptr,~q1ptr}; + assign q1sel[1:0] = {q1ptr, ~q1ptr}; - // end new queue control logic + // end new queue control logic - // misc data that is associated with each fetch buffer + // misc data that is associated with each fetch buffer - if(pt.BTB_ENABLE==1) - assign misc_data_in[MHI:0] = { + if (pt.BTB_ENABLE == 1) + assign misc_data_in[MHI:0] = { - ic_access_fault_type_f[1:0], - ifu_bp_btb_target_f[31:1], - ifu_bp_poffset_f[11:0], - ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] - }; - else - assign misc_data_in[MHI:0] = { - ic_access_fault_type_f[1:0] - }; + ic_access_fault_type_f[1:0], + ifu_bp_btb_target_f[31:1], + ifu_bp_poffset_f[11:0], + ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] + }; + else assign misc_data_in[MHI:0] = {ic_access_fault_type_f[1:0]}; - assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) | + assign {misc1eff[MHI:0],misc0eff[MHI:0]} = (({MSIZE*2{qren[0]}} & {misc1[MHI:0],misc0[MHI:0]}) | ({MSIZE*2{qren[1]}} & {misc2[MHI:0],misc1[MHI:0]}) | ({MSIZE*2{qren[2]}} & {misc0[MHI:0],misc2[MHI:0]})); - if(pt.BTB_ENABLE==1) begin - assign { + if (pt.BTB_ENABLE == 1) begin + assign { f1ictype[1:0], f1prett[31:1], f1poffset[11:0], f1fghr[pt.BHT_GHR_SIZE-1:0] } = misc1eff[MHI:0]; - assign { + assign { f0ictype[1:0], f0prett[31:1], f0poffset[11:0], f0fghr[pt.BHT_GHR_SIZE-1:0] } = misc0eff[MHI:0]; - if(pt.BTB_FULLYA) begin - assign brdata_in[BRDATA_SIZE-1:0] = { - ifu_bp_fa_index_f[1], iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1], - ifu_bp_fa_index_f[0], iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0] - }; - assign {f0index[1],f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], + if (pt.BTB_FULLYA) begin + assign brdata_in[BRDATA_SIZE-1:0] = { + ifu_bp_fa_index_f[1], + iccm_rd_ecc_double_err[1], + ic_access_fault_f[1], + ifu_bp_hist1_f[1], + ifu_bp_hist0_f[1], + ifu_bp_pc4_f[1], + ifu_bp_way_f[1], + ifu_bp_valid_f[1], + ifu_bp_ret_f[1], + ifu_bp_fa_index_f[0], + iccm_rd_ecc_double_err[0], + ic_access_fault_f[0], + ifu_bp_hist1_f[0], + ifu_bp_hist0_f[0], + ifu_bp_pc4_f[0], + ifu_bp_way_f[0], + ifu_bp_valid_f[0], + ifu_bp_ret_f[0] + }; + assign {f0index[1],f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], f0index[0],f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0]; - assign {f1index[1],f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], + assign {f1index[1],f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], f1index[0],f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0]; - end - else begin - assign brdata_in[BRDATA_SIZE-1:0] = { - iccm_rd_ecc_double_err[1],ic_access_fault_f[1],ifu_bp_hist1_f[1],ifu_bp_hist0_f[1],ifu_bp_pc4_f[1],ifu_bp_way_f[1],ifu_bp_valid_f[1],ifu_bp_ret_f[1], - iccm_rd_ecc_double_err[0],ic_access_fault_f[0],ifu_bp_hist1_f[0],ifu_bp_hist0_f[0],ifu_bp_pc4_f[0],ifu_bp_way_f[0],ifu_bp_valid_f[0],ifu_bp_ret_f[0] - }; - assign {f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], + end else begin + assign brdata_in[BRDATA_SIZE-1:0] = { + iccm_rd_ecc_double_err[1], + ic_access_fault_f[1], + ifu_bp_hist1_f[1], + ifu_bp_hist0_f[1], + ifu_bp_pc4_f[1], + ifu_bp_way_f[1], + ifu_bp_valid_f[1], + ifu_bp_ret_f[1], + iccm_rd_ecc_double_err[0], + ic_access_fault_f[0], + ifu_bp_hist1_f[0], + ifu_bp_hist0_f[0], + ifu_bp_pc4_f[0], + ifu_bp_way_f[0], + ifu_bp_valid_f[0], + ifu_bp_ret_f[0] + }; + assign {f0dbecc[1],f0icaf[1],f0hist1[1],f0hist0[1],f0pc4[1],f0way[1],f0brend[1],f0ret[1], f0dbecc[0],f0icaf[0],f0hist1[0],f0hist0[0],f0pc4[0],f0way[0],f0brend[0],f0ret[0]} = brdata0final[BRDATA_SIZE-1:0]; - assign {f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], + assign {f1dbecc[1],f1icaf[1],f1hist1[1],f1hist0[1],f1pc4[1],f1way[1],f1brend[1],f1ret[1], f1dbecc[0],f1icaf[0],f1hist1[0],f1hist0[0],f1pc4[0],f1way[0],f1brend[0],f1ret[0]} = brdata1final[BRDATA_SIZE-1:0]; - end + end - assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | + assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) | ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]})); - assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) | + assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) | ({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); - assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) | + assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) | ({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); - end // if (pt.BTB_ENABLE==1) + end // if (pt.BTB_ENABLE==1) else begin - assign { - f1ictype[1:0] - } = misc1eff[MHI:0]; + assign {f1ictype[1:0]} = misc1eff[MHI:0]; - assign { - f0ictype[1:0] - } = misc0eff[MHI:0]; + assign {f0ictype[1:0]} = misc0eff[MHI:0]; - assign brdata_in[BRDATA_SIZE-1:0] = { - iccm_rd_ecc_double_err[1],ic_access_fault_f[1], - iccm_rd_ecc_double_err[0],ic_access_fault_f[0] - }; - assign {f0dbecc[1],f0icaf[1], - f0dbecc[0],f0icaf[0]} = brdata0final[BRDATA_SIZE-1:0]; + assign brdata_in[BRDATA_SIZE-1:0] = { + iccm_rd_ecc_double_err[1], + ic_access_fault_f[1], + iccm_rd_ecc_double_err[0], + ic_access_fault_f[0] + }; + assign {f0dbecc[1], f0icaf[1], f0dbecc[0], f0icaf[0]} = brdata0final[BRDATA_SIZE-1:0]; - assign {f1dbecc[1],f1icaf[1], - f1dbecc[0],f1icaf[0]} = brdata1final[BRDATA_SIZE-1:0]; + assign {f1dbecc[1], f1icaf[1], f1dbecc[0], f1icaf[0]} = brdata1final[BRDATA_SIZE-1:0]; - assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | + assign {brdata1eff[BRDATA_SIZE-1:0],brdata0eff[BRDATA_SIZE-1:0]} = (({BRDATA_SIZE*2{qren[0]}} & {brdata1[BRDATA_SIZE-1:0],brdata0[BRDATA_SIZE-1:0]}) | ({BRDATA_SIZE*2{qren[1]}} & {brdata2[BRDATA_SIZE-1:0],brdata1[BRDATA_SIZE-1:0]}) | ({BRDATA_SIZE*2{qren[2]}} & {brdata0[BRDATA_SIZE-1:0],brdata2[BRDATA_SIZE-1:0]})); - assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) | + assign brdata0final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q0sel[0]}} & { brdata0eff[2*BRDATA_WIDTH-1:0]}) | ({BRDATA_SIZE{q0sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata0eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); - assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) | + assign brdata1final[BRDATA_SIZE-1:0] = (({BRDATA_SIZE{q1sel[0]}} & { brdata1eff[2*BRDATA_WIDTH-1:0]}) | ({BRDATA_SIZE{q1sel[1]}} & {{BRDATA_WIDTH{1'b0}},brdata1eff[BRDATA_SIZE-1:BRDATA_WIDTH]})); - end // else: !if(pt.BTB_ENABLE==1) + end // else: !if(pt.BTB_ENABLE==1) - // possible states of { sf0_valid, sf1_valid, f2_valid } - // - // 000 if->f0 - // 100 if->f1 - // 101 illegal - // 010 if->f1, f1->f0 - // 110 if->f2 - // 001 if->f1, f2->f0 - // 011 if->f2, f2->f1, f1->f0 - // 111 !if, no shift + // possible states of { sf0_valid, sf1_valid, f2_valid } + // + // 000 if->f0 + // 100 if->f1 + // 101 illegal + // 010 if->f1, f1->f0 + // 110 if->f2 + // 001 if->f1, f2->f0 + // 011 if->f2, f2->f1, f1->f0 + // 111 !if, no shift - assign f2_valid = f2val[0]; - assign sf1_valid = sf1val[0]; - assign sf0_valid = sf0val[0]; + assign f2_valid = f2val[0]; + assign sf1_valid = sf1val[0]; + assign sf0_valid = sf0val[0]; - // interface to fetch + // interface to fetch - assign consume_fb0 = ~sf0val[0] & f0val[0]; + assign consume_fb0 = ~sf0val[0] & f0val[0]; - assign consume_fb1 = ~sf1val[0] & f1val[0]; + assign consume_fb1 = ~sf1val[0] & f1val[0]; - assign ifu_fb_consume1 = consume_fb0 & ~consume_fb1 & ~exu_flush_final; - assign ifu_fb_consume2 = consume_fb0 & consume_fb1 & ~exu_flush_final; + assign ifu_fb_consume1 = consume_fb0 & ~consume_fb1 & ~exu_flush_final; + assign ifu_fb_consume2 = consume_fb0 & consume_fb1 & ~exu_flush_final; - assign ifvalid = ifu_fetch_val[0]; + assign ifvalid = ifu_fetch_val[0]; - assign shift_f1_f0 = ~sf0_valid & sf1_valid; - assign shift_f2_f0 = ~sf0_valid & ~sf1_valid & f2_valid; - assign shift_f2_f1 = ~sf0_valid & sf1_valid & f2_valid; + assign shift_f1_f0 = ~sf0_valid & sf1_valid; + assign shift_f2_f0 = ~sf0_valid & ~sf1_valid & f2_valid; + assign shift_f2_f1 = ~sf0_valid & sf1_valid & f2_valid; - assign fetch_to_f0 = ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid; + assign fetch_to_f0 = ~sf0_valid & ~sf1_valid & ~f2_valid & ifvalid; - assign fetch_to_f1 = (~sf0_valid & ~sf1_valid & f2_valid & ifvalid) | + assign fetch_to_f1 = (~sf0_valid & ~sf1_valid & f2_valid & ifvalid) | (~sf0_valid & sf1_valid & ~f2_valid & ifvalid) | ( sf0_valid & ~sf1_valid & ~f2_valid & ifvalid); - assign fetch_to_f2 = (~sf0_valid & sf1_valid & f2_valid & ifvalid) | + assign fetch_to_f2 = (~sf0_valid & sf1_valid & f2_valid & ifvalid) | ( sf0_valid & sf1_valid & ~f2_valid & ifvalid); - assign f2val_in[1:0] = ({2{ fetch_to_f2 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + assign f2val_in[1:0] = ({2{ fetch_to_f2 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | ({2{~fetch_to_f2 & ~shift_f2_f1 & ~shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] ); - assign sf1val[1:0] = ({2{ f1_shift_2B}} & {1'b0,f1val[1]}) | - ({2{~f1_shift_2B}} & f1val[1:0] ); + assign sf1val[1:0] = ({2{f1_shift_2B}} & {1'b0, f1val[1]}) | ({2{~f1_shift_2B}} & f1val[1:0]); - assign f1val_in[1:0] = ({2{ fetch_to_f1 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + assign f1val_in[1:0] = ({2{ fetch_to_f1 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | ({2{ shift_f2_f1 & ~exu_flush_final}} & f2val[1:0] ) | ({2{~fetch_to_f1 & ~shift_f2_f1 & ~shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] ); - assign sf0val[1:0] = ({2{ shift_2B }} & {1'b0,f0val[1]}) | + assign sf0val[1:0] = ({2{ shift_2B }} & {1'b0,f0val[1]}) | ({2{~shift_2B & ~shift_4B}} & f0val[1:0]); - assign f0val_in[1:0] = ({2{fetch_to_f0 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | + assign f0val_in[1:0] = ({2{fetch_to_f0 & ~exu_flush_final}} & ifu_fetch_val[1:0]) | ({2{ shift_f2_f0 & ~exu_flush_final}} & f2val[1:0] ) | ({2{ shift_f1_f0 & ~exu_flush_final}} & sf1val[1:0] ) | ({2{~fetch_to_f0 & ~shift_f2_f0 & ~shift_f1_f0 & ~exu_flush_final}} & sf0val[1:0] ); - assign {q1eff[31:0],q0eff[31:0]} = (({64{qren[0]}} & {q1[31:0],q0[31:0]}) | + assign {q1eff[31:0],q0eff[31:0]} = (({64{qren[0]}} & {q1[31:0],q0[31:0]}) | ({64{qren[1]}} & {q2[31:0],q1[31:0]}) | ({64{qren[2]}} & {q0[31:0],q2[31:0]})); - assign q0final[31:0] = ({32{q0sel[0]}} & { q0eff[31:0]}) | + assign q0final[31:0] = ({32{q0sel[0]}} & { q0eff[31:0]}) | ({32{q0sel[1]}} & {16'b0,q0eff[31:16]}); - assign q1final[15:0] = ({16{q1sel[0]}} & q1eff[15:0] ) | - ({16{q1sel[1]}} & q1eff[31:16]); - logic [31:1] q0pceff, q0pcfinal; - logic [31:1] q1pceff; + assign q1final[15:0] = ({16{q1sel[0]}} & q1eff[15:0]) | ({16{q1sel[1]}} & q1eff[31:16]); + logic [31:1] q0pceff, q0pcfinal; + logic [31:1] q1pceff; - assign {q1pceff[31:1],q0pceff[31:1]} = (({62{qren[0]}} & {q1pc[31:1],q0pc[31:1]}) | + assign {q1pceff[31:1],q0pceff[31:1]} = (({62{qren[0]}} & {q1pc[31:1],q0pc[31:1]}) | ({62{qren[1]}} & {q2pc[31:1],q1pc[31:1]}) | ({62{qren[2]}} & {q0pc[31:1],q2pc[31:1]})); - assign q0pcfinal[31:1] = ({31{q0sel[0]}} & ( q0pceff[31:1])) | + assign q0pcfinal[31:1] = ({31{q0sel[0]}} & ( q0pceff[31:1])) | ({31{q0sel[1]}} & ( q0pceff[31:1] + 31'd1)); - assign aligndata[31:0] = ({32{ f0val[1] }} & {q0final[31:0]}) | + assign aligndata[31:0] = ({32{ f0val[1] }} & {q0final[31:0]}) | ({32{~f0val[1] & f0val[0]}} & {q1final[15:0],q0final[15:0]}); - assign alignval[1:0] = ({ 2{ f0val[1] }} & {2'b11}) | - ({ 2{~f0val[1] & f0val[0]}} & {f1val[0],1'b1}); + assign alignval[1:0] = ({2{f0val[1]}} & {2'b11}) | ({2{~f0val[1] & f0val[0]}} & {f1val[0], 1'b1}); - assign alignicaf[1:0] = ({ 2{ f0val[1] }} & f0icaf[1:0] ) | + assign alignicaf[1:0] = ({ 2{ f0val[1] }} & f0icaf[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1icaf[0],f0icaf[0]}); - assign aligndbecc[1:0] = ({ 2{ f0val[1] }} & f0dbecc[1:0] ) | + assign aligndbecc[1:0] = ({ 2{ f0val[1] }} & f0dbecc[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1dbecc[0],f0dbecc[0]}); - if (pt.BTB_ENABLE==1) begin + if (pt.BTB_ENABLE == 1) begin - // for branch prediction + // for branch prediction - assign alignbrend[1:0] = ({ 2{ f0val[1] }} & f0brend[1:0] ) | + assign alignbrend[1:0] = ({ 2{ f0val[1] }} & f0brend[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1brend[0],f0brend[0]}); - assign alignpc4[1:0] = ({ 2{ f0val[1] }} & f0pc4[1:0] ) | + assign alignpc4[1:0] = ({ 2{ f0val[1] }} & f0pc4[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1pc4[0],f0pc4[0]}); - if(pt.BTB_FULLYA) begin - assign alignindex[0] = f0index[0]; - assign alignindex[1] = f0val[1] ? f0index[1] : f1index[0]; - end + if (pt.BTB_FULLYA) begin + assign alignindex[0] = f0index[0]; + assign alignindex[1] = f0val[1] ? f0index[1] : f1index[0]; + end - assign alignret[1:0] = ({ 2{ f0val[1] }} & f0ret[1:0] ) | + assign alignret[1:0] = ({ 2{ f0val[1] }} & f0ret[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1ret[0],f0ret[0]}); - assign alignway[1:0] = ({ 2{ f0val[1] }} & f0way[1:0] ) | + assign alignway[1:0] = ({ 2{ f0val[1] }} & f0way[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1way[0],f0way[0]}); - assign alignhist1[1:0] = ({ 2{ f0val[1] }} & f0hist1[1:0] ) | + assign alignhist1[1:0] = ({ 2{ f0val[1] }} & f0hist1[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1hist1[0],f0hist1[0]}); - assign alignhist0[1:0] = ({ 2{ f0val[1] }} & f0hist0[1:0] ) | + assign alignhist0[1:0] = ({ 2{ f0val[1] }} & f0hist0[1:0] ) | ({ 2{~f0val[1] & f0val[0]}} & {f1hist0[0],f0hist0[0]}); - assign secondpc[31:1] = ({31{ f0val[1] }} & (q0pceff[31:1] + 31'd1)) | - // you need the base pc for 2nd one only (4B max, 2B for the 1st and 2B for the 2nd) - ({31{~f0val[1] & f0val[0]}} & q1pceff[31:1] ); + assign secondpc[31:1] = ({31{f0val[1]}} & (q0pceff[31:1] + 31'd1)) | + // you need the base pc for 2nd one only (4B max, 2B for the 1st and 2B for the 2nd) + ({31{~f0val[1] & f0val[0]}} & q1pceff[31:1]); - assign firstpc[31:1] = q0pcfinal[31:1]; - end // if (pt.BTB_ENABLE==1) + assign firstpc[31:1] = q0pcfinal[31:1]; + end // if (pt.BTB_ENABLE==1) - assign alignfromf1[1] = ~f0val[1] & f0val[0]; + assign alignfromf1[1] = ~f0val[1] & f0val[0]; - assign ifu_i0_pc[31:1] = q0pcfinal[31:1]; + assign ifu_i0_pc[31:1] = q0pcfinal[31:1]; - assign ifu_i0_pc4 = first4B; + assign ifu_i0_pc4 = first4B; - assign ifu_i0_cinst[15:0] = aligndata[15:0]; + assign ifu_i0_cinst[15:0] = aligndata[15:0]; - assign first4B = (aligndata[1:0] == 2'b11); - assign first2B = ~first4B; + assign first4B = (aligndata[1:0] == 2'b11); + assign first2B = ~first4B; - assign ifu_i0_valid = (first4B & alignval[1]) | - (first2B & alignval[0]); + assign ifu_i0_valid = (first4B & alignval[1]) | (first2B & alignval[0]); - // inst access fault on any byte of inst results in access fault for the inst - assign ifu_i0_icaf = (first4B & (|alignicaf[1:0])) | - (first2B & alignicaf[0] ); + // inst access fault on any byte of inst results in access fault for the inst + assign ifu_i0_icaf = (first4B & (|alignicaf[1:0])) | (first2B & alignicaf[0]); - assign ifu_i0_icaf_type[1:0] = (first4B & ~f0val[1] & f0val[0] & ~alignicaf[0] & ~aligndbecc[0]) ? f1ictype[1:0] : f0ictype[1:0]; + assign ifu_i0_icaf_type[1:0] = (first4B & ~f0val[1] & f0val[0] & ~alignicaf[0] & ~aligndbecc[0]) ? f1ictype[1:0] : f0ictype[1:0]; - assign icaf_eff[1:0] = alignicaf[1:0] | aligndbecc[1:0]; + assign icaf_eff[1:0] = alignicaf[1:0] | aligndbecc[1:0]; - assign ifu_i0_icaf_second = first4B & ~icaf_eff[0] & icaf_eff[1]; + assign ifu_i0_icaf_second = first4B & ~icaf_eff[0] & icaf_eff[1]; - assign ifu_i0_dbecc = (first4B & (|aligndbecc[1:0])) | - (first2B & aligndbecc[0] ); + assign ifu_i0_dbecc = (first4B & (|aligndbecc[1:0])) | (first2B & aligndbecc[0]); - assign ifirst[31:0] = aligndata[31:0]; + assign ifirst[31:0] = aligndata[31:0]; - assign ifu_i0_instr[31:0] = ({32{first4B & alignval[1]}} & ifirst[31:0]) | + assign ifu_i0_instr[31:0] = ({32{first4B & alignval[1]}} & ifirst[31:0]) | ({32{first2B & alignval[0]}} & uncompress0[31:0]); -if(pt.BTB_ENABLE==1) begin + if (pt.BTB_ENABLE == 1) begin - // if you detect br does not start on instruction boundary + // if you detect br does not start on instruction boundary - el2_btb_addr_hash #(.pt(pt)) firsthash (.pc(firstpc [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), - .hash(firstpc_hash [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); - el2_btb_addr_hash #(.pt(pt)) secondhash(.pc(secondpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), - .hash(secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + el2_btb_addr_hash #( + .pt(pt) + ) firsthash ( + .pc (firstpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), + .hash(firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); + el2_btb_addr_hash #( + .pt(pt) + ) secondhash ( + .pc (secondpc[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), + .hash(secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); - if(pt.BTB_FULLYA) begin - assign firstbrtag_hash = firstpc; + if (pt.BTB_FULLYA) begin + assign firstbrtag_hash = firstpc; assign secondbrtag_hash = secondpc; - end - else begin - if(pt.BTB_BTAG_FOLD) begin : btbfold - el2_btb_tag_hash_fold #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), - .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0])); - el2_btb_tag_hash_fold #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), - .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0])); + end else begin + if (pt.BTB_BTAG_FOLD) begin : btbfold + el2_btb_tag_hash_fold #( + .pt(pt) + ) first_brhash ( + .pc (firstpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), + .hash(firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0]) + ); + el2_btb_tag_hash_fold #( + .pt(pt) + ) second_brhash ( + .pc (secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), + .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]) + ); + end else begin + el2_btb_tag_hash #( + .pt(pt) + ) first_brhash ( + .pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), + .hash(firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0]) + ); + el2_btb_tag_hash #( + .pt(pt) + ) second_brhash ( + .pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), + .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]) + ); end - else begin - el2_btb_tag_hash #(.pt(pt)) first_brhash (.pc(firstpc [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), - .hash(firstbrtag_hash [pt.BTB_BTAG_SIZE-1:0])); - el2_btb_tag_hash #(.pt(pt)) second_brhash(.pc(secondpc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]), - .hash(secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0])); - end - end // else: !if(pt.BTB_FULLYA) + end // else: !if(pt.BTB_FULLYA) - // start_indexing - you want pc to be based on where the end of branch is prediction - // normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2 + // start_indexing - you want pc to be based on where the end of branch is prediction + // normal indexing pc based that's incorrect now for pc4 cases it's pc4 + 2 - always_comb begin + always_comb begin - i0_brp = '0; + i0_brp = '0; - i0_br_start_error = (first4B & alignval[1] & alignbrend[0]); + i0_br_start_error = (first4B & alignval[1] & alignbrend[0]); - i0_brp.valid = (first2B & alignbrend[0]) | - (first4B & alignbrend[1]) | - i0_br_start_error; + i0_brp.valid = (first2B & alignbrend[0]) | (first4B & alignbrend[1]) | i0_br_start_error; - i0_brp_pc4 = (first2B & alignpc4[0]) | - (first4B & alignpc4[1]); + i0_brp_pc4 = (first2B & alignpc4[0]) | (first4B & alignpc4[1]); - i0_brp.ret = (first2B & alignret[0]) | - (first4B & alignret[1]); + i0_brp.ret = (first2B & alignret[0]) | (first4B & alignret[1]); - i0_brp.way = (first2B | alignbrend[0]) ? alignway[0] : alignway[1]; + i0_brp.way = (first2B | alignbrend[0]) ? alignway[0] : alignway[1]; - i0_brp.hist[1] = (first2B & alignhist1[0]) | - (first4B & alignhist1[1]); + i0_brp.hist[1] = (first2B & alignhist1[0]) | (first4B & alignhist1[1]); - i0_brp.hist[0] = (first2B & alignhist0[0]) | - (first4B & alignhist0[1]); + i0_brp.hist[0] = (first2B & alignhist0[0]) | (first4B & alignhist0[1]); - i0_ends_f1 = first4B & alignfromf1[1]; + i0_ends_f1 = first4B & alignfromf1[1]; - i0_brp.toffset[11:0] = (i0_ends_f1) ? f1poffset[11:0] : f0poffset[11:0]; + i0_brp.toffset[11:0] = (i0_ends_f1) ? f1poffset[11:0] : f0poffset[11:0]; - i0_brp.prett[31:1] = (i0_ends_f1) ? f1prett[31:1] : f0prett[31:1]; + i0_brp.prett[31:1] = (i0_ends_f1) ? f1prett[31:1] : f0prett[31:1]; - i0_brp.br_start_error = i0_br_start_error; + i0_brp.br_start_error = i0_br_start_error; - i0_brp.bank = (first2B | alignbrend[0]) ? firstpc[1] : secondpc[1]; + i0_brp.bank = (first2B | alignbrend[0]) ? firstpc[1] : secondpc[1]; i0_brp.br_error = (i0_brp.valid & i0_brp_pc4 & first2B) | (i0_brp.valid & ~i0_brp_pc4 & first4B); - if(pt.BTB_FULLYA) - ifu_i0_fa_index = (first2B | alignbrend[0]) ? alignindex[0] : alignindex[1]; - else - ifu_i0_fa_index = '0; + if (pt.BTB_FULLYA) + ifu_i0_fa_index = (first2B | alignbrend[0]) ? alignindex[0] : alignindex[1]; + else ifu_i0_fa_index = '0; - end + end - assign ifu_i0_bp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = (first2B | alignbrend[0]) ? firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : + assign ifu_i0_bp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = (first2B | alignbrend[0]) ? firstpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : secondpc_hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign ifu_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0] = (i0_ends_f1) ? f1fghr[pt.BHT_GHR_SIZE-1:0] : + assign ifu_i0_bp_fghr[pt.BHT_GHR_SIZE-1:0] = (i0_ends_f1) ? f1fghr[pt.BHT_GHR_SIZE-1:0] : f0fghr[pt.BHT_GHR_SIZE-1:0]; - assign ifu_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0] = (first2B | alignbrend[0]) ? firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0] : + assign ifu_i0_bp_btag[pt.BTB_BTAG_SIZE-1:0] = (first2B | alignbrend[0]) ? firstbrtag_hash[pt.BTB_BTAG_SIZE-1:0] : secondbrtag_hash[pt.BTB_BTAG_SIZE-1:0]; -end -else begin - assign i0_brp = '0; - assign ifu_i0_bp_index = '0; - assign ifu_i0_bp_fghr = '0; - assign ifu_i0_bp_btag = '0; -end // else: !if(pt.BTB_ENABLE==1) + end else begin + assign i0_brp = '0; + assign ifu_i0_bp_index = '0; + assign ifu_i0_bp_fghr = '0; + assign ifu_i0_bp_btag = '0; + end // else: !if(pt.BTB_ENABLE==1) - // decompress + // decompress - // quiet inputs for 4B inst - el2_ifu_compress_ctl compress0 (.din((first2B) ? aligndata[15:0] : '0), .dout(uncompress0[31:0])); + // quiet inputs for 4B inst + el2_ifu_compress_ctl compress0 ( + .din ((first2B) ? aligndata[15:0] : '0), + .dout(uncompress0[31:0]) + ); - assign i0_shift = dec_i0_decode_d & ~error_stall; + assign i0_shift = dec_i0_decode_d & ~error_stall; - assign ifu_pmu_instr_aligned = i0_shift; + assign ifu_pmu_instr_aligned = i0_shift; - // compute how many bytes are being shifted from f0 + // compute how many bytes are being shifted from f0 - assign shift_2B = i0_shift & first2B; + assign shift_2B = i0_shift & first2B; - assign shift_4B = i0_shift & first4B; + assign shift_4B = i0_shift & first4B; - // exact equations for the queue logic - assign f0_shift_2B = (shift_2B & f0val[0] ) | - (shift_4B & f0val[0] & ~f0val[1]); + // exact equations for the queue logic + assign f0_shift_2B = (shift_2B & f0val[0]) | (shift_4B & f0val[0] & ~f0val[1]); - // f0 valid states - // 11 - // 10 - // 00 + // f0 valid states + // 11 + // 10 + // 00 - assign f1_shift_2B = f0val[0] & ~f0val[1] & shift_4B; + assign f1_shift_2B = f0val[0] & ~f0val[1] & shift_4B; diff --git a/Flow/design/ifu/el2_ifu_bp_ctl.sv b/Flow/design/ifu/el2_ifu_bp_ctl.sv index dd361d5..78ccc66 100644 --- a/Flow/design/ifu/el2_ifu_bp_ctl.sv +++ b/Flow/design/ifu/el2_ifu_bp_ctl.sv @@ -25,868 +25,998 @@ //******************************************************************************** module el2_ifu_bp_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( + `include "el2_param.vh" +) ( - input logic clk, - input logic rst_l, + input logic clk, + input logic rst_l, - input logic ic_hit_f, // Icache hit, enables F address capture + input logic ic_hit_f, // Icache hit, enables F address capture - input logic [31:1] ifc_fetch_addr_f, // look up btb address - input logic ifc_fetch_req_f, // F1 valid + input logic [31:1] ifc_fetch_addr_f, // look up btb address + input logic ifc_fetch_req_f, // F1 valid - input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors - input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index + input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors + input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index - input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index + input logic [$clog2( +pt.BTB_SIZE +)-1:0] dec_fa_error_index, // Fully associative btb error index - input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F - input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches + input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F + input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches - input logic dec_tlu_bpred_disable, // disable all branch prediction + input logic dec_tlu_bpred_disable, // disable all branch prediction - input el2_predict_pkt_t exu_mp_pkt, // mispredict packet + input el2_predict_pkt_t exu_mp_pkt, // mispredict packet - input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr) - input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index - input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag + input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr) + input logic [ pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index + input logic [ pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag - input logic exu_flush_final, // all flushes + input logic exu_flush_final, // all flushes - output logic ifu_bp_hit_taken_f, // btb hit, select target - output logic [31:1] ifu_bp_btb_target_f, // predicted target PC - output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified + output logic ifu_bp_hit_taken_f, // btb hit, select target + output logic [31:1] ifu_bp_btb_target_f, // predicted target PC + output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified - output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr + output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr - output logic [1:0] ifu_bp_way_f, // way - output logic [1:0] ifu_bp_ret_f, // predicted ret - output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified - output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified - output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified - output logic [1:0] ifu_bp_valid_f, // branch valid, right justified - output logic [11:0] ifu_bp_poffset_f, // predicted target + output logic [1:0] ifu_bp_way_f, // way + output logic [1:0] ifu_bp_ret_f, // predicted ret + output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified + output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified + output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified + output logic [1:0] ifu_bp_valid_f, // branch valid, right justified + output logic [11:0] ifu_bp_poffset_f, // predicted target - output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option) + output logic [1:0][$clog2( +pt.BTB_SIZE +)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option) - input logic scan_mode - ); + input logic scan_mode +); - localparam BTB_DWIDTH = pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5; - localparam BTB_DWIDTH_TOP = int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4; - localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1; - localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ); - localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails - localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER); - localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE); + localparam BTB_DWIDTH = pt.BTB_TOFFSET_SIZE + pt.BTB_BTAG_SIZE + 5; + localparam BTB_DWIDTH_TOP = int'(pt.BTB_TOFFSET_SIZE) + int'(pt.BTB_BTAG_SIZE) + 4; + localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE) - 1; + localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ); + localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails + localparam FA_TAG_START_LOWER = 3 + int'(pt.BTB_TOFFSET_SIZE) + int'(FA_CMP_LOWER); + localparam FA_TAG_END_LOWER = 5 + int'(pt.BTB_TOFFSET_SIZE); - localparam TAG_START=BTB_DWIDTH-1; - localparam PC4=4; - localparam BOFF=3; - localparam CALL=2; - localparam RET=1; - localparam BV=0; + localparam TAG_START = BTB_DWIDTH - 1; + localparam PC4 = 4; + localparam BOFF = 3; + localparam CALL = 2; + localparam RET = 1; + localparam BV = 0; - localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH; - localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH; - localparam NUM_BHT_LOOP_INNER_HI = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI; - localparam NUM_BHT_LOOP_OUTER_LO = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO; - localparam BHT_NO_ADDR_MATCH = ( pt.BHT_ARRAY_DEPTH <= 16 ); + localparam LRU_SIZE = pt.BTB_ARRAY_DEPTH; + localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16) ? 16 : pt.BHT_ARRAY_DEPTH; + localparam NUM_BHT_LOOP_INNER_HI = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI; + localparam NUM_BHT_LOOP_OUTER_LO = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO; + localparam BHT_NO_ADDR_MATCH = (pt.BHT_ARRAY_DEPTH <= 16); - logic exu_mp_valid_write; - logic exu_mp_ataken; - logic exu_mp_valid; // conditional branch mispredict - logic exu_mp_boffset; // branch offsett - logic exu_mp_pc4; // branch is a 4B inst - logic exu_mp_call; // branch is a call inst - logic exu_mp_ret; // branch is a ret inst - logic exu_mp_ja; // branch is a jump always - logic [1:0] exu_mp_hist; // new history - logic [11:0] exu_mp_tgt; // target offset - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address - logic dec_tlu_br0_v_wb; // WB stage history update - logic [1:0] dec_tlu_br0_hist_wb; // new history - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr - logic dec_tlu_br0_error_wb; // error; invalidate bank - logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg - logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_wb; + logic exu_mp_valid_write; + logic exu_mp_ataken; + logic exu_mp_valid; // conditional branch mispredict + logic exu_mp_boffset; // branch offsett + logic exu_mp_pc4; // branch is a 4B inst + logic exu_mp_call; // branch is a call inst + logic exu_mp_ret; // branch is a ret inst + logic exu_mp_ja; // branch is a jump always + logic [1:0] exu_mp_hist; // new history + logic [11:0] exu_mp_tgt; // target offset + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address + logic dec_tlu_br0_v_wb; // WB stage history update + logic [1:0] dec_tlu_br0_hist_wb; // new history + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr + logic dec_tlu_br0_error_wb; // error; invalidate bank + logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg + logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_wb; - logic use_mp_way, use_mp_way_p1; - logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in; - logic [pt.RET_STACK_SIZE-1:0] rsenable; + logic use_mp_way, use_mp_way_p1; + logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in; + logic [pt.RET_STACK_SIZE-1:0] rsenable; - logic [11:0] btb_rd_tgt_f; - logic btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f; - logic [1:1] bp_total_branch_offset_f; + logic [ 11:0] btb_rd_tgt_f; + logic btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f; + logic [ 1:1] bp_total_branch_offset_f; - logic [31:1] bp_btb_target_adder_f; - logic [31:1] bp_rs_call_target_f; - logic rs_push, rs_pop, rs_hold; - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f; - logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f; - logic [BTB_DWIDTH-1:0] btb_wr_data; - logic btb_wr_en_way0, btb_wr_en_way1; + logic [31:1] bp_btb_target_adder_f; + logic [31:1] bp_rs_call_target_f; + logic rs_push, rs_pop, rs_hold; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f; + logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f; + logic [BTB_DWIDTH-1:0] btb_wr_data; + logic btb_wr_en_way0, btb_wr_en_way1; - logic dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb; - logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_error_addr_wb; - logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f; + logic dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb; + logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_error_addr_wb; + logic + branch_error_collision_f, + fetch_mp_collision_f, + branch_error_collision_p1_f, + fetch_mp_collision_p1_f; - logic branch_error_bank_conflict_f; - logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr; - logic [1:0] num_valids; - logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns, - fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0, - mp_wrindex_dec, mp_wrlru_b0; - logic btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f; - logic tag_match_way0_f, tag_match_way1_f; - logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f; - logic [1:0] bht_valid_f, bht_force_taken_f; + logic branch_error_bank_conflict_f; + logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr; + logic [1:0] num_valids; + logic [LRU_SIZE-1:0] + btb_lru_b0_f, + btb_lru_b0_hold, + btb_lru_b0_ns, + fetch_wrindex_dec, + fetch_wrindex_p1_dec, + fetch_wrlru_b0, + fetch_wrlru_p1_b0, + mp_wrindex_dec, + mp_wrlru_b0; + logic btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f; + logic tag_match_way0_f, tag_match_way1_f; + logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f; + logic [1:0] bht_valid_f, bht_force_taken_f; - logic leak_one_f, leak_one_f_d1; + logic leak_one_f, leak_one_f_d1; - logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_out ; + logic [ LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_out; - logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_out ; + logic [ LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_out; - logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ; - logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ; + logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f; + logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f; - logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ; - logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ; + logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f; + logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f; - logic [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f; + logic [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f; - logic final_h; - logic btb_fg_crossing_f; - logic middle_of_bank; + logic final_h; + logic btb_fg_crossing_f; + logic middle_of_bank; - logic [1:0] bht_vbank0_rd_data_f, bht_vbank1_rd_data_f; - logic branch_error_bank_conflict_p1_f; - logic tag_match_way0_p1_f, tag_match_way1_p1_f; + logic [1:0] bht_vbank0_rd_data_f, bht_vbank1_rd_data_f; + logic branch_error_bank_conflict_p1_f; + logic tag_match_way0_p1_f, tag_match_way1_p1_f; - logic [1:0] btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f; - logic [31:2] fetch_addr_p1_f; + logic [1:0] + btb_vlru_rd_f, + fetch_start_f, + tag_match_vway1_expanded_f, + tag_match_way0_expanded_p1_f, + tag_match_way1_expanded_p1_f; + logic [31:2] fetch_addr_p1_f; - logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb; - logic [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f; + logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb; + logic [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f; - logic [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f; + logic [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f; - logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f; + logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f; - logic [1:0] bht_bank0_rd_data_f; - logic [1:0] bht_bank1_rd_data_f; - logic [1:0] bht_bank0_rd_data_p1_f; - genvar j, i; + logic [1:0] bht_bank0_rd_data_f; + logic [1:0] bht_bank1_rd_data_f; + logic [1:0] bht_bank0_rd_data_p1_f; + genvar j, i; - assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict - assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset - assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst - assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst - assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst - assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always - assign exu_mp_way = exu_mp_pkt.way; // repl way - assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history - assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset - assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address - assign exu_mp_ataken = exu_mp_pkt.ataken; + assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict + assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset + assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst + assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst + assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst + assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always + assign exu_mp_way = exu_mp_pkt.way; // repl way + assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history + assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0]; // target offset + assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address + assign exu_mp_ataken = exu_mp_pkt.ataken; - assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid; - assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_r_pkt.hist[1:0]; - assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error; - assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle; - assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way; - assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error; - assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0]; + assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid; + assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_r_pkt.hist[1:0]; + assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error; + assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle; + assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way; + assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error; + assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0]; - // ---------------------------------------------------------------------- - // READ - // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + // READ + // ---------------------------------------------------------------------- - // hash the incoming fetch PC, first guess at hashing algorithm - el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + // hash the incoming fetch PC, first guess at hashing algorithm + el2_btb_addr_hash #( + .pt(pt) + ) f1hash ( + .pc (ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), + .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); - assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1; - el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])); + assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1; + el2_btb_addr_hash #( + .pt(pt) + ) f1hash_p1 ( + .pc (fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), + .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) + ); - assign btb_sel_f[1] = ~bht_dir_f[0]; - assign btb_sel_f[0] = bht_dir_f[0]; + assign btb_sel_f[1] = ~bht_dir_f[0]; + assign btb_sel_f[0] = bht_dir_f[0]; - assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]}; + assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]}; - // Errors colliding with fetches must kill the btb/bht hit. + // Errors colliding with fetches must kill the btb/bht hit. - assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); - assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); + assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); + assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]); - assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb; - assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb; + assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb; + assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb; - // set on leak one, hold until next flush without leak one - assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb); + // set on leak one, hold until next flush without leak one + assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb); -logic exu_flush_final_d1; + logic exu_flush_final_d1; - if(!pt.BTB_FULLYA) begin - assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + if (!pt.BTB_FULLYA) begin + assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & exu_mp_valid & ifc_fetch_req_f & (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) ); - assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & exu_mp_valid & ifc_fetch_req_f & (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]) ); - // 2 -way SA, figure out the way hit and mux accordingly - assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + // 2 -way SA, figure out the way hit and mux accordingly + assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f; - assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & + assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) & ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f; - assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f; - assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & + assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) & ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f; - // Both ways could hit, use the offset bit to reorder + // Both ways could hit, use the offset bit to reorder - assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f & (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]), - tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])}; + assign tag_match_way0_expanded_f[1:0] = { + tag_match_way0_f & (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]), + tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]) + }; - assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f & (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]), - tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])}; + assign tag_match_way1_expanded_f[1:0] = { + tag_match_way1_f & (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]), + tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]) + }; - assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f & (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]), - tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])}; + assign tag_match_way0_expanded_p1_f[1:0] = { + tag_match_way0_p1_f & (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]), + tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]) + }; - assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f & (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]), - tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])}; + assign tag_match_way1_expanded_p1_f[1:0] = { + tag_match_way1_p1_f & (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]), + tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]) + }; - assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0]; - assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0]; + assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0]; + assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0]; - assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) | + assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) | ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) ); - assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) | + assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) | ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) ); - assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) | + assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) | ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) ); - // virtual bank order + // virtual bank order - assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) | + assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) | ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) ); - assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) | + assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) | ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) ); - assign way_raw[1:0] = tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]); + assign way_raw[1:0] = tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]); - // -------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------- - // update lru - // mp + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + // update lru + // mp - // create a onehot lru write vector - assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + // create a onehot lru write vector + assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - // fetch - assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + // fetch + assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}}; + assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}}; - assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0]; + assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0]; - // Forward the mp lru information to the fetch, avoids multiple way hits later - assign use_mp_way = fetch_mp_collision_f; - assign use_mp_way_p1 = fetch_mp_collision_p1_f; + // Forward the mp lru information to the fetch, avoids multiple way hits later + assign use_mp_way = fetch_mp_collision_f; + assign use_mp_way_p1 = fetch_mp_collision_p1_f; - assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f; + assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f; - assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & + assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{lru_update_valid_f}}; - assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] & + assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] & {LRU_SIZE{lru_update_valid_f}}; - assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) | + assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) | (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) | (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) | (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) ); - assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); + assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); - assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); + assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]); - // rotated - assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) | + // rotated + assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) | ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f})); - assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) | + assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) | ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) ); - rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid), - .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]), - .dout(btb_lru_b0_f[(LRU_SIZE)-1:0])); + rvdffe #(LRU_SIZE) btb_lru_ff ( + .*, + .en (ifc_fetch_req_f | exu_mp_valid), + .din (btb_lru_b0_ns[(LRU_SIZE)-1:0]), + .dout(btb_lru_b0_f[(LRU_SIZE)-1:0]) + ); - end // if (!pt.BTB_FULLYA) - // Detect end of cache line and mask as needed - logic eoc_near; - logic eoc_mask; - assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3]; - assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1])); + end // if (!pt.BTB_FULLYA) + // Detect end of cache line and mask as needed + logic eoc_near; + logic eoc_mask; + assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3]; + assign eoc_mask = ~eoc_near | (|(~ifc_fetch_addr_f[2:1])); - // -------------------------------------------------------------------------------- - // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- + // -------------------------------------------------------------------------------- - // mux out critical hit bank for pc computation - // This is only useful for the first taken branch in the fetch group - logic [16:1] btb_sel_data_f; + // mux out critical hit bank for pc computation + // This is only useful for the first taken branch in the fetch group + logic [16:1] btb_sel_data_f; - assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5]; - assign btb_rd_pc4_f = btb_sel_data_f[4]; - assign btb_rd_call_f = btb_sel_data_f[2]; - assign btb_rd_ret_f = btb_sel_data_f[1]; + assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5]; + assign btb_rd_pc4_f = btb_sel_data_f[4]; + assign btb_rd_call_f = btb_sel_data_f[2]; + assign btb_rd_ret_f = btb_sel_data_f[1]; - assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) | + assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) | ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) ); - logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw; + logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw; - // a valid taken target needs to kill the next fetch as we compute the target address - assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable; + // a valid taken target needs to kill the next fetch as we compute the target address + assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable; - // Don't put calls/rets/ja in the predictor, force the bht taken instead - assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]), - (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])}; + // Don't put calls/rets/ja in the predictor, force the bht taken instead + assign bht_force_taken_f[1:0] = { + (btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]), + (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET]) + }; - // taken and valid, otherwise, branch errors must clear the bht - assign bht_valid_f[1:0] = vwayhit_f[1:0]; + // taken and valid, otherwise, branch errors must clear the bht + assign bht_valid_f[1:0] = vwayhit_f[1:0]; - assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) | + assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) | ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) ); - assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) | + assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) | ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) ); - assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1], - (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]}; + assign bht_dir_f[1:0] = { + (bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1], + (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0] + }; - assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f; + assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f; - // Branch prediction info is sent with the 2byte lane associated with the end of the branch. - // Cases - // BANK1 BANK0 - // ------------------------------- - // | : | : | - // ------------------------------- - // <------------> : PC4 branch, offset, should be in B1 (indicated on [2]) - // <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1] - // <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0] - // <------> : PC2 branch, offset, indicate VALID, HIST on [1] - // <------> : PC2 branch, no offset, indicate VALID, HIST on [0] - // + // Branch prediction info is sent with the 2byte lane associated with the end of the branch. + // Cases + // BANK1 BANK0 + // ------------------------------- + // | : | : | + // ------------------------------- + // <------------> : PC4 branch, offset, should be in B1 (indicated on [2]) + // <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1] + // <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0] + // <------> : PC2 branch, offset, indicate VALID, HIST on [1] + // <------> : PC2 branch, no offset, indicate VALID, HIST on [0] + // - assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1], + assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1], bht_vbank0_rd_data_f[1]}; - assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0], - bht_vbank0_rd_data_f[0]}; + assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0], bht_vbank0_rd_data_f[0]}; - assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4], - vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]}; + assign pc4_raw[1:0] = { + vwayhit_f[1] & btb_vbank1_rd_data_f[PC4], vwayhit_f[0] & btb_vbank0_rd_data_f[PC4] + }; - assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET], - vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]}; + assign pret_raw[1:0] = { + vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET], + vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET] + }; - // GHR + // GHR // count the valids with masking based on first taken - assign num_valids[1:0] = countones(bht_valid_f[1:0]); + assign num_valids[1:0] = countones(bht_valid_f[1:0]); - // Note that the following property holds - // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0) - // Num valid branches What new GHR must be - // 2 0H - // 1 PH - // 0 PP + // Note that the following property holds + // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0) + // Num valid branches What new GHR must be + // 2 0H + // 1 PH + // 0 PP - assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]); + assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]); - assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = ( + assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H - ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH - ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP + ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH + ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]})); // PP - logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr; - assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0]; + logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr; + assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0]; - assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) | + assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) | ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) | ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0])); - rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*, - .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}), - .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]})); + rvdffie #( + .WIDTH(pt.BHT_GHR_SIZE + 3), + .OVERRIDE(1) + ) fetchghr ( + .*, + .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}), + .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}) + ); - assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0]; + assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0]; - assign ifu_bp_way_f[1:0] = way_raw[1:0]; - assign ifu_bp_hist1_f[1:0] = hist1_raw[1:0]; - assign ifu_bp_hist0_f[1:0] = hist0_raw[1:0]; - assign ifu_bp_pc4_f[1:0] = pc4_raw[1:0]; + assign ifu_bp_way_f[1:0] = way_raw[1:0]; + assign ifu_bp_hist1_f[1:0] = hist1_raw[1:0]; + assign ifu_bp_hist0_f[1:0] = hist0_raw[1:0]; + assign ifu_bp_pc4_f[1:0] = pc4_raw[1:0]; - assign ifu_bp_valid_f[1:0] = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}}; - assign ifu_bp_ret_f[1:0] = pret_raw[1:0]; + assign ifu_bp_valid_f[1:0] = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}}; + assign ifu_bp_ret_f[1:0] = pret_raw[1:0]; - // compute target - // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk + // compute target + // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk -// .i 5 -// .o 3 -// .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f -// .ob bloc_f[1] bloc_f[0] use_fa_plus -// .type fr -// -// -// ## rotdir[1:0] fs pc4 off fapl -// -1 01 - 01 0 -// 10 01 - 10 0 -// -// -1 10 - 10 0 -// 10 10 0 01 1 -// 10 10 1 01 0 -logic [1:0] bloc_f; -logic use_fa_plus; -assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0] - & fetch_start_f[0]); -assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0] - & ~fetch_start_f[0]); -assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f); + // .i 5 + // .o 3 + // .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f + // .ob bloc_f[1] bloc_f[0] use_fa_plus + // .type fr + // + // + // ## rotdir[1:0] fs pc4 off fapl + // -1 01 - 01 0 + // 10 01 - 10 0 + // + // -1 10 - 10 0 + // 10 10 0 01 1 + // 10 10 1 01 0 + logic [1:0] bloc_f; + logic use_fa_plus; + assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0] & fetch_start_f[0]); + assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0] & ~fetch_start_f[0]); + assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f); - assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f; + assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f; - assign bp_total_branch_offset_f = bloc_f[1] ^ btb_rd_pc4_f; + assign bp_total_branch_offset_f = bloc_f[1] ^ btb_rd_pc4_f; - logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior; - rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2])); + logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior; + rvdfflie #( + .WIDTH(30), + .LEFT (19) + ) faddrf_ff ( + .*, + .en (ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), + .din (ifc_fetch_addr_f[31:2]), + .dout(ifc_fetch_adder_prior[31:2]) + ); - assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0]; + assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0]; - assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) | + assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) | ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) | ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2])); - rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), - .offset(btb_rd_tgt_f[11:0]), - .dout(bp_btb_target_adder_f[31:1]) - ); - // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction - assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) | + rvbradder predtgt_addr ( + .pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), + .offset(btb_rd_tgt_f[11:0]), + .dout(bp_btb_target_adder_f[31:1]) + ); + // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction + assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) | ({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) ); - // ---------------------------------------------------------------------- - // Return Stack - // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + // Return Stack + // ---------------------------------------------------------------------- - rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), - .offset({11'b0, ~btb_rd_pc4_f}), - .dout(bp_rs_call_target_f[31:1]) - ); + rvbradder rs_addr ( + .pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}), + .offset({11'b0, ~btb_rd_pc4_f}), + .dout(bp_rs_call_target_f[31:1]) + ); - assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f); - assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f); - assign rs_hold = ~rs_push & ~rs_pop; + assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f); + assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f); + assign rs_hold = ~rs_push & ~rs_pop; - // Fetch based (bit 0 is a valid) - assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid + // Fetch based (bit 0 is a valid) + assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid ({32{rs_pop}} & rets_out[1][31:0]) ); - assign rsenable[0] = ~rs_hold; + assign rsenable[0] = ~rs_hold; - for (i=0; i0) begin - assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) | + // for the last entry in the stack, we don't have a pop position + if (i == pt.RET_STACK_SIZE - 1) begin + assign rets_in[i][31:0] = rets_out[i-1][31:0]; + assign rsenable[i] = rs_push; + end else if (i > 0) begin + assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) | ({32{rs_pop}} & rets_out[i+1][31:0]) ); - assign rsenable[i] = rs_push | rs_pop; - end - rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0])); + assign rsenable[i] = rs_push | rs_pop; + end + rvdffe #(32) rets_ff ( + .*, + .en (rsenable[i]), + .din (rets_in[i][31:0]), + .dout(rets_out[i][31:0]) + ); - end : retstack + end : retstack - // ---------------------------------------------------------------------- - // WRITE - // ---------------------------------------------------------------------- + // ---------------------------------------------------------------------- + // WRITE + // ---------------------------------------------------------------------- - assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb; + assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb; - assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign dec_tlu_way_wb = dec_tlu_br0_way_wb; + assign dec_tlu_way_wb = dec_tlu_br0_way_wb; - assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb; + assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb; - assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]; + assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0]; - if(!pt.BTB_FULLYA) begin + if (!pt.BTB_FULLYA) begin - if(pt.BTB_BTAG_FOLD) begin : btbfold - el2_btb_tag_hash_fold #(.pt(pt)) rdtagf (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), - .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); - el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), - .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); - end - else begin - el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), - .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); - el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), - .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]})); - end + if (pt.BTB_BTAG_FOLD) begin : btbfold + el2_btb_tag_hash_fold #( + .pt(pt) + ) rdtagf ( + .hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), + .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}) + ); + el2_btb_tag_hash_fold #( + .pt(pt) + ) rdtagp1f ( + .hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), + .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}) + ); + end else begin + el2_btb_tag_hash #( + .pt(pt) + ) rdtagf ( + .hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]), + .pc({ + ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] + }) + ); + el2_btb_tag_hash #( + .pt(pt) + ) rdtagp1f ( + .hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]), + .pc({ + fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] + }) + ); + end - assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | + assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | ({{~dec_tlu_way_wb & dec_tlu_error_wb}})); - assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | + assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) | ({{dec_tlu_way_wb & dec_tlu_error_wb}})); - assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; + assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]; - assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) | + assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) | ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1}; - end // if (!pt.BTB_FULLYA) + end // if (!pt.BTB_FULLYA) - assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset, - exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ; + assign btb_wr_data[BTB_DWIDTH-1:0] = { + btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], + exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], + exu_mp_pc4, + exu_mp_boffset, + exu_mp_call | exu_mp_ja, + exu_mp_ret | exu_mp_ja, + btb_valid + }; - assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid; - logic [1:0] bht_wr_data0, bht_wr_data2; - logic [1:0] bht_wr_en0, bht_wr_en2; + assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid; + logic [1:0] bht_wr_data0, bht_wr_data2; + logic [1:0] bht_wr_en0, bht_wr_en2; - assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset; - assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank}; - assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ; + assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset; + assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank}; + assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb}; - // Experiments show this is the best priority scheme for same bank/index writes at the same time. - assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority - assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority + // Experiments show this is the best priority scheme for same bank/index writes at the same time. + assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority + assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority - logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2; + logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] + bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2; - logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f; - el2_btb_ghr_hash #(.pt(pt)) mpghrhs (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); - el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); - el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); - el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO])); + logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] + mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f; + el2_btb_ghr_hash #( + .pt(pt) + ) mpghrhs ( + .hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), + .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), + .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]) + ); + el2_btb_ghr_hash #( + .pt(pt) + ) br0ghrhs ( + .hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), + .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), + .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]) + ); + el2_btb_ghr_hash #( + .pt(pt) + ) fghrhs ( + .hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), + .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), + .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]) + ); + el2_btb_ghr_hash #( + .pt(pt) + ) fghrhs_p1 ( + .hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), + .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), + .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]) + ); - assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; - assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; - assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; - assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; + assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]; - // ---------------------------------------------------------------------- - // Structures. Using FLOPS - // ---------------------------------------------------------------------- - // BTB - // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid + // ---------------------------------------------------------------------- + // Structures. Using FLOPS + // ---------------------------------------------------------------------- + // BTB + // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid - if(!pt.BTB_FULLYA) begin + if (!pt.BTB_FULLYA) begin - for (j=0 ; j direction, strength - // - //----------------------------------------------------------------------------- + //----------------------------------------------------------------------------- + // BHT + // 2 bit Entry -> direction, strength + // + //----------------------------------------------------------------------------- -// logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ; - logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0] bht_bank_rd_data_out ; - logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken ; - logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk ; -// logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ; + // logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ; + logic [1:0][ pt.BHT_ARRAY_DEPTH-1:0][1:0] bht_bank_rd_data_out; + logic [1:0][(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken; + logic [1:0][(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk; + // logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ; - for ( i=0; i<2; i++) begin : BANKS - wire[pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1; - assign wr0 = bht_wr_en0[i] << bht_wr_addr0; - assign wr1 = bht_wr_en2[i] << bht_wr_addr2; - for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP - assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | + for (i = 0; i < 2; i++) begin : BANKS + wire [pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1; + assign wr0 = bht_wr_en0[i] << bht_wr_addr0; + assign wr1 = bht_wr_en2[i] << bht_wr_addr2; + for (genvar k = 0; k < (pt.BHT_ARRAY_DEPTH) / NUM_BHT_LOOP; k++) begin : BHT_CLK_GROUP + assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) | (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)); - rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); + rvclkhdr bht_bank_grp_cgc ( + .en(bht_bank_clken[i][k]), + .l1clk(bht_bank_clk[i][k]), + .* + ); - for (j=0 ; j cdecode.e + // 1) coredecode -in cdecode > cdecode.e -// 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations + // 2) espresso -Dso -oeqntott cdecode.e | addassign > compress_equations -// to generate the legal (16b compressed instruction is legal) equation below: + // to generate the legal (16b compressed instruction is legal) equation below: -// 1) coredecode -in cdecode -legal > clegal.e + // 1) coredecode -in cdecode -legal > clegal.e -// 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation + // 2) espresso -Dso -oeqntott clegal.e | addassign > clegal_equation -// espresso decodes -assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | ( + // espresso decodes + assign rdrd = (!i[14]&i[6]&i[1]) | (!i[15]&i[14]&i[11]&i[0]) | (!i[14]&i[5]&i[1]) | ( !i[15]&i[14]&i[10]&i[0]) | (!i[14]&i[4]&i[1]) | (!i[15]&i[14]&i[9] &i[0]) | (!i[14]&i[3]&i[1]) | (!i[15]&i[14]&!i[8]&i[0]) | (!i[14] &i[2]&i[1]) | (!i[15]&i[14]&i[7]&i[0]) | (!i[15]&i[1]) | (!i[15] &!i[13]&i[0]); -assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14] + assign rdrs1 = (!i[14]&i[12]&i[11]&i[1]) | (!i[14]&i[12]&i[10]&i[1]) | (!i[14] &i[12]&i[9]&i[1]) | (!i[14]&i[12]&i[8]&i[1]) | (!i[14]&i[12]&i[7] &i[1]) | (!i[14]&!i[12]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] &i[12]&i[6]&i[1]) | (!i[14]&i[12]&i[5]&i[1]) | (!i[14]&i[12]&i[4] &i[1]) | (!i[14]&i[12]&i[3]&i[1]) | (!i[14]&i[12]&i[2]&i[1]) | ( !i[15]&!i[14]&!i[13]&i[0]) | (!i[15]&!i[14]&i[1]); -assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | ( + assign rs2rs2 = (i[15]&i[6]&i[1]) | (i[15]&i[5]&i[1]) | (i[15]&i[4]&i[1]) | ( i[15]&i[3]&i[1]) | (i[15]&i[2]&i[1]) | (i[15]&i[14]&i[1]); -assign rdprd = (i[15]&!i[14]&!i[13]&i[0]); + assign rdprd = (i[15] & !i[14] & !i[13] & i[0]); -assign rdprs1 = (i[15]&!i[13]&i[0]) | (i[15]&i[14]&i[0]) | (i[14]&!i[1]&!i[0]); + assign rdprs1 = (i[15] & !i[13] & i[0]) | (i[15] & i[14] & i[0]) | (i[14] & !i[1] & !i[0]); -assign rs2prs2 = (i[15]&!i[14]&!i[13]&i[11]&i[10]&i[0]) | (i[15]&!i[1]&!i[0]); + assign rs2prs2 = (i[15] & !i[14] & !i[13] & i[11] & i[10] & i[0]) | (i[15] & !i[1] & !i[0]); -assign rs2prd = (!i[15]&!i[1]&!i[0]); + assign rs2prd = (!i[15] & !i[1] & !i[0]); -assign uimm9_2 = (!i[14]&!i[1]&!i[0]); + assign uimm9_2 = (!i[14] & !i[1] & !i[0]); -assign ulwimm6_2 = (!i[15]&i[14]&!i[1]&!i[0]); + assign ulwimm6_2 = (!i[15] & i[14] & !i[1] & !i[0]); -assign ulwspimm7_2 = (!i[15]&i[14]&i[1]); + assign ulwspimm7_2 = (!i[15] & i[14] & i[1]); -assign rdeq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + assign rdeq2 = (!i[15] & i[14] & i[13] & !i[11] & !i[10] & !i[9] & i[8] & !i[7]); -assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + assign rdeq1 = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] &!i[2]&i[1]) | (!i[15]&!i[14]&i[13]); -assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14] + assign rs1eq2 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]) | (i[14] &i[1]) | (!i[14]&!i[1]&!i[0]); -assign sbroffset8_1 = (i[15]&i[14]&i[0]); + assign sbroffset8_1 = (i[15] & i[14] & i[0]); -assign simm9_4 = (!i[15]&i[14]&i[13]&!i[11]&!i[10]&!i[9]&i[8]&!i[7]); + assign simm9_4 = (!i[15] & i[14] & i[13] & !i[11] & !i[10] & !i[9] & i[8] & !i[7]); -assign simm5_0 = (!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (!i[15]&!i[13]&i[0]); + assign simm5_0 = (!i[14] & !i[13] & i[11] & !i[10] & i[0]) | (!i[15] & !i[13] & i[0]); -assign sjaloffset11_1 = (!i[14]&i[13]); + assign sjaloffset11_1 = (!i[14] & i[13]); -assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | ( + assign sluimm17_12 = (!i[15]&i[14]&i[13]&i[7]) | (!i[15]&i[14]&i[13]&!i[8]) | ( !i[15]&i[14]&i[13]&i[9]) | (!i[15]&i[14]&i[13]&i[10]) | (!i[15]&i[14] &i[13]&i[11]); -assign uimm5_0 = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (!i[15]&!i[14]&i[1]); + assign uimm5_0 = (i[15] & !i[14] & !i[13] & !i[11] & i[0]) | (!i[15] & !i[14] & i[1]); -assign uswimm6_2 = (i[15]&!i[1]&!i[0]); + assign uswimm6_2 = (i[15] & !i[1] & !i[0]); -assign uswspimm7_2 = (i[15]&i[14]&i[1]); + assign uswspimm7_2 = (i[15] & i[14] & i[1]); -assign o[31] = 1'b0; + assign o[31] = 1'b0; -assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14] + assign o[30] = (i[15]&!i[14]&!i[13]&i[10]&!i[6]&!i[5]&i[0]) | (i[15]&!i[14] &!i[13]&!i[11]&i[10]&i[0]); -assign o[29] = 1'b0; + assign o[29] = 1'b0; -assign o[28] = 1'b0; + assign o[28] = 1'b0; -assign o[27] = 1'b0; + assign o[27] = 1'b0; -assign o[26] = 1'b0; + assign o[26] = 1'b0; -assign o[25] = 1'b0; + assign o[25] = 1'b0; -assign o[24] = 1'b0; + assign o[24] = 1'b0; -assign o[23] = 1'b0; + assign o[23] = 1'b0; -assign o[22] = 1'b0; + assign o[22] = 1'b0; -assign o[21] = 1'b0; + assign o[21] = 1'b0; -assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4] + assign o[20] = (!i[14]&i[12]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[6]&!i[5]&!i[4] &!i[3]&!i[2]&i[1]); -assign o[19] = 1'b0; + assign o[19] = 1'b0; -assign o[18] = 1'b0; + assign o[18] = 1'b0; -assign o[17] = 1'b0; + assign o[17] = 1'b0; -assign o[16] = 1'b0; + assign o[16] = 1'b0; -assign o[15] = 1'b0; + assign o[15] = 1'b0; -assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10] + assign o[14] = (i[15]&!i[14]&!i[13]&!i[11]&i[0]) | (i[15]&!i[14]&!i[13]&!i[10] &i[0]) | (i[15]&!i[14]&!i[13]&i[6]&i[0]) | (i[15]&!i[14]&!i[13]&i[5] &i[0]); -assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13] + assign o[13] = (i[15]&!i[14]&!i[13]&i[11]&!i[10]&i[0]) | (i[15]&!i[14]&!i[13] &i[11]&i[6]&i[0]) | (i[14]&!i[0]); -assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11] + assign o[12] = (i[15]&!i[14]&!i[13]&i[6]&i[5]&i[0]) | (i[15]&!i[14]&!i[13]&!i[11] &i[0]) | (i[15]&!i[14]&!i[13]&!i[10]&i[0]) | (!i[15]&!i[14]&i[1]) | ( i[15]&i[14]&i[13]); -assign o[11] = 1'b0; + assign o[11] = 1'b0; -assign o[10] = 1'b0; + assign o[10] = 1'b0; -assign o[9] = 1'b0; + assign o[9] = 1'b0; -assign o[8] = 1'b0; + assign o[8] = 1'b0; -assign o[7] = 1'b0; + assign o[7] = 1'b0; -assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | ( + assign o[6] = (i[15]&!i[14]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&!i[0]) | (!i[14]&i[13]) | ( i[15]&i[14]&i[0]); -assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | ( + assign o[5] = (i[15]&!i[0]) | (i[15]&i[11]&i[10]) | (i[13]&!i[8]) | (i[13]&i[7]) | ( i[13]&i[9]) | (i[13]&i[10]) | (i[13]&i[11]) | (!i[14]&i[13]) | ( i[15]&i[14]); -assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14] + assign o[4] = (!i[14]&!i[11]&!i[10]&!i[9]&!i[8]&!i[7]&!i[0]) | (!i[15]&!i[14] &!i[0]) | (!i[14]&i[6]&!i[0]) | (!i[15]&i[14]&i[0]) | (!i[14]&i[5] &!i[0]) | (!i[14]&i[4]&!i[0]) | (!i[14]&!i[13]&i[0]) | (!i[14]&i[3] &!i[0]) | (!i[14]&i[2]&!i[0]); -assign o[3] = (!i[14]&i[13]); + assign o[3] = (!i[14] & i[13]); -assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] + assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] &i[12]&i[10]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[9] &!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[8]&!i[6]&!i[5] &!i[4]&!i[3]&!i[2]&i[1]) | (!i[14]&i[12]&i[7]&!i[6]&!i[5]&!i[4]&!i[3] @@ -356,13 +353,13 @@ assign o[2] = (!i[14]&i[12]&i[11]&!i[6]&!i[5]&!i[4]&!i[3]&!i[2]&i[1]) | (!i[14] &!i[0]) | (!i[15]&i[13]&!i[8]) | (!i[15]&i[13]&i[7]) | (!i[15]&i[13] &i[9]) | (!i[15]&i[13]&i[10]) | (!i[15]&i[13]&i[11]) | (!i[14]&i[13]); -// 32b instruction has lower two bits 2'b11 + // 32b instruction has lower two bits 2'b11 -assign o[1] = 1'b1; + assign o[1] = 1'b1; -assign o[0] = 1'b1; + assign o[0] = 1'b1; -assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | ( + assign legal = (!i[13]&!i[12]&i[11]&i[1]&!i[0]) | (!i[13]&!i[12]&i[6]&i[1]&!i[0]) | ( !i[15]&!i[13]&i[11]&!i[1]) | (!i[13]&!i[12]&i[5]&i[1]&!i[0]) | ( !i[13]&!i[12]&i[10]&i[1]&!i[0]) | (!i[15]&!i[13]&i[6]&!i[1]) | ( i[15]&!i[12]&!i[1]&i[0]) | (!i[13]&!i[12]&i[9]&i[1]&!i[0]) | (!i[12] diff --git a/Flow/design/ifu/el2_ifu_ic_mem.sv b/Flow/design/ifu/el2_ifu_ic_mem.sv index 7e9765a..9b40eed 100644 --- a/Flow/design/ifu/el2_ifu_ic_mem.sv +++ b/Flow/design/ifu/el2_ifu_ic_mem.sv @@ -18,227 +18,240 @@ // ICACHE DATA & TAG MODULE WRAPPER // ///////////////////////////////////////////////////// module el2_ifu_ic_mem -import el2_pkg::*; - #( -`include "el2_param.vh" - ) - ( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic rst_l, // reset, active low - input logic clk_override, // Override non-functional clock gating - input logic dec_tlu_core_ecc_disable, // Disable ECC checking + import el2_pkg::*; +#( + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic rst_l, // reset, active low + input logic clk_override, // Override non-functional clock gating + input logic dec_tlu_core_ecc_disable, // Disable ECC checking - input logic [31:1] ic_rw_addr, - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en , // Which way to write - input logic ic_rd_en , // Read enable - input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. - input logic ic_debug_rd_en, // Icache debug rd - input logic ic_debug_wr_en, // Icache debug wr - input logic ic_debug_tag_array, // Debug tag array - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. - input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. - input logic ic_sel_premux_data, // Select the pre_muxed data + input logic [31:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Which way to write + input logic ic_rd_en, // Read enable + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data - input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC - output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - output logic [25:0] ictag_debug_rd_data,// Debug icache tag. - input logic [70:0] ic_debug_wr_data, // Debug wr cache. + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [25:0] ictag_debug_rd_data, // Debug icache tag. + input logic [70:0] ic_debug_wr_data, // Debug wr cache. - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // ecc error per bank - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid from the I$ tag valid outside (in flops). - input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, // this is being driven by the top level for soc testing/etc - input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, // this is being driven by the top level for soc testing/etc + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, // ecc error per bank + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid from the I$ tag valid outside (in flops). + input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, // this is being driven by the top level for soc testing/etc + input el2_ic_tag_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0] ic_tag_ext_in_pkt, // this is being driven by the top level for soc testing/etc - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // ic_rd_hit[3:0] - output logic ic_tag_perr, // Tag Parity error - input logic scan_mode // Flop scan mode control - ) ; + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // ic_rd_hit[3:0] + output logic ic_tag_perr, // Tag Parity error + input logic scan_mode // Flop scan mode control +); - EL2_IC_TAG #(.pt(pt)) ic_tag_inst - ( - .*, - .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), - .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), - .ic_rw_addr (ic_rw_addr[31:3]) - ) ; + EL2_IC_TAG #( + .pt(pt) + ) ic_tag_inst ( + .*, + .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), + .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), + .ic_rw_addr (ic_rw_addr[31:3]) + ); - EL2_IC_DATA #(.pt(pt)) ic_data_inst - ( - .*, - .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), - .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), - .ic_rw_addr (ic_rw_addr[31:1]) - ) ; + EL2_IC_DATA #( + .pt(pt) + ) ic_data_inst ( + .*, + .ic_wr_en (ic_wr_en[pt.ICACHE_NUM_WAYS-1:0]), + .ic_debug_addr(ic_debug_addr[pt.ICACHE_INDEX_HI:3]), + .ic_rw_addr (ic_rw_addr[31:1]) + ); - endmodule +endmodule ///////////////////////////////////////////////// ////// ICACHE DATA MODULE //////////////////// ///////////////////////////////////////////////// module EL2_IC_DATA -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, - input logic active_clk, - input logic rst_l, - input logic clk_override, + `include "el2_param.vh" +) ( + input logic clk, + input logic active_clk, + input logic rst_l, + input logic clk_override, - input logic [31:1] ic_rw_addr, - input logic [pt.ICACHE_NUM_WAYS-1:0]ic_wr_en, - input logic ic_rd_en, // Read enable + input logic [ 31:1] ic_rw_addr, + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, + input logic ic_rd_en, // Read enable - input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC - output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [70:0] ic_debug_wr_data, // Debug wr cache. - output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, - output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank - input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. - input logic ic_debug_rd_en, // Icache debug rd - input logic ic_debug_wr_en, // Icache debug wr - input logic ic_debug_tag_array, // Debug tag array - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. - input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. - input logic ic_sel_premux_data, // Select the pre_muxed data + input logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_wr_data, // Data to fill to the Icache. With ECC + output logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + output logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // ecc error per bank + input logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + input logic ic_debug_rd_en, // Icache debug rd + input logic ic_debug_wr_en, // Icache debug wr + input logic ic_debug_tag_array, // Debug tag array + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + input logic [63:0] ic_premux_data, // Premux data to be muxed with each way of the Icache. + input logic ic_sel_premux_data, // Select the pre_muxed data - input logic [pt.ICACHE_NUM_WAYS-1:0]ic_rd_hit, - input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, // this is being driven by the top level for soc testing/etc - input logic scan_mode + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, + input el2_ic_data_ext_in_pkt_t [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_data_ext_in_pkt, // this is being driven by the top level for soc testing/etc + input logic scan_mode - ) ; +); - logic [pt.ICACHE_TAG_INDEX_LO-1:1] ic_rw_addr_ff; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_wren; //bank x ways - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_rden; //bank x ways + logic [pt.ICACHE_TAG_INDEX_LO-1:1] ic_rw_addr_ff; + logic [ pt.ICACHE_BANKS_WAY-1:0][ pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_wren; //bank x ways + logic [ pt.ICACHE_BANKS_WAY-1:0][ pt.ICACHE_NUM_WAYS-1:0] ic_b_sb_rden; //bank x ways - logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_rden; //bank - logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_rden_ff; //bank - logic [pt.ICACHE_BANKS_WAY-1:0] ic_debug_sel_sb; + logic [ pt.ICACHE_BANKS_WAY-1:0] ic_b_rden; //bank + logic [ pt.ICACHE_BANKS_WAY-1:0] ic_b_rden_ff; //bank + logic [ pt.ICACHE_BANKS_WAY-1:0] ic_debug_sel_sb; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][70:0] wb_dout ; // ways x bank - logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_sb_wr_data, ic_bank_wr_data, wb_dout_ecc_bank; - logic [pt.ICACHE_NUM_WAYS-1:0] [141:0] wb_dout_way_pre; - logic [pt.ICACHE_NUM_WAYS-1:0] [63:0] wb_dout_way, wb_dout_way_with_premux; - logic [141:0] wb_dout_ecc; + logic [ pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][70:0] wb_dout; // ways x bank + logic [pt.ICACHE_BANKS_WAY-1:0][70:0] ic_sb_wr_data, ic_bank_wr_data, wb_dout_ecc_bank; + logic [pt.ICACHE_NUM_WAYS-1:0][141:0] wb_dout_way_pre; + logic [pt.ICACHE_NUM_WAYS-1:0][63:0] wb_dout_way, wb_dout_way_with_premux; + logic [141:0] wb_dout_ecc; - logic [pt.ICACHE_BANKS_WAY-1:0] bank_check_en; + logic [pt.ICACHE_BANKS_WAY-1:0] bank_check_en; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_bank_way_clken; - logic [pt.ICACHE_BANKS_WAY-1:0] ic_bank_way_clken_final; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_bank_way_clken_final_up; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_WAYS-1:0] ic_bank_way_clken; + logic [pt.ICACHE_BANKS_WAY-1:0] ic_bank_way_clken_final; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_bank_way_clken_final_up; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en; // debug wr_way - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en_ff; // debug wr_way - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_wr_way_en; // debug wr_way - logic [pt.ICACHE_INDEX_HI:1] ic_rw_addr_q; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en; // debug wr_way + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_rd_way_en_ff; // debug wr_way + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_wr_way_en; // debug wr_way + logic [pt.ICACHE_INDEX_HI:1] ic_rw_addr_q; - logic [pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_INDEX_HI : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_bank_q; + logic [pt.ICACHE_BANKS_WAY-1:0] [pt.ICACHE_INDEX_HI : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_bank_q; - logic [pt.ICACHE_TAG_LO-1 : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_q_inc; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit_q; + logic [pt.ICACHE_TAG_LO-1 : pt.ICACHE_DATA_INDEX_LO] ic_rw_addr_q_inc; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit_q; - logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_sram_en; - logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_read_en; - logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_write_en; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] wb_index_hold; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en; //bank - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_ff; //bank - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] index_valid; //bank - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_clear_en; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match; - logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_index_only; + logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_sram_en; + logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_read_en; + logic [pt.ICACHE_BANKS_WAY-1:0] ic_b_write_en; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] wb_index_hold; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en; //bank + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_ff; //bank + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] index_valid; //bank + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_clear_en; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match; + logic [pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_index_only; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_sram_en_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_read_en_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_write_en_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] wb_index_hold_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_up; //bank - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_ff_up; //bank - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] index_valid_up; //bank - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_clear_en_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_index_only_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_sram_en_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_read_en_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] ic_b_write_en_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] wb_index_hold_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_up; //bank + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] write_bypass_en_ff_up; //bank + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] index_valid_up; //bank + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_clear_en_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0][pt.ICACHE_NUM_BYPASS-1:0] ic_b_addr_match_index_only_up; - logic [pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr; - logic [pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only; + logic [pt.ICACHE_BANKS_WAY-1:0][31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr; + logic [pt.ICACHE_BANKS_WAY-1:0][31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_up; - logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_up; + logic [pt.ICACHE_NUM_WAYS-1:0][pt.ICACHE_BANKS_WAY-1:0] [31 : pt.ICACHE_DATA_INDEX_LO] ic_b_rw_addr_index_only_up; - logic ic_rd_en_with_debug; - logic ic_rw_addr_wrap, ic_cacheline_wrap_ff; - logic ic_debug_rd_en_ff; + logic ic_rd_en_with_debug; + logic ic_rw_addr_wrap, ic_cacheline_wrap_ff; + logic ic_debug_rd_en_ff; -//----------------------------------------------------------- -// ----------- Logic section starts here -------------------- -//----------------------------------------------------------- - assign ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_rd_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; - assign ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; + //----------------------------------------------------------- + // ----------- Logic section starts here -------------------- + //----------------------------------------------------------- + assign ic_debug_rd_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_rd_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; + assign ic_debug_wr_way_en[pt.ICACHE_NUM_WAYS-1:0] = {pt.ICACHE_NUM_WAYS{ic_debug_wr_en & ~ic_debug_tag_array}} & ic_debug_way[pt.ICACHE_NUM_WAYS-1:0] ; - logic end_of_cache_line; - assign end_of_cache_line = (pt.ICACHE_LN_SZ==7'h40) ? (&ic_rw_addr_q[5:4]) : ic_rw_addr_q[4]; - always_comb begin : clkens - ic_bank_way_clken = '0; + logic end_of_cache_line; + assign end_of_cache_line = (pt.ICACHE_LN_SZ == 7'h40) ? (&ic_rw_addr_q[5:4]) : ic_rw_addr_q[4]; + always_comb begin : clkens + ic_bank_way_clken = '0; - for ( int i=0; i> (16*iccm_rd_addr_lo_q[1]))}); - assign iccm_rd_data[63:0] = {iccm_data[63:0]}; - assign iccm_rd_data_ecc[77:0] = {iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0]}; + assign iccm_rd_data_pre[63:0] = { + iccm_bank_dout_fn[iccm_rd_addr_hi_q][31:0], + iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][31:0] + }; + assign iccm_data[63:0] = 64'({16'b0, (iccm_rd_data_pre[63:0] >> (16 * iccm_rd_addr_lo_q[1]))}); + assign iccm_rd_data[63:0] = {iccm_data[63:0]}; + assign iccm_rd_data_ecc[77:0] = { + iccm_bank_dout_fn[iccm_rd_addr_hi_q][38:0], + iccm_bank_dout_fn[iccm_rd_addr_lo_q[pt.ICCM_BANK_HI:2]][38:0] + }; -endmodule // el2_ifu_iccm_mem +endmodule // el2_ifu_iccm_mem diff --git a/Flow/design/ifu/el2_ifu_ifc_ctl.sv b/Flow/design/ifu/el2_ifu_ifc_ctl.sv index 3ab758e..fc37828 100644 --- a/Flow/design/ifu/el2_ifu_ifc_ctl.sv +++ b/Flow/design/ifu/el2_ifu_ifc_ctl.sv @@ -21,118 +21,119 @@ //******************************************************************************** module el2_ifu_ifc_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. - input logic rst_l, // reset enable, from core pin - input logic scan_mode, // scan + input logic rst_l, // reset enable, from core pin + input logic scan_mode, // scan - input logic ic_hit_f, // Icache hit - input logic ifu_ic_mb_empty, // Miss buffer empty + input logic ic_hit_f, // Icache hit + input logic ifu_ic_mb_empty, // Miss buffer empty - input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer - input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers + input logic ifu_fb_consume1, // Aligner consumed 1 fetch buffer + input logic ifu_fb_consume2, // Aligner consumed 2 fetch buffers - input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush - input logic exu_flush_final, // FLush - input logic [31:1] exu_flush_path_final, // Flush path + input logic dec_tlu_flush_noredir_wb, // Don't fetch on flush + input logic exu_flush_final, // FLush + input logic [31:1] exu_flush_path_final, // Flush path - input logic ifu_bp_hit_taken_f, // btb hit, select the target path - input logic [31:1] ifu_bp_btb_target_f, // predicted target PC + input logic ifu_bp_hit_taken_f, // btb hit, select the target path + input logic [31:1] ifu_bp_btb_target_f, // predicted target PC - input logic ic_dma_active, // IC DMA active, stop fetching - input logic ic_write_stall, // IC is writing, stop fetching - input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access + input logic ic_dma_active, // IC DMA active, stop fetching + input logic ic_write_stall, // IC is writing, stop fetching + input logic dma_iccm_stall_any, // force a stall in the fetch pipe for DMA ICCM access - input logic [31:0] dec_tlu_mrac_ff , // side_effect and cacheable for each region + input logic [31:0] dec_tlu_mrac_ff, // side_effect and cacheable for each region - output logic [31:1] ifc_fetch_addr_f, // fetch addr F - output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF + output logic [31:1] ifc_fetch_addr_f, // fetch addr F + output logic [31:1] ifc_fetch_addr_bf, // fetch addr BF - output logic ifc_fetch_req_f, // fetch request valid F + output logic ifc_fetch_req_f, // fetch request valid F - output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall + output logic ifu_pmu_fetch_stall, // pmu event measuring fetch stall - output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage - output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage - output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage - output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. - output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. + output logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. BF stage + output logic ifc_fetch_req_bf, // Fetch request. Comes with the address. BF stage + output logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. BF stage + output logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. + output logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. - output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed + output logic ifc_dma_access_ok // fetch is not accessing the ICCM, DMA can proceed - ); +); - logic [31:1] fetch_addr_bf; - logic [31:1] fetch_addr_next; - logic [3:0] fb_write_f, fb_write_ns; + logic [31:1] fetch_addr_bf; + logic [31:1] fetch_addr_next; + logic [3:0] fb_write_f, fb_write_ns; - logic fb_full_f_ns, fb_full_f; - logic fb_right, fb_right2, fb_left, wfm, idle; - logic sel_last_addr_bf, sel_next_addr_bf; - logic miss_f, miss_a; - logic flush_fb, dma_iccm_stall_any_f; - logic mb_empty_mod, goto_idle, leave_idle; - logic fetch_bf_en; - logic line_wrap; - logic fetch_addr_next_1; + logic fb_full_f_ns, fb_full_f; + logic fb_right, fb_right2, fb_left, wfm, idle; + logic sel_last_addr_bf, sel_next_addr_bf; + logic miss_f, miss_a; + logic flush_fb, dma_iccm_stall_any_f; + logic mb_empty_mod, goto_idle, leave_idle; + logic fetch_bf_en; + logic line_wrap; + logic fetch_addr_next_1; - // FSM assignment - typedef enum logic [1:0] { IDLE = 2'b00 , - FETCH = 2'b01 , - STALL = 2'b10 , - WFM = 2'b11 } state_t ; - state_t state ; - state_t next_state ; + // FSM assignment + typedef enum logic [1:0] { + IDLE = 2'b00, + FETCH = 2'b01, + STALL = 2'b10, + WFM = 2'b11 + } state_t; + state_t state; + state_t next_state; - logic dma_stall; - assign dma_stall = ic_dma_active | dma_iccm_stall_any_f; + logic dma_stall; + assign dma_stall = ic_dma_active | dma_iccm_stall_any_f; - // Fetch address mux - // - flush - // - Miss *or* flush during WFM (icache miss buffer is blocking) - // - Sequential + // Fetch address mux + // - flush + // - Miss *or* flush during WFM (icache miss buffer is blocking) + // - Sequential -if(pt.BTB_ENABLE==1) begin - logic sel_btb_addr_bf; + if (pt.BTB_ENABLE == 1) begin + logic sel_btb_addr_bf; - assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f); - assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f; - assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f; + assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f); + assign sel_btb_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ifu_bp_hit_taken_f & ic_hit_f; + assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f; - assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path - ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path - ({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]})| // BTB target - ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path + assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path + ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path + ({31{sel_btb_addr_bf}} & {ifu_bp_btb_target_f[31:1]}) | // BTB target + ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path -end // if (pt.BTB_ENABLE=1) + end // if (pt.BTB_ENABLE=1) else begin - assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f); - assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f; + assign sel_last_addr_bf = ~exu_flush_final & (~ifc_fetch_req_f | ~ic_hit_f); + assign sel_next_addr_bf = ~exu_flush_final & ifc_fetch_req_f & ic_hit_f; - assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path - ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path - ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path + assign fetch_addr_bf[31:1] = ( ({31{exu_flush_final}} & exu_flush_path_final[31:1]) | // FLUSH path + ({31{sel_last_addr_bf}} & ifc_fetch_addr_f[31:1]) | // MISS path + ({31{sel_next_addr_bf}} & {fetch_addr_next[31:1]})); // SEQ path -end - assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1 }; - assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]); + end + assign fetch_addr_next[31:1] = {({ifc_fetch_addr_f[31:2]} + 31'b1), fetch_addr_next_1}; + assign line_wrap = (fetch_addr_next[pt.ICACHE_TAG_INDEX_LO] ^ ifc_fetch_addr_f[pt.ICACHE_TAG_INDEX_LO]); - assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1]; + assign fetch_addr_next_1 = line_wrap ? 1'b0 : ifc_fetch_addr_f[1]; - assign ifc_fetch_req_bf_raw = ~idle; - assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw & + assign ifc_fetch_req_bf_raw = ~idle; + assign ifc_fetch_req_bf = ifc_fetch_req_bf_raw & ~(fb_full_f_ns & ~(ifu_fb_consume2 | ifu_fb_consume1)) & ~dma_stall & @@ -140,107 +141,125 @@ end ~dec_tlu_flush_noredir_wb; - assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f; + assign fetch_bf_en = exu_flush_final | ifc_fetch_req_f; - assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final; + assign miss_f = ifc_fetch_req_f & ~ic_hit_f & ~exu_flush_final; - assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a; + assign mb_empty_mod = (ifu_ic_mb_empty | exu_flush_final) & ~dma_stall & ~miss_f & ~miss_a; - // Halt flushes and takes us to IDLE - assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb; - // If we're in IDLE, and we get a flush, goto FETCH - assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle; + // Halt flushes and takes us to IDLE + assign goto_idle = exu_flush_final & dec_tlu_flush_noredir_wb; + // If we're in IDLE, and we get a flush, goto FETCH + assign leave_idle = exu_flush_final & ~dec_tlu_flush_noredir_wb & idle; -//.i 7 -//.o 2 -//.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle -//.ob next_state[1] next_state[0] -//.type fr -// -//# fetch 01, stall 10, wfm 11, idle 00 -//-- 1---- 01 -//-- 0--1- 00 -//00 0--00 00 -//00 0--01 01 -// -//01 01-0- 11 -//01 00-0- 01 -// -//11 0-10- 01 -//11 0-00- 11 + //.i 7 + //.o 2 + //.ilb state[1] state[0] reset_delayed miss_f mb_empty_mod goto_idle leave_idle + //.ob next_state[1] next_state[0] + //.type fr + // + //# fetch 01, stall 10, wfm 11, idle 00 + //-- 1---- 01 + //-- 0--1- 00 + //00 0--00 00 + //00 0--01 01 + // + //01 01-0- 11 + //01 00-0- 01 + // + //11 0-10- 01 + //11 0-00- 11 - assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) | + assign next_state[1] = (~state[1] & state[0] & miss_f & ~goto_idle) | (state[1] & ~mb_empty_mod & ~goto_idle); - assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle); + assign next_state[0] = (~goto_idle & leave_idle) | (state[0] & ~goto_idle); - assign flush_fb = exu_flush_final; + assign flush_fb = exu_flush_final; - // model fb write logic to mass balance the fetch buffers - assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch + // model fb write logic to mass balance the fetch buffers + assign fb_right = ( ifu_fb_consume1 & ~ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)) | // Consumed and no new fetch (ifu_fb_consume2 & ifc_fetch_req_f); // Consumed 2 and new fetch - assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch + assign fb_right2 = (ifu_fb_consume2 & (~ifc_fetch_req_f | miss_f)); // Consumed 2 and no new fetch - assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f; + assign fb_left = ifc_fetch_req_f & ~(ifu_fb_consume1 | ifu_fb_consume2) & ~miss_f; -// CBH - assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) | + // CBH + assign fb_write_ns[3:0] = ( ({4{(flush_fb)}} & 4'b0001) | ({4{~flush_fb & fb_right }} & {1'b0, fb_write_f[3:1]}) | ({4{~flush_fb & fb_right2}} & {2'b0, fb_write_f[3:2]}) | ({4{~flush_fb & fb_left }} & {fb_write_f[2:0], 1'b0}) | ({4{~flush_fb & ~fb_right & ~fb_right2 & ~fb_left}} & fb_write_f[3:0])); - assign fb_full_f_ns = fb_write_ns[3]; + assign fb_full_f_ns = fb_write_ns[3]; - assign idle = state == IDLE ; - assign wfm = state == WFM ; + assign idle = state == IDLE; + assign wfm = state == WFM; - rvdffie #(10) fbwrite_ff (.*, .clk(free_l2clk), - .din( {dma_iccm_stall_any, miss_f, ifc_fetch_req_bf, next_state[1:0], fb_full_f_ns, fb_write_ns[3:0]}), - .dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]})); + rvdffie #(10) fbwrite_ff ( + .*, + .clk(free_l2clk), + .din({ + dma_iccm_stall_any, + miss_f, + ifc_fetch_req_bf, + next_state[1:0], + fb_full_f_ns, + fb_write_ns[3:0] + }), + .dout({dma_iccm_stall_any_f, miss_a, ifc_fetch_req_f, state[1:0], fb_full_f, fb_write_f[3:0]}) + ); - assign ifu_pmu_fetch_stall = wfm | + assign ifu_pmu_fetch_stall = wfm | (ifc_fetch_req_bf_raw & ( (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1 | exu_flush_final)) | dma_stall)); - assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1]; + assign ifc_fetch_addr_bf[31:1] = fetch_addr_bf[31:1]; - rvdffpcie #(31) faddrf1_ff (.*, .en(fetch_bf_en), .din(fetch_addr_bf[31:1]), .dout(ifc_fetch_addr_f[31:1])); + rvdffpcie #(31) faddrf1_ff ( + .*, + .en (fetch_bf_en), + .din (fetch_addr_bf[31:1]), + .dout(ifc_fetch_addr_f[31:1]) + ); - if (pt.ICCM_ENABLE) begin - logic iccm_acc_in_region_bf; - logic iccm_acc_in_range_bf; - rvrangecheck #( .CCM_SADR (pt.ICCM_SADR), - .CCM_SIZE (pt.ICCM_SIZE) ) iccm_rangecheck ( - .addr ({ifc_fetch_addr_bf[31:1],1'b0}) , - .in_range (iccm_acc_in_range_bf) , - .in_region(iccm_acc_in_region_bf) - ); + if (pt.ICCM_ENABLE) begin + logic iccm_acc_in_region_bf; + logic iccm_acc_in_range_bf; + rvrangecheck #( + .CCM_SADR(pt.ICCM_SADR), + .CCM_SIZE(pt.ICCM_SIZE) + ) iccm_rangecheck ( + .addr ({ifc_fetch_addr_bf[31:1], 1'b0}), + .in_range (iccm_acc_in_range_bf), + .in_region(iccm_acc_in_region_bf) + ); - assign ifc_iccm_access_bf = iccm_acc_in_range_bf ; + assign ifc_iccm_access_bf = iccm_acc_in_range_bf; - assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf | + assign ifc_dma_access_ok = ( (~ifc_iccm_access_bf | (fb_full_f & ~(ifu_fb_consume2 | ifu_fb_consume1)) | (wfm & ~ifc_fetch_req_bf) | idle ) & ~exu_flush_final) | dma_iccm_stall_any_f; - assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf ; - end - else begin - assign ifc_iccm_access_bf = 1'b0 ; - assign ifc_dma_access_ok = 1'b0 ; - assign ifc_region_acc_fault_bf = 1'b0 ; - end + assign ifc_region_acc_fault_bf = ~iccm_acc_in_range_bf & iccm_acc_in_region_bf; + end else begin + assign ifc_iccm_access_bf = 1'b0; + assign ifc_dma_access_ok = 1'b0; + assign ifc_region_acc_fault_bf = 1'b0; + end - assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{ifc_fetch_addr_bf[31:28] , 1'b0 }] ; // bit 0 of each region description is the cacheable bit + assign ifc_fetch_uncacheable_bf = ~dec_tlu_mrac_ff[{ + ifc_fetch_addr_bf[31:28], 1'b0 + }]; // bit 0 of each region description is the cacheable bit -endmodule // el2_ifu_ifc_ctl +endmodule // el2_ifu_ifc_ctl diff --git a/Flow/design/ifu/el2_ifu_mem_ctl.sv b/Flow/design/ifu/el2_ifu_mem_ctl.sv index 096aa43..4741145 100644 --- a/Flow/design/ifu/el2_ifu_mem_ctl.sv +++ b/Flow/design/ifu/el2_ifu_mem_ctl.sv @@ -22,469 +22,496 @@ //******************************************************************************** module el2_ifu_mem_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. - input logic rst_l, // reset, active low - - input logic exu_flush_final, // Flush from the pipeline., includes flush lower - input logic dec_tlu_flush_lower_wb, // Flush lower from the pipeline. - input logic dec_tlu_flush_err_wb, // Flush from the pipeline due to perr. - input logic dec_tlu_i0_commit_cmt, // committed i0 instruction - input logic dec_tlu_force_halt, // force halt. - - input logic [31:1] ifc_fetch_addr_bf, // Fetch Address byte aligned always. F1 stage. - input logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. F1 stage - input logic ifc_fetch_req_bf, // Fetch request. Comes with the address. F1 stage - input logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. F1 stage - input logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. - input logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. - input logic ifc_dma_access_ok, // It is OK to give dma access to the ICCM. (ICCM is not busy this cycle). - input logic dec_tlu_fence_i_wb, // Fence.i instruction is committing. Clear all Icache valids. - input logic ifu_bp_hit_taken_f, // Branch is predicted taken. Kill the fetch next cycle. - - input logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified - - output logic ifu_miss_state_idle, // No icache misses are outstanding. - output logic ifu_ic_mb_empty, // Continue with normal fetching. This does not mean that miss is finished. - output logic ic_dma_active , // In the middle of servicing dma request to ICCM. Do not make any new requests. - output logic ic_write_stall, // Stall fetch the cycle we are writing the cache. - -/// PMU signals - output logic ifu_pmu_ic_miss, // IC miss event - output logic ifu_pmu_ic_hit, // IC hit event - output logic ifu_pmu_bus_error, // Bus error event - output logic ifu_pmu_bus_busy, // Bus busy event - output logic ifu_pmu_bus_trxn, // Bus transaction - - //-------------------------- IFU AXI signals-------------------------- - // AXI Write Channels - output logic ifu_axi_awvalid, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, - output logic [31:0] ifu_axi_awaddr, - output logic [3:0] ifu_axi_awregion, - output logic [7:0] ifu_axi_awlen, - output logic [2:0] ifu_axi_awsize, - output logic [1:0] ifu_axi_awburst, - output logic ifu_axi_awlock, - output logic [3:0] ifu_axi_awcache, - output logic [2:0] ifu_axi_awprot, - output logic [3:0] ifu_axi_awqos, - - output logic ifu_axi_wvalid, - output logic [63:0] ifu_axi_wdata, - output logic [7:0] ifu_axi_wstrb, - output logic ifu_axi_wlast, - - output logic ifu_axi_bready, - - // AXI Read Channels - output logic ifu_axi_arvalid, - input logic ifu_axi_arready, - output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, - output logic [31:0] ifu_axi_araddr, - output logic [3:0] ifu_axi_arregion, - output logic [7:0] ifu_axi_arlen, - output logic [2:0] ifu_axi_arsize, - output logic [1:0] ifu_axi_arburst, - output logic ifu_axi_arlock, - output logic [3:0] ifu_axi_arcache, - output logic [2:0] ifu_axi_arprot, - output logic [3:0] ifu_axi_arqos, - - input logic ifu_axi_rvalid, - output logic ifu_axi_rready, - input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, - input logic [63:0] ifu_axi_rdata, - input logic [1:0] ifu_axi_rresp, - - input logic ifu_bus_clk_en, - - - input logic dma_iccm_req, // dma iccm command (read or write) - input logic [31:0] dma_mem_addr, // dma address - input logic [2:0] dma_mem_sz, // size - input logic dma_mem_write, // write - input logic [63:0] dma_mem_wdata, // write data - input logic [2:0] dma_mem_tag, // DMA Buffer entry number - - output logic iccm_dma_ecc_error,// Data read from iccm has an ecc error - output logic iccm_dma_rvalid, // Data read from iccm is valid - output logic [63:0] iccm_dma_rdata, // dma data read from iccm - output logic [2:0] iccm_dma_rtag, // Tag of the DMA req - output logic iccm_ready, // iccm ready to accept new command. - - -// I$ & ITAG Ports - output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. - output logic ic_rd_en, // Icache read enable. - - output logic [pt.ICACHE_BANKS_WAY-1:0] [70:0] ic_wr_data, // Data to fill to the Icache. With ECC - input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC - input logic [25:0] ictag_debug_rd_data, // Debug icache tag. - output logic [70:0] ic_debug_wr_data, // Debug wr cache. - output logic [70:0] ifu_ic_debug_rd_data, // debug data read - - - input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // - input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, - - output logic [pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. - output logic ic_debug_rd_en, // Icache debug rd - output logic ic_debug_wr_en, // Icache debug wr - output logic ic_debug_tag_array, // Debug tag array - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. - - - output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage - - input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage - input logic ic_tag_perr, // Icache Tag parity error - - // ICCM ports - output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. - output logic iccm_wren, // ICCM write enable (through the DMA) - output logic iccm_rden, // ICCM read enable. - output logic [77:0] iccm_wr_data, // ICCM write data. - output logic [2:0] iccm_wr_size, // ICCM write location within DW. - - input logic [63:0] iccm_rd_data, // Data read from ICCM. - input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. - input logic [1:0] ifu_fetch_val, - // IFU control signals - output logic ic_hit_f, // Hit in Icache(if Icache access) or ICCM access( ICCM always has ic_hit_f) - output logic [1:0] ic_access_fault_f, // Access fault (bus error or ICCM access in region but out of offset range). - output logic [1:0] ic_access_fault_type_f, // Access fault types - output logic iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. - output logic [1:0] iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. - output logic ic_error_start, // This has any I$ errors ( data/tag/ecc/parity ) - - output logic ifu_async_error_start, // Or of the sb iccm, and all the icache errors sent to aligner to stop - output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access - output logic [1:0] ic_fetch_val_f, // valid bytes for fetch. To the Aligner. - output logic [31:0] ic_data_f, // Data read from Icache or ICCM. To the Aligner. - output logic [63:0] ic_premux_data, // Premuxed data to be muxed with Icache data - output logic ic_sel_premux_data, // Select premux data. - -///// Debug - input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt , // Icache/tag debug read/write packet - input logic dec_tlu_core_ecc_disable, // disable the ecc checking and flagging - output logic ifu_ic_debug_rd_data_valid, // debug data valid. - output logic iccm_buf_correct_ecc, - output logic iccm_correction_state, - - - input logic scan_mode - ); - -// Create different defines for ICACHE and ICCM enable combinations - - localparam NUM_OF_BEATS = 8 ; - - - - logic [31:3] ifu_ic_req_addr_f; - logic uncacheable_miss_in ; - logic uncacheable_miss_ff; - - - - logic bus_ifu_wr_en ; - logic bus_ifu_wr_en_ff ; - logic bus_ifu_wr_en_ff_q ; - logic bus_ifu_wr_en_ff_wo_err ; - logic [pt.ICACHE_NUM_WAYS-1:0] bus_ic_wr_en ; - - logic reset_tag_valid_for_miss ; - - - logic [pt.ICACHE_STATUS_BITS-1:0] way_status; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_in; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_rep_new; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_ff; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_hit_new; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_w_debug; - logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_in; - logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_ff; - - - logic ifu_wr_data_comb_err ; - logic ifu_byp_data_err_new; - logic [1:0] ifu_byp_data_err_f; - logic ifu_wr_cumulative_err_data; - logic ifu_wr_cumulative_err; - logic ifu_wr_data_comb_err_ff; - logic scnd_miss_index_match ; - - - logic ifc_dma_access_q_ok; - logic ifc_iccm_access_f ; - logic ifc_region_acc_fault_f; - logic ifc_region_acc_fault_final_f; - logic [1:0] ifc_bus_acc_fault_f; - logic ic_act_miss_f; - logic ic_miss_under_miss_f; - logic ic_ignore_2nd_miss_f; - logic ic_act_hit_f; - logic miss_pending; - logic [31:1] imb_in , imb_ff ; - logic [31:pt.ICACHE_BEAT_ADDR_HI+1] miss_addr_in , miss_addr ; - logic miss_wrap_f ; - logic flush_final_f; - logic ifc_fetch_req_f; - logic ifc_fetch_req_f_raw; - logic fetch_req_f_qual ; - logic ifc_fetch_req_qual_bf ; - logic [pt.ICACHE_NUM_WAYS-1:0] replace_way_mb_any; - logic last_beat; - logic reset_beat_cnt ; - logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_req_addr_bits_hi_3 ; - logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_wr_addr_bits_hi_3 ; - logic [31:1] ifu_fetch_addr_int_f ; - logic [31:1] ifu_ic_rw_int_addr ; - logic crit_wd_byp_ok_ff ; - logic ic_crit_wd_rdy_new_ff; - logic [79:0] ic_byp_data_only_pre_new; - logic [79:0] ic_byp_data_only_new; - logic ic_byp_hit_f ; - logic ic_valid ; - logic ic_valid_ff; - logic reset_all_tags; - logic ic_valid_w_debug; - - logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren,ifu_tag_wren_ff; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_tag_wr_en; - logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren_w_debug; - logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way_ff; - logic ic_debug_rd_en_ff ; - logic fetch_bf_f_c1_clken ; - logic fetch_bf_f_c1_clk; - logic debug_c1_clken; - logic debug_c1_clk; - - logic reset_ic_in ; - logic reset_ic_ff ; - logic [pt.ICACHE_BEAT_ADDR_HI:1] vaddr_f ; - logic [31:1] ifu_status_wr_addr; - logic sel_mb_addr ; - logic sel_mb_addr_ff ; - logic sel_mb_status_addr ; - logic [63:0] ic_final_data; - - logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_ff ; - logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_ff ; - logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_w_debug ; - logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_w_debug ; - - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_ff ; - logic way_status_wr_en_ff ; - logic [pt.ICACHE_TAG_DEPTH-1:0][pt.ICACHE_STATUS_BITS-1:0] way_status_out ; - logic [1:0] ic_debug_way_enc; - - logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rid_ff; - - logic fetch_req_icache_f; - logic fetch_req_iccm_f; - logic ic_iccm_hit_f; - logic fetch_uncacheable_ff; - logic way_status_wr_en; - logic sel_byp_data; - logic sel_ic_data; - logic sel_iccm_data; - logic ic_rd_parity_final_err; - logic ic_act_miss_f_delayed; - logic bus_ifu_wr_data_error; - logic bus_ifu_wr_data_error_ff; - logic way_status_wr_en_w_debug; - logic ic_debug_tag_val_rd_out; - logic ifu_pmu_ic_miss_in; - logic ifu_pmu_ic_hit_in; - logic ifu_pmu_bus_error_in; - logic ifu_pmu_bus_trxn_in; - logic ifu_pmu_bus_busy_in; - logic ic_debug_ict_array_sel_in; - logic ic_debug_ict_array_sel_ff; - logic debug_data_clken; - logic last_data_recieved_in ; - logic last_data_recieved_ff ; - - logic ifu_bus_rvalid ; - logic ifu_bus_rvalid_ff ; - logic ifu_bus_rvalid_unq_ff ; - logic ifu_bus_arready_unq ; - logic ifu_bus_arready_unq_ff ; - logic ifu_bus_arvalid ; - logic ifu_bus_arvalid_ff ; - logic ifu_bus_arready ; - logic ifu_bus_arready_ff ; - logic [63:0] ifu_bus_rdata_ff ; - logic [1:0] ifu_bus_rresp_ff ; - logic ifu_bus_rsp_valid ; - logic ifu_bus_rsp_ready ; - logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rsp_tag; - logic [63:0] ifu_bus_rsp_rdata; - logic [1:0] ifu_bus_rsp_opc; - - logic [pt.ICACHE_NUM_BEATS-1:0] write_fill_data; - logic [pt.ICACHE_NUM_BEATS-1:0] wr_data_c1_clk; - logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid_in; - logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid; - logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error_in; - logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error; - logic [pt.ICACHE_BEAT_ADDR_HI:1] byp_fetch_index; - logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_0; - logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_1; - logic [pt.ICACHE_BEAT_ADDR_HI:3] byp_fetch_index_inc; - logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_0; - logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_1; - logic miss_buff_hit_unq_f ; - logic stream_hit_f ; - logic stream_miss_f ; - logic stream_eol_f ; - logic crit_byp_hit_f ; - logic [pt.IFU_BUS_TAG-1:0] other_tag ; - logic [(2*pt.ICACHE_NUM_BEATS)-1:0] [31:0] ic_miss_buff_data; - logic [63:0] ic_miss_buff_half; - logic scnd_miss_req, scnd_miss_req_q; - logic scnd_miss_req_in; - - - logic [pt.ICCM_BITS-1:2] iccm_ecc_corr_index_ff; - logic [pt.ICCM_BITS-1:2] iccm_ecc_corr_index_in; - logic [38:0] iccm_ecc_corr_data_ff; - logic iccm_ecc_write_status ; - logic iccm_rd_ecc_single_err_ff ; - logic iccm_error_start; // start the error fsm - logic perr_state_en; - logic miss_state_en; - - logic busclk; - logic busclk_force; - logic busclk_reset; - logic bus_ifu_bus_clk_en_ff; - logic bus_ifu_bus_clk_en ; - - logic ifc_bus_ic_req_ff_in; - logic ifu_bus_cmd_valid ; - logic ifu_bus_cmd_ready ; - - logic bus_inc_data_beat_cnt ; - logic bus_reset_data_beat_cnt ; - logic bus_hold_data_beat_cnt ; - - logic bus_inc_cmd_beat_cnt ; - logic bus_reset_cmd_beat_cnt_0 ; - logic bus_reset_cmd_beat_cnt_secondlast ; - logic bus_hold_cmd_beat_cnt ; - - logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_data_beat_count ; - logic [pt.ICACHE_BEAT_BITS-1:0] bus_data_beat_count ; - - logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_cmd_beat_count ; - logic [pt.ICACHE_BEAT_BITS-1:0] bus_cmd_beat_count ; - - - logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_rd_addr_count; - logic [pt.ICACHE_BEAT_BITS-1:0] bus_rd_addr_count; - - - logic bus_cmd_sent ; - logic bus_last_data_beat ; - - - logic [pt.ICACHE_NUM_WAYS-1:0] bus_wren ; - - logic [pt.ICACHE_NUM_WAYS-1:0] bus_wren_last ; - logic [pt.ICACHE_NUM_WAYS-1:0] wren_reset_miss ; - logic ifc_dma_access_ok_d; - logic ifc_dma_access_ok_prev; - - logic bus_cmd_req_in ; - logic bus_cmd_req_hold ; - - logic second_half_available ; - logic write_ic_16_bytes ; - - logic ifc_region_acc_fault_final_bf; - logic ifc_region_acc_fault_memory_bf; - logic ifc_region_acc_fault_memory_f; - logic ifc_region_acc_okay; - - logic iccm_correct_ecc; - logic dma_sb_err_state, dma_sb_err_state_ff; - logic two_byte_instr; + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic free_l2clk, // Clock always. Through one clock header. For flops with second header built in. + input logic rst_l, // reset, active low + + input logic exu_flush_final, // Flush from the pipeline., includes flush lower + input logic dec_tlu_flush_lower_wb, // Flush lower from the pipeline. + input logic dec_tlu_flush_err_wb, // Flush from the pipeline due to perr. + input logic dec_tlu_i0_commit_cmt, // committed i0 instruction + input logic dec_tlu_force_halt, // force halt. + + input logic [31:1] ifc_fetch_addr_bf, // Fetch Address byte aligned always. F1 stage. + input logic ifc_fetch_uncacheable_bf, // The fetch request is uncacheable space. F1 stage + input logic ifc_fetch_req_bf, // Fetch request. Comes with the address. F1 stage + input logic ifc_fetch_req_bf_raw, // Fetch request without some qualifications. Used for clock-gating. F1 stage + input logic ifc_iccm_access_bf, // This request is to the ICCM. Do not generate misses to the bus. + input logic ifc_region_acc_fault_bf, // Access fault. in ICCM region but offset is outside defined ICCM. + input logic ifc_dma_access_ok, // It is OK to give dma access to the ICCM. (ICCM is not busy this cycle). + input logic dec_tlu_fence_i_wb, // Fence.i instruction is committing. Clear all Icache valids. + input logic ifu_bp_hit_taken_f, // Branch is predicted taken. Kill the fetch next cycle. + + input logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified + + output logic ifu_miss_state_idle, // No icache misses are outstanding. + output logic ifu_ic_mb_empty, // Continue with normal fetching. This does not mean that miss is finished. + output logic ic_dma_active , // In the middle of servicing dma request to ICCM. Do not make any new requests. + output logic ic_write_stall, // Stall fetch the cycle we are writing the cache. + + /// PMU signals + output logic ifu_pmu_ic_miss, // IC miss event + output logic ifu_pmu_ic_hit, // IC hit event + output logic ifu_pmu_bus_error, // Bus error event + output logic ifu_pmu_bus_busy, // Bus busy event + output logic ifu_pmu_bus_trxn, // Bus transaction + + //-------------------------- IFU AXI signals-------------------------- + // AXI Write Channels + output logic ifu_axi_awvalid, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_awid, + output logic [ 31:0] ifu_axi_awaddr, + output logic [ 3:0] ifu_axi_awregion, + output logic [ 7:0] ifu_axi_awlen, + output logic [ 2:0] ifu_axi_awsize, + output logic [ 1:0] ifu_axi_awburst, + output logic ifu_axi_awlock, + output logic [ 3:0] ifu_axi_awcache, + output logic [ 2:0] ifu_axi_awprot, + output logic [ 3:0] ifu_axi_awqos, + + output logic ifu_axi_wvalid, + output logic [63:0] ifu_axi_wdata, + output logic [ 7:0] ifu_axi_wstrb, + output logic ifu_axi_wlast, + + output logic ifu_axi_bready, + + // AXI Read Channels + output logic ifu_axi_arvalid, + input logic ifu_axi_arready, + output logic [pt.IFU_BUS_TAG-1:0] ifu_axi_arid, + output logic [ 31:0] ifu_axi_araddr, + output logic [ 3:0] ifu_axi_arregion, + output logic [ 7:0] ifu_axi_arlen, + output logic [ 2:0] ifu_axi_arsize, + output logic [ 1:0] ifu_axi_arburst, + output logic ifu_axi_arlock, + output logic [ 3:0] ifu_axi_arcache, + output logic [ 2:0] ifu_axi_arprot, + output logic [ 3:0] ifu_axi_arqos, + + input logic ifu_axi_rvalid, + output logic ifu_axi_rready, + input logic [pt.IFU_BUS_TAG-1:0] ifu_axi_rid, + input logic [ 63:0] ifu_axi_rdata, + input logic [ 1:0] ifu_axi_rresp, + + input logic ifu_bus_clk_en, + + + input logic dma_iccm_req, // dma iccm command (read or write) + input logic [31:0] dma_mem_addr, // dma address + input logic [ 2:0] dma_mem_sz, // size + input logic dma_mem_write, // write + input logic [63:0] dma_mem_wdata, // write data + input logic [ 2:0] dma_mem_tag, // DMA Buffer entry number + + output logic iccm_dma_ecc_error, // Data read from iccm has an ecc error + output logic iccm_dma_rvalid, // Data read from iccm is valid + output logic [63:0] iccm_dma_rdata, // dma data read from iccm + output logic [ 2:0] iccm_dma_rtag, // Tag of the DMA req + output logic iccm_ready, // iccm ready to accept new command. + + + // I$ & ITAG Ports + output logic [31:1] ic_rw_addr, // Read/Write addresss to the Icache. + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_wr_en, // Icache write enable, when filling the Icache. + output logic ic_rd_en, // Icache read enable. + + output logic [pt.ICACHE_BANKS_WAY-1:0] [70:0] ic_wr_data, // Data to fill to the Icache. With ECC + input logic [63:0] ic_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [70:0] ic_debug_rd_data , // Data read from Icache. 2x64bits + parity bits. F2 stage. With ECC + input logic [25:0] ictag_debug_rd_data, // Debug icache tag. + output logic [70:0] ic_debug_wr_data, // Debug wr cache. + output logic [70:0] ifu_ic_debug_rd_data, // debug data read + + + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_eccerr, // + input logic [pt.ICACHE_BANKS_WAY-1:0] ic_parerr, + + output logic [ pt.ICACHE_INDEX_HI:3] ic_debug_addr, // Read/Write addresss to the Icache. + output logic ic_debug_rd_en, // Icache debug rd + output logic ic_debug_wr_en, // Icache debug wr + output logic ic_debug_tag_array, // Debug tag array + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way, // Debug way. Rd or Wr. + + + output logic [pt.ICACHE_NUM_WAYS-1:0] ic_tag_valid, // Valid bits when accessing the Icache. One valid bit per way. F2 stage + + input logic [pt.ICACHE_NUM_WAYS-1:0] ic_rd_hit, // Compare hits from Icache tags. Per way. F2 stage + input logic ic_tag_perr, // Icache Tag parity error + + // ICCM ports + output logic [pt.ICCM_BITS-1:1] iccm_rw_addr, // ICCM read/write address. + output logic iccm_wren, // ICCM write enable (through the DMA) + output logic iccm_rden, // ICCM read enable. + output logic [ 77:0] iccm_wr_data, // ICCM write data. + output logic [ 2:0] iccm_wr_size, // ICCM write location within DW. + + input logic [63:0] iccm_rd_data, // Data read from ICCM. + input logic [77:0] iccm_rd_data_ecc, // Data + ECC read from ICCM. + input logic [1:0] ifu_fetch_val, + // IFU control signals + output logic ic_hit_f, // Hit in Icache(if Icache access) or ICCM access( ICCM always has ic_hit_f) + output logic [1:0] ic_access_fault_f, // Access fault (bus error or ICCM access in region but out of offset range). + output logic [1:0] ic_access_fault_type_f, // Access fault types + output logic iccm_rd_ecc_single_err, // This fetch has a single ICCM ecc error. + output logic [1:0] iccm_rd_ecc_double_err, // This fetch has a double ICCM ecc error. + output logic ic_error_start, // This has any I$ errors ( data/tag/ecc/parity ) + + output logic ifu_async_error_start, // Or of the sb iccm, and all the icache errors sent to aligner to stop + output logic iccm_dma_sb_error, // Single Bit ECC error from a DMA access + output logic [1:0] ic_fetch_val_f, // valid bytes for fetch. To the Aligner. + output logic [31:0] ic_data_f, // Data read from Icache or ICCM. To the Aligner. + output logic [63:0] ic_premux_data, // Premuxed data to be muxed with Icache data + output logic ic_sel_premux_data, // Select premux data. + + ///// Debug + input el2_cache_debug_pkt_t dec_tlu_ic_diag_pkt, // Icache/tag debug read/write packet + input logic dec_tlu_core_ecc_disable, // disable the ecc checking and flagging + output logic ifu_ic_debug_rd_data_valid, // debug data valid. + output logic iccm_buf_correct_ecc, + output logic iccm_correction_state, + + + input logic scan_mode +); + + // Create different defines for ICACHE and ICCM enable combinations + + localparam NUM_OF_BEATS = 8; + + + + logic [ 31:3] ifu_ic_req_addr_f; + logic uncacheable_miss_in; + logic uncacheable_miss_ff; + + + + logic bus_ifu_wr_en; + logic bus_ifu_wr_en_ff; + logic bus_ifu_wr_en_ff_q; + logic bus_ifu_wr_en_ff_wo_err; + logic [ pt.ICACHE_NUM_WAYS-1:0] bus_ic_wr_en; + + logic reset_tag_valid_for_miss; + + + logic [pt.ICACHE_STATUS_BITS-1:0] way_status; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_in; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_rep_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_ff; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_hit_new; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_w_debug; + logic [ pt.ICACHE_NUM_WAYS-1:0] tagv_mb_in; + logic [ pt.ICACHE_NUM_WAYS-1:0] tagv_mb_ff; + + + logic ifu_wr_data_comb_err; + logic ifu_byp_data_err_new; + logic [ 1:0] ifu_byp_data_err_f; + logic ifu_wr_cumulative_err_data; + logic ifu_wr_cumulative_err; + logic ifu_wr_data_comb_err_ff; + logic scnd_miss_index_match; + + + logic ifc_dma_access_q_ok; + logic ifc_iccm_access_f; + logic ifc_region_acc_fault_f; + logic ifc_region_acc_fault_final_f; + logic [ 1:0] ifc_bus_acc_fault_f; + logic ic_act_miss_f; + logic ic_miss_under_miss_f; + logic ic_ignore_2nd_miss_f; + logic ic_act_hit_f; + logic miss_pending; + logic [31:1] imb_in, imb_ff; + logic [31:pt.ICACHE_BEAT_ADDR_HI+1] miss_addr_in, miss_addr; + logic miss_wrap_f; + logic flush_final_f; + logic ifc_fetch_req_f; + logic ifc_fetch_req_f_raw; + logic fetch_req_f_qual; + logic ifc_fetch_req_qual_bf; + logic [ pt.ICACHE_NUM_WAYS-1:0] replace_way_mb_any; + logic last_beat; + logic reset_beat_cnt; + logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_req_addr_bits_hi_3; + logic [pt.ICACHE_BEAT_ADDR_HI:3] ic_wr_addr_bits_hi_3; + logic [ 31:1] ifu_fetch_addr_int_f; + logic [ 31:1] ifu_ic_rw_int_addr; + logic crit_wd_byp_ok_ff; + logic ic_crit_wd_rdy_new_ff; + logic [ 79:0] ic_byp_data_only_pre_new; + logic [ 79:0] ic_byp_data_only_new; + logic ic_byp_hit_f; + logic ic_valid; + logic ic_valid_ff; + logic reset_all_tags; + logic ic_valid_w_debug; + + logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren, ifu_tag_wren_ff; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_tag_wr_en; + logic [pt.ICACHE_NUM_WAYS-1:0] ifu_tag_wren_w_debug; + logic [pt.ICACHE_NUM_WAYS-1:0] ic_debug_way_ff; + logic ic_debug_rd_en_ff; + logic fetch_bf_f_c1_clken; + logic fetch_bf_f_c1_clk; + logic debug_c1_clken; + logic debug_c1_clk; + + logic reset_ic_in; + logic reset_ic_ff; + logic [pt.ICACHE_BEAT_ADDR_HI:1] vaddr_f; + logic [31:1] ifu_status_wr_addr; + logic sel_mb_addr; + logic sel_mb_addr_ff; + logic sel_mb_status_addr; + logic [63:0] ic_final_data; + + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_ff; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_ff; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_ic_rw_int_addr_w_debug; + logic [pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] ifu_status_wr_addr_w_debug; + + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_new_ff; + logic way_status_wr_en_ff; + logic [pt.ICACHE_TAG_DEPTH-1:0][pt.ICACHE_STATUS_BITS-1:0] way_status_out; + logic [1:0] ic_debug_way_enc; + + logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rid_ff; + + logic fetch_req_icache_f; + logic fetch_req_iccm_f; + logic ic_iccm_hit_f; + logic fetch_uncacheable_ff; + logic way_status_wr_en; + logic sel_byp_data; + logic sel_ic_data; + logic sel_iccm_data; + logic ic_rd_parity_final_err; + logic ic_act_miss_f_delayed; + logic bus_ifu_wr_data_error; + logic bus_ifu_wr_data_error_ff; + logic way_status_wr_en_w_debug; + logic ic_debug_tag_val_rd_out; + logic ifu_pmu_ic_miss_in; + logic ifu_pmu_ic_hit_in; + logic ifu_pmu_bus_error_in; + logic ifu_pmu_bus_trxn_in; + logic ifu_pmu_bus_busy_in; + logic ic_debug_ict_array_sel_in; + logic ic_debug_ict_array_sel_ff; + logic debug_data_clken; + logic last_data_recieved_in; + logic last_data_recieved_ff; + + logic ifu_bus_rvalid; + logic ifu_bus_rvalid_ff; + logic ifu_bus_rvalid_unq_ff; + logic ifu_bus_arready_unq; + logic ifu_bus_arready_unq_ff; + logic ifu_bus_arvalid; + logic ifu_bus_arvalid_ff; + logic ifu_bus_arready; + logic ifu_bus_arready_ff; + logic [63:0] ifu_bus_rdata_ff; + logic [1:0] ifu_bus_rresp_ff; + logic ifu_bus_rsp_valid; + logic ifu_bus_rsp_ready; + logic [pt.IFU_BUS_TAG-1:0] ifu_bus_rsp_tag; + logic [63:0] ifu_bus_rsp_rdata; + logic [1:0] ifu_bus_rsp_opc; + + logic [pt.ICACHE_NUM_BEATS-1:0] write_fill_data; + logic [pt.ICACHE_NUM_BEATS-1:0] wr_data_c1_clk; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid_in; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_valid; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error_in; + logic [pt.ICACHE_NUM_BEATS-1:0] ic_miss_buff_data_error; + logic [pt.ICACHE_BEAT_ADDR_HI:1] byp_fetch_index; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_0; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_1; + logic [pt.ICACHE_BEAT_ADDR_HI:3] byp_fetch_index_inc; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_0; + logic [pt.ICACHE_BEAT_ADDR_HI:2] byp_fetch_index_inc_1; + logic miss_buff_hit_unq_f; + logic stream_hit_f; + logic stream_miss_f; + logic stream_eol_f; + logic crit_byp_hit_f; + logic [pt.IFU_BUS_TAG-1:0] other_tag; + logic [(2*pt.ICACHE_NUM_BEATS)-1:0][31:0] ic_miss_buff_data; + logic [63:0] ic_miss_buff_half; + logic scnd_miss_req, scnd_miss_req_q; + logic scnd_miss_req_in; + + + logic [ pt.ICCM_BITS-1:2] iccm_ecc_corr_index_ff; + logic [ pt.ICCM_BITS-1:2] iccm_ecc_corr_index_in; + logic [ 38:0] iccm_ecc_corr_data_ff; + logic iccm_ecc_write_status; + logic iccm_rd_ecc_single_err_ff; + logic iccm_error_start; // start the error fsm + logic perr_state_en; + logic miss_state_en; + + logic busclk; + logic busclk_force; + logic busclk_reset; + logic bus_ifu_bus_clk_en_ff; + logic bus_ifu_bus_clk_en; + + logic ifc_bus_ic_req_ff_in; + logic ifu_bus_cmd_valid; + logic ifu_bus_cmd_ready; + + logic bus_inc_data_beat_cnt; + logic bus_reset_data_beat_cnt; + logic bus_hold_data_beat_cnt; + + logic bus_inc_cmd_beat_cnt; + logic bus_reset_cmd_beat_cnt_0; + logic bus_reset_cmd_beat_cnt_secondlast; + logic bus_hold_cmd_beat_cnt; + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_data_beat_count; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_data_beat_count; + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_cmd_beat_count; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_cmd_beat_count; + + + logic [pt.ICACHE_BEAT_BITS-1:0] bus_new_rd_addr_count; + logic [pt.ICACHE_BEAT_BITS-1:0] bus_rd_addr_count; + + + logic bus_cmd_sent; + logic bus_last_data_beat; + + + logic [ pt.ICACHE_NUM_WAYS-1:0] bus_wren; + + logic [ pt.ICACHE_NUM_WAYS-1:0] bus_wren_last; + logic [ pt.ICACHE_NUM_WAYS-1:0] wren_reset_miss; + logic ifc_dma_access_ok_d; + logic ifc_dma_access_ok_prev; + + logic bus_cmd_req_in; + logic bus_cmd_req_hold; + + logic second_half_available; + logic write_ic_16_bytes; + + logic ifc_region_acc_fault_final_bf; + logic ifc_region_acc_fault_memory_bf; + logic ifc_region_acc_fault_memory_f; + logic ifc_region_acc_okay; + + logic iccm_correct_ecc; + logic dma_sb_err_state, dma_sb_err_state_ff; + logic two_byte_instr; + + typedef enum logic [2:0] { + IDLE = 3'b000, + CRIT_BYP_OK = 3'b001, + HIT_U_MISS = 3'b010, + MISS_WAIT = 3'b011, + CRIT_WRD_RDY = 3'b100, + SCND_MISS = 3'b101, + STREAM = 3'b110, + STALL_SCND_MISS = 3'b111 + } miss_state_t; + miss_state_t miss_state, miss_nxtstate; + + typedef enum logic [1:0] { + ERR_STOP_IDLE = 2'b00, + ERR_FETCH1 = 2'b01, + ERR_FETCH2 = 2'b10, + ERR_STOP_FETCH = 2'b11 + } err_stop_state_t; + err_stop_state_t err_stop_state, err_stop_nxtstate; + logic err_stop_state_en; + logic err_stop_fetch; + + logic ic_crit_wd_rdy; // Critical fetch is ready to be bypassed. + + logic ifu_bp_hit_taken_q_f; + logic ifu_bus_rvalid_unq; + logic bus_cmd_beat_en; + - typedef enum logic [2:0] {IDLE=3'b000, CRIT_BYP_OK=3'b001, HIT_U_MISS=3'b010, MISS_WAIT=3'b011,CRIT_WRD_RDY=3'b100,SCND_MISS=3'b101,STREAM=3'b110 , STALL_SCND_MISS=3'b111} miss_state_t; - miss_state_t miss_state, miss_nxtstate; + // ---- Clock gating section ----- + // c1 clock enables - typedef enum logic [1:0] {ERR_STOP_IDLE=2'b00, ERR_FETCH1=2'b01 , ERR_FETCH2=2'b10 , ERR_STOP_FETCH=2'b11} err_stop_state_t; - err_stop_state_t err_stop_state, err_stop_nxtstate; - logic err_stop_state_en ; - logic err_stop_fetch ; - logic ic_crit_wd_rdy; // Critical fetch is ready to be bypassed. + assign fetch_bf_f_c1_clken = ifc_fetch_req_bf_raw | ifc_fetch_req_f | miss_pending | exu_flush_final | scnd_miss_req; + assign debug_c1_clken = ic_debug_rd_en | ic_debug_wr_en; + // C1 - 1 clock pulse for data - logic ifu_bp_hit_taken_q_f; - logic ifu_bus_rvalid_unq; - logic bus_cmd_beat_en; + rvclkhdr fetch_bf_f_c1_cgc ( + .en(fetch_bf_f_c1_clken), + .l1clk(fetch_bf_f_c1_clk), + .* + ); + rvclkhdr debug_c1_cgc ( + .en(debug_c1_clken), + .l1clk(debug_c1_clk), + .* + ); + // ------ end clock gating section ------------------------ -// ---- Clock gating section ----- -// c1 clock enables + logic [1:0] iccm_single_ecc_error; + logic dma_iccm_req_f; + assign iccm_dma_sb_error = (|iccm_single_ecc_error[1:0]) & dma_iccm_req_f; + assign ifu_async_error_start = iccm_rd_ecc_single_err | ic_error_start; - assign fetch_bf_f_c1_clken = ifc_fetch_req_bf_raw | ifc_fetch_req_f | miss_pending | exu_flush_final | scnd_miss_req; - assign debug_c1_clken = ic_debug_rd_en | ic_debug_wr_en ; - // C1 - 1 clock pulse for data + typedef enum logic [2:0] { + ERR_IDLE = 3'b000, + IC_WFF = 3'b001, + ECC_WFF = 3'b010, + ECC_CORR = 3'b011, + DMA_SB_ERR = 3'b100 + } perr_state_t; + perr_state_t perr_state, perr_nxtstate; - rvclkhdr fetch_bf_f_c1_cgc ( .en(fetch_bf_f_c1_clken), .l1clk(fetch_bf_f_c1_clk), .* ); - rvclkhdr debug_c1_cgc ( .en(debug_c1_clken), .l1clk(debug_c1_clk), .* ); -// ------ end clock gating section ------------------------ - - logic [1:0] iccm_single_ecc_error; - logic dma_iccm_req_f ; - assign iccm_dma_sb_error = (|iccm_single_ecc_error[1:0] ) & dma_iccm_req_f ; - assign ifu_async_error_start = iccm_rd_ecc_single_err | ic_error_start; - - - typedef enum logic [2:0] {ERR_IDLE=3'b000, IC_WFF=3'b001 , ECC_WFF=3'b010 , ECC_CORR=3'b011, DMA_SB_ERR=3'b100} perr_state_t; - perr_state_t perr_state, perr_nxtstate; - - - assign ic_dma_active = iccm_correct_ecc | (perr_state == DMA_SB_ERR) | (err_stop_state == ERR_STOP_FETCH) | err_stop_fetch | + assign ic_dma_active = iccm_correct_ecc | (perr_state == DMA_SB_ERR) | (err_stop_state == ERR_STOP_FETCH) | err_stop_fetch | dec_tlu_flush_err_wb; // The last term is to give a error-correction a chance to finish before refetch starts - assign scnd_miss_req_in = ifu_bus_rsp_valid & bus_ifu_bus_clk_en & ifu_bus_rsp_ready & + assign scnd_miss_req_in = ifu_bus_rsp_valid & bus_ifu_bus_clk_en & ifu_bus_rsp_ready & (&bus_new_data_beat_count[pt.ICACHE_BEAT_BITS-1:0]) & ~uncacheable_miss_ff & ((miss_state == SCND_MISS) | (miss_nxtstate == SCND_MISS)) & ~exu_flush_final; - assign ifu_bp_hit_taken_q_f = ifu_bp_hit_taken_f & ic_hit_f ; + assign ifu_bp_hit_taken_q_f = ifu_bp_hit_taken_f & ic_hit_f; - //////////////////////////////////// Create Miss State Machine /////////////////////// - // Create Miss State Machine // - // Create Miss State Machine // - // Create Miss State Machine // - //////////////////////////////////// Create Miss State Machine /////////////////////// - // FIFO state machine - always_comb begin : MISS_SM - miss_nxtstate = IDLE; - miss_state_en = 1'b0; - case (miss_state) - IDLE: begin : idle - miss_nxtstate = (ic_act_miss_f & ~exu_flush_final) ? CRIT_BYP_OK : HIT_U_MISS ; - miss_state_en = ic_act_miss_f & ~dec_tlu_force_halt ; - end - CRIT_BYP_OK: begin : crit_byp_ok - miss_nxtstate = (dec_tlu_force_halt ) ? IDLE : + //////////////////////////////////// Create Miss State Machine /////////////////////// + // Create Miss State Machine // + // Create Miss State Machine // + // Create Miss State Machine // + //////////////////////////////////// Create Miss State Machine /////////////////////// + // FIFO state machine + always_comb begin : MISS_SM + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + case (miss_state) + IDLE: begin : idle + miss_nxtstate = (ic_act_miss_f & ~exu_flush_final) ? CRIT_BYP_OK : HIT_U_MISS; + miss_state_en = ic_act_miss_f & ~dec_tlu_force_halt; + end + CRIT_BYP_OK: begin : crit_byp_ok + miss_nxtstate = (dec_tlu_force_halt ) ? IDLE : ( ic_byp_hit_f & (last_data_recieved_ff | (bus_ifu_wr_en_ff & last_beat)) & uncacheable_miss_ff) ? IDLE : ( ic_byp_hit_f & ~last_data_recieved_ff & uncacheable_miss_ff) ? MISS_WAIT : (~ic_byp_hit_f & ~exu_flush_final & (bus_ifu_wr_en_ff & last_beat) & uncacheable_miss_ff) ? CRIT_WRD_RDY : @@ -493,354 +520,455 @@ import el2_pkg::*; ( bus_ifu_wr_en_ff & ~exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~ifu_bp_hit_taken_q_f & ~uncacheable_miss_ff) ? STREAM : (~ic_byp_hit_f & ~exu_flush_final & (bus_ifu_wr_en_ff & last_beat) & ~uncacheable_miss_ff) ? IDLE : ( (exu_flush_final | ifu_bp_hit_taken_q_f) & ~(bus_ifu_wr_en_ff & last_beat) ) ? HIT_U_MISS : IDLE; - miss_state_en = dec_tlu_force_halt | exu_flush_final | ic_byp_hit_f | ifu_bp_hit_taken_q_f | (bus_ifu_wr_en_ff & last_beat) | (bus_ifu_wr_en_ff & ~uncacheable_miss_ff) ; - end - CRIT_WRD_RDY: begin : crit_wrd_rdy - miss_nxtstate = IDLE ; - miss_state_en = exu_flush_final | flush_final_f | ic_byp_hit_f | dec_tlu_force_halt ; - end - STREAM: begin : stream - miss_nxtstate = ((exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f ) & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; - miss_state_en = exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ; - end - MISS_WAIT: begin : miss_wait - miss_nxtstate = (exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; - miss_state_en = exu_flush_final | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ; - end - HIT_U_MISS: begin : hit_u_miss - miss_nxtstate = ic_miss_under_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? SCND_MISS : + miss_state_en = dec_tlu_force_halt | exu_flush_final | ic_byp_hit_f | ifu_bp_hit_taken_q_f | (bus_ifu_wr_en_ff & last_beat) | (bus_ifu_wr_en_ff & ~uncacheable_miss_ff) ; + end + CRIT_WRD_RDY: begin : crit_wrd_rdy + miss_nxtstate = IDLE; + miss_state_en = exu_flush_final | flush_final_f | ic_byp_hit_f | dec_tlu_force_halt; + end + STREAM: begin : stream + miss_nxtstate = ((exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f ) & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; + miss_state_en = exu_flush_final | ifu_bp_hit_taken_q_f | stream_eol_f | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt ; + end + MISS_WAIT: begin : miss_wait + miss_nxtstate = (exu_flush_final & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt) ? HIT_U_MISS : IDLE ; + miss_state_en = exu_flush_final | (bus_ifu_wr_en_ff & last_beat) | dec_tlu_force_halt; + end + HIT_U_MISS: begin : hit_u_miss + miss_nxtstate = ic_miss_under_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? SCND_MISS : ic_ignore_2nd_miss_f & ~(bus_ifu_wr_en_ff & last_beat) & ~dec_tlu_force_halt ? STALL_SCND_MISS : IDLE ; - miss_state_en = (bus_ifu_wr_en_ff & last_beat) | ic_miss_under_miss_f | ic_ignore_2nd_miss_f | dec_tlu_force_halt; - end - SCND_MISS: begin : scnd_miss - miss_nxtstate = dec_tlu_force_halt ? IDLE : + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | ic_miss_under_miss_f | ic_ignore_2nd_miss_f | dec_tlu_force_halt; + end + SCND_MISS: begin : scnd_miss + miss_nxtstate = dec_tlu_force_halt ? IDLE : exu_flush_final ? ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : CRIT_BYP_OK; - miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; - end - STALL_SCND_MISS: begin : stall_scnd_miss - miss_nxtstate = dec_tlu_force_halt ? IDLE : + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; + end + STALL_SCND_MISS: begin : stall_scnd_miss + miss_nxtstate = dec_tlu_force_halt ? IDLE : exu_flush_final ? ((bus_ifu_wr_en_ff & last_beat) ? IDLE : HIT_U_MISS) : IDLE; - miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; - end - default: begin : def_case - miss_nxtstate = IDLE; - miss_state_en = 1'b0; - end - endcase - end - rvdffs #(($bits(miss_state_t))) miss_state_ff (.clk(active_clk), .din(miss_nxtstate), .dout({miss_state}), .en(miss_state_en), .*); + miss_state_en = (bus_ifu_wr_en_ff & last_beat) | exu_flush_final | dec_tlu_force_halt; + end + default: begin : def_case + miss_nxtstate = IDLE; + miss_state_en = 1'b0; + end + endcase + end + rvdffs #(($bits( + miss_state_t + ))) miss_state_ff ( + .clk (active_clk), + .din (miss_nxtstate), + .dout({miss_state}), + .en (miss_state_en), + .* + ); - logic sel_hold_imb ; + logic sel_hold_imb; - assign miss_pending = (miss_state != IDLE) ; - assign crit_wd_byp_ok_ff = (miss_state == CRIT_BYP_OK) | ((miss_state == CRIT_WRD_RDY) & ~flush_final_f); - assign sel_hold_imb = (miss_pending & ~(bus_ifu_wr_en_ff & last_beat) & ~((miss_state == CRIT_WRD_RDY) & exu_flush_final) & + assign miss_pending = (miss_state != IDLE); + assign crit_wd_byp_ok_ff = (miss_state == CRIT_BYP_OK) | ((miss_state == CRIT_WRD_RDY) & ~flush_final_f); + assign sel_hold_imb = (miss_pending & ~(bus_ifu_wr_en_ff & last_beat) & ~((miss_state == CRIT_WRD_RDY) & exu_flush_final) & ~((miss_state == CRIT_WRD_RDY) & crit_byp_hit_f) ) | ic_act_miss_f | (miss_pending & (miss_nxtstate == CRIT_WRD_RDY)) ; - logic sel_hold_imb_scnd; - logic [31:1] imb_scnd_in; - logic [31:1] imb_scnd_ff; - logic uncacheable_miss_scnd_in ; - logic uncacheable_miss_scnd_ff ; + logic sel_hold_imb_scnd; + logic [ 31:1] imb_scnd_in; + logic [ 31:1] imb_scnd_ff; + logic uncacheable_miss_scnd_in; + logic uncacheable_miss_scnd_ff; - logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_in; - logic [pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_ff; + logic [ pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_in; + logic [ pt.ICACHE_NUM_WAYS-1:0] tagv_mb_scnd_ff; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_in; - logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_ff; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_in; + logic [pt.ICACHE_STATUS_BITS-1:0] way_status_mb_scnd_ff; - assign sel_hold_imb_scnd =((miss_state == SCND_MISS) | ic_miss_under_miss_f) & ~flush_final_f ; - assign way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0] = (miss_state == SCND_MISS) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : {way_status[pt.ICACHE_STATUS_BITS-1:0]} ; - assign tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0] = (miss_state == SCND_MISS) ? tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags & ~exu_flush_final}}); - assign uncacheable_miss_scnd_in = sel_hold_imb_scnd ? uncacheable_miss_scnd_ff : ifc_fetch_uncacheable_bf ; + assign sel_hold_imb_scnd = ((miss_state == SCND_MISS) | ic_miss_under_miss_f) & ~flush_final_f; + assign way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0] = (miss_state == SCND_MISS) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : {way_status[pt.ICACHE_STATUS_BITS-1:0]} ; + assign tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0] = (miss_state == SCND_MISS) ? tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags & ~exu_flush_final}}); + assign uncacheable_miss_scnd_in = sel_hold_imb_scnd ? uncacheable_miss_scnd_ff : ifc_fetch_uncacheable_bf ; - rvdff_fpga #(1) unc_miss_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din (uncacheable_miss_scnd_in), .dout(uncacheable_miss_scnd_ff)); - rvdffpcie #(31) imb_f_scnd_ff (.*, .en(fetch_bf_f_c1_clken), .din ({imb_scnd_in[31:1]}), .dout({imb_scnd_ff[31:1]})); - rvdff_fpga #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0]})); - rvdff_fpga #(pt.ICACHE_NUM_WAYS) mb_tagv_scnd_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0]})); + rvdff_fpga #(1) unc_miss_scnd_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din(uncacheable_miss_scnd_in), + .dout(uncacheable_miss_scnd_ff) + ); + rvdffpcie #(31) imb_f_scnd_ff ( + .*, + .en (fetch_bf_f_c1_clken), + .din ({imb_scnd_in[31:1]}), + .dout({imb_scnd_ff[31:1]}) + ); + rvdff_fpga #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_scnd_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din({way_status_mb_scnd_in[pt.ICACHE_STATUS_BITS-1:0]}), + .dout({way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0]}) + ); + rvdff_fpga #(pt.ICACHE_NUM_WAYS) mb_tagv_scnd_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din({tagv_mb_scnd_in[pt.ICACHE_NUM_WAYS-1:0]}), + .dout({tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0]}) + ); - assign ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] ; - assign ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = ifu_bus_rid_ff[pt.ICACHE_BEAT_BITS-1:0] & {pt.ICACHE_BEAT_BITS{bus_ifu_wr_en_ff}}; - // NOTE: Cacheline size is 16 bytes in this example. - // Tag Index Bank Offset - // [31:16] [15:5] [4] [3:0] + assign ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = bus_rd_addr_count[pt.ICACHE_BEAT_BITS-1:0] ; + assign ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] = ifu_bus_rid_ff[pt.ICACHE_BEAT_BITS-1:0] & {pt.ICACHE_BEAT_BITS{bus_ifu_wr_en_ff}}; + // NOTE: Cacheline size is 16 bytes in this example. + // Tag Index Bank Offset + // [31:16] [15:5] [4] [3:0] - assign fetch_req_icache_f = ifc_fetch_req_f & ~ifc_iccm_access_f & ~ifc_region_acc_fault_final_f; - assign fetch_req_iccm_f = ifc_fetch_req_f & ifc_iccm_access_f; + assign fetch_req_icache_f = ifc_fetch_req_f & ~ifc_iccm_access_f & ~ifc_region_acc_fault_final_f; + assign fetch_req_iccm_f = ifc_fetch_req_f & ifc_iccm_access_f; - assign ic_iccm_hit_f = fetch_req_iccm_f & (~miss_pending | (miss_state==HIT_U_MISS) | (miss_state==STREAM)); - assign ic_byp_hit_f = (crit_byp_hit_f | stream_hit_f) & fetch_req_icache_f & miss_pending ; - assign ic_act_hit_f = (|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) & fetch_req_icache_f & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff; - assign ic_act_miss_f = (((~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & ~miss_pending) | scnd_miss_req) & ~ifc_region_acc_fault_final_f; - assign ic_miss_under_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & + assign ic_iccm_hit_f = fetch_req_iccm_f & (~miss_pending | (miss_state==HIT_U_MISS) | (miss_state==STREAM)); + assign ic_byp_hit_f = (crit_byp_hit_f | stream_hit_f) & fetch_req_icache_f & miss_pending; + assign ic_act_hit_f = (|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) & fetch_req_icache_f & ~reset_all_tags & (~miss_pending | (miss_state==HIT_U_MISS)) & ~sel_mb_addr_ff; + assign ic_act_miss_f = (((~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & ~miss_pending) | scnd_miss_req) & ~ifc_region_acc_fault_final_f; + assign ic_miss_under_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & (imb_ff[31:pt.ICACHE_TAG_INDEX_LO] != ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO]) & ~uncacheable_miss_ff & ~sel_mb_addr_ff & ~ifc_region_acc_fault_final_f; - assign ic_ignore_2nd_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & + assign ic_ignore_2nd_miss_f = (~(|ic_rd_hit[pt.ICACHE_NUM_WAYS-1:0]) | reset_all_tags) & fetch_req_icache_f & (miss_state == HIT_U_MISS) & ((imb_ff[31:pt.ICACHE_TAG_INDEX_LO] == ifu_fetch_addr_int_f[31:pt.ICACHE_TAG_INDEX_LO]) | uncacheable_miss_ff) ; - assign ic_hit_f = ic_act_hit_f | ic_byp_hit_f | ic_iccm_hit_f | (ifc_region_acc_fault_final_f & ifc_fetch_req_f); + assign ic_hit_f = ic_act_hit_f | ic_byp_hit_f | ic_iccm_hit_f | (ifc_region_acc_fault_final_f & ifc_fetch_req_f); - assign uncacheable_miss_in = scnd_miss_req ? uncacheable_miss_scnd_ff : sel_hold_imb ? uncacheable_miss_ff : ifc_fetch_uncacheable_bf ; - assign imb_in[31:1] = scnd_miss_req ? imb_scnd_ff[31:1] : sel_hold_imb ? imb_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ; + assign uncacheable_miss_in = scnd_miss_req ? uncacheable_miss_scnd_ff : sel_hold_imb ? uncacheable_miss_ff : ifc_fetch_uncacheable_bf ; + assign imb_in[31:1] = scnd_miss_req ? imb_scnd_ff[31:1] : sel_hold_imb ? imb_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ; - assign imb_scnd_in[31:1] = sel_hold_imb_scnd ? imb_scnd_ff[31:1] : {ifc_fetch_addr_bf[31:1]} ; + assign imb_scnd_in[31:1] = sel_hold_imb_scnd ? imb_scnd_ff[31:1] : {ifc_fetch_addr_bf[31:1]}; - assign scnd_miss_index_match = (imb_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == imb_scnd_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]) & scnd_miss_req & ~ifu_wr_cumulative_err_data; - assign way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0] = (scnd_miss_req & ~scnd_miss_index_match) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : + assign scnd_miss_index_match = (imb_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO] == imb_scnd_ff[pt.ICACHE_INDEX_HI:pt.ICACHE_TAG_INDEX_LO]) & scnd_miss_req & ~ifu_wr_cumulative_err_data; + assign way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0] = (scnd_miss_req & ~scnd_miss_index_match) ? way_status_mb_scnd_ff[pt.ICACHE_STATUS_BITS-1:0] : (scnd_miss_req & scnd_miss_index_match) ? way_status_rep_new[pt.ICACHE_STATUS_BITS-1:0] : miss_pending ? way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0] : {way_status[pt.ICACHE_STATUS_BITS-1:0]} ; - assign tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0] = scnd_miss_req ? (tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] | ({pt.ICACHE_NUM_WAYS {scnd_miss_index_match}} & replace_way_mb_any[pt.ICACHE_NUM_WAYS-1:0])) : + assign tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0] = scnd_miss_req ? (tagv_mb_scnd_ff[pt.ICACHE_NUM_WAYS-1:0] | ({pt.ICACHE_NUM_WAYS {scnd_miss_index_match}} & replace_way_mb_any[pt.ICACHE_NUM_WAYS-1:0])) : miss_pending ? tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0] : ({ic_tag_valid[pt.ICACHE_NUM_WAYS-1:0]} & {pt.ICACHE_NUM_WAYS{~reset_all_tags & ~exu_flush_final}}) ; - assign reset_ic_in = miss_pending & ~scnd_miss_req_q & (reset_all_tags | reset_ic_ff) ; + assign reset_ic_in = miss_pending & ~scnd_miss_req_q & (reset_all_tags | reset_ic_ff); - rvdffpcie #(31) ifu_fetch_addr_f_ff (.*, .en(fetch_bf_f_c1_clken), .din ({ifc_fetch_addr_bf[31:1]}), .dout({ifu_fetch_addr_int_f[31:1]})); + rvdffpcie #(31) ifu_fetch_addr_f_ff ( + .*, + .en (fetch_bf_f_c1_clken), + .din ({ifc_fetch_addr_bf[31:1]}), + .dout({ifu_fetch_addr_int_f[31:1]}) + ); - assign vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1] ; + assign vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] = ifu_fetch_addr_int_f[pt.ICACHE_BEAT_ADDR_HI:1]; - rvdffpcie #(31) imb_f_ff (.*, .en(fetch_bf_f_c1_clken), .din (imb_in[31:1]), .dout(imb_ff[31:1])); - rvdff_fpga #(1) unc_miss_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ( uncacheable_miss_in), .dout( uncacheable_miss_ff)); + rvdffpcie #(31) imb_f_ff ( + .*, + .en (fetch_bf_f_c1_clken), + .din (imb_in[31:1]), + .dout(imb_ff[31:1]) + ); + rvdff_fpga #(1) unc_miss_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din(uncacheable_miss_in), + .dout(uncacheable_miss_ff) + ); - assign miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1] = (~miss_pending ) ? imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : + assign miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1] = (~miss_pending ) ? imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : ( scnd_miss_req_q ) ? imb_scnd_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] : miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] ; - rvdfflie #(.WIDTH(31-pt.ICACHE_BEAT_ADDR_HI),.LEFT(31-pt.ICACHE_BEAT_ADDR_HI-8)) miss_f_ff (.*, .en(bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), .din ({miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1]}), .dout({miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1]})); + rvdfflie #( + .WIDTH(31 - pt.ICACHE_BEAT_ADDR_HI), + .LEFT (31 - pt.ICACHE_BEAT_ADDR_HI - 8) + ) miss_f_ff ( + .*, + .en (bus_ifu_bus_clk_en | ic_act_miss_f | dec_tlu_force_halt), + .din ({miss_addr_in[31:pt.ICACHE_BEAT_ADDR_HI+1]}), + .dout({miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1]}) + ); - rvdff_fpga #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0]}), .dout({way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0]})); - rvdff_fpga #(pt.ICACHE_NUM_WAYS) mb_tagv_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din ({tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0]}), .dout({tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0]})); + rvdff_fpga #(pt.ICACHE_STATUS_BITS) mb_rep_wayf2_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din({way_status_mb_in[pt.ICACHE_STATUS_BITS-1:0]}), + .dout({way_status_mb_ff[pt.ICACHE_STATUS_BITS-1:0]}) + ); + rvdff_fpga #(pt.ICACHE_NUM_WAYS) mb_tagv_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din({tagv_mb_in[pt.ICACHE_NUM_WAYS-1:0]}), + .dout({tagv_mb_ff[pt.ICACHE_NUM_WAYS-1:0]}) + ); - assign ifc_fetch_req_qual_bf = ifc_fetch_req_bf & ~((miss_state == CRIT_WRD_RDY) & flush_final_f) & ~stream_miss_f ;// & ~exu_flush_final ; + assign ifc_fetch_req_qual_bf = ifc_fetch_req_bf & ~((miss_state == CRIT_WRD_RDY) & flush_final_f) & ~stream_miss_f ;// & ~exu_flush_final ; - assign ifc_fetch_req_f = ifc_fetch_req_f_raw & ~exu_flush_final ; + assign ifc_fetch_req_f = ifc_fetch_req_f_raw & ~exu_flush_final; - rvdff_fpga #(1) ifu_iccm_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din(ifc_iccm_access_bf), .dout(ifc_iccm_access_f)); - rvdff_fpga #(1) ifu_iccm_reg_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din(ifc_region_acc_fault_final_bf), .dout(ifc_region_acc_fault_final_f)); - rvdff_fpga #(1) rgn_acc_ff (.*, .clk(fetch_bf_f_c1_clk), .clken(fetch_bf_f_c1_clken), .rawclk(clk), .din(ifc_region_acc_fault_bf), .dout(ifc_region_acc_fault_f)); + rvdff_fpga #(1) ifu_iccm_acc_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din(ifc_iccm_access_bf), + .dout(ifc_iccm_access_f) + ); + rvdff_fpga #(1) ifu_iccm_reg_acc_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din(ifc_region_acc_fault_final_bf), + .dout(ifc_region_acc_fault_final_f) + ); + rvdff_fpga #(1) rgn_acc_ff ( + .*, + .clk(fetch_bf_f_c1_clk), + .clken(fetch_bf_f_c1_clken), + .rawclk(clk), + .din(ifc_region_acc_fault_bf), + .dout(ifc_region_acc_fault_f) + ); - assign ifu_ic_req_addr_f[31:3] = {miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] }; - assign ifu_ic_mb_empty = (((miss_state == HIT_U_MISS) | (miss_state == STREAM)) & ~(bus_ifu_wr_en_ff & last_beat)) | ~miss_pending ; - assign ifu_miss_state_idle = (miss_state == IDLE) ; + assign ifu_ic_req_addr_f[31:3] = { + miss_addr[31:pt.ICACHE_BEAT_ADDR_HI+1], ic_req_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] + }; + assign ifu_ic_mb_empty = (((miss_state == HIT_U_MISS) | (miss_state == STREAM)) & ~(bus_ifu_wr_en_ff & last_beat)) | ~miss_pending ; + assign ifu_miss_state_idle = (miss_state == IDLE); - assign sel_mb_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff) | reset_tag_valid_for_miss) ; - assign ifu_ic_rw_int_addr[31:1] = ({31{ sel_mb_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | + assign sel_mb_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff) | reset_tag_valid_for_miss) ; + assign ifu_ic_rw_int_addr[31:1] = ({31{ sel_mb_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | ({31{~sel_mb_addr}} & ifc_fetch_addr_bf[31:1] ) ; - assign sel_mb_status_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff & last_beat & bus_ifu_wr_en_ff_q) | reset_tag_valid_for_miss) ; - assign ifu_status_wr_addr[31:1] = ({31{ sel_mb_status_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | + assign sel_mb_status_addr = ((miss_pending & write_ic_16_bytes & ~uncacheable_miss_ff & last_beat & bus_ifu_wr_en_ff_q) | reset_tag_valid_for_miss) ; + assign ifu_status_wr_addr[31:1] = ({31{ sel_mb_status_addr}} & {imb_ff[31:pt.ICACHE_BEAT_ADDR_HI+1] , ic_wr_addr_bits_hi_3[pt.ICACHE_BEAT_ADDR_HI:3] , imb_ff[2:1]}) | ({31{~sel_mb_status_addr}} & ifu_fetch_addr_int_f[31:1] ) ; - assign ic_rw_addr[31:1] = ifu_ic_rw_int_addr[31:1] ; + assign ic_rw_addr[31:1] = ifu_ic_rw_int_addr[31:1]; -if (pt.ICACHE_ECC == 1) begin: icache_ecc_1 - logic [6:0] ic_wr_ecc; - logic [6:0] ic_miss_buff_ecc; - logic [141:0] ic_wr_16bytes_data ; - logic [70:0] ifu_ic_debug_rd_data_in ; + if (pt.ICACHE_ECC == 1) begin : icache_ecc_1 + logic [ 6:0] ic_wr_ecc; + logic [ 6:0] ic_miss_buff_ecc; + logic [141:0] ic_wr_16bytes_data; + logic [ 70:0] ifu_ic_debug_rd_data_in; - rvecc_encode_64 ic_ecc_encode_64_bus ( - .din (ifu_bus_rdata_ff[63:0]), - .ecc_out(ic_wr_ecc[6:0])); - rvecc_encode_64 ic_ecc_encode_64_buff ( - .din (ic_miss_buff_half[63:0]), - .ecc_out(ic_miss_buff_ecc[6:0])); + rvecc_encode_64 ic_ecc_encode_64_bus ( + .din (ifu_bus_rdata_ff[63:0]), + .ecc_out(ic_wr_ecc[6:0]) + ); + rvecc_encode_64 ic_ecc_encode_64_buff ( + .din (ic_miss_buff_half[63:0]), + .ecc_out(ic_miss_buff_ecc[6:0]) + ); - for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_wr_data_loop - assign ic_wr_data[i][70:0] = ic_wr_16bytes_data[((71*i)+70): (71*i)]; - end - - - assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ; - assign ic_error_start = ((|ic_eccerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; - - - - assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {2'b0,ictag_debug_rd_data[25:21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}}, way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : - ic_debug_rd_data[70:0]; - - rvdffe #(71) ifu_debug_data_ff (.*, - .en (debug_data_clken), - .din ({ - ifu_ic_debug_rd_data_in[70:0] - }), - .dout({ - ifu_ic_debug_rd_data[70:0] - }) - ); - - assign ic_wr_16bytes_data[141:0] = ifu_bus_rid_ff[0] ? {ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] } : - {ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] , ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] } ; - - -end -else begin : icache_parity_1 - logic [3:0] ic_wr_parity; - logic [3:0] ic_miss_buff_parity; - logic [135:0] ic_wr_16bytes_data ; - logic [70:0] ifu_ic_debug_rd_data_in ; - for (genvar i=0 ; i < 4 ; i++) begin : DATA_PGEN - rveven_paritygen #(16) par_bus (.data_in (ifu_bus_rdata_ff[((16*i)+15):(16*i)]), - .parity_out(ic_wr_parity[i])); - rveven_paritygen #(16) par_buff (.data_in (ic_miss_buff_half[((16*i)+15):(16*i)]), - .parity_out(ic_miss_buff_parity[i])); + for (genvar i = 0; i < pt.ICACHE_BANKS_WAY; i++) begin : ic_wr_data_loop + assign ic_wr_data[i][70:0] = ic_wr_16bytes_data[((71*i)+70):(71*i)]; end - for (genvar i=0; i < pt.ICACHE_BANKS_WAY ; i++) begin : ic_wr_data_loop - assign ic_wr_data[i][70:0] = {3'b0, ic_wr_16bytes_data[((68*i)+67): (68*i)]}; - end + assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]}; + assign ic_error_start = ((|ic_eccerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; + + + + assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {2'b0,ictag_debug_rd_data[25:21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}}, way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : + ic_debug_rd_data[70:0]; + + rvdffe #(71) ifu_debug_data_ff ( + .*, + .en (debug_data_clken), + .din ({ifu_ic_debug_rd_data_in[70:0]}), + .dout({ifu_ic_debug_rd_data[70:0]}) + ); + + assign ic_wr_16bytes_data[141:0] = ifu_bus_rid_ff[0] ? {ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] } : + {ic_miss_buff_ecc[6:0] , ic_miss_buff_half[63:0] , ic_wr_ecc[6:0] , ifu_bus_rdata_ff[63:0] } ; + + + end else begin : icache_parity_1 + logic [ 3:0] ic_wr_parity; + logic [ 3:0] ic_miss_buff_parity; + logic [135:0] ic_wr_16bytes_data; + logic [ 70:0] ifu_ic_debug_rd_data_in; + for (genvar i = 0; i < 4; i++) begin : DATA_PGEN + rveven_paritygen #(16) par_bus ( + .data_in (ifu_bus_rdata_ff[((16*i)+15):(16*i)]), + .parity_out(ic_wr_parity[i]) + ); + rveven_paritygen #(16) par_buff ( + .data_in (ic_miss_buff_half[((16*i)+15):(16*i)]), + .parity_out(ic_miss_buff_parity[i]) + ); + end + + + for (genvar i = 0; i < pt.ICACHE_BANKS_WAY; i++) begin : ic_wr_data_loop + assign ic_wr_data[i][70:0] = {3'b0, ic_wr_16bytes_data[((68*i)+67):(68*i)]}; + end - assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]} ; - assign ic_error_start = ((|ic_parerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; + assign ic_debug_wr_data[70:0] = {dec_tlu_ic_diag_pkt.icache_wrdata[70:0]}; + assign ic_error_start = ((|ic_parerr[pt.ICACHE_BANKS_WAY-1:0]) & ic_act_hit_f) | ic_rd_parity_final_err; - assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {6'b0,ictag_debug_rd_data[21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}},way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : + assign ifu_ic_debug_rd_data_in[70:0] = ic_debug_ict_array_sel_ff ? {6'b0,ictag_debug_rd_data[21],32'b0,ictag_debug_rd_data[20:0],{7-pt.ICACHE_STATUS_BITS{1'b0}},way_status[pt.ICACHE_STATUS_BITS-1:0],3'b0,ic_debug_tag_val_rd_out} : ic_debug_rd_data[70:0] ; - rvdffe #(71) ifu_debug_data_ff (.*, - .en (debug_data_clken), - .din ({ - ifu_ic_debug_rd_data_in[70:0] - }), - .dout({ - ifu_ic_debug_rd_data[70:0] - }) - ); + rvdffe #(71) ifu_debug_data_ff ( + .*, + .en (debug_data_clken), + .din ({ifu_ic_debug_rd_data_in[70:0]}), + .dout({ifu_ic_debug_rd_data[70:0]}) + ); - assign ic_wr_16bytes_data[135:0] = ifu_bus_rid_ff[0] ? {ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] } : + assign ic_wr_16bytes_data[135:0] = ifu_bus_rid_ff[0] ? {ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] , ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] } : {ic_miss_buff_parity[3:0] , ic_miss_buff_half[63:0] , ic_wr_parity[3:0] , ifu_bus_rdata_ff[63:0] } ; -end + end - assign ifu_wr_data_comb_err = bus_ifu_wr_data_error_ff ; - assign ifu_wr_cumulative_err = (ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff) & ~reset_beat_cnt; - assign ifu_wr_cumulative_err_data = ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff ; + assign ifu_wr_data_comb_err = bus_ifu_wr_data_error_ff; + assign ifu_wr_cumulative_err = (ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff) & ~reset_beat_cnt; + assign ifu_wr_cumulative_err_data = ifu_wr_data_comb_err | ifu_wr_data_comb_err_ff; - assign sel_byp_data = (ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK)); + assign sel_byp_data = (ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK)); assign sel_ic_data = ~(ic_crit_wd_rdy | (miss_state == STREAM) | (miss_state == CRIT_BYP_OK) | (miss_state == MISS_WAIT)) & ~fetch_req_iccm_f & ~ifc_region_acc_fault_final_f; - if (pt.ICCM_ICACHE==1) begin: iccm_icache - assign sel_iccm_data = fetch_req_iccm_f ; + if (pt.ICCM_ICACHE == 1) begin : iccm_icache + assign sel_iccm_data = fetch_req_iccm_f; - assign ic_final_data[63:0] = ({64{sel_byp_data | sel_iccm_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ; + assign ic_final_data[63:0] = ({64{sel_byp_data | sel_iccm_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ; - assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | + assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | ({64{sel_iccm_data}} & {iccm_rd_data[63:0]}); - assign ic_sel_premux_data = sel_iccm_data | sel_byp_data ; - end + assign ic_sel_premux_data = sel_iccm_data | sel_byp_data; + end -if (pt.ICCM_ONLY == 1 ) begin: iccm_only - assign sel_iccm_data = fetch_req_iccm_f ; - assign ic_final_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | + if (pt.ICCM_ONLY == 1) begin : iccm_only + assign sel_iccm_data = fetch_req_iccm_f; + assign ic_final_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) | ({64{sel_iccm_data}} & {iccm_rd_data[63:0]}); - assign ic_premux_data = '0 ; - assign ic_sel_premux_data = '0 ; -end + assign ic_premux_data = '0; + assign ic_sel_premux_data = '0; + end -if (pt.ICACHE_ONLY == 1 ) begin: icache_only - assign ic_final_data[63:0] = ({64{sel_byp_data | sel_ic_data}} & {ic_rd_data[63:0]} ) ; - assign ic_premux_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ; - assign ic_sel_premux_data = sel_byp_data ; -end + if (pt.ICACHE_ONLY == 1) begin : icache_only + assign ic_final_data[63:0] = ({64{sel_byp_data | sel_ic_data}} & {ic_rd_data[63:0]}); + assign ic_premux_data[63:0] = ({64{sel_byp_data}} & {ic_byp_data_only_new[63:0]}); + assign ic_sel_premux_data = sel_byp_data; + end -if (pt.NO_ICCM_NO_ICACHE == 1 ) begin: no_iccm_no_icache - assign ic_final_data[63:0] = ({64{sel_byp_data }} & {ic_byp_data_only_new[63:0]} ) ; - assign ic_premux_data = 0 ; - assign ic_sel_premux_data = '0 ; -end + if (pt.NO_ICCM_NO_ICACHE == 1) begin : no_iccm_no_icache + assign ic_final_data[63:0] = ({64{sel_byp_data}} & {ic_byp_data_only_new[63:0]}); + assign ic_premux_data = 0; + assign ic_sel_premux_data = '0; + end - assign ifc_bus_acc_fault_f[1:0] = {2{ic_byp_hit_f}} & ifu_byp_data_err_f[1:0] ; - assign ic_data_f[31:0] = ic_final_data[31:0]; + assign ifc_bus_acc_fault_f[1:0] = {2{ic_byp_hit_f}} & ifu_byp_data_err_f[1:0]; + assign ic_data_f[31:0] = ic_final_data[31:0]; -assign fetch_req_f_qual = ic_hit_f & ~exu_flush_final; -assign ic_access_fault_f[1:0] = ({2{ifc_region_acc_fault_final_f}} | ifc_bus_acc_fault_f[1:0]) & {2{~exu_flush_final}}; -assign ic_access_fault_type_f[1:0] = |iccm_rd_ecc_double_err ? 2'b01 : + assign fetch_req_f_qual = ic_hit_f & ~exu_flush_final; + assign ic_access_fault_f[1:0] = ({2{ifc_region_acc_fault_final_f}} | ifc_bus_acc_fault_f[1:0]) & {2{~exu_flush_final}}; + assign ic_access_fault_type_f[1:0] = |iccm_rd_ecc_double_err ? 2'b01 : ifc_region_acc_fault_f ? 2'b10 : ifc_region_acc_fault_memory_f ? 2'b11 : 2'b00 ; // right justified -assign ic_fetch_val_f[1] = fetch_req_f_qual & ifu_bp_inst_mask_f & ~(vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}}) & (err_stop_state != ERR_FETCH2); -assign ic_fetch_val_f[0] = fetch_req_f_qual ; -assign two_byte_instr = (ic_data_f[1:0] != 2'b11 ) ; + assign ic_fetch_val_f[1] = fetch_req_f_qual & ifu_bp_inst_mask_f & ~(vaddr_f[pt.ICACHE_BEAT_ADDR_HI:1] == {pt.ICACHE_BEAT_ADDR_HI{1'b1}}) & (err_stop_state != ERR_FETCH2); + assign ic_fetch_val_f[0] = fetch_req_f_qual; + assign two_byte_instr = (ic_data_f[1:0] != 2'b11); -///////////////////////////////////////////////////////////////////////////////////// -// Create full buffer... // -///////////////////////////////////////////////////////////////////////////////////// - logic [63:0] ic_miss_buff_data_in; - assign ic_miss_buff_data_in[63:0] = ifu_bus_rsp_rdata[63:0]; + ///////////////////////////////////////////////////////////////////////////////////// + // Create full buffer... // + ///////////////////////////////////////////////////////////////////////////////////// + logic [63:0] ic_miss_buff_data_in; + assign ic_miss_buff_data_in[63:0] = ifu_bus_rsp_rdata[63:0]; - for (genvar i=0; i=1 & clk_count<=3) rst_l <= 1'b0; - else rst_l <= 1'b1; + always @(posedge clk) begin + clk_count = clk_count + 1; + if (clk_count >= 1 & clk_count <= 3) rst_l <= 1'b0; + else rst_l <= 1'b1; - if (clk_count > 3) begin + if (clk_count > 3) begin - compressed_din[15:0] <= compressed[clk_count-3]; // c.mv - expected_val[31:0] <= expected[clk_count-3]; + compressed_din[15:0] <= compressed[clk_count-3]; // c.mv + expected_val[31:0] <= expected[clk_count-3]; - end + end - if (clk_count == 65000) begin - $dumpoff; - $finish; - end - end // always @ (posedge clk) + if (clk_count == 65000) begin + $dumpoff; + $finish; + end + end // always @ (posedge clk) - always @(negedge clk) begin - if (clk_count > 3 & error) begin - $display("clock: %d compressed %h error actual %h expected %h",clk_count,compressed_din,actual,expected_val); - end - end + always @(negedge clk) begin + if (clk_count > 3 & error) begin + $display("clock: %d compressed %h error actual %h expected %h", clk_count, compressed_din, + actual, expected_val); + end + end - el2_ifu_compress_ctl align (.*,.din(compressed_din[15:0]),.dout(actual[31:0])); + el2_ifu_compress_ctl align ( + .*, + .din (compressed_din[15:0]), + .dout(actual[31:0]) + ); - assign error = actual[31:0] != expected_val[31:0]; + assign error = actual[31:0] != expected_val[31:0]; -endmodule // el2_ifu_tb_memread +endmodule // el2_ifu_tb_memread diff --git a/Flow/design/include/el2_def.sv b/Flow/design/include/el2_def.sv index cae80f4..ad239d1 100644 --- a/Flow/design/include/el2_def.sv +++ b/Flow/design/include/el2_def.sv @@ -3,405 +3,405 @@ //`define EL2_DEF_SV package el2_pkg; -typedef struct packed { - logic trace_rv_i_valid_ip; - logic [31:0] trace_rv_i_insn_ip; - logic [31:0] trace_rv_i_address_ip; - logic trace_rv_i_exception_ip; - logic [4:0] trace_rv_i_ecause_ip; - logic trace_rv_i_interrupt_ip; - logic [31:0] trace_rv_i_tval_ip; - } el2_trace_pkt_t; + typedef struct packed { + logic trace_rv_i_valid_ip; + logic [31:0] trace_rv_i_insn_ip; + logic [31:0] trace_rv_i_address_ip; + logic trace_rv_i_exception_ip; + logic [4:0] trace_rv_i_ecause_ip; + logic trace_rv_i_interrupt_ip; + logic [31:0] trace_rv_i_tval_ip; + } el2_trace_pkt_t; -typedef enum logic [3:0] { - NULL = 4'b0000, - MUL = 4'b0001, - LOAD = 4'b0010, - STORE = 4'b0011, - ALU = 4'b0100, - CSRREAD = 4'b0101, - CSRWRITE = 4'b0110, - CSRRW = 4'b0111, - EBREAK = 4'b1000, - ECALL = 4'b1001, - FENCE = 4'b1010, - FENCEI = 4'b1011, - MRET = 4'b1100, - CONDBR = 4'b1101, - JAL = 4'b1110, - BITMANIPU = 4'b1111 - } el2_inst_pkt_t; + typedef enum logic [3:0] { + NULL = 4'b0000, + MUL = 4'b0001, + LOAD = 4'b0010, + STORE = 4'b0011, + ALU = 4'b0100, + CSRREAD = 4'b0101, + CSRWRITE = 4'b0110, + CSRRW = 4'b0111, + EBREAK = 4'b1000, + ECALL = 4'b1001, + FENCE = 4'b1010, + FENCEI = 4'b1011, + MRET = 4'b1100, + CONDBR = 4'b1101, + JAL = 4'b1110, + BITMANIPU = 4'b1111 + } el2_inst_pkt_t; -typedef struct packed { - logic valid; - logic wb; - logic [2:0] tag; - logic [4:0] rd; - } el2_load_cam_pkt_t; + typedef struct packed { + logic valid; + logic wb; + logic [2:0] tag; + logic [4:0] rd; + } el2_load_cam_pkt_t; -typedef struct packed { - logic pc0_call; - logic pc0_ret; - logic pc0_pc4; - } el2_rets_pkt_t; -typedef struct packed { - logic valid; - logic [11:0] toffset; - logic [1:0] hist; - logic br_error; - logic br_start_error; - logic bank; - logic [31:1] prett; // predicted ret target - logic way; - logic ret; - } el2_br_pkt_t; + typedef struct packed { + logic pc0_call; + logic pc0_ret; + logic pc0_pc4; + } el2_rets_pkt_t; + typedef struct packed { + logic valid; + logic [11:0] toffset; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic bank; + logic [31:1] prett; // predicted ret target + logic way; + logic ret; + } el2_br_pkt_t; -typedef struct packed { - logic valid; - logic [1:0] hist; - logic br_error; - logic br_start_error; - logic way; - logic middle; - } el2_br_tlu_pkt_t; + typedef struct packed { + logic valid; + logic [1:0] hist; + logic br_error; + logic br_start_error; + logic way; + logic middle; + } el2_br_tlu_pkt_t; -typedef struct packed { - logic misp; - logic ataken; - logic boffset; - logic pc4; - logic [1:0] hist; - logic [11:0] toffset; - logic valid; - logic br_error; - logic br_start_error; - logic pcall; - logic pja; - logic way; - logic pret; - // for power use the pret bit to clock the prett field - logic [31:1] prett; - } el2_predict_pkt_t; + typedef struct packed { + logic misp; + logic ataken; + logic boffset; + logic pc4; + logic [1:0] hist; + logic [11:0] toffset; + logic valid; + logic br_error; + logic br_start_error; + logic pcall; + logic pja; + logic way; + logic pret; + // for power use the pret bit to clock the prett field + logic [31:1] prett; + } el2_predict_pkt_t; -typedef struct packed { - // unlikely to change - logic icaf; - logic icaf_second; - logic [1:0] icaf_type; - logic fence_i; - logic [3:0] i0trigger; - logic pmu_i0_br_unpred; // pmu - logic pmu_divide; - // likely to change - logic legal; - logic pmu_lsu_misaligned; - el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type - } el2_trap_pkt_t; + typedef struct packed { + // unlikely to change + logic icaf; + logic icaf_second; + logic [1:0] icaf_type; + logic fence_i; + logic [3:0] i0trigger; + logic pmu_i0_br_unpred; // pmu + logic pmu_divide; + // likely to change + logic legal; + logic pmu_lsu_misaligned; + el2_inst_pkt_t pmu_i0_itype; // pmu - instruction type + } el2_trap_pkt_t; -typedef struct packed { - // unlikely to change - logic i0div; - logic csrwen; - logic csrwonly; - logic [11:0] csrwaddr; - // likely to change - logic [4:0] i0rd; - logic i0load; - logic i0store; - logic i0v; - logic i0valid; - } el2_dest_pkt_t; + typedef struct packed { + // unlikely to change + logic i0div; + logic csrwen; + logic csrwonly; + logic [11:0] csrwaddr; + // likely to change + logic [4:0] i0rd; + logic i0load; + logic i0store; + logic i0v; + logic i0valid; + } el2_dest_pkt_t; -typedef struct packed { - logic mul; - logic load; - logic alu; - } el2_class_pkt_t; + typedef struct packed { + logic mul; + logic load; + logic alu; + } el2_class_pkt_t; -typedef struct packed { - logic [4:0] rs1; - logic [4:0] rs2; - logic [4:0] rd; - } el2_reg_pkt_t; + typedef struct packed { + logic [4:0] rs1; + logic [4:0] rs2; + logic [4:0] rd; + } el2_reg_pkt_t; -typedef struct packed { - logic clz; - logic ctz; - logic cpop; - logic sext_b; - logic sext_h; - logic min; - logic max; - logic pack; - logic packu; - logic packh; - logic rol; - logic ror; - logic grev; - logic gorc; - logic zbb; - logic bset; - logic bclr; - logic binv; - logic bext; - logic sh1add; - logic sh2add; - logic sh3add; - logic zba; - logic land; - logic lor; - logic lxor; - logic sll; - logic srl; - logic sra; - logic beq; - logic bne; - logic blt; - logic bge; - logic add; - logic sub; - logic slt; - logic unsign; - logic jal; - logic predict_t; - logic predict_nt; - logic csr_write; - logic csr_imm; - } el2_alu_pkt_t; + typedef struct packed { + logic clz; + logic ctz; + logic cpop; + logic sext_b; + logic sext_h; + logic min; + logic max; + logic pack; + logic packu; + logic packh; + logic rol; + logic ror; + logic grev; + logic gorc; + logic zbb; + logic bset; + logic bclr; + logic binv; + logic bext; + logic sh1add; + logic sh2add; + logic sh3add; + logic zba; + logic land; + logic lor; + logic lxor; + logic sll; + logic srl; + logic sra; + logic beq; + logic bne; + logic blt; + logic bge; + logic add; + logic sub; + logic slt; + logic unsign; + logic jal; + logic predict_t; + logic predict_nt; + logic csr_write; + logic csr_imm; + } el2_alu_pkt_t; -typedef struct packed { - logic fast_int; -/* verilator lint_off SYMRSVDWORD */ - logic stack; -/* verilator lint_on SYMRSVDWORD */ - logic by; - logic half; - logic word; - logic dword; // for dma - logic load; - logic store; - logic unsign; - logic dma; // dma pkt - logic store_data_bypass_d; - logic load_ldst_bypass_d; - logic store_data_bypass_m; - logic valid; - } el2_lsu_pkt_t; + typedef struct packed { + logic fast_int; + /* verilator lint_off SYMRSVDWORD */ + logic stack; + /* verilator lint_on SYMRSVDWORD */ + logic by; + logic half; + logic word; + logic dword; // for dma + logic load; + logic store; + logic unsign; + logic dma; // dma pkt + logic store_data_bypass_d; + logic load_ldst_bypass_d; + logic store_data_bypass_m; + logic valid; + } el2_lsu_pkt_t; -typedef struct packed { - logic inst_type; //0: Load, 1: Store - //logic dma_valid; - logic exc_type; //0: MisAligned, 1: Access Fault - logic [3:0] mscause; - logic [31:0] addr; - logic single_ecc_error; - logic exc_valid; - } el2_lsu_error_pkt_t; + typedef struct packed { + logic inst_type; //0: Load, 1: Store + //logic dma_valid; + logic exc_type; //0: MisAligned, 1: Access Fault + logic [3:0] mscause; + logic [31:0] addr; + logic single_ecc_error; + logic exc_valid; + } el2_lsu_error_pkt_t; -typedef struct packed { - logic clz; - logic ctz; - logic cpop; - logic sext_b; - logic sext_h; - logic min; - logic max; - logic pack; - logic packu; - logic packh; - logic rol; - logic ror; - logic grev; - logic gorc; - logic zbb; - logic bset; - logic bclr; - logic binv; - logic bext; - logic zbs; - logic bcompress; - logic bdecompress; - logic zbe; - logic clmul; - logic clmulh; - logic clmulr; - logic zbc; - logic shfl; - logic unshfl; - logic xperm_n; - logic xperm_b; - logic xperm_h; - logic zbp; - logic crc32_b; - logic crc32_h; - logic crc32_w; - logic crc32c_b; - logic crc32c_h; - logic crc32c_w; - logic zbr; - logic bfp; - logic zbf; - logic sh1add; - logic sh2add; - logic sh3add; - logic zba; - logic alu; - logic rs1; - logic rs2; - logic imm12; - logic rd; - logic shimm5; - logic imm20; - logic pc; - logic load; - logic store; - logic lsu; - logic add; - logic sub; - logic land; - logic lor; - logic lxor; - logic sll; - logic sra; - logic srl; - logic slt; - logic unsign; - logic condbr; - logic beq; - logic bne; - logic bge; - logic blt; - logic jal; - logic by; - logic half; - logic word; - logic csr_read; - logic csr_clr; - logic csr_set; - logic csr_write; - logic csr_imm; - logic presync; - logic postsync; - logic ebreak; - logic ecall; - logic mret; - logic mul; - logic rs1_sign; - logic rs2_sign; - logic low; - logic div; - logic rem; - logic fence; - logic fence_i; - logic pm_alu; - logic legal; - } el2_dec_pkt_t; + typedef struct packed { + logic clz; + logic ctz; + logic cpop; + logic sext_b; + logic sext_h; + logic min; + logic max; + logic pack; + logic packu; + logic packh; + logic rol; + logic ror; + logic grev; + logic gorc; + logic zbb; + logic bset; + logic bclr; + logic binv; + logic bext; + logic zbs; + logic bcompress; + logic bdecompress; + logic zbe; + logic clmul; + logic clmulh; + logic clmulr; + logic zbc; + logic shfl; + logic unshfl; + logic xperm_n; + logic xperm_b; + logic xperm_h; + logic zbp; + logic crc32_b; + logic crc32_h; + logic crc32_w; + logic crc32c_b; + logic crc32c_h; + logic crc32c_w; + logic zbr; + logic bfp; + logic zbf; + logic sh1add; + logic sh2add; + logic sh3add; + logic zba; + logic alu; + logic rs1; + logic rs2; + logic imm12; + logic rd; + logic shimm5; + logic imm20; + logic pc; + logic load; + logic store; + logic lsu; + logic add; + logic sub; + logic land; + logic lor; + logic lxor; + logic sll; + logic sra; + logic srl; + logic slt; + logic unsign; + logic condbr; + logic beq; + logic bne; + logic bge; + logic blt; + logic jal; + logic by; + logic half; + logic word; + logic csr_read; + logic csr_clr; + logic csr_set; + logic csr_write; + logic csr_imm; + logic presync; + logic postsync; + logic ebreak; + logic ecall; + logic mret; + logic mul; + logic rs1_sign; + logic rs2_sign; + logic low; + logic div; + logic rem; + logic fence; + logic fence_i; + logic pm_alu; + logic legal; + } el2_dec_pkt_t; -typedef struct packed { - logic valid; - logic rs1_sign; - logic rs2_sign; - logic low; - logic bcompress; - logic bdecompress; - logic clmul; - logic clmulh; - logic clmulr; - logic grev; - logic gorc; - logic shfl; - logic unshfl; - logic crc32_b; - logic crc32_h; - logic crc32_w; - logic crc32c_b; - logic crc32c_h; - logic crc32c_w; - logic bfp; - logic xperm_n; - logic xperm_b; - logic xperm_h; - } el2_mul_pkt_t; + typedef struct packed { + logic valid; + logic rs1_sign; + logic rs2_sign; + logic low; + logic bcompress; + logic bdecompress; + logic clmul; + logic clmulh; + logic clmulr; + logic grev; + logic gorc; + logic shfl; + logic unshfl; + logic crc32_b; + logic crc32_h; + logic crc32_w; + logic crc32c_b; + logic crc32c_h; + logic crc32c_w; + logic bfp; + logic xperm_n; + logic xperm_b; + logic xperm_h; + } el2_mul_pkt_t; -typedef struct packed { - logic valid; - logic unsign; - logic rem; - } el2_div_pkt_t; + typedef struct packed { + logic valid; + logic unsign; + logic rem; + } el2_div_pkt_t; -typedef struct packed { - logic TEST1; - logic RME; - logic [3:0] RM; + typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; - logic LS; - logic DS; - logic SD; - logic TEST_RNM; - logic BC1; - logic BC2; - } el2_ccm_ext_in_pkt_t; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ccm_ext_in_pkt_t; -typedef struct packed { - logic TEST1; - logic RME; - logic [3:0] RM; - logic LS; - logic DS; - logic SD; - logic TEST_RNM; - logic BC1; - logic BC2; - } el2_dccm_ext_in_pkt_t; + typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_dccm_ext_in_pkt_t; -typedef struct packed { - logic TEST1; - logic RME; - logic [3:0] RM; - logic LS; - logic DS; - logic SD; - logic TEST_RNM; - logic BC1; - logic BC2; - } el2_ic_data_ext_in_pkt_t; + typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ic_data_ext_in_pkt_t; -typedef struct packed { - logic TEST1; - logic RME; - logic [3:0] RM; - logic LS; - logic DS; - logic SD; - logic TEST_RNM; - logic BC1; - logic BC2; - } el2_ic_tag_ext_in_pkt_t; + typedef struct packed { + logic TEST1; + logic RME; + logic [3:0] RM; + logic LS; + logic DS; + logic SD; + logic TEST_RNM; + logic BC1; + logic BC2; + } el2_ic_tag_ext_in_pkt_t; -typedef struct packed { - logic select; - logic match; - logic store; - logic load; - logic execute; - logic m; - logic [31:0] tdata2; - } el2_trigger_pkt_t; + typedef struct packed { + logic select; + logic match; + logic store; + logic load; + logic execute; + logic m; + logic [31:0] tdata2; + } el2_trigger_pkt_t; -typedef struct packed { - logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]} - logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3 - logic icache_rd_valid; - logic icache_wr_valid; - } el2_cache_debug_pkt_t; -//`endif + typedef struct packed { + logic [70:0] icache_wrdata; // {dicad1[1:0], dicad0h[31:0], dicad0[31:0]} + logic [16:0] icache_dicawics; // Arraysel:24, Waysel:21:20, Index:16:3 + logic icache_rd_valid; + logic icache_wr_valid; + } el2_cache_debug_pkt_t; + //`endif -endpackage // el2_pkg +endpackage // el2_pkg diff --git a/Flow/design/lib/ahb_to_axi4.sv b/Flow/design/lib/ahb_to_axi4.sv index 735244b..dee1faf 100644 --- a/Flow/design/lib/ahb_to_axi4.sv +++ b/Flow/design/lib/ahb_to_axi4.sv @@ -21,254 +21,412 @@ // //******************************************************************************** module ahb_to_axi4 -import el2_pkg::*; + import el2_pkg::*; #( - TAG = 1, - `include "el2_param.vh" + TAG = 1, + `include "el2_param.vh" ) // ,TAG = 1) ( - input clk, - input rst_l, - input scan_mode, - input bus_clk_en, - input clk_override, + input clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, - // AXI signals - // AXI Write Channels - output logic axi_awvalid, - input logic axi_awready, - output logic [TAG-1:0] axi_awid, - output logic [31:0] axi_awaddr, - output logic [2:0] axi_awsize, - output logic [2:0] axi_awprot, - output logic [7:0] axi_awlen, - output logic [1:0] axi_awburst, + // AXI signals + // AXI Write Channels + output logic axi_awvalid, + input logic axi_awready, + output logic [TAG-1:0] axi_awid, + output logic [ 31:0] axi_awaddr, + output logic [ 2:0] axi_awsize, + output logic [ 2:0] axi_awprot, + output logic [ 7:0] axi_awlen, + output logic [ 1:0] axi_awburst, - output logic axi_wvalid, - input logic axi_wready, - output logic [63:0] axi_wdata, - output logic [7:0] axi_wstrb, - output logic axi_wlast, + output logic axi_wvalid, + input logic axi_wready, + output logic [63:0] axi_wdata, + output logic [ 7:0] axi_wstrb, + output logic axi_wlast, - input logic axi_bvalid, - output logic axi_bready, - input logic [1:0] axi_bresp, - input logic [TAG-1:0] axi_bid, + input logic axi_bvalid, + output logic axi_bready, + input logic [ 1:0] axi_bresp, + input logic [TAG-1:0] axi_bid, - // AXI Read Channels - output logic axi_arvalid, - input logic axi_arready, - output logic [TAG-1:0] axi_arid, - output logic [31:0] axi_araddr, - output logic [2:0] axi_arsize, - output logic [2:0] axi_arprot, - output logic [7:0] axi_arlen, - output logic [1:0] axi_arburst, + // AXI Read Channels + output logic axi_arvalid, + input logic axi_arready, + output logic [TAG-1:0] axi_arid, + output logic [ 31:0] axi_araddr, + output logic [ 2:0] axi_arsize, + output logic [ 2:0] axi_arprot, + output logic [ 7:0] axi_arlen, + output logic [ 1:0] axi_arburst, - input logic axi_rvalid, - output logic axi_rready, - input logic [TAG-1:0] axi_rid, - input logic [63:0] axi_rdata, - input logic [1:0] axi_rresp, + input logic axi_rvalid, + output logic axi_rready, + input logic [TAG-1:0] axi_rid, + input logic [ 63:0] axi_rdata, + input logic [ 1:0] axi_rresp, - // AHB-Lite signals - input logic [31:0] ahb_haddr, // ahb bus address - input logic [2:0] ahb_hburst, // tied to 0 - input logic ahb_hmastlock, // tied to 0 - input logic [3:0] ahb_hprot, // tied to 4'b0011 - input logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) - input logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) - input logic ahb_hwrite, // ahb bus write - input logic [63:0] ahb_hwdata, // ahb bus write data - input logic ahb_hsel, // this slave was selected - input logic ahb_hreadyin, // previous hready was accepted or not + // AHB-Lite signals + input logic [31:0] ahb_haddr, // ahb bus address + input logic [ 2:0] ahb_hburst, // tied to 0 + input logic ahb_hmastlock, // tied to 0 + input logic [ 3:0] ahb_hprot, // tied to 4'b0011 + input logic [ 2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + input logic [ 1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + input logic ahb_hwrite, // ahb bus write + input logic [63:0] ahb_hwdata, // ahb bus write data + input logic ahb_hsel, // this slave was selected + input logic ahb_hreadyin, // previous hready was accepted or not - output logic [63:0] ahb_hrdata, // ahb bus read data - output logic ahb_hreadyout, // slave ready to accept transaction - output logic ahb_hresp // slave response (high indicates erro) + output logic [63:0] ahb_hrdata, // ahb bus read data + output logic ahb_hreadyout, // slave ready to accept transaction + output logic ahb_hresp // slave response (high indicates erro) ); - logic [7:0] master_wstrb; + logic [7:0] master_wstrb; - typedef enum logic [1:0] { IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved - WR = 2'b01, // Write Command recieved - RD = 2'b10, // Read Command recieved - PEND = 2'b11 // Waiting on Read Data from core - } state_t; - state_t buf_state, buf_nxtstate; - logic buf_state_en; + typedef enum logic [1:0] { + IDLE = 2'b00, // Nothing in the buffer. No commands yet recieved + WR = 2'b01, // Write Command recieved + RD = 2'b10, // Read Command recieved + PEND = 2'b11 // Waiting on Read Data from core + } state_t; + state_t buf_state, buf_nxtstate; + logic buf_state_en; - // Buffer signals (one entry buffer) - logic buf_read_error_in, buf_read_error; - logic [63:0] buf_rdata; + // Buffer signals (one entry buffer) + logic buf_read_error_in, buf_read_error; + logic [63:0] buf_rdata; - logic ahb_hready; - logic ahb_hready_q; - logic [1:0] ahb_htrans_in, ahb_htrans_q; - logic [2:0] ahb_hsize_q; - logic ahb_hwrite_q; - logic [31:0] ahb_haddr_q; - logic [63:0] ahb_hwdata_q; - logic ahb_hresp_q; + logic ahb_hready; + logic ahb_hready_q; + logic [1:0] ahb_htrans_in, ahb_htrans_q; + logic [ 2:0] ahb_hsize_q; + logic ahb_hwrite_q; + logic [31:0] ahb_haddr_q; + logic [63:0] ahb_hwdata_q; + logic ahb_hresp_q; - //Miscellaneous signals - logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic; - logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc; - // signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus - logic buf_rdata_en; + //Miscellaneous signals + logic ahb_addr_in_dccm, ahb_addr_in_iccm, ahb_addr_in_pic; + logic ahb_addr_in_dccm_region_nc, ahb_addr_in_iccm_region_nc, ahb_addr_in_pic_region_nc; + // signals needed for the read data coming back from the core and to block any further commands as AHB is a blocking bus + logic buf_rdata_en; - logic ahb_addr_clk_en, buf_rdata_clk_en; - logic bus_clk, ahb_addr_clk, buf_rdata_clk; - // Command buffer is the holding station where we convert to AXI and send to core - logic cmdbuf_wr_en, cmdbuf_rst; - logic cmdbuf_full; - logic cmdbuf_vld, cmdbuf_write; - logic [1:0] cmdbuf_size; - logic [7:0] cmdbuf_wstrb; - logic [31:0] cmdbuf_addr; - logic [63:0] cmdbuf_wdata; + logic ahb_addr_clk_en, buf_rdata_clk_en; + logic bus_clk, ahb_addr_clk, buf_rdata_clk; + // Command buffer is the holding station where we convert to AXI and send to core + logic cmdbuf_wr_en, cmdbuf_rst; + logic cmdbuf_full; + logic cmdbuf_vld, cmdbuf_write; + logic [ 1:0] cmdbuf_size; + logic [ 7:0] cmdbuf_wstrb; + logic [31:0] cmdbuf_addr; + logic [63:0] cmdbuf_wdata; -// FSM to control the bus states and when to block the hready and load the command buffer - always_comb begin - buf_nxtstate = IDLE; - buf_state_en = 1'b0; - buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back - buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core - cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes - case (buf_state) - IDLE: begin // No commands recieved - buf_nxtstate = ahb_hwrite ? WR : RD; - buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans - end - WR: begin // Write command recieved last cycle - buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD; - buf_state_en = (~cmdbuf_full | ahb_hresp) ; - cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now. - end - RD: begin // Read command recieved last cycle. - buf_nxtstate = ahb_hresp ? IDLE :PEND; // If error go to idle, else wait for read data - buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error - cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error - end - PEND: begin // Read Command has been sent. Waiting on Data. - buf_nxtstate = IDLE; // go back for next command and present data next cycle - buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back - buf_rdata_en = buf_state_en; // buffer the read data coming back from core - buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC ) - end - endcase - end // always_comb begin + // FSM to control the bus states and when to block the hready and load the command buffer + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_rdata_en = 1'b0; // signal to load the buffer when the core sends read data back + buf_read_error_in = 1'b0; // signal indicating that an error came back with the read from the core + cmdbuf_wr_en = 1'b0; // all clear from the gasket to load the buffer with the command for reads, command/dat for writes + case (buf_state) + IDLE: begin // No commands recieved + buf_nxtstate = ahb_hwrite ? WR : RD; + buf_state_en = ahb_hready & ahb_htrans[1] & ahb_hsel; // only transition on a valid hrtans + end + WR: begin // Write command recieved last cycle + buf_nxtstate = (ahb_hresp | (ahb_htrans[1:0] == 2'b0) | ~ahb_hsel) ? IDLE : ahb_hwrite ? WR : RD; + buf_state_en = (~cmdbuf_full | ahb_hresp); + cmdbuf_wr_en = ~cmdbuf_full & ~(ahb_hresp | ((ahb_htrans[1:0] == 2'b01) & ahb_hsel)); // Dont send command to the buffer in case of an error or when the master is not ready with the data now. + end + RD: begin // Read command recieved last cycle. + buf_nxtstate = ahb_hresp ? IDLE : PEND; // If error go to idle, else wait for read data + buf_state_en = (~cmdbuf_full | ahb_hresp); // only when command can go, or if its an error + cmdbuf_wr_en = ~ahb_hresp & ~cmdbuf_full; // send command only when no error + end + PEND: begin // Read Command has been sent. Waiting on Data. + buf_nxtstate = IDLE; // go back for next command and present data next cycle + buf_state_en = axi_rvalid & ~cmdbuf_write; // read data is back + buf_rdata_en = buf_state_en; // buffer the read data coming back from core + buf_read_error_in = buf_state_en & |axi_rresp[1:0]; // buffer error flag if return has Error ( ECC ) + end + endcase + end // always_comb begin - rvdffs_fpga #($bits(state_t)) state_reg (.*, .din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk)); + rvdffs_fpga #($bits( + state_t + )) state_reg ( + .*, + .din(buf_nxtstate), + .dout({buf_state}), + .en(buf_state_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk) + ); - assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) | + assign master_wstrb[7:0] = ({8{ahb_hsize_q[2:0] == 3'b0}} & (8'b1 << ahb_haddr_q[2:0])) | ({8{ahb_hsize_q[2:0] == 3'b1}} & (8'b11 << ahb_haddr_q[2:0])) | ({8{ahb_hsize_q[2:0] == 3'b10}} & (8'b1111 << ahb_haddr_q[2:0])) | ({8{ahb_hsize_q[2:0] == 3'b11}} & 8'b1111_1111); - // AHB signals - assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) : + // AHB signals + assign ahb_hreadyout = ahb_hresp ? (ahb_hresp_q & ~ahb_hready_q) : ((~cmdbuf_full | (buf_state == IDLE)) & ~(buf_state == RD | buf_state == PEND) & ~buf_read_error); - assign ahb_hready = ahb_hreadyout & ahb_hreadyin; - assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0]; - assign ahb_hrdata[63:0] = buf_rdata[63:0]; - assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) & + assign ahb_hready = ahb_hreadyout & ahb_hreadyin; + assign ahb_htrans_in[1:0] = {2{ahb_hsel}} & ahb_htrans[1:0]; + assign ahb_hrdata[63:0] = buf_rdata[63:0]; + assign ahb_hresp = ((ahb_htrans_q[1:0] != 2'b0) & (buf_state != IDLE) & ((~(ahb_addr_in_dccm | ahb_addr_in_iccm)) | // request not for ICCM or DCCM - ((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size - ((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned - ((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned - ((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned - buf_read_error | // Read ECC error - (ahb_hresp_q & ~ahb_hready_q); + ((ahb_addr_in_iccm | (ahb_addr_in_dccm & ahb_hwrite_q)) & ~((ahb_hsize_q[1:0] == 2'b10) | (ahb_hsize_q[1:0] == 2'b11))) | // ICCM Rd/Wr OR DCCM Wr not the right size + ((ahb_hsize_q[2:0] == 3'h1) & ahb_haddr_q[0]) | // HW size but unaligned + ((ahb_hsize_q[2:0] == 3'h2) & (|ahb_haddr_q[1:0])) | // W size but unaligned + ((ahb_hsize_q[2:0] == 3'h3) & (|ahb_haddr_q[2:0])))) | // DW size but unaligned + buf_read_error | // Read ECC error + (ahb_hresp_q & ~ahb_hready_q); - // Buffer signals - needed for the read data and ECC error response - rvdff_fpga #(.WIDTH(64)) buf_rdata_ff (.din(axi_rdata[63:0]), .dout(buf_rdata[63:0]), .clk(buf_rdata_clk), .clken(buf_rdata_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) buf_read_error_ff(.din(buf_read_error_in), .dout(buf_read_error), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); // buf_read_error will be high only one cycle + // Buffer signals - needed for the read data and ECC error response + rvdff_fpga #( + .WIDTH(64) + ) buf_rdata_ff ( + .din(axi_rdata[63:0]), + .dout(buf_rdata[63:0]), + .clk(buf_rdata_clk), + .clken(buf_rdata_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) buf_read_error_ff ( + .din(buf_read_error_in), + .dout(buf_read_error), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); // buf_read_error will be high only one cycle - // All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer. - rvdff_fpga #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(2)) htrans_ff (.din(ahb_htrans_in[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(3)) hsize_ff (.din(ahb_hsize[2:0]), .dout(ahb_hsize_q[2:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(32)) haddr_ff (.din(ahb_haddr[31:0]), .dout(ahb_haddr_q[31:0]), .clk(ahb_addr_clk), .clken(ahb_addr_clk_en), .rawclk(clk), .*); + // All the Master signals are captured before presenting it to the command buffer. We check for Hresp before sending it to the cmd buffer. + rvdff_fpga #( + .WIDTH(1) + ) hresp_ff ( + .din(ahb_hresp), + .dout(ahb_hresp_q), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) hready_ff ( + .din(ahb_hready), + .dout(ahb_hready_q), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(2) + ) htrans_ff ( + .din(ahb_htrans_in[1:0]), + .dout(ahb_htrans_q[1:0]), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(3) + ) hsize_ff ( + .din(ahb_hsize[2:0]), + .dout(ahb_hsize_q[2:0]), + .clk(ahb_addr_clk), + .clken(ahb_addr_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) hwrite_ff ( + .din(ahb_hwrite), + .dout(ahb_hwrite_q), + .clk(ahb_addr_clk), + .clken(ahb_addr_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(32) + ) haddr_ff ( + .din(ahb_haddr[31:0]), + .dout(ahb_haddr_q[31:0]), + .clk(ahb_addr_clk), + .clken(ahb_addr_clk_en), + .rawclk(clk), + .* + ); - // Address check dccm - rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), - .CCM_SIZE(pt.DCCM_SIZE)) addr_dccm_rangecheck ( + // Address check dccm + rvrangecheck #( + .CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE) + ) addr_dccm_rangecheck ( .addr(ahb_haddr_q[31:0]), .in_range(ahb_addr_in_dccm), .in_region(ahb_addr_in_dccm_region_nc) - ); + ); - // Address check iccm - if (pt.ICCM_ENABLE == 1) begin: GenICCM - rvrangecheck #(.CCM_SADR(pt.ICCM_SADR), - .CCM_SIZE(pt.ICCM_SIZE)) addr_iccm_rangecheck ( - .addr(ahb_haddr_q[31:0]), - .in_range(ahb_addr_in_iccm), - .in_region(ahb_addr_in_iccm_region_nc) - ); - end else begin: GenNoICCM - assign ahb_addr_in_iccm = '0; - assign ahb_addr_in_iccm_region_nc = '0; - end + // Address check iccm + if (pt.ICCM_ENABLE == 1) begin : GenICCM + rvrangecheck #( + .CCM_SADR(pt.ICCM_SADR), + .CCM_SIZE(pt.ICCM_SIZE) + ) addr_iccm_rangecheck ( + .addr(ahb_haddr_q[31:0]), + .in_range(ahb_addr_in_iccm), + .in_region(ahb_addr_in_iccm_region_nc) + ); + end else begin : GenNoICCM + assign ahb_addr_in_iccm = '0; + assign ahb_addr_in_iccm_region_nc = '0; + end - // PIC memory address check - rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), - .CCM_SIZE(pt.PIC_SIZE)) addr_pic_rangecheck ( + // PIC memory address check + rvrangecheck #( + .CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE) + ) addr_pic_rangecheck ( .addr(ahb_haddr_q[31:0]), .in_range(ahb_addr_in_pic), .in_region(ahb_addr_in_pic_region_nc) - ); + ); - // Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals. - assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write); - assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready))); + // Command Buffer - Holding for the commands to be sent for the AXI. It will be converted to the AXI signals. + assign cmdbuf_rst = (((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready)) & ~cmdbuf_wr_en) | (ahb_hresp & ~cmdbuf_write); + assign cmdbuf_full = (cmdbuf_vld & ~((axi_awvalid & axi_awready) | (axi_arvalid & axi_arready))); - rvdffsc_fpga #(.WIDTH(1)) cmdbuf_vldff (.din(1'b1), .dout(cmdbuf_vld), .en(cmdbuf_wr_en), .clear(cmdbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) cmdbuf_writeff (.din(ahb_hwrite_q), .dout(cmdbuf_write), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(2)) cmdbuf_sizeff (.din(ahb_hsize_q[1:0]), .dout(cmdbuf_size[1:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(8)) cmdbuf_wstrbff (.din(master_wstrb[7:0]), .dout(cmdbuf_wstrb[7:0]), .en(cmdbuf_wr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) cmdbuf_addrff (.din(ahb_haddr_q[31:0]), .dout(cmdbuf_addr[31:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*); - rvdffe #(.WIDTH(64)) cmdbuf_wdataff (.din(ahb_hwdata[63:0]), .dout(cmdbuf_wdata[63:0]), .en(cmdbuf_wr_en & bus_clk_en), .clk(clk), .*); + rvdffsc_fpga #( + .WIDTH(1) + ) cmdbuf_vldff ( + .din(1'b1), + .dout(cmdbuf_vld), + .en(cmdbuf_wr_en), + .clear(cmdbuf_rst), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) cmdbuf_writeff ( + .din(ahb_hwrite_q), + .dout(cmdbuf_write), + .en(cmdbuf_wr_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(2) + ) cmdbuf_sizeff ( + .din(ahb_hsize_q[1:0]), + .dout(cmdbuf_size[1:0]), + .en(cmdbuf_wr_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(8) + ) cmdbuf_wstrbff ( + .din(master_wstrb[7:0]), + .dout(cmdbuf_wstrb[7:0]), + .en(cmdbuf_wr_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) cmdbuf_addrff ( + .din (ahb_haddr_q[31:0]), + .dout(cmdbuf_addr[31:0]), + .en (cmdbuf_wr_en & bus_clk_en), + .clk (clk), + .* + ); + rvdffe #( + .WIDTH(64) + ) cmdbuf_wdataff ( + .din (ahb_hwdata[63:0]), + .dout(cmdbuf_wdata[63:0]), + .en (cmdbuf_wr_en & bus_clk_en), + .clk (clk), + .* + ); - // AXI Write Command Channel - assign axi_awvalid = cmdbuf_vld & cmdbuf_write; - assign axi_awid[TAG-1:0] = '0; - assign axi_awaddr[31:0] = cmdbuf_addr[31:0]; - assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]}; - assign axi_awprot[2:0] = 3'b0; - assign axi_awlen[7:0] = '0; - assign axi_awburst[1:0] = 2'b01; - // AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data. - assign axi_wvalid = cmdbuf_vld & cmdbuf_write; - assign axi_wdata[63:0] = cmdbuf_wdata[63:0]; - assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0]; - assign axi_wlast = 1'b1; + // AXI Write Command Channel + assign axi_awvalid = cmdbuf_vld & cmdbuf_write; + assign axi_awid[TAG-1:0] = '0; + assign axi_awaddr[31:0] = cmdbuf_addr[31:0]; + assign axi_awsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_awprot[2:0] = 3'b0; + assign axi_awlen[7:0] = '0; + assign axi_awburst[1:0] = 2'b01; + // AXI Write Data Channel - This is tied to the command channel as we only write the command buffer once we have the data. + assign axi_wvalid = cmdbuf_vld & cmdbuf_write; + assign axi_wdata[63:0] = cmdbuf_wdata[63:0]; + assign axi_wstrb[7:0] = cmdbuf_wstrb[7:0]; + assign axi_wlast = 1'b1; // AXI Write Response - Always ready. AHB does not require a write response. - assign axi_bready = 1'b1; - // AXI Read Channels - assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write; - assign axi_arid[TAG-1:0] = '0; - assign axi_araddr[31:0] = cmdbuf_addr[31:0]; - assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]}; - assign axi_arprot = 3'b0; - assign axi_arlen[7:0] = '0; - assign axi_arburst[1:0] = 2'b01; - // AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always. - assign axi_rready = 1'b1; + assign axi_bready = 1'b1; + // AXI Read Channels + assign axi_arvalid = cmdbuf_vld & ~cmdbuf_write; + assign axi_arid[TAG-1:0] = '0; + assign axi_araddr[31:0] = cmdbuf_addr[31:0]; + assign axi_arsize[2:0] = {1'b0, cmdbuf_size[1:0]}; + assign axi_arprot = 3'b0; + assign axi_arlen[7:0] = '0; + assign axi_arburst[1:0] = 2'b01; + // AXI Read Response Channel - Always ready as AHB reads are blocking and the the buffer is available for the read coming back always. + assign axi_rready = 1'b1; - // Clock header logic - assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]); - assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en; + // Clock header logic + assign ahb_addr_clk_en = bus_clk_en & (ahb_hready & ahb_htrans[1]); + assign buf_rdata_clk_en = bus_clk_en & buf_rdata_en; - rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); - rvclkhdr ahb_addr_cgc (.en(ahb_addr_clk_en), .l1clk(ahb_addr_clk), .*); - rvclkhdr buf_rdata_cgc (.en(buf_rdata_clk_en), .l1clk(buf_rdata_clk), .*); + rvclkhdr bus_cgc ( + .en(bus_clk_en), + .l1clk(bus_clk), + .* + ); + rvclkhdr ahb_addr_cgc ( + .en(ahb_addr_clk_en), + .l1clk(ahb_addr_clk), + .* + ); + rvclkhdr buf_rdata_cgc ( + .en(buf_rdata_clk_en), + .l1clk(buf_rdata_clk), + .* + ); -endmodule // ahb_to_axi4 \ No newline at end of file +endmodule // ahb_to_axi4 diff --git a/Flow/design/lib/axi4_to_ahb.sv b/Flow/design/lib/axi4_to_ahb.sv index bfb784d..f93f0ec 100644 --- a/Flow/design/lib/axi4_to_ahb.sv +++ b/Flow/design/lib/axi4_to_ahb.sv @@ -22,433 +22,722 @@ // //******************************************************************************** module axi4_to_ahb -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" -,parameter TAG = 1) ( + `include "el2_param.vh", + parameter TAG = 1 +) ( - input clk, - input free_clk, - input rst_l, - input scan_mode, - input bus_clk_en, - input clk_override, - input dec_tlu_force_halt, + input clk, + input free_clk, + input rst_l, + input scan_mode, + input bus_clk_en, + input clk_override, + input dec_tlu_force_halt, - // AXI signals - // AXI Write Channels - input logic axi_awvalid, - output logic axi_awready, - input logic [TAG-1:0] axi_awid, - input logic [31:0] axi_awaddr, - input logic [2:0] axi_awsize, - input logic [2:0] axi_awprot, + // AXI signals + // AXI Write Channels + input logic axi_awvalid, + output logic axi_awready, + input logic [TAG-1:0] axi_awid, + input logic [ 31:0] axi_awaddr, + input logic [ 2:0] axi_awsize, + input logic [ 2:0] axi_awprot, - input logic axi_wvalid, - output logic axi_wready, - input logic [63:0] axi_wdata, - input logic [7:0] axi_wstrb, - input logic axi_wlast, + input logic axi_wvalid, + output logic axi_wready, + input logic [63:0] axi_wdata, + input logic [ 7:0] axi_wstrb, + input logic axi_wlast, - output logic axi_bvalid, - input logic axi_bready, - output logic [1:0] axi_bresp, - output logic [TAG-1:0] axi_bid, + output logic axi_bvalid, + input logic axi_bready, + output logic [ 1:0] axi_bresp, + output logic [TAG-1:0] axi_bid, - // AXI Read Channels - input logic axi_arvalid, - output logic axi_arready, - input logic [TAG-1:0] axi_arid, - input logic [31:0] axi_araddr, - input logic [2:0] axi_arsize, - input logic [2:0] axi_arprot, + // AXI Read Channels + input logic axi_arvalid, + output logic axi_arready, + input logic [TAG-1:0] axi_arid, + input logic [ 31:0] axi_araddr, + input logic [ 2:0] axi_arsize, + input logic [ 2:0] axi_arprot, - output logic axi_rvalid, - input logic axi_rready, - output logic [TAG-1:0] axi_rid, - output logic [63:0] axi_rdata, - output logic [1:0] axi_rresp, - output logic axi_rlast, + output logic axi_rvalid, + input logic axi_rready, + output logic [TAG-1:0] axi_rid, + output logic [ 63:0] axi_rdata, + output logic [ 1:0] axi_rresp, + output logic axi_rlast, - // AHB-Lite signals - output logic [31:0] ahb_haddr, // ahb bus address - output logic [2:0] ahb_hburst, // tied to 0 - output logic ahb_hmastlock, // tied to 0 - output logic [3:0] ahb_hprot, // tied to 4'b0011 - output logic [2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) - output logic [1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) - output logic ahb_hwrite, // ahb bus write - output logic [63:0] ahb_hwdata, // ahb bus write data + // AHB-Lite signals + output logic [31:0] ahb_haddr, // ahb bus address + output logic [ 2:0] ahb_hburst, // tied to 0 + output logic ahb_hmastlock, // tied to 0 + output logic [ 3:0] ahb_hprot, // tied to 4'b0011 + output logic [ 2:0] ahb_hsize, // size of bus transaction (possible values 0,1,2,3) + output logic [ 1:0] ahb_htrans, // Transaction type (possible values 0,2 only right now) + output logic ahb_hwrite, // ahb bus write + output logic [63:0] ahb_hwdata, // ahb bus write data - input logic [63:0] ahb_hrdata, // ahb bus read data - input logic ahb_hready, // slave ready to accept transaction - input logic ahb_hresp // slave response (high indicates erro) + input logic [63:0] ahb_hrdata, // ahb bus read data + input logic ahb_hready, // slave ready to accept transaction + input logic ahb_hresp // slave response (high indicates erro) ); - localparam ID = 1; - localparam PRTY = 1; - typedef enum logic [2:0] {IDLE=3'b000, CMD_RD=3'b001, CMD_WR=3'b010, DATA_RD=3'b011, DATA_WR=3'b100, DONE=3'b101, STREAM_RD=3'b110, STREAM_ERR_RD=3'b111} state_t; - state_t buf_state, buf_nxtstate; + localparam ID = 1; + localparam PRTY = 1; + typedef enum logic [2:0] { + IDLE = 3'b000, + CMD_RD = 3'b001, + CMD_WR = 3'b010, + DATA_RD = 3'b011, + DATA_WR = 3'b100, + DONE = 3'b101, + STREAM_RD = 3'b110, + STREAM_ERR_RD = 3'b111 + } state_t; + state_t buf_state, buf_nxtstate; - logic slave_valid; - logic slave_ready; - logic [TAG-1:0] slave_tag; - logic [63:0] slave_rdata; - logic [3:0] slave_opc; + logic slave_valid; + logic slave_ready; + logic [TAG-1:0] slave_tag; + logic [ 63:0] slave_rdata; + logic [ 3:0] slave_opc; - logic wrbuf_en, wrbuf_data_en; - logic wrbuf_cmd_sent, wrbuf_rst; - logic wrbuf_vld; - logic wrbuf_data_vld; - logic [TAG-1:0] wrbuf_tag; - logic [2:0] wrbuf_size; - logic [31:0] wrbuf_addr; - logic [63:0] wrbuf_data; - logic [7:0] wrbuf_byteen; + logic wrbuf_en, wrbuf_data_en; + logic wrbuf_cmd_sent, wrbuf_rst; + logic wrbuf_vld; + logic wrbuf_data_vld; + logic [TAG-1:0] wrbuf_tag; + logic [ 2:0] wrbuf_size; + logic [ 31:0] wrbuf_addr; + logic [ 63:0] wrbuf_data; + logic [ 7:0] wrbuf_byteen; - logic master_valid; - logic master_ready; - logic [TAG-1:0] master_tag; - logic [31:0] master_addr; - logic [63:0] master_wdata; - logic [2:0] master_size; - logic [2:0] master_opc; - logic [7:0] master_byteen; + logic master_valid; + logic master_ready; + logic [TAG-1:0] master_tag; + logic [ 31:0] master_addr; + logic [ 63:0] master_wdata; + logic [ 2:0] master_size; + logic [ 2:0] master_opc; + logic [ 7:0] master_byteen; - // Buffer signals (one entry buffer) - logic [31:0] buf_addr; - logic [1:0] buf_size; - logic buf_write; - logic [7:0] buf_byteen; - logic buf_aligned; - logic [63:0] buf_data; - logic [TAG-1:0] buf_tag; + // Buffer signals (one entry buffer) + logic [ 31:0] buf_addr; + logic [ 1:0] buf_size; + logic buf_write; + logic [ 7:0] buf_byteen; + logic buf_aligned; + logic [ 63:0] buf_data; + logic [TAG-1:0] buf_tag; - //Miscellaneous signals - logic buf_rst; - logic [TAG-1:0] buf_tag_in; - logic [31:0] buf_addr_in; - logic [7:0] buf_byteen_in; - logic [63:0] buf_data_in; - logic buf_write_in; - logic buf_aligned_in; - logic [2:0] buf_size_in; + //Miscellaneous signals + logic buf_rst; + logic [TAG-1:0] buf_tag_in; + logic [ 31:0] buf_addr_in; + logic [ 7:0] buf_byteen_in; + logic [ 63:0] buf_data_in; + logic buf_write_in; + logic buf_aligned_in; + logic [ 2:0] buf_size_in; - logic buf_state_en; - logic buf_wr_en; - logic buf_data_wr_en; - logic slvbuf_error_en; - logic wr_cmd_vld; + logic buf_state_en; + logic buf_wr_en; + logic buf_data_wr_en; + logic slvbuf_error_en; + logic wr_cmd_vld; - logic cmd_done_rst, cmd_done, cmd_doneQ; - logic trxn_done; - logic [2:0] buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr; - logic buf_cmd_byte_ptr_en; - logic found; + logic cmd_done_rst, cmd_done, cmd_doneQ; + logic trxn_done; + logic [2:0] buf_cmd_byte_ptr, buf_cmd_byte_ptrQ, buf_cmd_nxtbyte_ptr; + logic buf_cmd_byte_ptr_en; + logic found; - logic slave_valid_pre; - logic ahb_hready_q; - logic ahb_hresp_q; - logic [1:0] ahb_htrans_q; - logic ahb_hwrite_q; - logic [63:0] ahb_hrdata_q; + logic slave_valid_pre; + logic ahb_hready_q; + logic ahb_hresp_q; + logic [ 1:0] ahb_htrans_q; + logic ahb_hwrite_q; + logic [ 63:0] ahb_hrdata_q; - logic slvbuf_write; - logic slvbuf_error; - logic [TAG-1:0] slvbuf_tag; + logic slvbuf_write; + logic slvbuf_error; + logic [TAG-1:0] slvbuf_tag; - logic slvbuf_error_in; - logic slvbuf_wr_en; - logic bypass_en; - logic rd_bypass_idle; + logic slvbuf_error_in; + logic slvbuf_wr_en; + logic bypass_en; + logic rd_bypass_idle; - logic last_addr_en; - logic [31:0] last_bus_addr; + logic last_addr_en; + logic [ 31:0] last_bus_addr; - // Clocks - logic buf_clken; - logic ahbm_data_clken; + // Clocks + logic buf_clken; + logic ahbm_data_clken; - logic buf_clk; - logic bus_clk; - logic ahbm_data_clk; + logic buf_clk; + logic bus_clk; + logic ahbm_data_clk; - logic dec_tlu_force_halt_bus, dec_tlu_force_halt_bus_ns, dec_tlu_force_halt_bus_q; + logic dec_tlu_force_halt_bus, dec_tlu_force_halt_bus_ns, dec_tlu_force_halt_bus_q; - // Function to get the length from byte enable - function automatic logic [1:0] get_write_size; - input logic [7:0] byteen; + // Function to get the length from byte enable + function automatic logic [1:0] get_write_size; + input logic [7:0] byteen; - logic [1:0] size; + logic [1:0] size; - size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) | + size[1:0] = (2'b11 & {2{(byteen[7:0] == 8'hff)}}) | (2'b10 & {2{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h0f))}}) | (2'b01 & {2{((byteen[7:0] == 8'hc0) | (byteen[7:0] == 8'h30) | (byteen[7:0] == 8'h0c) | (byteen[7:0] == 8'h03))}}); - return size[1:0]; - endfunction // get_write_size + return size[1:0]; + endfunction // get_write_size - // Function to get the length from byte enable - function automatic logic [2:0] get_write_addr; - input logic [7:0] byteen; + // Function to get the length from byte enable + function automatic logic [2:0] get_write_addr; + input logic [7:0] byteen; - logic [2:0] addr; + logic [2:0] addr; - addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) | + addr[2:0] = (3'h0 & {3{((byteen[7:0] == 8'hff) | (byteen[7:0] == 8'h0f) | (byteen[7:0] == 8'h03))}}) | (3'h2 & {3{(byteen[7:0] == 8'h0c)}}) | (3'h4 & {3{((byteen[7:0] == 8'hf0) | (byteen[7:0] == 8'h03))}}) | (3'h6 & {3{(byteen[7:0] == 8'hc0)}}); - return addr[2:0]; - endfunction // get_write_addr + return addr[2:0]; + endfunction // get_write_addr - // Function to get the next byte pointer - function automatic logic [2:0] get_nxtbyte_ptr (logic [2:0] current_byte_ptr, logic [7:0] byteen, logic get_next); - logic [2:0] start_ptr; - logic found; - found = '0; - //get_nxtbyte_ptr[2:0] = current_byte_ptr[2:0]; - start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0]; - for (int j=0; j<8; j++) begin - if (~found) begin - get_nxtbyte_ptr[2:0] = 3'(j); - found |= (byteen[j] & (3'(j) >= start_ptr[2:0])) ; - end + // Function to get the next byte pointer + function automatic logic [2:0] get_nxtbyte_ptr(logic [2:0] current_byte_ptr, logic [7:0] byteen, + logic get_next); + logic [2:0] start_ptr; + logic found; + found = '0; + //get_nxtbyte_ptr[2:0] = current_byte_ptr[2:0]; + start_ptr[2:0] = get_next ? (current_byte_ptr[2:0] + 3'b1) : current_byte_ptr[2:0]; + for (int j = 0; j < 8; j++) begin + if (~found) begin + get_nxtbyte_ptr[2:0] = 3'(j); + found |= (byteen[j] & (3'(j) >= start_ptr[2:0])); end - endfunction // get_nextbyte_ptr + end + endfunction // get_nextbyte_ptr - // Create bus synchronized version of force halt - assign dec_tlu_force_halt_bus = dec_tlu_force_halt | dec_tlu_force_halt_bus_q; - assign dec_tlu_force_halt_bus_ns = ~bus_clk_en & dec_tlu_force_halt_bus; - rvdff #(.WIDTH(1)) force_halt_busff(.din(dec_tlu_force_halt_bus_ns), .dout(dec_tlu_force_halt_bus_q), .clk(free_clk), .*); + // Create bus synchronized version of force halt + assign dec_tlu_force_halt_bus = dec_tlu_force_halt | dec_tlu_force_halt_bus_q; + assign dec_tlu_force_halt_bus_ns = ~bus_clk_en & dec_tlu_force_halt_bus; + rvdff #( + .WIDTH(1) + ) force_halt_busff ( + .din (dec_tlu_force_halt_bus_ns), + .dout(dec_tlu_force_halt_bus_q), + .clk (free_clk), + .* + ); - // Write buffer - assign wrbuf_en = axi_awvalid & axi_awready & master_ready; - assign wrbuf_data_en = axi_wvalid & axi_wready & master_ready; - assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01); - assign wrbuf_rst = (wrbuf_cmd_sent & ~wrbuf_en) | dec_tlu_force_halt_bus; + // Write buffer + assign wrbuf_en = axi_awvalid & axi_awready & master_ready; + assign wrbuf_data_en = axi_wvalid & axi_wready & master_ready; + assign wrbuf_cmd_sent = master_valid & master_ready & (master_opc[2:1] == 2'b01); + assign wrbuf_rst = (wrbuf_cmd_sent & ~wrbuf_en) | dec_tlu_force_halt_bus; - assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready; - assign axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready; - assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready; - assign axi_rlast = 1'b1; + assign axi_awready = ~(wrbuf_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_wready = ~(wrbuf_data_vld & ~wrbuf_cmd_sent) & master_ready; + assign axi_arready = ~(wrbuf_vld & wrbuf_data_vld) & master_ready; + assign axi_rlast = 1'b1; - assign wr_cmd_vld = (wrbuf_vld & wrbuf_data_vld); - assign master_valid = wr_cmd_vld | axi_arvalid; - assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0]; - assign master_opc[2:0] = wr_cmd_vld ? 3'b011 : 3'b0; - assign master_addr[31:0] = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0]; - assign master_size[2:0] = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0]; - assign master_byteen[7:0] = wrbuf_byteen[7:0]; - assign master_wdata[63:0] = wrbuf_data[63:0]; + assign wr_cmd_vld = (wrbuf_vld & wrbuf_data_vld); + assign master_valid = wr_cmd_vld | axi_arvalid; + assign master_tag[TAG-1:0] = wr_cmd_vld ? wrbuf_tag[TAG-1:0] : axi_arid[TAG-1:0]; + assign master_opc[2:0] = wr_cmd_vld ? 3'b011 : 3'b0; + assign master_addr[31:0] = wr_cmd_vld ? wrbuf_addr[31:0] : axi_araddr[31:0]; + assign master_size[2:0] = wr_cmd_vld ? wrbuf_size[2:0] : axi_arsize[2:0]; + assign master_byteen[7:0] = wrbuf_byteen[7:0]; + assign master_wdata[63:0] = wrbuf_data[63:0]; - // AXI response channel signals - assign axi_bvalid = slave_valid & slave_ready & slave_opc[3]; - assign axi_bresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); - assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0]; + // AXI response channel signals + assign axi_bvalid = slave_valid & slave_ready & slave_opc[3]; + assign axi_bresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_bid[TAG-1:0] = slave_tag[TAG-1:0]; - assign axi_rvalid = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0); - assign axi_rresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); - assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0]; - assign axi_rdata[63:0] = slave_rdata[63:0]; - assign slave_ready = axi_bready & axi_rready; + assign axi_rvalid = slave_valid & slave_ready & (slave_opc[3:2] == 2'b0); + assign axi_rresp[1:0] = slave_opc[0] ? 2'b10 : (slave_opc[1] ? 2'b11 : 2'b0); + assign axi_rid[TAG-1:0] = slave_tag[TAG-1:0]; + assign axi_rdata[63:0] = slave_rdata[63:0]; + assign slave_ready = axi_bready & axi_rready; - // FIFO state machine - always_comb begin - buf_nxtstate = IDLE; - buf_state_en = 1'b0; - buf_wr_en = 1'b0; - buf_data_wr_en = 1'b0; - slvbuf_error_in = 1'b0; - slvbuf_error_en = 1'b0; - buf_write_in = 1'b0; - cmd_done = 1'b0; - trxn_done = 1'b0; - buf_cmd_byte_ptr_en = 1'b0; - buf_cmd_byte_ptr[2:0] = '0; - slave_valid_pre = 1'b0; - master_ready = 1'b0; - ahb_htrans[1:0] = 2'b0; - slvbuf_wr_en = 1'b0; - bypass_en = 1'b0; - rd_bypass_idle = 1'b0; + // FIFO state machine + always_comb begin + buf_nxtstate = IDLE; + buf_state_en = 1'b0; + buf_wr_en = 1'b0; + buf_data_wr_en = 1'b0; + slvbuf_error_in = 1'b0; + slvbuf_error_en = 1'b0; + buf_write_in = 1'b0; + cmd_done = 1'b0; + trxn_done = 1'b0; + buf_cmd_byte_ptr_en = 1'b0; + buf_cmd_byte_ptr[2:0] = '0; + slave_valid_pre = 1'b0; + master_ready = 1'b0; + ahb_htrans[1:0] = 2'b0; + slvbuf_wr_en = 1'b0; + bypass_en = 1'b0; + rd_bypass_idle = 1'b0; - case (buf_state) - IDLE: begin - master_ready = 1'b1; - buf_write_in = (master_opc[2:1] == 2'b01); - buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD; - buf_state_en = master_valid & master_ready; - buf_wr_en = buf_state_en; - buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR); - buf_cmd_byte_ptr_en = buf_state_en; - buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : master_addr[2:0]; - bypass_en = buf_state_en; - rd_bypass_idle = bypass_en & (buf_nxtstate == CMD_RD); - ahb_htrans[1:0] = {2{bypass_en}} & 2'b10; - end - CMD_RD: begin - buf_nxtstate = (master_valid & (master_opc[2:0] == 3'b000))? STREAM_RD : DATA_RD; - buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; - cmd_done = buf_state_en & ~master_valid; - slvbuf_wr_en = buf_state_en; - master_ready = buf_state_en & (buf_nxtstate == STREAM_RD); - buf_wr_en = master_ready; - bypass_en = master_ready & master_valid; - buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; - ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}}; - end - STREAM_RD: begin - master_ready = (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01); - buf_wr_en = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands - buf_nxtstate = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD); // assuming that the master accpets the slave response right away. - buf_state_en = (ahb_hready_q | ahb_hresp_q); - buf_data_wr_en = buf_state_en; - slvbuf_error_in = ahb_hresp_q; - slvbuf_error_en = buf_state_en; - slave_valid_pre = buf_state_en & ~ahb_hresp_q; // send a response right away if we are not going through an error response. - cmd_done = buf_state_en & ~master_valid; // last one of the stream should not send a htrans - bypass_en = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en; - buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; - ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}}; - slvbuf_wr_en = buf_wr_en; // shifting the contents from the buf to slv_buf for streaming cases - end // case: STREAM_RD - STREAM_ERR_RD: begin - buf_nxtstate = DATA_RD; - buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; - slave_valid_pre = buf_state_en; - slvbuf_wr_en = buf_state_en; // Overwrite slvbuf with buffer - buf_cmd_byte_ptr[2:0] = buf_addr[2:0]; - ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}}; - end - DATA_RD: begin - buf_nxtstate = DONE; - buf_state_en = (ahb_hready_q | ahb_hresp_q); - buf_data_wr_en = buf_state_en; - slvbuf_error_in= ahb_hresp_q; - slvbuf_error_en= buf_state_en; - slvbuf_wr_en = buf_state_en; + case (buf_state) + IDLE: begin + master_ready = 1'b1; + buf_write_in = (master_opc[2:1] == 2'b01); + buf_nxtstate = buf_write_in ? CMD_WR : CMD_RD; + buf_state_en = master_valid & master_ready; + buf_wr_en = buf_state_en; + buf_data_wr_en = buf_state_en & (buf_nxtstate == CMD_WR); + buf_cmd_byte_ptr_en = buf_state_en; + buf_cmd_byte_ptr[2:0] = buf_write_in ? get_nxtbyte_ptr(3'b0, buf_byteen_in[7:0], 1'b0) : + master_addr[2:0]; + bypass_en = buf_state_en; + rd_bypass_idle = bypass_en & (buf_nxtstate == CMD_RD); + ahb_htrans[1:0] = {2{bypass_en}} & 2'b10; + end + CMD_RD: begin + buf_nxtstate = (master_valid & (master_opc[2:0] == 3'b000)) ? STREAM_RD : DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + cmd_done = buf_state_en & ~master_valid; + slvbuf_wr_en = buf_state_en; + master_ready = buf_state_en & (buf_nxtstate == STREAM_RD); + buf_wr_en = master_ready; + bypass_en = master_ready & master_valid; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en | bypass_en}}; + end + STREAM_RD: begin + master_ready = (ahb_hready_q & ~ahb_hresp_q) & ~(master_valid & master_opc[2:1] == 2'b01); + buf_wr_en = (master_valid & master_ready & (master_opc[2:0] == 3'b000)); // update the fifo if we are streaming the read commands + buf_nxtstate = ahb_hresp_q ? STREAM_ERR_RD : (buf_wr_en ? STREAM_RD : DATA_RD); // assuming that the master accpets the slave response right away. + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + slave_valid_pre = buf_state_en & ~ahb_hresp_q; // send a response right away if we are not going through an error response. + cmd_done = buf_state_en & ~master_valid; // last one of the stream should not send a htrans + bypass_en = master_ready & master_valid & (buf_nxtstate == STREAM_RD) & buf_state_en; + buf_cmd_byte_ptr[2:0] = bypass_en ? master_addr[2:0] : buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~((buf_nxtstate != STREAM_RD) & buf_state_en)}}; + slvbuf_wr_en = buf_wr_en; // shifting the contents from the buf to slv_buf for streaming cases + end // case: STREAM_RD + STREAM_ERR_RD: begin + buf_nxtstate = DATA_RD; + buf_state_en = ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & ~ahb_hwrite_q; + slave_valid_pre = buf_state_en; + slvbuf_wr_en = buf_state_en; // Overwrite slvbuf with buffer + buf_cmd_byte_ptr[2:0] = buf_addr[2:0]; + ahb_htrans[1:0] = 2'b10 & {2{~buf_state_en}}; + end + DATA_RD: begin + buf_nxtstate = DONE; + buf_state_en = (ahb_hready_q | ahb_hresp_q); + buf_data_wr_en = buf_state_en; + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; + slvbuf_wr_en = buf_state_en; - end - CMD_WR: begin - buf_nxtstate = DATA_WR; - trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); - buf_state_en = trxn_done; - buf_cmd_byte_ptr_en = buf_state_en; - slvbuf_wr_en = buf_state_en; - buf_cmd_byte_ptr = trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; - cmd_done = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) | - (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)); - ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10; - end - DATA_WR: begin - buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q; - master_ready = buf_state_en & ~ahb_hresp_q & slave_ready; // Ready to accept new command if current command done and no error - buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE : + end + CMD_WR: begin + buf_nxtstate = DATA_WR; + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_state_en = trxn_done; + buf_cmd_byte_ptr_en = buf_state_en; + slvbuf_wr_en = buf_state_en; + buf_cmd_byte_ptr = trxn_done ? + get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0], buf_byteen[7:0], 1'b1) : buf_cmd_byte_ptrQ; + cmd_done = trxn_done & (buf_aligned | (buf_cmd_byte_ptrQ == 3'b111) | + (buf_byteen[get_nxtbyte_ptr( + buf_cmd_byte_ptrQ[2:0], buf_byteen[7:0], 1'b1)] == 1'b0)); + ahb_htrans[1:0] = {2{~(cmd_done | cmd_doneQ)}} & 2'b10; + end + DATA_WR: begin + buf_state_en = (cmd_doneQ & ahb_hready_q) | ahb_hresp_q; + master_ready = buf_state_en & ~ahb_hresp_q & slave_ready; // Ready to accept new command if current command done and no error + buf_nxtstate = (ahb_hresp_q | ~slave_ready) ? DONE : ((master_valid & master_ready) ? ((master_opc[2:1] == 2'b01) ? CMD_WR : CMD_RD) : IDLE); - slvbuf_error_in = ahb_hresp_q; - slvbuf_error_en = buf_state_en; + slvbuf_error_in = ahb_hresp_q; + slvbuf_error_en = buf_state_en; - buf_write_in = (master_opc[2:1] == 2'b01); - buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD)); - buf_data_wr_en = buf_wr_en; + buf_write_in = (master_opc[2:1] == 2'b01); + buf_wr_en = buf_state_en & ((buf_nxtstate == CMD_WR) | (buf_nxtstate == CMD_RD)); + buf_data_wr_en = buf_wr_en; - cmd_done = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & - ((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1)] == 1'b0)))); - bypass_en = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR); // Only bypass for writes for the time being - ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10; - slave_valid_pre = buf_state_en & (buf_nxtstate != DONE); + cmd_done = (ahb_hresp_q | (ahb_hready_q & (ahb_htrans_q[1:0] != 2'b0) & + ((buf_cmd_byte_ptrQ == 3'b111) | (buf_byteen[get_nxtbyte_ptr( + buf_cmd_byte_ptrQ[2:0], buf_byteen[7:0], 1'b1)] == 1'b0)))); + bypass_en = buf_state_en & buf_write_in & (buf_nxtstate == CMD_WR); // Only bypass for writes for the time being + ahb_htrans[1:0] = {2{(~(cmd_done | cmd_doneQ) | bypass_en)}} & 2'b10; + slave_valid_pre = buf_state_en & (buf_nxtstate != DONE); - trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); - buf_cmd_byte_ptr_en = trxn_done | bypass_en; - buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0,buf_byteen_in[7:0],1'b0) : - trxn_done ? get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0],buf_byteen[7:0],1'b1) : buf_cmd_byte_ptrQ; - end - DONE: begin - buf_nxtstate = IDLE; - buf_state_en = slave_ready; - slvbuf_error_en = 1'b1; - slave_valid_pre = 1'b1; - end - endcase - end + trxn_done = ahb_hready_q & ahb_hwrite_q & (ahb_htrans_q[1:0] != 2'b0); + buf_cmd_byte_ptr_en = trxn_done | bypass_en; + buf_cmd_byte_ptr = bypass_en ? get_nxtbyte_ptr(3'b0, buf_byteen_in[7:0], 1'b0) : trxn_done ? + get_nxtbyte_ptr(buf_cmd_byte_ptrQ[2:0], buf_byteen[7:0], 1'b1) : buf_cmd_byte_ptrQ; + end + DONE: begin + buf_nxtstate = IDLE; + buf_state_en = slave_ready; + slvbuf_error_en = 1'b1; + slave_valid_pre = 1'b1; + end + endcase + end - assign buf_rst = dec_tlu_force_halt_bus; - assign cmd_done_rst = slave_valid_pre; - assign buf_addr_in[31:3] = master_addr[31:3]; - assign buf_addr_in[2:0] = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr(master_byteen[7:0]) : master_addr[2:0]; - assign buf_tag_in[TAG-1:0] = master_tag[TAG-1:0]; - assign buf_byteen_in[7:0] = wrbuf_byteen[7:0]; - assign buf_data_in[63:0] = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0]; - assign buf_size_in[1:0] = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size(master_byteen[7:0]) : master_size[1:0]; - assign buf_aligned_in = (master_opc[2:0] == 3'b0) | // reads are always aligned since they are either DW or sideeffects - (master_size[1:0] == 2'b0) | (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned - ((master_size[1:0] == 2'b11) & + assign buf_rst = dec_tlu_force_halt_bus; + assign cmd_done_rst = slave_valid_pre; + assign buf_addr_in[31:3] = master_addr[31:3]; + assign buf_addr_in[2:0] = (buf_aligned_in & (master_opc[2:1] == 2'b01)) ? get_write_addr( + master_byteen[7:0] + ) : master_addr[2:0]; + assign buf_tag_in[TAG-1:0] = master_tag[TAG-1:0]; + assign buf_byteen_in[7:0] = wrbuf_byteen[7:0]; + assign buf_data_in[63:0] = (buf_state == DATA_RD) ? ahb_hrdata_q[63:0] : master_wdata[63:0]; + assign buf_size_in[1:0] = (buf_aligned_in & (master_size[1:0] == 2'b11) & (master_opc[2:1] == 2'b01)) ? get_write_size( + master_byteen[7:0] + ) : master_size[1:0]; + assign buf_aligned_in = (master_opc[2:0] == 3'b0) | // reads are always aligned since they are either DW or sideeffects + (master_size[1:0] == 2'b0) | (master_size[1:0] == 2'b01) | (master_size[1:0] == 2'b10) | // Always aligned for Byte/HW/Word since they can be only for non-idempotent. IFU/SB are always aligned + ((master_size[1:0] == 2'b11) & ((master_byteen[7:0] == 8'h3) | (master_byteen[7:0] == 8'hc) | (master_byteen[7:0] == 8'h30) | (master_byteen[7:0] == 8'hc0) | (master_byteen[7:0] == 8'hf) | (master_byteen[7:0] == 8'hf0) | (master_byteen[7:0] == 8'hff))); - // Generate the ahb signals - assign ahb_haddr[31:3] = bypass_en ? master_addr[31:3] : buf_addr[31:3]; - assign ahb_haddr[2:0] = {3{(ahb_htrans == 2'b10)}} & buf_cmd_byte_ptr[2:0]; // Trxn should be aligned during IDLE - assign ahb_hsize[2:0] = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} : + // Generate the ahb signals + assign ahb_haddr[31:3] = bypass_en ? master_addr[31:3] : buf_addr[31:3]; + assign ahb_haddr[2:0] = {3{(ahb_htrans == 2'b10)}} & buf_cmd_byte_ptr[2:0]; // Trxn should be aligned during IDLE + assign ahb_hsize[2:0] = bypass_en ? {1'b0, ({2{buf_aligned_in}} & buf_size_in[1:0])} : {1'b0, ({2{buf_aligned}} & buf_size[1:0])}; // Send the full size for aligned trxn - assign ahb_hburst[2:0] = 3'b0; - assign ahb_hmastlock = 1'b0; - assign ahb_hprot[3:0] = {3'b001,~axi_arprot[2]}; - assign ahb_hwrite = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write; - assign ahb_hwdata[63:0] = buf_data[63:0]; + assign ahb_hburst[2:0] = 3'b0; + assign ahb_hmastlock = 1'b0; + assign ahb_hprot[3:0] = {3'b001, ~axi_arprot[2]}; + assign ahb_hwrite = bypass_en ? (master_opc[2:1] == 2'b01) : buf_write; + assign ahb_hwdata[63:0] = buf_data[63:0]; - assign slave_valid = slave_valid_pre;// & (~slvbuf_posted_write | slvbuf_error); - assign slave_opc[3:2] = slvbuf_write ? 2'b11 : 2'b00; - assign slave_opc[1:0] = {2{slvbuf_error}} & 2'b10; - assign slave_rdata[63:0] = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]); - assign slave_tag[TAG-1:0] = slvbuf_tag[TAG-1:0]; + assign slave_valid = slave_valid_pre; // & (~slvbuf_posted_write | slvbuf_error); + assign slave_opc[3:2] = slvbuf_write ? 2'b11 : 2'b00; + assign slave_opc[1:0] = {2{slvbuf_error}} & 2'b10; + assign slave_rdata[63:0] = slvbuf_error ? {2{last_bus_addr[31:0]}} : ((buf_state == DONE) ? buf_data[63:0] : ahb_hrdata_q[63:0]); + assign slave_tag[TAG-1:0] = slvbuf_tag[TAG-1:0]; - assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite ; + assign last_addr_en = (ahb_htrans[1:0] != 2'b0) & ahb_hready & ahb_hwrite; - rvdffsc_fpga #(.WIDTH(1)) wrbuf_vldff (.din(1'b1), .dout(wrbuf_vld), .en(wrbuf_en), .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffsc_fpga #(.WIDTH(1)) wrbuf_data_vldff(.din(1'b1), .dout(wrbuf_data_vld), .en(wrbuf_data_en), .clear(wrbuf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(TAG)) wrbuf_tagff (.din(axi_awid[TAG-1:0]), .dout(wrbuf_tag[TAG-1:0]), .en(wrbuf_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(3)) wrbuf_sizeff (.din(axi_awsize[2:0]), .dout(wrbuf_size[2:0]), .en(wrbuf_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) wrbuf_addrff (.din(axi_awaddr[31:0]), .dout(wrbuf_addr[31:0]), .en(wrbuf_en & bus_clk_en), .clk(clk), .*); - rvdffe #(.WIDTH(64)) wrbuf_dataff (.din(axi_wdata[63:0]), .dout(wrbuf_data[63:0]), .en(wrbuf_data_en & bus_clk_en), .clk(clk), .*); - rvdffs_fpga #(.WIDTH(8)) wrbuf_byteenff (.din(axi_wstrb[7:0]), .dout(wrbuf_byteen[7:0]), .en(wrbuf_data_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #( + .WIDTH(1) + ) wrbuf_vldff ( + .din(1'b1), + .dout(wrbuf_vld), + .en(wrbuf_en), + .clear(wrbuf_rst), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffsc_fpga #( + .WIDTH(1) + ) wrbuf_data_vldff ( + .din(1'b1), + .dout(wrbuf_data_vld), + .en(wrbuf_data_en), + .clear(wrbuf_rst), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(TAG) + ) wrbuf_tagff ( + .din(axi_awid[TAG-1:0]), + .dout(wrbuf_tag[TAG-1:0]), + .en(wrbuf_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(3) + ) wrbuf_sizeff ( + .din(axi_awsize[2:0]), + .dout(wrbuf_size[2:0]), + .en(wrbuf_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) wrbuf_addrff ( + .din (axi_awaddr[31:0]), + .dout(wrbuf_addr[31:0]), + .en (wrbuf_en & bus_clk_en), + .clk (clk), + .* + ); + rvdffe #( + .WIDTH(64) + ) wrbuf_dataff ( + .din (axi_wdata[63:0]), + .dout(wrbuf_data[63:0]), + .en (wrbuf_data_en & bus_clk_en), + .clk (clk), + .* + ); + rvdffs_fpga #( + .WIDTH(8) + ) wrbuf_byteenff ( + .din(axi_wstrb[7:0]), + .dout(wrbuf_byteen[7:0]), + .en(wrbuf_data_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); - rvdffs_fpga #(.WIDTH(32)) last_bus_addrff (.din(ahb_haddr[31:0]), .dout(last_bus_addr[31:0]), .en(last_addr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #( + .WIDTH(32) + ) last_bus_addrff ( + .din(ahb_haddr[31:0]), + .dout(last_bus_addr[31:0]), + .en(last_addr_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); - rvdffsc_fpga #(.WIDTH($bits(state_t))) buf_state_ff (.din(buf_nxtstate), .dout({buf_state}), .en(buf_state_en), .clear(buf_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) buf_writeff (.din(buf_write_in), .dout(buf_write), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(TAG)) buf_tagff (.din(buf_tag_in[TAG-1:0]), .dout(buf_tag[TAG-1:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) buf_addrff (.din(buf_addr_in[31:0]), .dout(buf_addr[31:0]), .en(buf_wr_en & bus_clk_en), .clk(clk), .*); - rvdffs_fpga #(.WIDTH(2)) buf_sizeff (.din(buf_size_in[1:0]), .dout(buf_size[1:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) buf_alignedff (.din(buf_aligned_in), .dout(buf_aligned), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(8)) buf_byteenff (.din(buf_byteen_in[7:0]), .dout(buf_byteen[7:0]), .en(buf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffe #(.WIDTH(64)) buf_dataff (.din(buf_data_in[63:0]), .dout(buf_data[63:0]), .en(buf_data_wr_en & bus_clk_en), .clk(clk), .*); + rvdffsc_fpga #( + .WIDTH($bits(state_t)) + ) buf_state_ff ( + .din(buf_nxtstate), + .dout({buf_state}), + .en(buf_state_en), + .clear(buf_rst), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) buf_writeff ( + .din(buf_write_in), + .dout(buf_write), + .en(buf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(TAG) + ) buf_tagff ( + .din(buf_tag_in[TAG-1:0]), + .dout(buf_tag[TAG-1:0]), + .en(buf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) buf_addrff ( + .din (buf_addr_in[31:0]), + .dout(buf_addr[31:0]), + .en (buf_wr_en & bus_clk_en), + .clk (clk), + .* + ); + rvdffs_fpga #( + .WIDTH(2) + ) buf_sizeff ( + .din(buf_size_in[1:0]), + .dout(buf_size[1:0]), + .en(buf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) buf_alignedff ( + .din(buf_aligned_in), + .dout(buf_aligned), + .en(buf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(8) + ) buf_byteenff ( + .din(buf_byteen_in[7:0]), + .dout(buf_byteen[7:0]), + .en(buf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(64) + ) buf_dataff ( + .din (buf_data_in[63:0]), + .dout(buf_data[63:0]), + .en (buf_data_wr_en & bus_clk_en), + .clk (clk), + .* + ); - rvdffs_fpga #(.WIDTH(1)) slvbuf_writeff (.din(buf_write), .dout(slvbuf_write), .en(slvbuf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(TAG)) slvbuf_tagff (.din(buf_tag[TAG-1:0]), .dout(slvbuf_tag[TAG-1:0]), .en(slvbuf_wr_en), .clk(buf_clk), .clken(buf_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) slvbuf_errorff (.din(slvbuf_error_in), .dout(slvbuf_error), .en(slvbuf_error_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); + rvdffs_fpga #( + .WIDTH(1) + ) slvbuf_writeff ( + .din(buf_write), + .dout(slvbuf_write), + .en(slvbuf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(TAG) + ) slvbuf_tagff ( + .din(buf_tag[TAG-1:0]), + .dout(slvbuf_tag[TAG-1:0]), + .en(slvbuf_wr_en), + .clk(buf_clk), + .clken(buf_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) slvbuf_errorff ( + .din(slvbuf_error_in), + .dout(slvbuf_error), + .en(slvbuf_error_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); - rvdffsc_fpga #(.WIDTH(1)) buf_cmd_doneff (.din(1'b1), .dout(cmd_doneQ), .en(cmd_done), .clear(cmd_done_rst), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(3)) buf_cmd_byte_ptrff (.din(buf_cmd_byte_ptr[2:0]), .dout(buf_cmd_byte_ptrQ[2:0]), .en(buf_cmd_byte_ptr_en), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); + rvdffsc_fpga #( + .WIDTH(1) + ) buf_cmd_doneff ( + .din(1'b1), + .dout(cmd_doneQ), + .en(cmd_done), + .clear(cmd_done_rst), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(3) + ) buf_cmd_byte_ptrff ( + .din(buf_cmd_byte_ptr[2:0]), + .dout(buf_cmd_byte_ptrQ[2:0]), + .en(buf_cmd_byte_ptr_en), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); - rvdff_fpga #(.WIDTH(1)) hready_ff (.din(ahb_hready), .dout(ahb_hready_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(2)) htrans_ff (.din(ahb_htrans[1:0]), .dout(ahb_htrans_q[1:0]), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) hwrite_ff (.din(ahb_hwrite), .dout(ahb_hwrite_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) hresp_ff (.din(ahb_hresp), .dout(ahb_hresp_q), .clk(bus_clk), .clken(bus_clk_en), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(64)) hrdata_ff (.din(ahb_hrdata[63:0]), .dout(ahb_hrdata_q[63:0]), .clk(ahbm_data_clk), .clken(ahbm_data_clken), .rawclk(clk), .*); + rvdff_fpga #( + .WIDTH(1) + ) hready_ff ( + .din(ahb_hready), + .dout(ahb_hready_q), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(2) + ) htrans_ff ( + .din(ahb_htrans[1:0]), + .dout(ahb_htrans_q[1:0]), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) hwrite_ff ( + .din(ahb_hwrite), + .dout(ahb_hwrite_q), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) hresp_ff ( + .din(ahb_hresp), + .dout(ahb_hresp_q), + .clk(bus_clk), + .clken(bus_clk_en), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(64) + ) hrdata_ff ( + .din(ahb_hrdata[63:0]), + .dout(ahb_hrdata_q[63:0]), + .clk(ahbm_data_clk), + .clken(ahbm_data_clken), + .rawclk(clk), + .* + ); - // Clock headers - // clock enables for ahbm addr/data - assign buf_clken = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override); - assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override); + // Clock headers + // clock enables for ahbm addr/data + assign buf_clken = bus_clk_en & (buf_wr_en | slvbuf_wr_en | clk_override); + assign ahbm_data_clken = bus_clk_en & ((buf_state != IDLE) | clk_override); - rvclkhdr bus_cgc (.en(bus_clk_en), .l1clk(bus_clk), .*); - rvclkhdr buf_cgc (.en(buf_clken), .l1clk(buf_clk), .*); - rvclkhdr ahbm_data_cgc (.en(ahbm_data_clken), .l1clk(ahbm_data_clk), .*); + rvclkhdr bus_cgc ( + .en(bus_clk_en), + .l1clk(bus_clk), + .* + ); + rvclkhdr buf_cgc ( + .en(buf_clken), + .l1clk(buf_clk), + .* + ); + rvclkhdr ahbm_data_cgc ( + .en(ahbm_data_clken), + .l1clk(ahbm_data_clk), + .* + ); -endmodule // axi4_to_ahb +endmodule // axi4_to_ahb diff --git a/Flow/design/lib/beh_lib.sv b/Flow/design/lib/beh_lib.sv index 711ec02..ccc51f4 100644 --- a/Flow/design/lib/beh_lib.sv +++ b/Flow/design/lib/beh_lib.sv @@ -16,202 +16,233 @@ // all flops call the rvdff flop -module rvdff #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, +module rvdff #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, - output logic [WIDTH-1:0] dout - ); + output logic [WIDTH-1:0] dout +); -if (SHORT == 1) begin - assign dout = din; -end -else begin + if (SHORT == 1) begin + assign dout = din; + end else begin `ifdef RV_CLOCKGATE - always @(posedge tb_top.clk) begin - #0 $strobe("CG: %0t %m din %x dout %x clk %b width %d",$time,din,dout,clk,WIDTH); - end + always @(posedge tb_top.clk) begin + #0 $strobe("CG: %0t %m din %x dout %x clk %b width %d", $time, din, dout, clk, WIDTH); + end `endif - always_ff @(posedge clk or negedge rst_l) begin - if (rst_l == 0) - dout[WIDTH-1:0] <= 0; - else - dout[WIDTH-1:0] <= din[WIDTH-1:0]; - end + always_ff @(posedge clk or negedge rst_l) begin + if (rst_l == 0) dout[WIDTH-1:0] <= 0; + else dout[WIDTH-1:0] <= din[WIDTH-1:0]; + end -end + end endmodule // rvdff with 2:1 input mux to flop din iff sel==1 -module rvdffs #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic en, - input logic clk, - input logic rst_l, - output logic [WIDTH-1:0] dout - ); +module rvdffs #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout +); -if (SHORT == 1) begin : genblock - assign dout = din; -end -else begin : genblock - rvdff #(WIDTH) dffs (.din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), .*); -end + if (SHORT == 1) begin : genblock + assign dout = din; + end else begin : genblock + rvdff #(WIDTH) dffs ( + .din((en) ? din[WIDTH-1:0] : dout[WIDTH-1:0]), + .* + ); + end endmodule // rvdff with en and clear -module rvdffsc #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic en, - input logic clear, - input logic clk, - input logic rst_l, - output logic [WIDTH-1:0] dout - ); +module rvdffsc #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clear, + input logic clk, + input logic rst_l, + output logic [WIDTH-1:0] dout +); - logic [WIDTH-1:0] din_new; -if (SHORT == 1) begin - assign dout = din; -end -else begin - assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]); - rvdff #(WIDTH) dffsc (.din(din_new[WIDTH-1:0]), .*); -end + logic [WIDTH-1:0] din_new; + if (SHORT == 1) begin + assign dout = din; + end else begin + assign din_new = {WIDTH{~clear}} & (en ? din[WIDTH-1:0] : dout[WIDTH-1:0]); + rvdff #(WIDTH) dffsc ( + .din(din_new[WIDTH-1:0]), + .* + ); + end endmodule // _fpga versions -module rvdff_fpga #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic clk, - input logic clken, - input logic rawclk, - input logic rst_l, +module rvdff_fpga #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic clken, + input logic rawclk, + input logic rst_l, - output logic [WIDTH-1:0] dout - ); + output logic [WIDTH-1:0] dout +); -if (SHORT == 1) begin - assign dout = din; -end -else begin - rvdff #(WIDTH) dff (.*); -end + if (SHORT == 1) begin + assign dout = din; + end else begin + rvdff #(WIDTH) dff (.*); + end endmodule // rvdff with 2:1 input mux to flop din iff sel==1 -module rvdffs_fpga #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic en, - input logic clk, - input logic clken, - input logic rawclk, - input logic rst_l, +module rvdffs_fpga #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic clken, + input logic rawclk, + input logic rst_l, - output logic [WIDTH-1:0] dout - ); + output logic [WIDTH-1:0] dout +); -if (SHORT == 1) begin : genblock - assign dout = din; -end -else begin : genblock - rvdffs #(WIDTH) dffs (.*); -end + if (SHORT == 1) begin : genblock + assign dout = din; + end else begin : genblock + rvdffs #(WIDTH) dffs (.*); + end endmodule // rvdff with en and clear -module rvdffsc_fpga #( parameter WIDTH=1, SHORT=0 ) - ( - input logic [WIDTH-1:0] din, - input logic en, - input logic clear, - input logic clk, - input logic clken, - input logic rawclk, - input logic rst_l, +module rvdffsc_fpga #( + parameter WIDTH = 1, + SHORT = 0 +) ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clear, + input logic clk, + input logic clken, + input logic rawclk, + input logic rst_l, - output logic [WIDTH-1:0] dout - ); + output logic [WIDTH-1:0] dout +); - logic [WIDTH-1:0] din_new; -if (SHORT == 1) begin - assign dout = din; -end -else begin - rvdffsc #(WIDTH) dffsc (.*); -end + logic [WIDTH-1:0] din_new; + if (SHORT == 1) begin + assign dout = din; + end else begin + rvdffsc #(WIDTH) dffsc (.*); + end endmodule -module rvdffe #( parameter WIDTH=1, SHORT=0, OVERRIDE=0 ) - ( - input logic [WIDTH-1:0] din, - input logic en, - input logic clk, - input logic rst_l, - input logic scan_mode, - output logic [WIDTH-1:0] dout - ); +module rvdffe #( + parameter WIDTH = 1, + SHORT = 0, + OVERRIDE = 0 +) ( + input logic [WIDTH-1:0] din, + input logic en, + input logic clk, + input logic rst_l, + input logic scan_mode, + output logic [WIDTH-1:0] dout +); - logic l1clk; + logic l1clk; -if (SHORT == 1) begin : genblock - if (1) begin : genblock + if (SHORT == 1) begin : genblock + if (1) begin : genblock assign dout = din; - end -end -else begin : genblock - rvclkhdr clkhdr ( .* ); - rvdff #(WIDTH) dff (.*, .clk(l1clk)); -end // else: !if(SHORT == 1) + end + end else begin : genblock + rvclkhdr clkhdr (.*); + rvdff #(WIDTH) dff ( + .*, + .clk(l1clk) + ); + end // else: !if(SHORT == 1) -endmodule // rvdffe +endmodule // rvdffe -module rvdffpcie #( parameter WIDTH=31 ) - ( - input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, - input logic en, - input logic scan_mode, - output logic [WIDTH-1:0] dout - ); +module rvdffpcie #( + parameter WIDTH = 31 +) ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, + input logic en, + input logic scan_mode, + output logic [WIDTH-1:0] dout +); - rvdfflie #(.WIDTH(WIDTH), .LEFT(19)) dff (.*); + rvdfflie #( + .WIDTH(WIDTH), + .LEFT (19) + ) dff ( + .* + ); endmodule // format: { LEFT, EXTRA } // LEFT # of bits will be done with rvdffie, all else EXTRA with rvdffe -module rvdfflie #( parameter WIDTH=16, LEFT=8 ) - ( - input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, - input logic en, - input logic scan_mode, - output logic [WIDTH-1:0] dout - ); +module rvdfflie #( + parameter WIDTH = 16, + LEFT = 8 +) ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, + input logic en, + input logic scan_mode, + output logic [WIDTH-1:0] dout +); - localparam EXTRA = WIDTH-LEFT; + localparam EXTRA = WIDTH - LEFT; - localparam LMSB = WIDTH-1; - localparam LLSB = LMSB-LEFT+1; - localparam XMSB = LLSB-1; - localparam XLSB = LLSB-EXTRA; + localparam LMSB = WIDTH - 1; + localparam LLSB = LMSB - LEFT + 1; + localparam XMSB = LLSB - 1; + localparam XLSB = LLSB - EXTRA; - rvdffiee #(LEFT) dff_left (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB])); - rvdffe #(EXTRA) dff_extra (.*, .din(din[XMSB:XLSB]), .dout(dout[XMSB:XLSB])); + rvdffiee #(LEFT) dff_left ( + .*, + .din (din[LMSB:LLSB]), + .dout(dout[LMSB:LLSB]) + ); + rvdffe #(EXTRA) dff_extra ( + .*, + .din (din[XMSB:XLSB]), + .dout(dout[XMSB:XLSB]) + ); endmodule @@ -221,464 +252,549 @@ endmodule // special power flop for predict packet // format: { LEFT, RIGHT==31 } // LEFT # of bits will be done with rvdffe; RIGHT is enabled by LEFT[LSB] & en -module rvdffppe #( parameter WIDTH=32 ) - ( - input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, - input logic en, - input logic scan_mode, - output logic [WIDTH-1:0] dout - ); +module rvdffppe #( + parameter WIDTH = 32 +) ( + input logic [WIDTH-1:0] din, + input logic clk, + input logic rst_l, + input logic en, + input logic scan_mode, + output logic [WIDTH-1:0] dout +); - localparam RIGHT = 31; - localparam LEFT = WIDTH - RIGHT; + localparam RIGHT = 31; + localparam LEFT = WIDTH - RIGHT; - localparam LMSB = WIDTH-1; - localparam LLSB = LMSB-LEFT+1; - localparam RMSB = LLSB-1; - localparam RLSB = LLSB-RIGHT; + localparam LMSB = WIDTH - 1; + localparam LLSB = LMSB - LEFT + 1; + localparam RMSB = LLSB - 1; + localparam RLSB = LLSB - RIGHT; - rvdffe #(LEFT) dff_left (.*, .din(din[LMSB:LLSB]), .dout(dout[LMSB:LLSB])); - rvdffe #(RIGHT) dff_right (.*, .din(din[RMSB:RLSB]), .dout(dout[RMSB:RLSB]), .en(en & din[LLSB])); // qualify with pret + rvdffe #(LEFT) dff_left ( + .*, + .din (din[LMSB:LLSB]), + .dout(dout[LMSB:LLSB]) + ); + rvdffe #(RIGHT) dff_right ( + .*, + .din (din[RMSB:RLSB]), + .dout(dout[RMSB:RLSB]), + .en (en & din[LLSB]) + ); // qualify with pret endmodule -module rvdffie #( parameter WIDTH=1, OVERRIDE=0 ) - ( - input logic [WIDTH-1:0] din, +module rvdffie #( + parameter WIDTH = 1, + OVERRIDE = 0 +) ( + input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, - input logic scan_mode, - output logic [WIDTH-1:0] dout - ); + input logic clk, + input logic rst_l, + input logic scan_mode, + output logic [WIDTH-1:0] dout +); - logic l1clk; - logic en; + logic l1clk; + logic en; - assign en = |(din ^ dout); + assign en = |(din ^ dout); - rvclkhdr clkhdr ( .* ); - rvdff #(WIDTH) dff (.*, .clk(l1clk)); + rvclkhdr clkhdr (.*); + rvdff #(WIDTH) dff ( + .*, + .clk(l1clk) + ); endmodule // ie flop but it has an .en input -module rvdffiee #( parameter WIDTH=1, OVERRIDE=0 ) - ( - input logic [WIDTH-1:0] din, +module rvdffiee #( + parameter WIDTH = 1, + OVERRIDE = 0 +) ( + input logic [WIDTH-1:0] din, - input logic clk, - input logic rst_l, - input logic scan_mode, - input logic en, - output logic [WIDTH-1:0] dout - ); + input logic clk, + input logic rst_l, + input logic scan_mode, + input logic en, + output logic [WIDTH-1:0] dout +); - logic l1clk; - logic final_en; + logic l1clk; + logic final_en; - assign final_en = (|(din ^ dout)) & en; + assign final_en = (|(din ^ dout)) & en; - rvdffe #(WIDTH) dff (.*, .en(final_en)); + rvdffe #(WIDTH) dff ( + .*, + .en(final_en) + ); endmodule -module rvsyncss #(parameter WIDTH = 251) - ( - input logic clk, - input logic rst_l, - input logic [WIDTH-1:0] din, - output logic [WIDTH-1:0] dout - ); +module rvsyncss #( + parameter WIDTH = 251 +) ( + input logic clk, + input logic rst_l, + input logic [WIDTH-1:0] din, + output logic [WIDTH-1:0] dout +); - logic [WIDTH-1:0] din_ff1; + logic [WIDTH-1:0] din_ff1; - rvdff #(WIDTH) sync_ff1 (.*, .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0])); - rvdff #(WIDTH) sync_ff2 (.*, .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0])); + rvdff #(WIDTH) sync_ff1 ( + .*, + .din (din[WIDTH-1:0]), + .dout(din_ff1[WIDTH-1:0]) + ); + rvdff #(WIDTH) sync_ff2 ( + .*, + .din (din_ff1[WIDTH-1:0]), + .dout(dout[WIDTH-1:0]) + ); -endmodule // rvsyncss +endmodule // rvsyncss -module rvsyncss_fpga #(parameter WIDTH = 251) - ( - input logic gw_clk, - input logic rawclk, - input logic clken, - input logic rst_l, - input logic [WIDTH-1:0] din, - output logic [WIDTH-1:0] dout - ); +module rvsyncss_fpga #( + parameter WIDTH = 251 +) ( + input logic gw_clk, + input logic rawclk, + input logic clken, + input logic rst_l, + input logic [WIDTH-1:0] din, + output logic [WIDTH-1:0] dout +); - logic [WIDTH-1:0] din_ff1; + logic [WIDTH-1:0] din_ff1; - rvdff_fpga #(WIDTH) sync_ff1 (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din[WIDTH-1:0]), .dout(din_ff1[WIDTH-1:0])); - rvdff_fpga #(WIDTH) sync_ff2 (.*, .clk(gw_clk), .rawclk(rawclk), .clken(clken), .din (din_ff1[WIDTH-1:0]), .dout(dout[WIDTH-1:0])); + rvdff_fpga #(WIDTH) sync_ff1 ( + .*, + .clk(gw_clk), + .rawclk(rawclk), + .clken(clken), + .din(din[WIDTH-1:0]), + .dout(din_ff1[WIDTH-1:0]) + ); + rvdff_fpga #(WIDTH) sync_ff2 ( + .*, + .clk(gw_clk), + .rawclk(rawclk), + .clken(clken), + .din(din_ff1[WIDTH-1:0]), + .dout(dout[WIDTH-1:0]) + ); -endmodule // rvsyncss +endmodule // rvsyncss -module rvlsadder - ( +module rvlsadder ( input logic [31:0] rs1, input logic [11:0] offset, output logic [31:0] dout - ); +); - logic cout; - logic sign; + logic cout; + logic sign; - logic [31:12] rs1_inc; - logic [31:12] rs1_dec; + logic [31:12] rs1_inc; + logic [31:12] rs1_dec; - assign {cout,dout[11:0]} = {1'b0,rs1[11:0]} + {1'b0,offset[11:0]}; + assign {cout, dout[11:0]} = {1'b0, rs1[11:0]} + {1'b0, offset[11:0]}; - assign rs1_inc[31:12] = rs1[31:12] + 1; + assign rs1_inc[31:12] = rs1[31:12] + 1; - assign rs1_dec[31:12] = rs1[31:12] - 1; + assign rs1_dec[31:12] = rs1[31:12] - 1; - assign sign = offset[11]; + assign sign = offset[11]; - assign dout[31:12] = ({20{ sign ^~ cout}} & rs1[31:12]) | + assign dout[31:12] = ({20{ sign ^~ cout}} & rs1[31:12]) | ({20{ ~sign & cout}} & rs1_inc[31:12]) | ({20{ sign & ~cout}} & rs1_dec[31:12]); -endmodule // rvlsadder +endmodule // rvlsadder // assume we only maintain pc[31:1] in the pipe -module rvbradder - ( +module rvbradder ( input [31:1] pc, input [12:1] offset, output [31:1] dout - ); +); - logic cout; - logic sign; + logic cout; + logic sign; - logic [31:13] pc_inc; - logic [31:13] pc_dec; + logic [31:13] pc_inc; + logic [31:13] pc_dec; - assign {cout,dout[12:1]} = {1'b0,pc[12:1]} + {1'b0,offset[12:1]}; + assign {cout, dout[12:1]} = {1'b0, pc[12:1]} + {1'b0, offset[12:1]}; - assign pc_inc[31:13] = pc[31:13] + 1; + assign pc_inc[31:13] = pc[31:13] + 1; - assign pc_dec[31:13] = pc[31:13] - 1; + assign pc_dec[31:13] = pc[31:13] - 1; - assign sign = offset[12]; + assign sign = offset[12]; - assign dout[31:13] = ({19{ sign ^~ cout}} & pc[31:13]) | + assign dout[31:13] = ({19{ sign ^~ cout}} & pc[31:13]) | ({19{ ~sign & cout}} & pc_inc[31:13]) | ({19{ sign & ~cout}} & pc_dec[31:13]); -endmodule // rvbradder +endmodule // rvbradder // 2s complement circuit -module rvtwoscomp #( parameter WIDTH=32 ) - ( - input logic [WIDTH-1:0] din, +module rvtwoscomp #( + parameter WIDTH = 32 +) ( + input logic [WIDTH-1:0] din, - output logic [WIDTH-1:0] dout - ); + output logic [WIDTH-1:0] dout +); - logic [WIDTH-1:1] dout_temp; // holding for all other bits except for the lsb. LSB is always din + logic [WIDTH-1:1] dout_temp; // holding for all other bits except for the lsb. LSB is always din - genvar i; + genvar i; - for ( i = 1; i < WIDTH; i++ ) begin : flip_after_first_one - assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i]; - end : flip_after_first_one + for (i = 1; i < WIDTH; i++) begin : flip_after_first_one + assign dout_temp[i] = (|din[i-1:0]) ? ~din[i] : din[i]; + end : flip_after_first_one - assign dout[WIDTH-1:0] = { dout_temp[WIDTH-1:1], din[0] }; + assign dout[WIDTH-1:0] = {dout_temp[WIDTH-1:1], din[0]}; endmodule // 2'scomp // find first -module rvfindfirst1 #( parameter WIDTH=32, SHIFT=$clog2(WIDTH) ) - ( - input logic [WIDTH-1:0] din, +module rvfindfirst1 #( + parameter WIDTH = 32, + SHIFT = $clog2(WIDTH) +) ( + input logic [WIDTH-1:0] din, - output logic [SHIFT-1:0] dout - ); - logic done; + output logic [SHIFT-1:0] dout +); + logic done; - always_comb begin - dout[SHIFT-1:0] = {SHIFT{1'b0}}; - done = 1'b0; + always_comb begin + dout[SHIFT-1:0] = {SHIFT{1'b0}}; + done = 1'b0; - for ( int i = WIDTH-1; i > 0; i-- ) begin : find_first_one - done |= din[i]; - dout[SHIFT-1:0] += done ? 1'b0 : 1'b1; - end : find_first_one - end -endmodule // rvfindfirst1 + for (int i = WIDTH - 1; i > 0; i--) begin : find_first_one + done |= din[i]; + dout[SHIFT-1:0] += done ? 1'b0 : 1'b1; + end : find_first_one + end +endmodule // rvfindfirst1 -module rvfindfirst1hot #( parameter WIDTH=32 ) - ( - input logic [WIDTH-1:0] din, +module rvfindfirst1hot #( + parameter WIDTH = 32 +) ( + input logic [WIDTH-1:0] din, - output logic [WIDTH-1:0] dout - ); - logic done; + output logic [WIDTH-1:0] dout +); + logic done; - always_comb begin - dout[WIDTH-1:0] = {WIDTH{1'b0}}; - done = 1'b0; - for ( int i = 0; i < WIDTH; i++ ) begin : find_first_one - dout[i] = ~done & din[i]; - done |= din[i]; - end : find_first_one - end -endmodule // rvfindfirst1hot + always_comb begin + dout[WIDTH-1:0] = {WIDTH{1'b0}}; + done = 1'b0; + for (int i = 0; i < WIDTH; i++) begin : find_first_one + dout[i] = ~done & din[i]; + done |= din[i]; + end : find_first_one + end +endmodule // rvfindfirst1hot // mask and match function matches bits after finding the first 0 position // find first starting from LSB. Skip that location and match the rest of the bits -module rvmaskandmatch #( parameter WIDTH=32 ) - ( - input logic [WIDTH-1:0] mask, // this will have the mask in the lower bit positions - input logic [WIDTH-1:0] data, // this is what needs to be matched on the upper bits with the mask's upper bits - input logic masken, // when 1 : do mask. 0 : full match - output logic match - ); +module rvmaskandmatch #( + parameter WIDTH = 32 +) ( + input logic [WIDTH-1:0] mask, // this will have the mask in the lower bit positions + input logic [WIDTH-1:0] data, // this is what needs to be matched on the upper bits with the mask's upper bits + input logic masken, // when 1 : do mask. 0 : full match + output logic match +); - logic [WIDTH-1:0] matchvec; - logic masken_or_fullmask; + logic [WIDTH-1:0] matchvec; + logic masken_or_fullmask; - assign masken_or_fullmask = masken & ~(&mask[WIDTH-1:0]); + assign masken_or_fullmask = masken & ~(&mask[WIDTH-1:0]); - assign matchvec[0] = masken_or_fullmask | (mask[0] == data[0]); - genvar i; + assign matchvec[0] = masken_or_fullmask | (mask[0] == data[0]); + genvar i; - for ( i = 1; i < WIDTH; i++ ) begin : match_after_first_zero - assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]); - end : match_after_first_zero + for (i = 1; i < WIDTH; i++) begin : match_after_first_zero + assign matchvec[i] = (&mask[i-1:0] & masken_or_fullmask) ? 1'b1 : (mask[i] == data[i]); + end : match_after_first_zero - assign match = &matchvec[WIDTH-1:0]; // all bits either matched or were masked off + assign match = &matchvec[WIDTH-1:0]; // all bits either matched or were masked off -endmodule // rvmaskandmatch +endmodule // rvmaskandmatch // Check if the S_ADDR <= addr < E_ADDR -module rvrangecheck #(CCM_SADR = 32'h0, - CCM_SIZE = 128) ( - input logic [31:0] addr, // Address to be checked for range - output logic in_range, // S_ADDR <= start_addr < E_ADDR - output logic in_region +module rvrangecheck #( + CCM_SADR = 32'h0, + CCM_SIZE = 128 +) ( + input logic [31:0] addr, // Address to be checked for range + output logic in_range, // S_ADDR <= start_addr < E_ADDR + output logic in_region ); - localparam REGION_BITS = 4; - localparam MASK_BITS = 10 + $clog2(CCM_SIZE); + localparam REGION_BITS = 4; + localparam MASK_BITS = 10 + $clog2(CCM_SIZE); - logic [31:0] start_addr; - logic [3:0] region; + logic [31:0] start_addr; + logic [ 3:0] region; - assign start_addr[31:0] = CCM_SADR; - assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)]; + assign start_addr[31:0] = CCM_SADR; + assign region[REGION_BITS-1:0] = start_addr[31:(32-REGION_BITS)]; - assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]); - if (CCM_SIZE == 48) + assign in_region = (addr[31:(32-REGION_BITS)] == region[REGION_BITS-1:0]); + if (CCM_SIZE == 48) assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]) & ~(&addr[MASK_BITS-1 : MASK_BITS-2]); - else - assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]); + else assign in_range = (addr[31:MASK_BITS] == start_addr[31:MASK_BITS]); endmodule // rvrangechecker // 16 bit even parity generator -module rveven_paritygen #(WIDTH = 16) ( - input logic [WIDTH-1:0] data_in, // Data - output logic parity_out // generated even parity - ); +module rveven_paritygen #( + WIDTH = 16 +) ( + input logic [WIDTH-1:0] data_in, // Data + output logic parity_out // generated even parity +); - assign parity_out = ^(data_in[WIDTH-1:0]) ; + assign parity_out = ^(data_in[WIDTH-1:0]); endmodule // rveven_paritygen -module rveven_paritycheck #(WIDTH = 16) ( - input logic [WIDTH-1:0] data_in, // Data - input logic parity_in, - output logic parity_err // Parity error - ); +module rveven_paritycheck #( + WIDTH = 16 +) ( + input logic [WIDTH-1:0] data_in, // Data + input logic parity_in, + output logic parity_err // Parity error +); - assign parity_err = ^(data_in[WIDTH-1:0]) ^ parity_in ; + assign parity_err = ^(data_in[WIDTH-1:0]) ^ parity_in; endmodule // rveven_paritycheck -module rvecc_encode ( - input [31:0] din, - output [6:0] ecc_out - ); -logic [5:0] ecc_out_temp; +module rvecc_encode ( + input [31:0] din, + output [ 6:0] ecc_out +); + logic [5:0] ecc_out_temp; - assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; - assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; - assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; - assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; - assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; - assign ecc_out_temp[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + assign ecc_out_temp[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_out_temp[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_out_temp[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_out_temp[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_out_temp[5] = din[26] ^ din[27] ^ din[28] ^ din[29] ^ din[30] ^ din[31]; - assign ecc_out[6:0] = {(^din[31:0])^(^ecc_out_temp[5:0]),ecc_out_temp[5:0]}; + assign ecc_out[6:0] = {(^din[31:0]) ^ (^ecc_out_temp[5:0]), ecc_out_temp[5:0]}; -endmodule // rvecc_encode +endmodule // rvecc_encode -module rvecc_decode ( - input en, - input [31:0] din, - input [6:0] ecc_in, - input sed_ded, // only do detection and no correction. Used for the I$ - output [31:0] dout, - output [6:0] ecc_out, - output single_ecc_error, - output double_ecc_error +module rvecc_decode ( + input en, + input [31:0] din, + input [ 6:0] ecc_in, + input sed_ded, // only do detection and no correction. Used for the I$ + output [31:0] dout, + output [ 6:0] ecc_out, + output single_ecc_error, + output double_ecc_error - ); +); - logic [6:0] ecc_check; - logic [38:0] error_mask; - logic [38:0] din_plus_parity, dout_plus_parity; + logic [ 6:0] ecc_check; + logic [38:0] error_mask; + logic [38:0] din_plus_parity, dout_plus_parity; - // Generate the ecc bits - assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; - assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; - assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; - assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; - assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; - assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]; + // Generate the ecc bits + assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]; + assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]; + assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]; + assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]; + assign ecc_check[5] = ecc_in[5] ^ din[26] ^ din[27] ^ din[28] ^ din[29] ^ din[30] ^ din[31]; - // This is the parity bit - assign ecc_check[6] = ((^din[31:0])^(^ecc_in[6:0])) & ~sed_ded; + // This is the parity bit + assign ecc_check[6] = ((^din[31:0]) ^ (^ecc_in[6:0])) & ~sed_ded; - assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6]; // this will never be on for sed_ded - assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6]; // all errors in the sed_ded case will be recorded as DE + assign single_ecc_error = en & (ecc_check[6:0] != 0) & ecc_check[6]; // this will never be on for sed_ded + assign double_ecc_error = en & (ecc_check[6:0] != 0) & ~ecc_check[6]; // all errors in the sed_ded case will be recorded as DE - // Generate the mask for error correctiong - for (genvar i=1; i<40; i++) begin - assign error_mask[i-1] = (ecc_check[5:0] == i); - end + // Generate the mask for error correctiong + for (genvar i = 1; i < 40; i++) begin + assign error_mask[i-1] = (ecc_check[5:0] == i); + end - // Generate the corrected data - assign din_plus_parity[38:0] = {ecc_in[6], din[31:26], ecc_in[5], din[25:11], ecc_in[4], din[10:4], ecc_in[3], din[3:1], ecc_in[2], din[0], ecc_in[1:0]}; + // Generate the corrected data + assign din_plus_parity[38:0] = { + ecc_in[6], + din[31:26], + ecc_in[5], + din[25:11], + ecc_in[4], + din[10:4], + ecc_in[3], + din[3:1], + ecc_in[2], + din[0], + ecc_in[1:0] + }; - assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0]; - assign dout[31:0] = {dout_plus_parity[37:32], dout_plus_parity[30:16], dout_plus_parity[14:8], dout_plus_parity[6:4], dout_plus_parity[2]}; - assign ecc_out[6:0] = {(dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), dout_plus_parity[31], dout_plus_parity[15], dout_plus_parity[7], dout_plus_parity[3], dout_plus_parity[1:0]}; + assign dout_plus_parity[38:0] = single_ecc_error ? (error_mask[38:0] ^ din_plus_parity[38:0]) : din_plus_parity[38:0]; + assign dout[31:0] = { + dout_plus_parity[37:32], + dout_plus_parity[30:16], + dout_plus_parity[14:8], + dout_plus_parity[6:4], + dout_plus_parity[2] + }; + assign ecc_out[6:0] = { + (dout_plus_parity[38] ^ (ecc_check[6:0] == 7'b1000000)), + dout_plus_parity[31], + dout_plus_parity[15], + dout_plus_parity[7], + dout_plus_parity[3], + dout_plus_parity[1:0] + }; -endmodule // rvecc_decode +endmodule // rvecc_decode -module rvecc_encode_64 ( - input [63:0] din, - output [6:0] ecc_out - ); +module rvecc_encode_64 ( + input [63:0] din, + output [ 6:0] ecc_out +); assign ecc_out[0] = din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63]; - assign ecc_out[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; + assign ecc_out[1] = din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; - assign ecc_out[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; + assign ecc_out[2] = din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; - assign ecc_out[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_out[3] = din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_out[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_out[4] = din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_out[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_out[5] = din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_out[6] = din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63]; + assign ecc_out[6] = din[57] ^ din[58] ^ din[59] ^ din[60] ^ din[61] ^ din[62] ^ din[63]; -endmodule // rvecc_encode_64 +endmodule // rvecc_encode_64 -module rvecc_decode_64 ( - input en, - input [63:0] din, - input [6:0] ecc_in, - output ecc_error - ); +module rvecc_decode_64 ( + input en, + input [63:0] din, + input [ 6:0] ecc_in, + output ecc_error +); - logic [6:0] ecc_check; + logic [6:0] ecc_check; - // Generate the ecc bits - assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63]; + // Generate the ecc bits + assign ecc_check[0] = ecc_in[0]^din[0]^din[1]^din[3]^din[4]^din[6]^din[8]^din[10]^din[11]^din[13]^din[15]^din[17]^din[19]^din[21]^din[23]^din[25]^din[26]^din[28]^din[30]^din[32]^din[34]^din[36]^din[38]^din[40]^din[42]^din[44]^din[46]^din[48]^din[50]^din[52]^din[54]^din[56]^din[57]^din[59]^din[61]^din[63]; - assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; + assign ecc_check[1] = ecc_in[1]^din[0]^din[2]^din[3]^din[5]^din[6]^din[9]^din[10]^din[12]^din[13]^din[16]^din[17]^din[20]^din[21]^din[24]^din[25]^din[27]^din[28]^din[31]^din[32]^din[35]^din[36]^din[39]^din[40]^din[43]^din[44]^din[47]^din[48]^din[51]^din[52]^din[55]^din[56]^din[58]^din[59]^din[62]^din[63]; - assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; + assign ecc_check[2] = ecc_in[2]^din[1]^din[2]^din[3]^din[7]^din[8]^din[9]^din[10]^din[14]^din[15]^din[16]^din[17]^din[22]^din[23]^din[24]^din[25]^din[29]^din[30]^din[31]^din[32]^din[37]^din[38]^din[39]^din[40]^din[45]^din[46]^din[47]^din[48]^din[53]^din[54]^din[55]^din[56]^din[60]^din[61]^din[62]^din[63]; - assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_check[3] = ecc_in[3]^din[4]^din[5]^din[6]^din[7]^din[8]^din[9]^din[10]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_check[4] = ecc_in[4]^din[11]^din[12]^din[13]^din[14]^din[15]^din[16]^din[17]^din[18]^din[19]^din[20]^din[21]^din[22]^din[23]^din[24]^din[25]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; + assign ecc_check[5] = ecc_in[5]^din[26]^din[27]^din[28]^din[29]^din[30]^din[31]^din[32]^din[33]^din[34]^din[35]^din[36]^din[37]^din[38]^din[39]^din[40]^din[41]^din[42]^din[43]^din[44]^din[45]^din[46]^din[47]^din[48]^din[49]^din[50]^din[51]^din[52]^din[53]^din[54]^din[55]^din[56]; - assign ecc_check[6] = ecc_in[6]^din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63]; + assign ecc_check[6] = ecc_in[6]^din[57]^din[58]^din[59]^din[60]^din[61]^din[62]^din[63]; - assign ecc_error = en & (ecc_check[6:0] != 0); // all errors in the sed_ded case will be recorded as DE + assign ecc_error = en & (ecc_check[6:0] != 0); // all errors in the sed_ded case will be recorded as DE - endmodule // rvecc_decode_64 +endmodule // rvecc_decode_64 -module clockhdr - ( - input logic SE, EN, CK, - output Q - ); +module clockhdr ( + input logic SE, + EN, + CK, + output Q +); - logic en_ff; - logic enable; + logic en_ff; + logic enable; - assign enable = EN | SE; + assign enable = EN | SE; `ifdef VERILATOR - always @(negedge CK) begin - en_ff <= enable; - end + always @(negedge CK) begin + en_ff <= enable; + end `else - always @(CK, enable) begin - if(!CK) - en_ff = enable; - end + always @(CK, enable) begin + if (!CK) en_ff = enable; + end `endif - assign Q = CK & en_ff; + assign Q = CK & en_ff; endmodule -module rvclkhdr - ( - input logic en, - input logic clk, - input logic scan_mode, - output logic l1clk - ); +module rvclkhdr ( + input logic en, + input logic clk, + input logic scan_mode, + output logic l1clk +); - logic SE; - assign SE = 0; + logic SE; + assign SE = 0; - clockhdr clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk)); + clockhdr clkhdr ( + .*, + .EN(en), + .CK(clk), + .Q (l1clk) + ); -endmodule // rvclkhdr +endmodule // rvclkhdr -module rvoclkhdr - ( - input logic en, - input logic clk, - input logic scan_mode, - output logic l1clk - ); +module rvoclkhdr ( + input logic en, + input logic clk, + input logic scan_mode, + output logic l1clk +); - logic SE; - assign SE = 0; + logic SE; + assign SE = 0; - clockhdr clkhdr ( .*, .EN(en), .CK(clk), .Q(l1clk)); + clockhdr clkhdr ( + .*, + .EN(en), + .CK(clk), + .Q (l1clk) + ); endmodule diff --git a/Flow/design/lib/el2_lib.sv b/Flow/design/lib/el2_lib.sv index 6f71a3c..49f33ab 100644 --- a/Flow/design/lib/el2_lib.sv +++ b/Flow/design/lib/el2_lib.sv @@ -1,64 +1,71 @@ module el2_btb_tag_hash #( -`include "el2_param.vh" - ) ( - input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, - output logic [pt.BTB_BTAG_SIZE-1:0] hash - ); + `include "el2_param.vh" +) ( + input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, + output logic [pt.BTB_BTAG_SIZE-1:0] hash +); - assign hash = {(pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+1] ^ + assign hash = { + (pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+1] ^ pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^ - pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])}; + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]) + }; endmodule -module el2_btb_tag_hash_fold #( -`include "el2_param.vh" - )( - input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, - output logic [pt.BTB_BTAG_SIZE-1:0] hash - ); +module el2_btb_tag_hash_fold #( + `include "el2_param.vh" +) ( + input logic [pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1] pc, + output logic [pt.BTB_BTAG_SIZE-1:0] hash +); - assign hash = {( + assign hash = { + ( pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+1] ^ - pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1])}; + pc[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]) + }; endmodule -module el2_btb_addr_hash #( -`include "el2_param.vh" - )( - input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc, - output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash - ); +module el2_btb_addr_hash #( + `include "el2_param.vh" +) ( + input logic [pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO] pc, + output logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hash +); -if(pt.BTB_FOLD2_INDEX_HASH) begin : fold2 - assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ + if (pt.BTB_FOLD2_INDEX_HASH) begin : fold2 + assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO]; -end - else begin - assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ + end else begin + assign hash[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = pc[pt.BTB_INDEX1_HI:pt.BTB_INDEX1_LO] ^ pc[pt.BTB_INDEX2_HI:pt.BTB_INDEX2_LO] ^ pc[pt.BTB_INDEX3_HI:pt.BTB_INDEX3_LO]; -end + end endmodule -module el2_btb_ghr_hash #( -`include "el2_param.vh" - )( - input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin, - input logic [pt.BHT_GHR_SIZE-1:0] ghr, - output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash - ); +module el2_btb_ghr_hash #( + `include "el2_param.vh" +) ( + input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] hashin, + input logic [pt.BHT_GHR_SIZE-1:0] ghr, + output logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] hash +); - // The hash function is too complex to write in verilog for all cases. - // The config script generates the logic string based on the bp config. - if(pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1 - assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], hashin[pt.BTB_INDEX1_HI:2]^ghr[pt.BTB_INDEX1_HI-2:0]}; - end - else begin : ghrhash_cfg2 - assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { hashin[pt.BHT_GHR_SIZE+1:2]^ghr[pt.BHT_GHR_SIZE-1:0]}; - end + // The hash function is too complex to write in verilog for all cases. + // The config script generates the logic string based on the bp config. + if (pt.BHT_GHR_HASH_1) begin : ghrhash_cfg1 + assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { + ghr[pt.BHT_GHR_SIZE-1:pt.BTB_INDEX1_HI-1], + hashin[pt.BTB_INDEX1_HI:2] ^ ghr[pt.BTB_INDEX1_HI-2:0] + }; + end else begin : ghrhash_cfg2 + assign hash[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = { + hashin[pt.BHT_GHR_SIZE+1:2] ^ ghr[pt.BHT_GHR_SIZE-1:0] + }; + end endmodule diff --git a/Flow/design/lib/mem_lib.sv b/Flow/design/lib/mem_lib.sv index 325f80e..34fc5c8 100644 --- a/Flow/design/lib/mem_lib.sv +++ b/Flow/design/lib/mem_lib.sv @@ -83,22 +83,28 @@ assign ROP = ME; \ endmodule // parameterizable RAM for verilator sims -module el2_ram #(depth=4096, width=39) ( -input logic [$clog2(depth)-1:0] ADR, -input logic [(width-1):0] D, -output logic [(width-1):0] Q, - `EL2_LOCAL_RAM_TEST_IO +module el2_ram #( + depth = 4096, + width = 39 +) ( + input logic [$clog2(depth)-1:0] ADR, + input logic [(width-1):0] D, + output logic [(width-1):0] Q, + `EL2_LOCAL_RAM_TEST_IO ); -reg [(width-1):0] ram_core [(depth-1):0]; + reg [(width-1):0] ram_core[(depth-1):0]; -always @(posedge CLK) begin + always @(posedge CLK) begin `ifdef GTLSIM - if (ME && WE) ram_core[ADR] <= D; + if (ME && WE) ram_core[ADR] <= D; `else - if (ME && WE) begin ram_core[ADR] <= D; Q <= 'x; end + if (ME && WE) begin + ram_core[ADR] <= D; + Q <= 'x; + end `endif - if (ME && ~WE) Q <= ram_core[ADR]; -end + if (ME && ~WE) Q <= ram_core[ADR]; + end endmodule //========================================================================================================================= @@ -111,7 +117,7 @@ endmodule `EL2_RAM(4096, 39) `EL2_RAM(3072, 39) `EL2_RAM(2048, 39) -`EL2_RAM(1536, 39) // need this for the 48KB DCCM option) +`EL2_RAM(1536, 39) // need this for the 48KB DCCM option) `EL2_RAM(1024, 39) `EL2_RAM(768, 39) `EL2_RAM(512, 39) diff --git a/Flow/design/lsu/el2_lsu.sv b/Flow/design/lsu/el2_lsu.sv index ecdba92..e1de432 100644 --- a/Flow/design/lsu/el2_lsu.sv +++ b/Flow/design/lsu/el2_lsu.sv @@ -26,311 +26,312 @@ //******************************************************************************** module el2_lsu -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) -( + `include "el2_param.vh" +) ( - input logic clk_override, // Override non-functional clock gating - input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only - input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state - input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt + input logic clk_override, // Override non-functional clock gating + input logic dec_tlu_flush_lower_r, // I0/I1 writeback flush. This is used to flush the old packets only + input logic dec_tlu_i0_kill_writeb_r, // I0 is flushed, don't writeback any results to arch state + input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt - // chicken signals - input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals - input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce - input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus - input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc + // chicken signals + input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals + input logic dec_tlu_wb_coalescing_disable, // disable the write buffer coalesce + input logic dec_tlu_sideeffect_posted_disable, // disable the posted sideeffect load store to the bus + input logic dec_tlu_core_ecc_disable, // disable the generation of the ecc - input logic [31:0] exu_lsu_rs1_d, // address rs operand - input logic [31:0] exu_lsu_rs2_d, // store data - input logic [11:0] dec_lsu_offset_d, // address offset operand + input logic [31:0] exu_lsu_rs1_d, // address rs operand + input logic [31:0] exu_lsu_rs2_d, // store data + input logic [11:0] dec_lsu_offset_d, // address offset operand - input el2_lsu_pkt_t lsu_p, // lsu control packet - input logic dec_lsu_valid_raw_d, // Raw valid for address computation - input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + input el2_lsu_pkt_t lsu_p, // lsu control packet + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control - output logic [31:0] lsu_result_m, // lsu load data - output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF - output logic lsu_load_stall_any, // This is for blocking loads in the decode - output logic lsu_store_stall_any, // This is for blocking stores in the decode - output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage - output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA - output logic lsu_active, // Used to turn off top level clk + output logic [31:0] lsu_result_m, // lsu load data + output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF + output logic lsu_load_stall_any, // This is for blocking loads in the decode + output logic lsu_store_stall_any, // This is for blocking stores in the decode + output logic lsu_fastint_stall_any, // Stall the fastint in decode-1 stage + output logic lsu_idle_any, // lsu buffers are empty and no instruction in the pipeline. Doesn't include DMA + output logic lsu_active, // Used to turn off top level clk - output logic [31:1] lsu_fir_addr, // fast interrupt address - output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup + output logic [31:1] lsu_fir_addr, // fast interrupt address + output logic [ 1:0] lsu_fir_error, // Error during fast interrupt lookup - output logic lsu_single_ecc_error_incr, // Increment the ecc counter - output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet - output logic lsu_imprecise_error_load_any, // bus load imprecise error - output logic lsu_imprecise_error_store_any, // bus store imprecise error - output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address + output logic lsu_single_ecc_error_incr, // Increment the ecc counter + output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet + output logic lsu_imprecise_error_load_any, // bus load imprecise error + output logic lsu_imprecise_error_store_any, // bus store imprecise error + output logic [31:0] lsu_imprecise_error_addr_any, // bus store imprecise error address - // Non-blocking loads - output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load - output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated - output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam - output logic lsu_nonblock_load_data_error, // non block load has an error - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error - output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load - output logic lsu_pmu_load_external_m, // PMU : Bus loads - output logic lsu_pmu_store_external_m, // PMU : Bus loads - output logic lsu_pmu_misaligned_m, // PMU : misaligned - output logic lsu_pmu_bus_trxn, // PMU : bus transaction - output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus - output logic lsu_pmu_bus_error, // PMU : bus sending error back - output logic lsu_pmu_bus_busy, // PMU : bus is not ready + output logic lsu_pmu_load_external_m, // PMU : Bus loads + output logic lsu_pmu_store_external_m, // PMU : Bus loads + output logic lsu_pmu_misaligned_m, // PMU : misaligned + output logic lsu_pmu_bus_trxn, // PMU : bus transaction + output logic lsu_pmu_bus_misaligned, // PMU : misaligned access going to the bus + output logic lsu_pmu_bus_error, // PMU : bus sending error back + output logic lsu_pmu_bus_busy, // PMU : bus is not ready - // Trigger signals - input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode - output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger) + // Trigger signals + input el2_trigger_pkt_t [3:0] trigger_pkt_any, // Trigger info from the decode + output logic [3:0] lsu_trigger_match_m, // lsu trigger hit (one bit per trigger) - // DCCM ports - output logic dccm_wren, // DCCM write enable - output logic dccm_rden, // DCCM read enable - output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank - output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank - output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank - output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read) - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank + // DCCM ports + output logic dccm_wren, // DCCM write enable + output logic dccm_rden, // DCCM read enable + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // DCCM write address low bank + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // DCCM write address hi bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // DCCM read address low bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // DCCM read address hi bank (hi and low same if aligned read) + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // DCCM write data for lo bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // DCCM write data for hi bank - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // DCCM read data low bank + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // DCCM read data hi bank - // PIC ports - output logic picm_wren, // PIC memory write enable - output logic picm_rden, // PIC memory read enable - output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward - output logic [31:0] picm_rdaddr, // address for pic read access - output logic [31:0] picm_wraddr, // address for pic write access - output logic [31:0] picm_wr_data, // PIC memory write data - input logic [31:0] picm_rd_data, // PIC memory read/mask data + // PIC ports + output logic picm_wren, // PIC memory write enable + output logic picm_rden, // PIC memory read enable + output logic picm_mken, // Need to read the mask for stores to determine which bits to write/forward + output logic [31:0] picm_rdaddr, // address for pic read access + output logic [31:0] picm_wraddr, // address for pic write access + output logic [31:0] picm_wr_data, // PIC memory write data + input logic [31:0] picm_rd_data, // PIC memory read/mask data - // AXI Write Channels - output logic lsu_axi_awvalid, - input logic lsu_axi_awready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, - output logic [31:0] lsu_axi_awaddr, - output logic [3:0] lsu_axi_awregion, - output logic [7:0] lsu_axi_awlen, - output logic [2:0] lsu_axi_awsize, - output logic [1:0] lsu_axi_awburst, - output logic lsu_axi_awlock, - output logic [3:0] lsu_axi_awcache, - output logic [2:0] lsu_axi_awprot, - output logic [3:0] lsu_axi_awqos, + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [ 31:0] lsu_axi_awaddr, + output logic [ 3:0] lsu_axi_awregion, + output logic [ 7:0] lsu_axi_awlen, + output logic [ 2:0] lsu_axi_awsize, + output logic [ 1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [ 3:0] lsu_axi_awcache, + output logic [ 2:0] lsu_axi_awprot, + output logic [ 3:0] lsu_axi_awqos, - output logic lsu_axi_wvalid, - input logic lsu_axi_wready, - output logic [63:0] lsu_axi_wdata, - output logic [7:0] lsu_axi_wstrb, - output logic lsu_axi_wlast, + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [ 7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, - input logic lsu_axi_bvalid, - output logic lsu_axi_bready, - input logic [1:0] lsu_axi_bresp, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [ 1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, - // AXI Read Channels - output logic lsu_axi_arvalid, - input logic lsu_axi_arready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, - output logic [31:0] lsu_axi_araddr, - output logic [3:0] lsu_axi_arregion, - output logic [7:0] lsu_axi_arlen, - output logic [2:0] lsu_axi_arsize, - output logic [1:0] lsu_axi_arburst, - output logic lsu_axi_arlock, - output logic [3:0] lsu_axi_arcache, - output logic [2:0] lsu_axi_arprot, - output logic [3:0] lsu_axi_arqos, + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [ 31:0] lsu_axi_araddr, + output logic [ 3:0] lsu_axi_arregion, + output logic [ 7:0] lsu_axi_arlen, + output logic [ 2:0] lsu_axi_arsize, + output logic [ 1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [ 3:0] lsu_axi_arcache, + output logic [ 2:0] lsu_axi_arprot, + output logic [ 3:0] lsu_axi_arqos, - input logic lsu_axi_rvalid, - output logic lsu_axi_rready, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, - input logic [63:0] lsu_axi_rdata, - input logic [1:0] lsu_axi_rresp, - input logic lsu_axi_rlast, + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [ 63:0] lsu_axi_rdata, + input logic [ 1:0] lsu_axi_rresp, + input logic lsu_axi_rlast, - input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio + input logic lsu_bus_clk_en, // external drives a clock_en to control bus ratio - // DMA slave - input logic dma_dccm_req, // DMA read/write to dccm - input logic [2:0] dma_mem_tag, // DMA request tag - input logic [31:0] dma_mem_addr, // DMA address - input logic [2:0] dma_mem_sz, // DMA access size - input logic dma_mem_write, // DMA access is a write - input logic [63:0] dma_mem_wdata, // DMA write data + // DMA slave + input logic dma_dccm_req, // DMA read/write to dccm + input logic [ 2:0] dma_mem_tag, // DMA request tag + input logic [31:0] dma_mem_addr, // DMA address + input logic [ 2:0] dma_mem_sz, // DMA access size + input logic dma_mem_write, // DMA access is a write + input logic [63:0] dma_mem_wdata, // DMA write data - output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read - output logic dccm_dma_ecc_error, // DMA load had ecc error - output logic [2:0] dccm_dma_rtag, // DMA request tag - output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read - output logic dccm_ready, // lsu ready for DMA access + output logic dccm_dma_rvalid, // lsu data valid for DMA dccm read + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [ 2:0] dccm_dma_rtag, // DMA request tag + output logic [63:0] dccm_dma_rdata, // lsu data for DMA dccm read + output logic dccm_ready, // lsu ready for DMA access - input logic scan_mode, // scan mode - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic rst_l // reset, active low + input logic scan_mode, // scan mode + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic rst_l // reset, active low - ); +); - logic lsu_dccm_rden_m; - logic lsu_dccm_rden_r; - logic [31:0] store_data_m; - logic [31:0] store_data_r; - logic [31:0] store_data_hi_r, store_data_lo_r; - logic [31:0] store_datafn_hi_r, store_datafn_lo_r; - logic [31:0] sec_data_lo_m, sec_data_hi_m; - logic [31:0] sec_data_lo_r, sec_data_hi_r; + logic lsu_dccm_rden_m; + logic lsu_dccm_rden_r; + logic [31:0] store_data_m; + logic [31:0] store_data_r; + logic [31:0] store_data_hi_r, store_data_lo_r; + logic [31:0] store_datafn_hi_r, store_datafn_lo_r; + logic [31:0] sec_data_lo_m, sec_data_hi_m; + logic [31:0] sec_data_lo_r, sec_data_hi_r; - logic [31:0] lsu_ld_data_m; - logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m; - logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m; - logic lsu_single_ecc_error_m; - logic lsu_double_ecc_error_m; + logic [31:0] lsu_ld_data_m; + logic [31:0] dccm_rdata_hi_m, dccm_rdata_lo_m; + logic [6:0] dccm_data_ecc_hi_m, dccm_data_ecc_lo_m; + logic lsu_single_ecc_error_m; + logic lsu_double_ecc_error_m; - logic [31:0] lsu_ld_data_r; - logic [31:0] lsu_ld_data_corr_r; - logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r; - logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r; - logic single_ecc_error_hi_r, single_ecc_error_lo_r; - logic lsu_single_ecc_error_r; - logic lsu_double_ecc_error_r; - logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff; + logic [31:0] lsu_ld_data_r; + logic [31:0] lsu_ld_data_corr_r; + logic [31:0] dccm_rdata_hi_r, dccm_rdata_lo_r; + logic [6:0] dccm_data_ecc_hi_r, dccm_data_ecc_lo_r; + logic single_ecc_error_hi_r, single_ecc_error_lo_r; + logic lsu_single_ecc_error_r; + logic lsu_double_ecc_error_r; + logic ld_single_ecc_error_r, ld_single_ecc_error_r_ff; - logic [31:0] picm_mask_data_m; + logic [31:0] picm_mask_data_m; - logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r; - logic [31:0] end_addr_d, end_addr_m, end_addr_r; + logic [31:0] lsu_addr_d, lsu_addr_m, lsu_addr_r; + logic [31:0] end_addr_d, end_addr_m, end_addr_r; - el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r; - logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r; + el2_lsu_pkt_t lsu_pkt_d, lsu_pkt_m, lsu_pkt_r; + logic lsu_i0_valid_d, lsu_i0_valid_m, lsu_i0_valid_r; - // Store Buffer signals - logic store_stbuf_reqvld_r; - logic ldst_stbuf_reqvld_r; + // Store Buffer signals + logic store_stbuf_reqvld_r; + logic ldst_stbuf_reqvld_r; - logic lsu_commit_r; - logic lsu_exc_m; + logic lsu_commit_r; + logic lsu_exc_m; - logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r; - logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r; - logic ldst_dual_d, ldst_dual_m, ldst_dual_r; - logic addr_external_m; + logic addr_in_dccm_d, addr_in_dccm_m, addr_in_dccm_r; + logic addr_in_pic_d, addr_in_pic_m, addr_in_pic_r; + logic ldst_dual_d, ldst_dual_m, ldst_dual_r; + logic addr_external_m; - logic stbuf_reqvld_any; - logic stbuf_reqvld_flushed_any; - logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any; - logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any; - logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any; - logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff; - logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff; + logic stbuf_reqvld_any; + logic stbuf_reqvld_flushed_any; + logic [ pt.LSU_SB_BITS-1:0] stbuf_addr_any; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any; + logic [ pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any; + logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, sec_data_hi_r_ff; + logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, sec_data_ecc_lo_r_ff; - logic lsu_cmpen_m; - logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m; - logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m; - logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m; - logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m; + logic lsu_cmpen_m; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m; + logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m; + logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m; + logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m; - logic lsu_stbuf_commit_any; - logic lsu_stbuf_empty_any; // This is for blocking loads - logic lsu_stbuf_full_any; + logic lsu_stbuf_commit_any; + logic lsu_stbuf_empty_any; // This is for blocking loads + logic lsu_stbuf_full_any; - // Bus signals - logic lsu_busreq_r; - logic lsu_bus_buffer_pend_any; - logic lsu_bus_buffer_empty_any; - logic lsu_bus_buffer_full_any; - logic lsu_busreq_m; - logic [31:0] bus_read_data_m; + // Bus signals + logic lsu_busreq_r; + logic lsu_bus_buffer_pend_any; + logic lsu_bus_buffer_empty_any; + logic lsu_bus_buffer_full_any; + logic lsu_busreq_m; + logic [ 31:0] bus_read_data_m; - logic flush_m_up, flush_r; - logic is_sideeffects_m; - logic [2:0] dma_mem_tag_d, dma_mem_tag_m; - logic ldst_nodma_mtor; - logic dma_dccm_wen, dma_pic_wen; - logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi; - logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi; + logic flush_m_up, flush_r; + logic is_sideeffects_m; + logic [2:0] dma_mem_tag_d, dma_mem_tag_m; + logic ldst_nodma_mtor; + logic dma_dccm_wen, dma_pic_wen; + logic [31:0] dma_dccm_wdata_lo, dma_dccm_wdata_hi; + logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, dma_dccm_wdata_ecc_hi; - // Clocks - logic lsu_busm_clken; - logic lsu_bus_obuf_c1_clken; - logic lsu_c1_m_clk, lsu_c1_r_clk; - logic lsu_c2_m_clk, lsu_c2_r_clk; - logic lsu_store_c1_m_clk, lsu_store_c1_r_clk; + // Clocks + logic lsu_busm_clken; + logic lsu_bus_obuf_c1_clken; + logic lsu_c1_m_clk, lsu_c1_r_clk; + logic lsu_c2_m_clk, lsu_c2_r_clk; + logic lsu_store_c1_m_clk, lsu_store_c1_r_clk; - logic lsu_stbuf_c1_clk; - logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk; - logic lsu_busm_clk; - logic lsu_free_c2_clk; + logic lsu_stbuf_c1_clk; + logic lsu_bus_ibuf_c1_clk, lsu_bus_obuf_c1_clk, lsu_bus_buf_c1_clk; + logic lsu_busm_clk; + logic lsu_free_c2_clk; - logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m; - logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r; + logic lsu_raw_fwd_lo_m, lsu_raw_fwd_hi_m; + logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r; - assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]); - assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]); + assign lsu_raw_fwd_lo_m = (|stbuf_fwdbyteen_lo_m[pt.DCCM_BYTE_WIDTH-1:0]); + assign lsu_raw_fwd_hi_m = (|stbuf_fwdbyteen_hi_m[pt.DCCM_BYTE_WIDTH-1:0]); - el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*); + el2_lsu_lsc_ctl #(.pt(pt)) lsu_lsc_ctl (.*); - // block stores in decode - for either bus or stbuf reasons - assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; - assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; - assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage + // block stores in decode - for either bus or stbuf reasons + assign lsu_store_stall_any = lsu_stbuf_full_any | lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; + assign lsu_load_stall_any = lsu_bus_buffer_full_any | ld_single_ecc_error_r_ff; + assign lsu_fastint_stall_any = ld_single_ecc_error_r; // Stall the fastint in decode-1 stage - // Ready to accept dma trxns - // There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m - assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0]; - assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store); + // Ready to accept dma trxns + // There can't be any inpipe forwarding from non-dma packet to dma packet since they can be flushed so we can't have st in r when dma is in m + assign dma_mem_tag_d[2:0] = dma_mem_tag[2:0]; + assign ldst_nodma_mtor = (lsu_pkt_m.valid & ~lsu_pkt_m.dma & (addr_in_dccm_m | addr_in_pic_m) & lsu_pkt_m.store); - assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff); + assign dccm_ready = ~(dec_lsu_valid_raw_d | ldst_nodma_mtor | ld_single_ecc_error_r_ff); - assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword - assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d; - assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores + assign dma_dccm_wen = dma_dccm_req & dma_mem_write & addr_in_dccm_d & dma_mem_sz[1]; // Perform DMA writes only for word/dword + assign dma_pic_wen = dma_dccm_req & dma_mem_write & addr_in_pic_d; + assign {dma_dccm_wdata_hi[31:0], dma_dccm_wdata_lo[31:0]} = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores - // Generate per cycle flush signals - assign flush_m_up = dec_tlu_flush_lower_r; - assign flush_r = dec_tlu_i0_kill_writeb_r; + // Generate per cycle flush signals + assign flush_m_up = dec_tlu_flush_lower_r; + assign flush_r = dec_tlu_i0_kill_writeb_r; - // lsu idle - // lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence. - // Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error - // Store buffer now have only non-dma dccm stores - // stbuf_empty not needed since it has only dccm stores - assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) | + // lsu idle + // lsu halt idle. This is used for entering the halt mode. Also, DMA accesses are allowed during fence. + // Indicates non-idle if there is a instruction valid in d-r or read/write buffers are non-empty since they can come with error + // Store buffer now have only non-dma dccm stores + // stbuf_empty not needed since it has only dccm stores + assign lsu_idle_any = ~((lsu_pkt_m.valid & ~lsu_pkt_m.dma) | (lsu_pkt_r.valid & ~lsu_pkt_r.dma)) & lsu_bus_buffer_empty_any; - assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock + assign lsu_active = (lsu_pkt_m.valid | lsu_pkt_r.valid | ld_single_ecc_error_r_ff) | ~lsu_bus_buffer_empty_any; // This includes DMA. Used for gating top clock - // Instantiate the store buffer - assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | ((lsu_pkt_r.by | lsu_pkt_r.half) & ~lsu_double_ecc_error_r)); + // Instantiate the store buffer + assign store_stbuf_reqvld_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~flush_r & (~lsu_pkt_r.dma | ((lsu_pkt_r.by | lsu_pkt_r.half) & ~lsu_double_ecc_error_r)); - // Disable Forwarding for now - assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m); + // Disable Forwarding for now + assign lsu_cmpen_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & (addr_in_dccm_m | addr_in_pic_m); - // Bus signals - assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int; + // Bus signals + assign lsu_busreq_m = lsu_pkt_m.valid & ((lsu_pkt_m.load | lsu_pkt_m.store) & addr_external_m) & ~flush_m_up & ~lsu_exc_m & ~lsu_pkt_m.fast_int; - // Dual signals - assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]); - assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]); - assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]); + // Dual signals + assign ldst_dual_d = (lsu_addr_d[2] != end_addr_d[2]); + assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]); + assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]); - // PMU signals - assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0]))); - assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m; - assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m; + // PMU signals + assign lsu_pmu_misaligned_m = lsu_pkt_m.valid & ((lsu_pkt_m.half & lsu_addr_m[0]) | (lsu_pkt_m.word & (|lsu_addr_m[1:0]))); + assign lsu_pmu_load_external_m = lsu_pkt_m.valid & lsu_pkt_m.load & addr_external_m; + assign lsu_pmu_store_external_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_external_m; - el2_lsu_dccm_ctl #(.pt(pt)) dccm_ctl ( + el2_lsu_dccm_ctl #( + .pt(pt) + ) dccm_ctl ( .lsu_addr_d(lsu_addr_d[31:0]), .end_addr_d(end_addr_d[pt.DCCM_BITS-1:0]), .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]), @@ -339,34 +340,42 @@ import el2_pkg::*; .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]), .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]), .* - ); + ); - el2_lsu_stbuf #(.pt(pt)) stbuf ( + el2_lsu_stbuf #( + .pt(pt) + ) stbuf ( .lsu_addr_d(lsu_addr_d[pt.LSU_SB_BITS-1:0]), .end_addr_d(end_addr_d[pt.LSU_SB_BITS-1:0]), .* - ); + ); - el2_lsu_ecc #(.pt(pt)) ecc ( + el2_lsu_ecc #( + .pt(pt) + ) ecc ( .lsu_addr_r(lsu_addr_r[pt.DCCM_BITS-1:0]), .end_addr_r(end_addr_r[pt.DCCM_BITS-1:0]), .lsu_addr_m(lsu_addr_m[pt.DCCM_BITS-1:0]), .end_addr_m(end_addr_m[pt.DCCM_BITS-1:0]), .* - ); + ); - el2_lsu_trigger #(.pt(pt)) trigger ( + el2_lsu_trigger #( + .pt(pt) + ) trigger ( .store_data_m(store_data_m[31:0]), .* - ); + ); - // Clk domain - el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*); + // Clk domain + el2_lsu_clkdomain #(.pt(pt)) clkdomain (.*); - // Bus interface - el2_lsu_bus_intf #(.pt(pt)) bus_intf ( + // Bus interface + el2_lsu_bus_intf #( + .pt(pt) + ) bus_intf ( .lsu_addr_m(lsu_addr_m[31:0] & {32{addr_external_m & lsu_pkt_m.valid}}), .lsu_addr_r(lsu_addr_r[31:0] & {32{lsu_busreq_r}}), @@ -375,10 +384,20 @@ import el2_pkg::*; .store_data_r(store_data_r[31:0] & {32{lsu_busreq_r}}), .* - ); + ); - //Flops - rvdff #(3) dma_mem_tag_mff (.*, .din(dma_mem_tag_d[2:0]), .dout(dma_mem_tag_m[2:0]), .clk(lsu_c1_m_clk)); - rvdff #(2) lsu_raw_fwd_r_ff (.*, .din({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), .clk(lsu_c2_r_clk)); + //Flops + rvdff #(3) dma_mem_tag_mff ( + .*, + .din (dma_mem_tag_d[2:0]), + .dout(dma_mem_tag_m[2:0]), + .clk (lsu_c1_m_clk) + ); + rvdff #(2) lsu_raw_fwd_r_ff ( + .*, + .din ({lsu_raw_fwd_hi_m, lsu_raw_fwd_lo_m}), + .dout({lsu_raw_fwd_hi_r, lsu_raw_fwd_lo_r}), + .clk (lsu_c2_r_clk) + ); -endmodule // el2_lsu +endmodule // el2_lsu diff --git a/Flow/design/lsu/el2_lsu_addrcheck.sv b/Flow/design/lsu/el2_lsu_addrcheck.sv index 2abe8e1..1b7fa50 100644 --- a/Flow/design/lsu/el2_lsu_addrcheck.sv +++ b/Flow/design/lsu/el2_lsu_addrcheck.sv @@ -23,111 +23,119 @@ // //******************************************************************************** module el2_lsu_addrcheck -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - )( - input logic lsu_c2_m_clk, // clock - input logic rst_l, // reset + `include "el2_param.vh" +) ( + input logic lsu_c2_m_clk, // clock + input logic rst_l, // reset - input logic [31:0] start_addr_d, // start address for lsu - input logic [31:0] end_addr_d, // end address for lsu - input el2_lsu_pkt_t lsu_pkt_d, // packet in d - input logic [31:0] dec_tlu_mrac_ff, // CSR read - input logic [3:0] rs1_region_d, // address rs operand [31:28] + input logic [31:0] start_addr_d, // start address for lsu + input logic [31:0] end_addr_d, // end address for lsu + input el2_lsu_pkt_t lsu_pkt_d, // packet in d + input logic [31:0] dec_tlu_mrac_ff, // CSR read + input logic [ 3:0] rs1_region_d, // address rs operand [31:28] - input logic [31:0] rs1_d, // address rs operand + input logic [31:0] rs1_d, // address rs operand - output logic is_sideeffects_m, // is sideffects space - output logic addr_in_dccm_d, // address in dccm - output logic addr_in_pic_d, // address in pic - output logic addr_external_d, // address in external + output logic is_sideeffects_m, // is sideffects space + output logic addr_in_dccm_d, // address in dccm + output logic addr_in_pic_d, // address in pic + output logic addr_external_d, // address in external - output logic access_fault_d, // access fault - output logic misaligned_fault_d, // misaligned - output logic [3:0] exc_mscause_d, // mscause for access/misaligned faults + output logic access_fault_d, // access fault + output logic misaligned_fault_d, // misaligned + output logic [3:0] exc_mscause_d, // mscause for access/misaligned faults - output logic fir_dccm_access_error_d, // Fast interrupt dccm access error - output logic fir_nondccm_access_error_d,// Fast interrupt dccm access error + output logic fir_dccm_access_error_d, // Fast interrupt dccm access error + output logic fir_nondccm_access_error_d, // Fast interrupt dccm access error - input logic scan_mode // Scan mode + input logic scan_mode // Scan mode ); - logic non_dccm_access_ok; - logic is_sideeffects_d, is_aligned_d; - logic start_addr_in_dccm_d, end_addr_in_dccm_d; - logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d; - logic start_addr_in_pic_d, end_addr_in_pic_d; - logic start_addr_in_pic_region_d, end_addr_in_pic_region_d; - logic [4:0] csr_idx; - logic addr_in_iccm; - logic start_addr_dccm_or_pic; - logic base_reg_dccm_or_pic; - logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d; - logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d; - logic [3:0] access_fault_mscause_d; - logic [3:0] misaligned_fault_mscause_d; + logic non_dccm_access_ok; + logic is_sideeffects_d, is_aligned_d; + logic start_addr_in_dccm_d, end_addr_in_dccm_d; + logic start_addr_in_dccm_region_d, end_addr_in_dccm_region_d; + logic start_addr_in_pic_d, end_addr_in_pic_d; + logic start_addr_in_pic_region_d, end_addr_in_pic_region_d; + logic [4:0] csr_idx; + logic addr_in_iccm; + logic start_addr_dccm_or_pic; + logic base_reg_dccm_or_pic; + logic unmapped_access_fault_d, mpu_access_fault_d, picm_access_fault_d, regpred_access_fault_d; + logic regcross_misaligned_fault_d, sideeffect_misaligned_fault_d; + logic [3:0] access_fault_mscause_d; + logic [3:0] misaligned_fault_mscause_d; - if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable - // Start address check - rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), - .CCM_SIZE(pt.DCCM_SIZE)) start_addr_dccm_rangecheck ( - .addr(start_addr_d[31:0]), - .in_range(start_addr_in_dccm_d), - .in_region(start_addr_in_dccm_region_d) - ); + if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable + // Start address check + rvrangecheck #( + .CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE) + ) start_addr_dccm_rangecheck ( + .addr(start_addr_d[31:0]), + .in_range(start_addr_in_dccm_d), + .in_region(start_addr_in_dccm_region_d) + ); - // End address check - rvrangecheck #(.CCM_SADR(pt.DCCM_SADR), - .CCM_SIZE(pt.DCCM_SIZE)) end_addr_dccm_rangecheck ( - .addr(end_addr_d[31:0]), - .in_range(end_addr_in_dccm_d), - .in_region(end_addr_in_dccm_region_d) - ); - end else begin: Gen_dccm_disable // block: Gen_dccm_enable - assign start_addr_in_dccm_d = '0; - assign start_addr_in_dccm_region_d = '0; - assign end_addr_in_dccm_d = '0; - assign end_addr_in_dccm_region_d = '0; - end + // End address check + rvrangecheck #( + .CCM_SADR(pt.DCCM_SADR), + .CCM_SIZE(pt.DCCM_SIZE) + ) end_addr_dccm_rangecheck ( + .addr(end_addr_d[31:0]), + .in_range(end_addr_in_dccm_d), + .in_region(end_addr_in_dccm_region_d) + ); + end else begin : Gen_dccm_disable // block: Gen_dccm_enable + assign start_addr_in_dccm_d = '0; + assign start_addr_in_dccm_region_d = '0; + assign end_addr_in_dccm_d = '0; + assign end_addr_in_dccm_region_d = '0; + end - if (pt.ICCM_ENABLE == 1) begin : check_iccm - assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION); - end else begin - assign addr_in_iccm = 1'b0; - end + if (pt.ICCM_ENABLE == 1) begin : check_iccm + assign addr_in_iccm = (start_addr_d[31:28] == pt.ICCM_REGION); + end else begin + assign addr_in_iccm = 1'b0; + end - // PIC memory check - // Start address check - rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), - .CCM_SIZE(pt.PIC_SIZE)) start_addr_pic_rangecheck ( + // PIC memory check + // Start address check + rvrangecheck #( + .CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE) + ) start_addr_pic_rangecheck ( .addr(start_addr_d[31:0]), .in_range(start_addr_in_pic_d), .in_region(start_addr_in_pic_region_d) - ); + ); - // End address check - rvrangecheck #(.CCM_SADR(pt.PIC_BASE_ADDR), - .CCM_SIZE(pt.PIC_SIZE)) end_addr_pic_rangecheck ( + // End address check + rvrangecheck #( + .CCM_SADR(pt.PIC_BASE_ADDR), + .CCM_SIZE(pt.PIC_SIZE) + ) end_addr_pic_rangecheck ( .addr(end_addr_d[31:0]), .in_range(end_addr_in_pic_d), .in_region(end_addr_in_pic_region_d) - ); + ); - assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d; - assign base_reg_dccm_or_pic = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION); - assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d); - assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d); + assign start_addr_dccm_or_pic = start_addr_in_dccm_region_d | start_addr_in_pic_region_d; + assign base_reg_dccm_or_pic = ((rs1_region_d[3:0] == pt.DCCM_REGION) & pt.DCCM_ENABLE) | (rs1_region_d[3:0] == pt.PIC_REGION); + assign addr_in_dccm_d = (start_addr_in_dccm_d & end_addr_in_dccm_d); + assign addr_in_pic_d = (start_addr_in_pic_d & end_addr_in_pic_d); - assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d); - assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1}; - assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & (lsu_pkt_d.store | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions - assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) | + assign addr_external_d = ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d); + assign csr_idx[4:0] = {start_addr_d[31:28], 1'b1}; + assign is_sideeffects_d = dec_tlu_mrac_ff[csr_idx] & ~(start_addr_in_dccm_region_d | start_addr_in_pic_region_d | addr_in_iccm) & lsu_pkt_d.valid & (lsu_pkt_d.store | lsu_pkt_d.load); //every region has the 2 LSB indicating ( 1: sideeffects/no_side effects, and 0: cacheable ). Ignored in internal regions + assign is_aligned_d = (lsu_pkt_d.word & (start_addr_d[1:0] == 2'b0)) | (lsu_pkt_d.half & (start_addr_d[0] == 1'b0)) | lsu_pkt_d.by; - assign non_dccm_access_ok = (~(|{pt.DATA_ACCESS_ENABLE0,pt.DATA_ACCESS_ENABLE1,pt.DATA_ACCESS_ENABLE2,pt.DATA_ACCESS_ENABLE3,pt.DATA_ACCESS_ENABLE4,pt.DATA_ACCESS_ENABLE5,pt.DATA_ACCESS_ENABLE6,pt.DATA_ACCESS_ENABLE7})) | + assign non_dccm_access_ok = (~(|{pt.DATA_ACCESS_ENABLE0,pt.DATA_ACCESS_ENABLE1,pt.DATA_ACCESS_ENABLE2,pt.DATA_ACCESS_ENABLE3,pt.DATA_ACCESS_ENABLE4,pt.DATA_ACCESS_ENABLE5,pt.DATA_ACCESS_ENABLE6,pt.DATA_ACCESS_ENABLE7})) | (((pt.DATA_ACCESS_ENABLE0 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK0)) == (pt.DATA_ACCESS_ADDR0 | pt.DATA_ACCESS_MASK0)) | (pt.DATA_ACCESS_ENABLE1 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK1)) == (pt.DATA_ACCESS_ADDR1 | pt.DATA_ACCESS_MASK1)) | (pt.DATA_ACCESS_ENABLE2 & ((start_addr_d[31:0] | pt.DATA_ACCESS_MASK2)) == (pt.DATA_ACCESS_ADDR2 | pt.DATA_ACCESS_MASK2)) | @@ -145,47 +153,54 @@ import el2_pkg::*; (pt.DATA_ACCESS_ENABLE6 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK6)) == (pt.DATA_ACCESS_ADDR6 | pt.DATA_ACCESS_MASK6)) | (pt.DATA_ACCESS_ENABLE7 & ((end_addr_d[31:0] | pt.DATA_ACCESS_MASK7)) == (pt.DATA_ACCESS_ADDR7 | pt.DATA_ACCESS_MASK7)))); - // Access fault logic - // 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region - // 1. Uncorrectable (double bit) ECC error - // 3. Address is not in a populated non-dccm region - // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa - // 6. Ld/St access to picm are not word aligned or word size - assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa - assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size + // Access fault logic + // 0. Unmapped local memory : Addr in dccm region but not in dccm offset OR Addr in picm region but not in picm offset OR DCCM -> PIC cross when DCCM/PIC in same region + // 1. Uncorrectable (double bit) ECC error + // 3. Address is not in a populated non-dccm region + // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa + // 6. Ld/St access to picm are not word aligned or word size + assign regpred_access_fault_d = (start_addr_dccm_or_pic ^ base_reg_dccm_or_pic); // 5. Region predication access fault: Base Address in DCCM/PIC and Final address in non-DCCM/non-PIC region or vice versa + assign picm_access_fault_d = (addr_in_pic_d & ((start_addr_d[1:0] != 2'b0) | ~lsu_pkt_d.word)); // 6. Ld/St access to picm are not word aligned or word size - if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin - assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset - (end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset - (start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region - (start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region - assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region - end else begin - assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset - (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset - (start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset - (end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset - assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region - end + if (pt.DCCM_ENABLE & (pt.DCCM_REGION == pt.PIC_REGION)) begin + assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~(start_addr_in_dccm_d | start_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset + (end_addr_in_dccm_region_d & ~(end_addr_in_dccm_d | end_addr_in_pic_d)) | // 0. Addr in dccm/pic region but not in dccm/pic offset + (start_addr_in_dccm_d & end_addr_in_pic_d) | // 0. DCCM -> PIC cross when DCCM/PIC in same region + (start_addr_in_pic_d & end_addr_in_dccm_d)); // 0. DCCM -> PIC cross when DCCM/PIC in same region + assign mpu_access_fault_d = (~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region + end else begin + assign unmapped_access_fault_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset + (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d) | // 0. Addr in dccm region but not in dccm offset + (start_addr_in_pic_region_d & ~start_addr_in_pic_d) | // 0. Addr in picm region but not in picm offset + (end_addr_in_pic_region_d & ~end_addr_in_pic_d)); // 0. Addr in picm region but not in picm offset + assign mpu_access_fault_d = (~start_addr_in_pic_region_d & ~start_addr_in_dccm_region_d & ~non_dccm_access_ok); // 3. Address is not in a populated non-dccm region + end - assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; - assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0; + assign access_fault_d = (unmapped_access_fault_d | mpu_access_fault_d | picm_access_fault_d | regpred_access_fault_d) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; + assign access_fault_mscause_d[3:0] = unmapped_access_fault_d ? 4'h2 : mpu_access_fault_d ? 4'h3 : regpred_access_fault_d ? 4'h5 : picm_access_fault_d ? 4'h6 : 4'h0; - // Misaligned happens due to 2 reasons - // 0. Region cross - // 1. sideeffects access which are not aligned - assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]); - assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d); - assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; - assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0; + // Misaligned happens due to 2 reasons + // 0. Region cross + // 1. sideeffects access which are not aligned + assign regcross_misaligned_fault_d = (start_addr_d[31:28] != end_addr_d[31:28]); + assign sideeffect_misaligned_fault_d = (is_sideeffects_d & ~is_aligned_d); + assign misaligned_fault_d = (regcross_misaligned_fault_d | (sideeffect_misaligned_fault_d & addr_external_d)) & lsu_pkt_d.valid & ~lsu_pkt_d.dma; + assign misaligned_fault_mscause_d[3:0] = regcross_misaligned_fault_d ? 4'h2 : sideeffect_misaligned_fault_d ? 4'h1 : 4'h0; - assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0]; + assign exc_mscause_d[3:0] = misaligned_fault_d ? misaligned_fault_mscause_d[3:0] : access_fault_mscause_d[3:0]; - // Fast interrupt error logic - assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | + // Fast interrupt error logic + assign fir_dccm_access_error_d = ((start_addr_in_dccm_region_d & ~start_addr_in_dccm_d) | (end_addr_in_dccm_region_d & ~end_addr_in_dccm_d)) & lsu_pkt_d.valid & lsu_pkt_d.fast_int; - assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int; + assign fir_nondccm_access_error_d = ~(start_addr_in_dccm_region_d & end_addr_in_dccm_region_d) & lsu_pkt_d.valid & lsu_pkt_d.fast_int; - rvdff #(.WIDTH(1)) is_sideeffects_mff (.din(is_sideeffects_d), .dout(is_sideeffects_m), .clk(lsu_c2_m_clk), .*); + rvdff #( + .WIDTH(1) + ) is_sideeffects_mff ( + .din (is_sideeffects_d), + .dout(is_sideeffects_m), + .clk (lsu_c2_m_clk), + .* + ); -endmodule // el2_lsu_addrcheck +endmodule // el2_lsu_addrcheck diff --git a/Flow/design/lsu/el2_lsu_bus_buffer.sv b/Flow/design/lsu/el2_lsu_bus_buffer.sv index 450aaf5..b0171c5 100644 --- a/Flow/design/lsu/el2_lsu_bus_buffer.sv +++ b/Flow/design/lsu/el2_lsu_bus_buffer.sv @@ -24,897 +24,1477 @@ //******************************************************************************** module el2_lsu_bus_buffer -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - )( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic clk_override, // Override non-functional clock gating - input logic rst_l, // reset, active low - input logic scan_mode, // scan mode - input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals - input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing - input logic dec_tlu_sideeffect_posted_disable, // Don't block the sideeffect load store to the bus - input logic dec_tlu_force_halt, + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic clk_override, // Override non-functional clock gating + input logic rst_l, // reset, active low + input logic scan_mode, // scan mode + input logic dec_tlu_external_ldfwd_disable, // disable load to load forwarding for externals + input logic dec_tlu_wb_coalescing_disable, // disable write buffer coalescing + input logic dec_tlu_sideeffect_posted_disable, // Don't block the sideeffect load store to the bus + input logic dec_tlu_force_halt, - // various clocks needed for the bus reads and writes - input logic lsu_bus_obuf_c1_clken, - input logic lsu_busm_clken, - input logic lsu_c2_r_clk, - input logic lsu_bus_ibuf_c1_clk, - input logic lsu_bus_obuf_c1_clk, - input logic lsu_bus_buf_c1_clk, - input logic lsu_free_c2_clk, - input logic lsu_busm_clk, + // various clocks needed for the bus reads and writes + input logic lsu_bus_obuf_c1_clken, + input logic lsu_busm_clken, + input logic lsu_c2_r_clk, + input logic lsu_bus_ibuf_c1_clk, + input logic lsu_bus_obuf_c1_clk, + input logic lsu_bus_buf_c1_clk, + input logic lsu_free_c2_clk, + input logic lsu_busm_clk, - input logic dec_lsu_valid_raw_d, // Raw valid for address computation - input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe - input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet flowing down the pipe + input el2_lsu_pkt_t lsu_pkt_r, // lsu packet flowing down the pipe - input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe - input logic [31:0] end_addr_m, // lsu address flowing down the pipe - input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe - input logic [31:0] end_addr_r, // lsu address flowing down the pipe - input logic [31:0] store_data_r, // store data flowing down the pipe + input logic [31:0] lsu_addr_m, // lsu address flowing down the pipe + input logic [31:0] end_addr_m, // lsu address flowing down the pipe + input logic [31:0] lsu_addr_r, // lsu address flowing down the pipe + input logic [31:0] end_addr_r, // lsu address flowing down the pipe + input logic [31:0] store_data_r, // store data flowing down the pipe - input logic no_word_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce - input logic no_dword_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce - input logic lsu_busreq_m, // bus request is in m - output logic lsu_busreq_r, // bus request is in r - input logic ld_full_hit_m, // load can get all its byte from a write buffer entry - input logic flush_m_up, // flush - input logic flush_r, // flush - input logic lsu_commit_r, // lsu instruction in r commits - input logic is_sideeffects_r, // lsu attribute is side_effects - input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary - input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary - input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary + input logic no_word_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce + input logic no_dword_merge_r, // r store doesn't need to wait in ibuf since it will not coalesce + input logic lsu_busreq_m, // bus request is in m + output logic lsu_busreq_r, // bus request is in r + input logic ld_full_hit_m, // load can get all its byte from a write buffer entry + input logic flush_m_up, // flush + input logic flush_r, // flush + input logic lsu_commit_r, // lsu instruction in r commits + input logic is_sideeffects_r, // lsu attribute is side_effects + input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary + input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary - input logic [7:0] ldst_byteen_ext_m, // HI and LO signals + input logic [7:0] ldst_byteen_ext_m, // HI and LO signals - output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry - output logic lsu_bus_buffer_full_any, // bus buffer is full - output logic lsu_bus_buffer_empty_any, // bus buffer is empty + output logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + output logic lsu_bus_buffer_full_any, // bus buffer is full + output logic lsu_bus_buffer_empty_any, // bus buffer is empty - output logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi, // Byte enables for forwarding data - output logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi, // load forwarding data + output logic [ 3:0] ld_byte_hit_buf_lo, + ld_byte_hit_buf_hi, // Byte enables for forwarding data + output logic [31:0] ld_fwddata_buf_lo, + ld_fwddata_buf_hi, // load forwarding data - output logic lsu_imprecise_error_load_any, // imprecise load bus error - output logic lsu_imprecise_error_store_any, // imprecise store bus error - output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error + output logic lsu_imprecise_error_load_any, // imprecise load bus error + output logic lsu_imprecise_error_store_any, // imprecise store bus error + output logic [31:0] lsu_imprecise_error_addr_any, // address of the imprecise error - // Non-blocking loads - output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load - output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated - output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam - output logic lsu_nonblock_load_data_error, // non block load has an error - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error - output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid, // the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load - // PMU events - output logic lsu_pmu_bus_trxn, - output logic lsu_pmu_bus_misaligned, - output logic lsu_pmu_bus_error, - output logic lsu_pmu_bus_busy, + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, - // AXI Write Channels - output logic lsu_axi_awvalid, - input logic lsu_axi_awready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, - output logic [31:0] lsu_axi_awaddr, - output logic [3:0] lsu_axi_awregion, - output logic [7:0] lsu_axi_awlen, - output logic [2:0] lsu_axi_awsize, - output logic [1:0] lsu_axi_awburst, - output logic lsu_axi_awlock, - output logic [3:0] lsu_axi_awcache, - output logic [2:0] lsu_axi_awprot, - output logic [3:0] lsu_axi_awqos, + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [ 31:0] lsu_axi_awaddr, + output logic [ 3:0] lsu_axi_awregion, + output logic [ 7:0] lsu_axi_awlen, + output logic [ 2:0] lsu_axi_awsize, + output logic [ 1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [ 3:0] lsu_axi_awcache, + output logic [ 2:0] lsu_axi_awprot, + output logic [ 3:0] lsu_axi_awqos, - output logic lsu_axi_wvalid, - input logic lsu_axi_wready, - output logic [63:0] lsu_axi_wdata, - output logic [7:0] lsu_axi_wstrb, - output logic lsu_axi_wlast, + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [ 7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, - input logic lsu_axi_bvalid, - output logic lsu_axi_bready, - input logic [1:0] lsu_axi_bresp, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [ 1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, - // AXI Read Channels - output logic lsu_axi_arvalid, - input logic lsu_axi_arready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, - output logic [31:0] lsu_axi_araddr, - output logic [3:0] lsu_axi_arregion, - output logic [7:0] lsu_axi_arlen, - output logic [2:0] lsu_axi_arsize, - output logic [1:0] lsu_axi_arburst, - output logic lsu_axi_arlock, - output logic [3:0] lsu_axi_arcache, - output logic [2:0] lsu_axi_arprot, - output logic [3:0] lsu_axi_arqos, + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [ 31:0] lsu_axi_araddr, + output logic [ 3:0] lsu_axi_arregion, + output logic [ 7:0] lsu_axi_arlen, + output logic [ 2:0] lsu_axi_arsize, + output logic [ 1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [ 3:0] lsu_axi_arcache, + output logic [ 2:0] lsu_axi_arprot, + output logic [ 3:0] lsu_axi_arqos, - input logic lsu_axi_rvalid, - output logic lsu_axi_rready, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, - input logic [63:0] lsu_axi_rdata, - input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [ 63:0] lsu_axi_rdata, + input logic [ 1:0] lsu_axi_rresp, - input logic lsu_bus_clk_en, - input logic lsu_bus_clk_en_q + input logic lsu_bus_clk_en, + input logic lsu_bus_clk_en_q ); - // For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE_PARTIAL(?) -> DONE_WAIT(?) -> DONE -> IDLE - // For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE - typedef enum logic [2:0] {IDLE=3'b000, WAIT=3'b001, CMD=3'b010, RESP=3'b011, DONE_PARTIAL=3'b100, DONE_WAIT=3'b101, DONE=3'b110} state_t; + // For Ld: IDLE -> WAIT -> CMD -> RESP -> DONE_PARTIAL(?) -> DONE_WAIT(?) -> DONE -> IDLE + // For St: IDLE -> WAIT -> CMD -> RESP(?) -> IDLE + typedef enum logic [2:0] { + IDLE = 3'b000, + WAIT = 3'b001, + CMD = 3'b010, + RESP = 3'b011, + DONE_PARTIAL = 3'b100, + DONE_WAIT = 3'b101, + DONE = 3'b110 + } state_t; - localparam DEPTH = pt.LSU_NUM_NBLOAD; - localparam DEPTH_LOG2 = pt.LSU_NUM_NBLOAD_WIDTH; - localparam TIMER = 8; // This can be only power of 2 - localparam TIMER_MAX = TIMER - 1; // Maximum value of timer - localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER); + localparam DEPTH = pt.LSU_NUM_NBLOAD; + localparam DEPTH_LOG2 = pt.LSU_NUM_NBLOAD_WIDTH; + localparam TIMER = 8; // This can be only power of 2 + localparam TIMER_MAX = TIMER - 1; // Maximum value of timer + localparam TIMER_LOG2 = (TIMER < 2) ? 1 : $clog2(TIMER); - logic [3:0] ldst_byteen_hi_m, ldst_byteen_lo_m; - logic [DEPTH-1:0] ld_addr_hitvec_lo, ld_addr_hitvec_hi; - logic [3:0][DEPTH-1:0] ld_byte_hitvec_lo, ld_byte_hitvec_hi; - logic [3:0][DEPTH-1:0] ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi; + logic [3:0] ldst_byteen_hi_m, ldst_byteen_lo_m; + logic [DEPTH-1:0] ld_addr_hitvec_lo, ld_addr_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvec_lo, ld_byte_hitvec_hi; + logic [3:0][DEPTH-1:0] ld_byte_hitvecfn_lo, ld_byte_hitvecfn_hi; - logic ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi; - logic [3:0] ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi; + logic ld_addr_ibuf_hit_lo, ld_addr_ibuf_hit_hi; + logic [3:0] ld_byte_ibuf_hit_lo, ld_byte_ibuf_hit_hi; - logic [3:0] ldst_byteen_r; - logic [3:0] ldst_byteen_hi_r, ldst_byteen_lo_r; - logic [31:0] store_data_hi_r, store_data_lo_r; - logic is_aligned_r; // Aligned load/store - logic ldst_samedw_r; + logic [3:0] ldst_byteen_r; + logic [3:0] ldst_byteen_hi_r, ldst_byteen_lo_r; + logic [31:0] store_data_hi_r, store_data_lo_r; + logic is_aligned_r; // Aligned load/store + logic ldst_samedw_r; - logic lsu_nonblock_load_valid_r; - logic [31:0] lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn; - logic [1:0] lsu_nonblock_addr_offset; - logic [1:0] lsu_nonblock_sz; - logic lsu_nonblock_unsign; - logic lsu_nonblock_load_data_ready; + logic lsu_nonblock_load_valid_r; + logic [31:0] lsu_nonblock_load_data_hi, lsu_nonblock_load_data_lo, lsu_nonblock_data_unalgn; + logic [1:0] lsu_nonblock_addr_offset; + logic [1:0] lsu_nonblock_sz; + logic lsu_nonblock_unsign; + logic lsu_nonblock_load_data_ready; - logic [DEPTH-1:0] CmdPtr0Dec, CmdPtr1Dec; - logic [DEPTH-1:0] RspPtrDec; - logic [DEPTH_LOG2-1:0] CmdPtr0, CmdPtr1; - logic [DEPTH_LOG2-1:0] RspPtr; - logic [DEPTH_LOG2-1:0] WrPtr0_m, WrPtr0_r; - logic [DEPTH_LOG2-1:0] WrPtr1_m, WrPtr1_r; - logic found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1; - logic [3:0] buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_cmd_any, buf_numvld_pend_any; - logic any_done_wait_state; - logic bus_sideeffect_pend; - logic bus_coalescing_disable; + logic [DEPTH-1:0] CmdPtr0Dec, CmdPtr1Dec; + logic [DEPTH-1:0] RspPtrDec; + logic [DEPTH_LOG2-1:0] CmdPtr0, CmdPtr1; + logic [DEPTH_LOG2-1:0] RspPtr; + logic [DEPTH_LOG2-1:0] WrPtr0_m, WrPtr0_r; + logic [DEPTH_LOG2-1:0] WrPtr1_m, WrPtr1_r; + logic found_wrptr0, found_wrptr1, found_cmdptr0, found_cmdptr1; + logic [3:0] buf_numvld_any, buf_numvld_wrcmd_any, buf_numvld_cmd_any, buf_numvld_pend_any; + logic any_done_wait_state; + logic bus_sideeffect_pend; + logic bus_coalescing_disable; - logic bus_addr_match_pending; - logic bus_cmd_sent, bus_cmd_ready; - logic bus_wcmd_sent, bus_wdata_sent; - logic bus_rsp_read, bus_rsp_write; - logic [pt.LSU_BUS_TAG-1:0] bus_rsp_read_tag, bus_rsp_write_tag; - logic bus_rsp_read_error, bus_rsp_write_error; - logic [63:0] bus_rsp_rdata; + logic bus_addr_match_pending; + logic bus_cmd_sent, bus_cmd_ready; + logic bus_wcmd_sent, bus_wdata_sent; + logic bus_rsp_read, bus_rsp_write; + logic [pt.LSU_BUS_TAG-1:0] bus_rsp_read_tag, bus_rsp_write_tag; + logic bus_rsp_read_error, bus_rsp_write_error; + logic [ 63:0] bus_rsp_rdata; - // Bus buffer signals - state_t [DEPTH-1:0] buf_state; - logic [DEPTH-1:0][1:0] buf_sz; - logic [DEPTH-1:0][31:0] buf_addr; - logic [DEPTH-1:0][3:0] buf_byteen; - logic [DEPTH-1:0] buf_sideeffect; - logic [DEPTH-1:0] buf_write; - logic [DEPTH-1:0] buf_unsign; - logic [DEPTH-1:0] buf_dual; - logic [DEPTH-1:0] buf_samedw; - logic [DEPTH-1:0] buf_nomerge; - logic [DEPTH-1:0] buf_dualhi; - logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag; - logic [DEPTH-1:0] buf_ldfwd; - logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag; - logic [DEPTH-1:0] buf_error; - logic [DEPTH-1:0][31:0] buf_data; - logic [DEPTH-1:0][DEPTH-1:0] buf_age, buf_age_younger; - logic [DEPTH-1:0][DEPTH-1:0] buf_rspage, buf_rsp_pickage; + // Bus buffer signals + state_t [ DEPTH-1:0 ] buf_state; + logic [DEPTH-1:0] [ 1:0] buf_sz; + logic [DEPTH-1:0] [ 31:0] buf_addr; + logic [DEPTH-1:0] [ 3:0] buf_byteen; + logic [DEPTH-1:0] buf_sideeffect; + logic [DEPTH-1:0] buf_write; + logic [DEPTH-1:0] buf_unsign; + logic [DEPTH-1:0] buf_dual; + logic [DEPTH-1:0] buf_samedw; + logic [DEPTH-1:0] buf_nomerge; + logic [DEPTH-1:0] buf_dualhi; + logic [DEPTH-1:0] [DEPTH_LOG2-1:0] buf_dualtag; + logic [DEPTH-1:0] buf_ldfwd; + logic [DEPTH-1:0] [DEPTH_LOG2-1:0] buf_ldfwdtag; + logic [DEPTH-1:0] buf_error; + logic [DEPTH-1:0] [ 31:0] buf_data; + logic [DEPTH-1:0][DEPTH-1:0] buf_age, buf_age_younger; + logic [DEPTH-1:0][DEPTH-1:0] buf_rspage, buf_rsp_pickage; - state_t [DEPTH-1:0] buf_nxtstate; - logic [DEPTH-1:0] buf_rst; - logic [DEPTH-1:0] buf_state_en; - logic [DEPTH-1:0] buf_cmd_state_bus_en; - logic [DEPTH-1:0] buf_resp_state_bus_en; - logic [DEPTH-1:0] buf_state_bus_en; - logic [DEPTH-1:0] buf_dual_in; - logic [DEPTH-1:0] buf_samedw_in; - logic [DEPTH-1:0] buf_nomerge_in; - logic [DEPTH-1:0] buf_sideeffect_in; - logic [DEPTH-1:0] buf_unsign_in; - logic [DEPTH-1:0][1:0] buf_sz_in; - logic [DEPTH-1:0] buf_write_in; - logic [DEPTH-1:0] buf_wr_en; - logic [DEPTH-1:0] buf_dualhi_in; - logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_dualtag_in; - logic [DEPTH-1:0] buf_ldfwd_en; - logic [DEPTH-1:0] buf_ldfwd_in; - logic [DEPTH-1:0][DEPTH_LOG2-1:0] buf_ldfwdtag_in; - logic [DEPTH-1:0][3:0] buf_byteen_in; - logic [DEPTH-1:0][31:0] buf_addr_in; - logic [DEPTH-1:0][31:0] buf_data_in; - logic [DEPTH-1:0] buf_error_en; - logic [DEPTH-1:0] buf_data_en; - logic [DEPTH-1:0][DEPTH-1:0] buf_age_in; - logic [DEPTH-1:0][DEPTH-1:0] buf_ageQ; - logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_set; - logic [DEPTH-1:0][DEPTH-1:0] buf_rspage_in; - logic [DEPTH-1:0][DEPTH-1:0] buf_rspageQ; + state_t [ DEPTH-1:0 ] buf_nxtstate; + logic [ DEPTH-1:0] buf_rst; + logic [ DEPTH-1:0] buf_state_en; + logic [ DEPTH-1:0] buf_cmd_state_bus_en; + logic [ DEPTH-1:0] buf_resp_state_bus_en; + logic [ DEPTH-1:0] buf_state_bus_en; + logic [ DEPTH-1:0] buf_dual_in; + logic [ DEPTH-1:0] buf_samedw_in; + logic [ DEPTH-1:0] buf_nomerge_in; + logic [ DEPTH-1:0] buf_sideeffect_in; + logic [ DEPTH-1:0] buf_unsign_in; + logic [ DEPTH-1:0] [ 1:0] buf_sz_in; + logic [ DEPTH-1:0] buf_write_in; + logic [ DEPTH-1:0] buf_wr_en; + logic [ DEPTH-1:0] buf_dualhi_in; + logic [ DEPTH-1:0] [DEPTH_LOG2-1:0] buf_dualtag_in; + logic [ DEPTH-1:0] buf_ldfwd_en; + logic [ DEPTH-1:0] buf_ldfwd_in; + logic [ DEPTH-1:0] [DEPTH_LOG2-1:0] buf_ldfwdtag_in; + logic [ DEPTH-1:0] [ 3:0] buf_byteen_in; + logic [ DEPTH-1:0] [ 31:0] buf_addr_in; + logic [ DEPTH-1:0] [ 31:0] buf_data_in; + logic [ DEPTH-1:0] buf_error_en; + logic [ DEPTH-1:0] buf_data_en; + logic [ DEPTH-1:0] [ DEPTH-1:0] buf_age_in; + logic [ DEPTH-1:0] [ DEPTH-1:0] buf_ageQ; + logic [ DEPTH-1:0] [ DEPTH-1:0] buf_rspage_set; + logic [ DEPTH-1:0] [ DEPTH-1:0] buf_rspage_in; + logic [ DEPTH-1:0] [ DEPTH-1:0] buf_rspageQ; - // Input buffer signals - logic ibuf_valid; - logic ibuf_dual; - logic ibuf_samedw; - logic ibuf_nomerge; - logic [DEPTH_LOG2-1:0] ibuf_tag; - logic [DEPTH_LOG2-1:0] ibuf_dualtag; - logic ibuf_sideeffect; - logic ibuf_unsign; - logic ibuf_write; - logic [1:0] ibuf_sz; - logic [3:0] ibuf_byteen; - logic [31:0] ibuf_addr; - logic [31:0] ibuf_data; - logic [TIMER_LOG2-1:0] ibuf_timer; + // Input buffer signals + logic ibuf_valid; + logic ibuf_dual; + logic ibuf_samedw; + logic ibuf_nomerge; + logic [DEPTH_LOG2-1:0] ibuf_tag; + logic [DEPTH_LOG2-1:0] ibuf_dualtag; + logic ibuf_sideeffect; + logic ibuf_unsign; + logic ibuf_write; + logic [ 1:0] ibuf_sz; + logic [ 3:0] ibuf_byteen; + logic [ 31:0] ibuf_addr; + logic [ 31:0] ibuf_data; + logic [TIMER_LOG2-1:0] ibuf_timer; - logic ibuf_byp; - logic ibuf_wr_en; - logic ibuf_rst; - logic ibuf_force_drain; - logic ibuf_drain_vld; - logic [DEPTH-1:0] ibuf_drainvec_vld; - logic [DEPTH_LOG2-1:0] ibuf_tag_in; - logic [DEPTH_LOG2-1:0] ibuf_dualtag_in; - logic [1:0] ibuf_sz_in; - logic [31:0] ibuf_addr_in; - logic [3:0] ibuf_byteen_in; - logic [31:0] ibuf_data_in; - logic [TIMER_LOG2-1:0] ibuf_timer_in; - logic [3:0] ibuf_byteen_out; - logic [31:0] ibuf_data_out; - logic ibuf_merge_en, ibuf_merge_in; + logic ibuf_byp; + logic ibuf_wr_en; + logic ibuf_rst; + logic ibuf_force_drain; + logic ibuf_drain_vld; + logic [ DEPTH-1:0] ibuf_drainvec_vld; + logic [DEPTH_LOG2-1:0] ibuf_tag_in; + logic [DEPTH_LOG2-1:0] ibuf_dualtag_in; + logic [ 1:0] ibuf_sz_in; + logic [ 31:0] ibuf_addr_in; + logic [ 3:0] ibuf_byteen_in; + logic [ 31:0] ibuf_data_in; + logic [TIMER_LOG2-1:0] ibuf_timer_in; + logic [ 3:0] ibuf_byteen_out; + logic [ 31:0] ibuf_data_out; + logic ibuf_merge_en, ibuf_merge_in; - // Output buffer signals - logic obuf_valid; - logic obuf_write; - logic obuf_nosend; - logic obuf_rdrsp_pend; - logic obuf_sideeffect; - logic [31:0] obuf_addr; - logic [63:0] obuf_data; - logic [1:0] obuf_sz; - logic [7:0] obuf_byteen; - logic obuf_merge; - logic obuf_cmd_done, obuf_data_done; - logic [pt.LSU_BUS_TAG-1:0] obuf_tag0; - logic [pt.LSU_BUS_TAG-1:0] obuf_tag1; - logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag; + // Output buffer signals + logic obuf_valid; + logic obuf_write; + logic obuf_nosend; + logic obuf_rdrsp_pend; + logic obuf_sideeffect; + logic [31:0] obuf_addr; + logic [63:0] obuf_data; + logic [ 1:0] obuf_sz; + logic [ 7:0] obuf_byteen; + logic obuf_merge; + logic obuf_cmd_done, obuf_data_done; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag0; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag1; + logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag; - logic ibuf_buf_byp; - logic obuf_force_wr_en; - logic obuf_wr_wait; - logic obuf_wr_en, obuf_wr_enQ; - logic obuf_rst; - logic obuf_write_in; - logic obuf_nosend_in; - logic obuf_rdrsp_pend_en; - logic obuf_rdrsp_pend_in; - logic obuf_sideeffect_in; - logic obuf_aligned_in; - logic [31:0] obuf_addr_in; - logic [63:0] obuf_data_in; - logic [1:0] obuf_sz_in; - logic [7:0] obuf_byteen_in; - logic obuf_merge_in; - logic obuf_cmd_done_in, obuf_data_done_in; - logic [pt.LSU_BUS_TAG-1:0] obuf_tag0_in; - logic [pt.LSU_BUS_TAG-1:0] obuf_tag1_in; - logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag_in; + logic ibuf_buf_byp; + logic obuf_force_wr_en; + logic obuf_wr_wait; + logic obuf_wr_en, obuf_wr_enQ; + logic obuf_rst; + logic obuf_write_in; + logic obuf_nosend_in; + logic obuf_rdrsp_pend_en; + logic obuf_rdrsp_pend_in; + logic obuf_sideeffect_in; + logic obuf_aligned_in; + logic [31:0] obuf_addr_in; + logic [63:0] obuf_data_in; + logic [ 1:0] obuf_sz_in; + logic [ 7:0] obuf_byteen_in; + logic obuf_merge_in; + logic obuf_cmd_done_in, obuf_data_done_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag0_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_tag1_in; + logic [pt.LSU_BUS_TAG-1:0] obuf_rdrsp_tag_in; - logic obuf_merge_en; - logic [TIMER_LOG2-1:0] obuf_wr_timer, obuf_wr_timer_in; - logic [7:0] obuf_byteen0_in, obuf_byteen1_in; - logic [63:0] obuf_data0_in, obuf_data1_in; + logic obuf_merge_en; + logic [TIMER_LOG2-1:0] obuf_wr_timer, obuf_wr_timer_in; + logic [7:0] obuf_byteen0_in, obuf_byteen1_in; + logic [63:0] obuf_data0_in, obuf_data1_in; - logic lsu_axi_awvalid_q, lsu_axi_awready_q; - logic lsu_axi_wvalid_q, lsu_axi_wready_q; - logic lsu_axi_arvalid_q, lsu_axi_arready_q; - logic lsu_axi_bvalid_q, lsu_axi_bready_q; - logic lsu_axi_rvalid_q, lsu_axi_rready_q; - logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid_q, lsu_axi_rid_q; - logic [1:0] lsu_axi_bresp_q, lsu_axi_rresp_q; - logic [pt.LSU_BUS_TAG-1:0] lsu_imprecise_error_store_tag; - logic [63:0] lsu_axi_rdata_q; + logic lsu_axi_awvalid_q, lsu_axi_awready_q; + logic lsu_axi_wvalid_q, lsu_axi_wready_q; + logic lsu_axi_arvalid_q, lsu_axi_arready_q; + logic lsu_axi_bvalid_q, lsu_axi_bready_q; + logic lsu_axi_rvalid_q, lsu_axi_rready_q; + logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid_q, lsu_axi_rid_q; + logic [1:0] lsu_axi_bresp_q, lsu_axi_rresp_q; + logic [pt.LSU_BUS_TAG-1:0] lsu_imprecise_error_store_tag; + logic [ 63:0] lsu_axi_rdata_q; - //------------------------------------------------------------------------------ - // Load forwarding logic start - //------------------------------------------------------------------------------ + //------------------------------------------------------------------------------ + // Load forwarding logic start + //------------------------------------------------------------------------------ - // Function to do 8 to 3 bit encoding - function automatic logic [2:0] f_Enc8to3; - input logic [7:0] Dec_value; + // Function to do 8 to 3 bit encoding + function automatic logic [2:0] f_Enc8to3; + input logic [7:0] Dec_value; - logic [2:0] Enc_value; - Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7]; - Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7]; - Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7]; + logic [2:0] Enc_value; + Enc_value[0] = Dec_value[1] | Dec_value[3] | Dec_value[5] | Dec_value[7]; + Enc_value[1] = Dec_value[2] | Dec_value[3] | Dec_value[6] | Dec_value[7]; + Enc_value[2] = Dec_value[4] | Dec_value[5] | Dec_value[6] | Dec_value[7]; - return Enc_value[2:0]; - endfunction // f_Enc8to3 + return Enc_value[2:0]; + endfunction // f_Enc8to3 - // Buffer hit logic for bus load forwarding - assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; - assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; - for (genvar i=0; i 4'b0) & (obuf_wr_timer < TIMER_MAX)) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer); - assign obuf_force_wr_en = lsu_busreq_m & ~lsu_busreq_r & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_m[31:2] != buf_addr[CmdPtr0][31:2]); // Entry in m can't merge with entry going to obuf and there is no entry in between - assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & (~lsu_pkt_r.store | no_dword_merge_r); + assign obuf_wr_timer_in = obuf_wr_en ? 3'b0: (((buf_numvld_cmd_any > 4'b0) & (obuf_wr_timer < TIMER_MAX)) ? (obuf_wr_timer + 1'b1) : obuf_wr_timer); + assign obuf_force_wr_en = lsu_busreq_m & ~lsu_busreq_r & ~ibuf_valid & (buf_numvld_cmd_any[3:0] == 4'b1) & (lsu_addr_m[31:2] != buf_addr[CmdPtr0][31:2]); // Entry in m can't merge with entry going to obuf and there is no entry in between + assign ibuf_buf_byp = ibuf_byp & (buf_numvld_pend_any[3:0] == 4'b0) & (~lsu_pkt_r.store | no_dword_merge_r); - assign obuf_wr_en = ((ibuf_buf_byp & lsu_commit_r & ~(is_sideeffects_r & bus_sideeffect_pend)) | + assign obuf_wr_en = ((ibuf_buf_byp & lsu_commit_r & ~(is_sideeffects_r & bus_sideeffect_pend)) | ((buf_state[CmdPtr0] == CMD) & found_cmdptr0 & ~buf_cmd_state_bus_en[CmdPtr0] & ~(buf_sideeffect[CmdPtr0] & bus_sideeffect_pend) & (~(buf_dual[CmdPtr0] & buf_samedw[CmdPtr0] & ~buf_write[CmdPtr0]) | found_cmdptr1 | buf_nomerge[CmdPtr0] | obuf_force_wr_en))) & (bus_cmd_ready | ~obuf_valid | obuf_nosend) & ~obuf_wr_wait & ~bus_addr_match_pending & lsu_bus_clk_en; - assign obuf_rst = ((bus_cmd_sent | (obuf_valid & obuf_nosend)) & ~obuf_wr_en & lsu_bus_clk_en) | dec_tlu_force_halt; + assign obuf_rst = ((bus_cmd_sent | (obuf_valid & obuf_nosend)) & ~obuf_wr_en & lsu_bus_clk_en) | dec_tlu_force_halt; - assign obuf_write_in = ibuf_buf_byp ? lsu_pkt_r.store : buf_write[CmdPtr0]; - assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_r : buf_sideeffect[CmdPtr0]; - assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_r[31:0] : buf_addr[CmdPtr0]; - assign obuf_sz_in[1:0] = ibuf_buf_byp ? {lsu_pkt_r.word, lsu_pkt_r.half} : buf_sz[CmdPtr0]; - assign obuf_merge_in = obuf_merge_en; - assign obuf_tag0_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr0_r) : (pt.LSU_BUS_TAG)'(CmdPtr0); - assign obuf_tag1_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr1_r) : (pt.LSU_BUS_TAG)'(CmdPtr1); + assign obuf_write_in = ibuf_buf_byp ? lsu_pkt_r.store : buf_write[CmdPtr0]; + assign obuf_sideeffect_in = ibuf_buf_byp ? is_sideeffects_r : buf_sideeffect[CmdPtr0]; + assign obuf_addr_in[31:0] = ibuf_buf_byp ? lsu_addr_r[31:0] : buf_addr[CmdPtr0]; + assign obuf_sz_in[1:0] = ibuf_buf_byp ? {lsu_pkt_r.word, lsu_pkt_r.half} : buf_sz[CmdPtr0]; + assign obuf_merge_in = obuf_merge_en; + assign obuf_tag0_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr0_r) : (pt.LSU_BUS_TAG)'(CmdPtr0); + assign obuf_tag1_in[pt.LSU_BUS_TAG-1:0] = ibuf_buf_byp ? (pt.LSU_BUS_TAG)'(WrPtr1_r) : (pt.LSU_BUS_TAG)'(CmdPtr1); - assign obuf_cmd_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent); - assign obuf_data_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent); + assign obuf_cmd_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_cmd_done | bus_wcmd_sent); + assign obuf_data_done_in = ~(obuf_wr_en | obuf_rst) & (obuf_data_done | bus_wdata_sent); - assign obuf_aligned_in = ibuf_buf_byp ? is_aligned_r : ((obuf_sz_in[1:0] == 2'b0) | + assign obuf_aligned_in = ibuf_buf_byp ? is_aligned_r : ((obuf_sz_in[1:0] == 2'b0) | (obuf_sz_in[0] & ~obuf_addr_in[0]) | (obuf_sz_in[1] & ~(|obuf_addr_in[1:0]))); - assign obuf_rdrsp_pend_in = ((~(obuf_wr_en & ~obuf_nosend_in) & obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))) | (bus_cmd_sent & ~obuf_write)) & ~dec_tlu_force_halt; - assign obuf_rdrsp_pend_en = lsu_bus_clk_en | dec_tlu_force_halt; - assign obuf_rdrsp_tag_in[pt.LSU_BUS_TAG-1:0] = (bus_cmd_sent & ~obuf_write) ? obuf_tag0[pt.LSU_BUS_TAG-1:0] : obuf_rdrsp_tag[pt.LSU_BUS_TAG-1:0]; - // No ld to ld fwd for aligned - assign obuf_nosend_in = (obuf_addr_in[31:3] == obuf_addr[31:3]) & obuf_aligned_in & ~obuf_sideeffect & ~obuf_write & ~obuf_write_in & ~dec_tlu_external_ldfwd_disable & + assign obuf_rdrsp_pend_in = ((~(obuf_wr_en & ~obuf_nosend_in) & obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag))) | (bus_cmd_sent & ~obuf_write)) & ~dec_tlu_force_halt; + assign obuf_rdrsp_pend_en = lsu_bus_clk_en | dec_tlu_force_halt; + assign obuf_rdrsp_tag_in[pt.LSU_BUS_TAG-1:0] = (bus_cmd_sent & ~obuf_write) ? obuf_tag0[pt.LSU_BUS_TAG-1:0] : obuf_rdrsp_tag[pt.LSU_BUS_TAG-1:0]; + // No ld to ld fwd for aligned + assign obuf_nosend_in = (obuf_addr_in[31:3] == obuf_addr[31:3]) & obuf_aligned_in & ~obuf_sideeffect & ~obuf_write & ~obuf_write_in & ~dec_tlu_external_ldfwd_disable & ((obuf_valid & ~obuf_nosend) | (obuf_rdrsp_pend & ~(bus_rsp_read & (bus_rsp_read_tag == obuf_rdrsp_tag)))); - assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {ldst_byteen_lo_r[3:0],4'b0} : {4'b0,ldst_byteen_lo_r[3:0]}) : + assign obuf_byteen0_in[7:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {ldst_byteen_lo_r[3:0],4'b0} : {4'b0,ldst_byteen_lo_r[3:0]}) : (buf_addr[CmdPtr0][2] ? {buf_byteen[CmdPtr0],4'b0} : {4'b0,buf_byteen[CmdPtr0]}); - assign obuf_byteen1_in[7:0] = ibuf_buf_byp ? (end_addr_r[2] ? {ldst_byteen_hi_r[3:0],4'b0} : {4'b0,ldst_byteen_hi_r[3:0]}) : + assign obuf_byteen1_in[7:0] = ibuf_buf_byp ? (end_addr_r[2] ? {ldst_byteen_hi_r[3:0],4'b0} : {4'b0,ldst_byteen_hi_r[3:0]}) : (buf_addr[CmdPtr1][2] ? {buf_byteen[CmdPtr1],4'b0} : {4'b0,buf_byteen[CmdPtr1]}); - assign obuf_data0_in[63:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_lo_r[31:0],32'b0} : {32'b0,store_data_lo_r[31:0]}) : + assign obuf_data0_in[63:0] = ibuf_buf_byp ? (lsu_addr_r[2] ? {store_data_lo_r[31:0],32'b0} : {32'b0,store_data_lo_r[31:0]}) : (buf_addr[CmdPtr0][2] ? {buf_data[CmdPtr0],32'b0} : {32'b0,buf_data[CmdPtr0]}); - assign obuf_data1_in[63:0] = ibuf_buf_byp ? (end_addr_r[2] ? {store_data_hi_r[31:0],32'b0} :{32'b0,store_data_hi_r[31:0]}) : + assign obuf_data1_in[63:0] = ibuf_buf_byp ? (end_addr_r[2] ? {store_data_hi_r[31:0],32'b0} :{32'b0,store_data_hi_r[31:0]}) : (buf_addr[CmdPtr1][2] ? {buf_data[CmdPtr1],32'b0} : {32'b0,buf_data[CmdPtr1]}); - for (genvar i=0 ;i<8; i++) begin - assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]); - assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)]; - end + for (genvar i = 0; i < 8; i++) begin + assign obuf_byteen_in[i] = obuf_byteen0_in[i] | (obuf_merge_en & obuf_byteen1_in[i]); + assign obuf_data_in[(8*i)+7:(8*i)] = (obuf_merge_en & obuf_byteen1_in[i]) ? obuf_data1_in[(8*i)+7:(8*i)] : obuf_data0_in[(8*i)+7:(8*i)]; + end - // No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now - assign obuf_merge_en = ((CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & + // No store obuf merging for AXI since all stores are sent non-posted. Can't track the second id right now + assign obuf_merge_en = ((CmdPtr0 != CmdPtr1) & found_cmdptr0 & found_cmdptr1 & (buf_state[CmdPtr0] == CMD) & (buf_state[CmdPtr1] == CMD) & ~buf_cmd_state_bus_en[CmdPtr0] & ~buf_sideeffect[CmdPtr0] & (~buf_write[CmdPtr0] & buf_dual[CmdPtr0] & ~buf_dualhi[CmdPtr0] & buf_samedw[CmdPtr0])) | // CmdPtr0/CmdPtr1 are for same load which is within a DW (ibuf_buf_byp & ldst_samedw_r & ldst_dual_r); - rvdff_fpga #(.WIDTH(1)) obuf_wren_ff (.din(obuf_wr_en), .dout(obuf_wr_enQ), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*); - rvdffsc #(.WIDTH(1)) obuf_valid_ff (.din(1'b1), .dout(obuf_valid), .en(obuf_wr_en), .clear(obuf_rst), .clk(lsu_free_c2_clk), .*); - rvdffs #(.WIDTH(1)) obuf_nosend_ff (.din(obuf_nosend_in), .dout(obuf_nosend), .en(obuf_wr_en), .clk(lsu_free_c2_clk), .*); - rvdffs #(.WIDTH(1)) obuf_rdrsp_pend_ff(.din(obuf_rdrsp_pend_in), .dout(obuf_rdrsp_pend), .en(obuf_rdrsp_pend_en), .clk(lsu_free_c2_clk), .*); - rvdff_fpga #(.WIDTH(1)) obuf_cmd_done_ff (.din(obuf_cmd_done_in), .dout(obuf_cmd_done), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(1)) obuf_data_done_ff (.din(obuf_data_done_in), .dout(obuf_data_done), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*); - rvdff_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_rdrsp_tagff (.din(obuf_rdrsp_tag_in), .dout(obuf_rdrsp_tag), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag0ff (.din(obuf_tag0_in), .dout(obuf_tag0), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(pt.LSU_BUS_TAG)) obuf_tag1ff (.din(obuf_tag1_in), .dout(obuf_tag1), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) obuf_mergeff (.din(obuf_merge_in), .dout(obuf_merge), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) obuf_writeff (.din(obuf_write_in), .dout(obuf_write), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(1)) obuf_sideeffectff (.din(obuf_sideeffect_in), .dout(obuf_sideeffect), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(2)) obuf_szff (.din(obuf_sz_in[1:0]), .dout(obuf_sz), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffs_fpga #(.WIDTH(8)) obuf_byteenff (.din(obuf_byteen_in[7:0]), .dout(obuf_byteen), .en(obuf_wr_en), .clk(lsu_bus_obuf_c1_clk), .clken(lsu_bus_obuf_c1_clken), .rawclk(clk), .*); - rvdffe #(.WIDTH(32)) obuf_addrff (.din(obuf_addr_in[31:0]), .dout(obuf_addr), .en(obuf_wr_en), .*); - rvdffe #(.WIDTH(64)) obuf_dataff (.din(obuf_data_in[63:0]), .dout(obuf_data), .en(obuf_wr_en), .*); - rvdff_fpga #(.WIDTH(TIMER_LOG2)) obuf_timerff (.din(obuf_wr_timer_in), .dout(obuf_wr_timer), .clk(lsu_busm_clk), .clken(lsu_busm_clken), .rawclk(clk), .*); + rvdff_fpga #( + .WIDTH(1) + ) obuf_wren_ff ( + .din(obuf_wr_en), + .dout(obuf_wr_enQ), + .clk(lsu_busm_clk), + .clken(lsu_busm_clken), + .rawclk(clk), + .* + ); + rvdffsc #( + .WIDTH(1) + ) obuf_valid_ff ( + .din(1'b1), + .dout(obuf_valid), + .en(obuf_wr_en), + .clear(obuf_rst), + .clk(lsu_free_c2_clk), + .* + ); + rvdffs #( + .WIDTH(1) + ) obuf_nosend_ff ( + .din (obuf_nosend_in), + .dout(obuf_nosend), + .en (obuf_wr_en), + .clk (lsu_free_c2_clk), + .* + ); + rvdffs #( + .WIDTH(1) + ) obuf_rdrsp_pend_ff ( + .din (obuf_rdrsp_pend_in), + .dout(obuf_rdrsp_pend), + .en (obuf_rdrsp_pend_en), + .clk (lsu_free_c2_clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) obuf_cmd_done_ff ( + .din(obuf_cmd_done_in), + .dout(obuf_cmd_done), + .clk(lsu_busm_clk), + .clken(lsu_busm_clken), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(1) + ) obuf_data_done_ff ( + .din(obuf_data_done_in), + .dout(obuf_data_done), + .clk(lsu_busm_clk), + .clken(lsu_busm_clken), + .rawclk(clk), + .* + ); + rvdff_fpga #( + .WIDTH(pt.LSU_BUS_TAG) + ) obuf_rdrsp_tagff ( + .din(obuf_rdrsp_tag_in), + .dout(obuf_rdrsp_tag), + .clk(lsu_busm_clk), + .clken(lsu_busm_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(pt.LSU_BUS_TAG) + ) obuf_tag0ff ( + .din(obuf_tag0_in), + .dout(obuf_tag0), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(pt.LSU_BUS_TAG) + ) obuf_tag1ff ( + .din(obuf_tag1_in), + .dout(obuf_tag1), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) obuf_mergeff ( + .din(obuf_merge_in), + .dout(obuf_merge), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) obuf_writeff ( + .din(obuf_write_in), + .dout(obuf_write), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(1) + ) obuf_sideeffectff ( + .din(obuf_sideeffect_in), + .dout(obuf_sideeffect), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(2) + ) obuf_szff ( + .din(obuf_sz_in[1:0]), + .dout(obuf_sz), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffs_fpga #( + .WIDTH(8) + ) obuf_byteenff ( + .din(obuf_byteen_in[7:0]), + .dout(obuf_byteen), + .en(obuf_wr_en), + .clk(lsu_bus_obuf_c1_clk), + .clken(lsu_bus_obuf_c1_clken), + .rawclk(clk), + .* + ); + rvdffe #( + .WIDTH(32) + ) obuf_addrff ( + .din (obuf_addr_in[31:0]), + .dout(obuf_addr), + .en (obuf_wr_en), + .* + ); + rvdffe #( + .WIDTH(64) + ) obuf_dataff ( + .din (obuf_data_in[63:0]), + .dout(obuf_data), + .en (obuf_wr_en), + .* + ); + rvdff_fpga #( + .WIDTH(TIMER_LOG2) + ) obuf_timerff ( + .din(obuf_wr_timer_in), + .dout(obuf_wr_timer), + .clk(lsu_busm_clk), + .clken(lsu_busm_clken), + .rawclk(clk), + .* + ); - //------------------------------------------------------------------------------ - // Output buffer logic ends here - //------------------------------------------------------------------------------ + //------------------------------------------------------------------------------ + // Output buffer logic ends here + //------------------------------------------------------------------------------ - // Find the entry to allocate and entry to send - always_comb begin - WrPtr0_m[DEPTH_LOG2-1:0] = '0; - WrPtr1_m[DEPTH_LOG2-1:0] = '0; - found_wrptr0 = '0; - found_wrptr1 = '0; + // Find the entry to allocate and entry to send + always_comb begin + WrPtr0_m[DEPTH_LOG2-1:0] = '0; + WrPtr1_m[DEPTH_LOG2-1:0] = '0; + found_wrptr0 = '0; + found_wrptr1 = '0; - // Find first write pointer - for (int i=0; i= (DEPTH-1)) : (buf_numvld_any[3:0] == DEPTH); - assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid; + assign lsu_bus_buffer_pend_any = (buf_numvld_pend_any != 0); + assign lsu_bus_buffer_full_any = (ldst_dual_d & dec_lsu_valid_raw_d) ? (buf_numvld_any[3:0] >= (DEPTH-1)) : (buf_numvld_any[3:0] == DEPTH); + assign lsu_bus_buffer_empty_any = ~(|buf_state[DEPTH-1:0]) & ~ibuf_valid & ~obuf_valid; - // Non blocking ports - assign lsu_nonblock_load_valid_m = lsu_busreq_m & lsu_pkt_m.valid & lsu_pkt_m.load & ~flush_m_up & ~ld_full_hit_m; - assign lsu_nonblock_load_tag_m[DEPTH_LOG2-1:0] = WrPtr0_m[DEPTH_LOG2-1:0]; - assign lsu_nonblock_load_inv_r = lsu_nonblock_load_valid_r & ~lsu_commit_r; - assign lsu_nonblock_load_inv_tag_r[DEPTH_LOG2-1:0] = WrPtr0_r[DEPTH_LOG2-1:0]; // r tag needs to be accurate even if there is no invalidate + // Non blocking ports + assign lsu_nonblock_load_valid_m = lsu_busreq_m & lsu_pkt_m.valid & lsu_pkt_m.load & ~flush_m_up & ~ld_full_hit_m; + assign lsu_nonblock_load_tag_m[DEPTH_LOG2-1:0] = WrPtr0_m[DEPTH_LOG2-1:0]; + assign lsu_nonblock_load_inv_r = lsu_nonblock_load_valid_r & ~lsu_commit_r; + assign lsu_nonblock_load_inv_tag_r[DEPTH_LOG2-1:0] = WrPtr0_r[DEPTH_LOG2-1:0]; // r tag needs to be accurate even if there is no invalidate - always_comb begin - lsu_nonblock_load_data_ready = '0; - lsu_nonblock_load_data_error = '0; - lsu_nonblock_load_data_tag[DEPTH_LOG2-1:0] = '0; - lsu_nonblock_load_data_lo[31:0] = '0; - lsu_nonblock_load_data_hi[31:0] = '0; - for (int i=0; i> 8*lsu_nonblock_addr_offset[1:0]); + assign lsu_nonblock_addr_offset[1:0] = buf_addr[lsu_nonblock_load_data_tag][1:0]; + assign lsu_nonblock_sz[1:0] = buf_sz[lsu_nonblock_load_data_tag][1:0]; + assign lsu_nonblock_unsign = buf_unsign[lsu_nonblock_load_data_tag]; + assign lsu_nonblock_data_unalgn[31:0] = 32'({lsu_nonblock_load_data_hi[31:0], lsu_nonblock_load_data_lo[31:0]} >> 8*lsu_nonblock_addr_offset[1:0]); - assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_ready & ~lsu_nonblock_load_data_error; - assign lsu_nonblock_load_data[31:0] = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) | + assign lsu_nonblock_load_data_valid = lsu_nonblock_load_data_ready & ~lsu_nonblock_load_data_error; + assign lsu_nonblock_load_data[31:0] = ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {24'b0,lsu_nonblock_data_unalgn[7:0]}) | ({32{ lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {16'b0,lsu_nonblock_data_unalgn[15:0]}) | ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b00)}} & {{24{lsu_nonblock_data_unalgn[7]}}, lsu_nonblock_data_unalgn[7:0]}) | ({32{~lsu_nonblock_unsign & (lsu_nonblock_sz[1:0] == 2'b01)}} & {{16{lsu_nonblock_data_unalgn[15]}},lsu_nonblock_data_unalgn[15:0]}) | ({32{(lsu_nonblock_sz[1:0] == 2'b10)}} & lsu_nonblock_data_unalgn[31:0]); - // Determine if there is a pending return to sideeffect load/store - always_comb begin - bus_sideeffect_pend = obuf_valid & obuf_sideeffect & dec_tlu_sideeffect_posted_disable; - for (int i=0; i put in the cam - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load - output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated - output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam - output logic lsu_nonblock_load_data_error,// non block load has an error - output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error - output logic [31:0] lsu_nonblock_load_data, // Data of the non block load + // Non-blocking loads + output logic lsu_nonblock_load_valid_m, // there is an external load -> put in the cam + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_tag_m, // the tag of the external non block load + output logic lsu_nonblock_load_inv_r, // invalidate signal for the cam entry for non block loads + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_inv_tag_r, // tag of the enrty which needs to be invalidated + output logic lsu_nonblock_load_data_valid,// the non block is valid - sending information back to the cam + output logic lsu_nonblock_load_data_error, // non block load has an error + output logic [pt.LSU_NUM_NBLOAD_WIDTH-1:0] lsu_nonblock_load_data_tag, // the tag of the non block load sending the data/error + output logic [31:0] lsu_nonblock_load_data, // Data of the non block load - // PMU events - output logic lsu_pmu_bus_trxn, - output logic lsu_pmu_bus_misaligned, - output logic lsu_pmu_bus_error, - output logic lsu_pmu_bus_busy, + // PMU events + output logic lsu_pmu_bus_trxn, + output logic lsu_pmu_bus_misaligned, + output logic lsu_pmu_bus_error, + output logic lsu_pmu_bus_busy, - // AXI Write Channels - output logic lsu_axi_awvalid, - input logic lsu_axi_awready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, - output logic [31:0] lsu_axi_awaddr, - output logic [3:0] lsu_axi_awregion, - output logic [7:0] lsu_axi_awlen, - output logic [2:0] lsu_axi_awsize, - output logic [1:0] lsu_axi_awburst, - output logic lsu_axi_awlock, - output logic [3:0] lsu_axi_awcache, - output logic [2:0] lsu_axi_awprot, - output logic [3:0] lsu_axi_awqos, + // AXI Write Channels + output logic lsu_axi_awvalid, + input logic lsu_axi_awready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_awid, + output logic [ 31:0] lsu_axi_awaddr, + output logic [ 3:0] lsu_axi_awregion, + output logic [ 7:0] lsu_axi_awlen, + output logic [ 2:0] lsu_axi_awsize, + output logic [ 1:0] lsu_axi_awburst, + output logic lsu_axi_awlock, + output logic [ 3:0] lsu_axi_awcache, + output logic [ 2:0] lsu_axi_awprot, + output logic [ 3:0] lsu_axi_awqos, - output logic lsu_axi_wvalid, - input logic lsu_axi_wready, - output logic [63:0] lsu_axi_wdata, - output logic [7:0] lsu_axi_wstrb, - output logic lsu_axi_wlast, + output logic lsu_axi_wvalid, + input logic lsu_axi_wready, + output logic [63:0] lsu_axi_wdata, + output logic [ 7:0] lsu_axi_wstrb, + output logic lsu_axi_wlast, - input logic lsu_axi_bvalid, - output logic lsu_axi_bready, - input logic [1:0] lsu_axi_bresp, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, + input logic lsu_axi_bvalid, + output logic lsu_axi_bready, + input logic [ 1:0] lsu_axi_bresp, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_bid, - // AXI Read Channels - output logic lsu_axi_arvalid, - input logic lsu_axi_arready, - output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, - output logic [31:0] lsu_axi_araddr, - output logic [3:0] lsu_axi_arregion, - output logic [7:0] lsu_axi_arlen, - output logic [2:0] lsu_axi_arsize, - output logic [1:0] lsu_axi_arburst, - output logic lsu_axi_arlock, - output logic [3:0] lsu_axi_arcache, - output logic [2:0] lsu_axi_arprot, - output logic [3:0] lsu_axi_arqos, + // AXI Read Channels + output logic lsu_axi_arvalid, + input logic lsu_axi_arready, + output logic [pt.LSU_BUS_TAG-1:0] lsu_axi_arid, + output logic [ 31:0] lsu_axi_araddr, + output logic [ 3:0] lsu_axi_arregion, + output logic [ 7:0] lsu_axi_arlen, + output logic [ 2:0] lsu_axi_arsize, + output logic [ 1:0] lsu_axi_arburst, + output logic lsu_axi_arlock, + output logic [ 3:0] lsu_axi_arcache, + output logic [ 2:0] lsu_axi_arprot, + output logic [ 3:0] lsu_axi_arqos, - input logic lsu_axi_rvalid, - output logic lsu_axi_rready, - input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, - input logic [63:0] lsu_axi_rdata, - input logic [1:0] lsu_axi_rresp, + input logic lsu_axi_rvalid, + output logic lsu_axi_rready, + input logic [pt.LSU_BUS_TAG-1:0] lsu_axi_rid, + input logic [ 63:0] lsu_axi_rdata, + input logic [ 1:0] lsu_axi_rresp, - input logic lsu_bus_clk_en + input logic lsu_bus_clk_en ); - logic lsu_bus_clk_en_q; + logic lsu_bus_clk_en_q; - logic [3:0] ldst_byteen_m, ldst_byteen_r; - logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r; - logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r; - logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r; - logic is_sideeffects_r; + logic [3:0] ldst_byteen_m, ldst_byteen_r; + logic [7:0] ldst_byteen_ext_m, ldst_byteen_ext_r; + logic [3:0] ldst_byteen_hi_m, ldst_byteen_hi_r; + logic [3:0] ldst_byteen_lo_m, ldst_byteen_lo_r; + logic is_sideeffects_r; - logic [63:0] store_data_ext_r; - logic [31:0] store_data_hi_r; - logic [31:0] store_data_lo_r; + logic [63:0] store_data_ext_r; + logic [31:0] store_data_hi_r; + logic [31:0] store_data_lo_r; - logic addr_match_dw_lo_r_m; - logic addr_match_word_lo_r_m; - logic no_word_merge_r, no_dword_merge_r; + logic addr_match_dw_lo_r_m; + logic addr_match_word_lo_r_m; + logic no_word_merge_r, no_dword_merge_r; - logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; - logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; + logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; + logic [3:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; - logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo; - logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi; + logic [3:0] ld_byte_hit_lo, ld_byte_rhit_lo; + logic [3:0] ld_byte_hit_hi, ld_byte_rhit_hi; - logic [31:0] ld_fwddata_rpipe_lo; - logic [31:0] ld_fwddata_rpipe_hi; + logic [31:0] ld_fwddata_rpipe_lo; + logic [31:0] ld_fwddata_rpipe_hi; - logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi; - logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi; + logic [3:0] ld_byte_hit_buf_lo, ld_byte_hit_buf_hi; + logic [31:0] ld_fwddata_buf_lo, ld_fwddata_buf_hi; - logic [63:0] ld_fwddata_lo, ld_fwddata_hi; - logic [63:0] ld_fwddata_m; + logic [63:0] ld_fwddata_lo, ld_fwddata_hi; + logic [63:0] ld_fwddata_m; - logic ld_full_hit_hi_m, ld_full_hit_lo_m; - logic ld_full_hit_m; + logic ld_full_hit_hi_m, ld_full_hit_lo_m; + logic ld_full_hit_m; - assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}} & 4'b0001) | + assign ldst_byteen_m[3:0] = ({4{lsu_pkt_m.by}} & 4'b0001) | ({4{lsu_pkt_m.half}} & 4'b0011) | ({4{lsu_pkt_m.word}} & 4'b1111); - // Read/Write Buffer - el2_lsu_bus_buffer #(.pt(pt)) bus_buffer ( - .* - ); + // Read/Write Buffer + el2_lsu_bus_buffer #(.pt(pt)) bus_buffer (.*); - // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced - assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]); - assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2]^lsu_addr_m[2]); + // Logic to determine if dc5 store can be coalesced or not with younger stores. Bypass ibuf if cannot colaesced + assign addr_match_dw_lo_r_m = (lsu_addr_r[31:3] == lsu_addr_m[31:3]); + assign addr_match_word_lo_r_m = addr_match_dw_lo_r_m & ~(lsu_addr_r[2] ^ lsu_addr_m[2]); - assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m); - assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m); + assign no_word_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_word_lo_r_m); + assign no_dword_merge_r = lsu_busreq_r & ~ldst_dual_r & lsu_busreq_m & (lsu_pkt_m.load | ~addr_match_dw_lo_r_m); - // Create Hi/Lo signals - assign ldst_byteen_ext_m[7:0] = {4'b0,ldst_byteen_m[3:0]} << lsu_addr_m[1:0]; - assign ldst_byteen_ext_r[7:0] = {4'b0,ldst_byteen_r[3:0]} << lsu_addr_r[1:0]; + // Create Hi/Lo signals + assign ldst_byteen_ext_m[7:0] = {4'b0, ldst_byteen_m[3:0]} << lsu_addr_m[1:0]; + assign ldst_byteen_ext_r[7:0] = {4'b0, ldst_byteen_r[3:0]} << lsu_addr_r[1:0]; - assign store_data_ext_r[63:0] = {32'b0,store_data_r[31:0]} << {lsu_addr_r[1:0],3'b0}; + assign store_data_ext_r[63:0] = {32'b0, store_data_r[31:0]} << {lsu_addr_r[1:0], 3'b0}; - assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; - assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; - assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4]; - assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0]; + assign ldst_byteen_hi_m[3:0] = ldst_byteen_ext_m[7:4]; + assign ldst_byteen_lo_m[3:0] = ldst_byteen_ext_m[3:0]; + assign ldst_byteen_hi_r[3:0] = ldst_byteen_ext_r[7:4]; + assign ldst_byteen_lo_r[3:0] = ldst_byteen_ext_r[3:0]; - assign store_data_hi_r[31:0] = store_data_ext_r[63:32]; - assign store_data_lo_r[31:0] = store_data_ext_r[31:0]; + assign store_data_hi_r[31:0] = store_data_ext_r[63:32]; + assign store_data_lo_r[31:0] = store_data_ext_r[31:0]; - assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; - assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; - assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; - assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; + assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; + assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; + assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; + assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & lsu_busreq_m & lsu_busreq_r; - for (genvar i=0; i<4; i++) begin: GenBusBufFwd - assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i]; - assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i]; - assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i]; - assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i]; + for (genvar i = 0; i < 4; i++) begin : GenBusBufFwd + assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i] & ldst_byteen_lo_m[i]; + assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i] & ldst_byteen_hi_m[i]; + assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i] & ldst_byteen_lo_m[i]; + assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i] & ldst_byteen_hi_m[i]; - assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] | + assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i] | ld_byte_hit_buf_lo[i]; - assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] | + assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i] | ld_byte_hit_buf_hi[i]; - assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; - assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; + assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i]; + assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i]; - assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); - assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | + assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) | ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]); - // Final muxing between m/r - assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)]; + // Final muxing between m/r + assign ld_fwddata_lo[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i] ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : ld_fwddata_buf_lo[(8*i)+7:(8*i)]; - assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)]; + assign ld_fwddata_hi[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i] ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : ld_fwddata_buf_hi[(8*i)+7:(8*i)]; - end + end - always_comb begin - ld_full_hit_lo_m = 1'b1; - ld_full_hit_hi_m = 1'b1; - for (int i=0; i<4; i++) begin - ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]); - ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]); - end - end + always_comb begin + ld_full_hit_lo_m = 1'b1; + ld_full_hit_hi_m = 1'b1; + for (int i = 0; i < 4; i++) begin + ld_full_hit_lo_m &= (ld_byte_hit_lo[i] | ~ldst_byteen_lo_m[i]); + ld_full_hit_hi_m &= (ld_byte_hit_hi[i] | ~ldst_byteen_hi_m[i]); + end + end - // This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf) - assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m; + // This will be high if all the bytes of load hit the stores in pipe/write buffer (m/r/wrbuf) + assign ld_full_hit_m = ld_full_hit_lo_m & ld_full_hit_hi_m & lsu_busreq_m & lsu_pkt_m.load & ~is_sideeffects_m; - assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8*lsu_addr_m[1:0]); - assign bus_read_data_m[31:0] = ld_fwddata_m[31:0]; + assign ld_fwddata_m[63:0] = {ld_fwddata_hi[31:0], ld_fwddata_lo[31:0]} >> (8 * lsu_addr_m[1:0]); + assign bus_read_data_m[31:0] = ld_fwddata_m[31:0]; - // Fifo flops + // Fifo flops - rvdff #(.WIDTH(1)) clken_ff (.din(lsu_bus_clk_en), .dout(lsu_bus_clk_en_q), .clk(active_clk), .*); + rvdff #( + .WIDTH(1) + ) clken_ff ( + .din (lsu_bus_clk_en), + .dout(lsu_bus_clk_en_q), + .clk (active_clk), + .* + ); - rvdff #(.WIDTH(1)) is_sideeffects_rff (.din(is_sideeffects_m), .dout(is_sideeffects_r), .clk(lsu_c1_r_clk), .*); + rvdff #( + .WIDTH(1) + ) is_sideeffects_rff ( + .din (is_sideeffects_m), + .dout(is_sideeffects_r), + .clk (lsu_c1_r_clk), + .* + ); - rvdff #(4) lsu_byten_rff (.*, .din(ldst_byteen_m[3:0]), .dout(ldst_byteen_r[3:0]), .clk(lsu_c1_r_clk)); + rvdff #(4) lsu_byten_rff ( + .*, + .din (ldst_byteen_m[3:0]), + .dout(ldst_byteen_r[3:0]), + .clk (lsu_c1_r_clk) + ); -endmodule // el2_lsu_bus_intf +endmodule // el2_lsu_bus_intf diff --git a/Flow/design/lsu/el2_lsu_clkdomain.sv b/Flow/design/lsu/el2_lsu_clkdomain.sv index e1ab444..ec7874a 100644 --- a/Flow/design/lsu/el2_lsu_clkdomain.sv +++ b/Flow/design/lsu/el2_lsu_clkdomain.sv @@ -24,117 +24,180 @@ module el2_lsu_clkdomain -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" -)( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. - input logic rst_l, // reset, active low - input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic active_clk, // Clock only while core active. Through two clock headers. For flops without second clock header built in. + input logic rst_l, // reset, active low + input logic dec_tlu_force_halt, // This will be high till TLU goes to debug halt - // Inputs - input logic clk_override, // chciken bit to turn off clock gating - input logic dma_dccm_req, // dma is active - input logic ldst_stbuf_reqvld_r, // allocating in to the store queue + // Inputs + input logic clk_override, // chciken bit to turn off clock gating + input logic dma_dccm_req, // dma is active + input logic ldst_stbuf_reqvld_r, // allocating in to the store queue - input logic stbuf_reqvld_any, // stbuf is draining - input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed - input logic lsu_busreq_r, // busreq in r - input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry - input logic lsu_bus_buffer_empty_any, // external bus buffer is empty - input logic lsu_stbuf_empty_any, // stbuf is empty + input logic stbuf_reqvld_any, // stbuf is draining + input logic stbuf_reqvld_flushed_any, // instruction going to stbuf is flushed + input logic lsu_busreq_r, // busreq in r + input logic lsu_bus_buffer_pend_any, // bus buffer has a pending bus entry + input logic lsu_bus_buffer_empty_any, // external bus buffer is empty + input logic lsu_stbuf_empty_any, // stbuf is empty - input logic lsu_bus_clk_en, // bus clock enable + input logic lsu_bus_clk_en, // bus clock enable - input el2_lsu_pkt_t lsu_p, // lsu packet in decode - input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d - input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m - input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r + input el2_lsu_pkt_t lsu_p, // lsu packet in decode + input el2_lsu_pkt_t lsu_pkt_d, // lsu packet in d + input el2_lsu_pkt_t lsu_pkt_m, // lsu packet in m + input el2_lsu_pkt_t lsu_pkt_r, // lsu packet in r - // Outputs - output logic lsu_bus_obuf_c1_clken, // obuf clock enable - output logic lsu_busm_clken, // bus clock enable + // Outputs + output logic lsu_bus_obuf_c1_clken, // obuf clock enable + output logic lsu_busm_clken, // bus clock enable - output logic lsu_c1_m_clk, // m pipe single pulse clock - output logic lsu_c1_r_clk, // r pipe single pulse clock + output logic lsu_c1_m_clk, // m pipe single pulse clock + output logic lsu_c1_r_clk, // r pipe single pulse clock - output logic lsu_c2_m_clk, // m pipe double pulse clock - output logic lsu_c2_r_clk, // r pipe double pulse clock + output logic lsu_c2_m_clk, // m pipe double pulse clock + output logic lsu_c2_r_clk, // r pipe double pulse clock - output logic lsu_store_c1_m_clk, // store in m - output logic lsu_store_c1_r_clk, // store in r + output logic lsu_store_c1_m_clk, // store in m + output logic lsu_store_c1_r_clk, // store in r - output logic lsu_stbuf_c1_clk, - output logic lsu_bus_obuf_c1_clk, // ibuf clock - output logic lsu_bus_ibuf_c1_clk, // ibuf clock - output logic lsu_bus_buf_c1_clk, // ibuf clock - output logic lsu_busm_clk, // bus clock + output logic lsu_stbuf_c1_clk, + output logic lsu_bus_obuf_c1_clk, // ibuf clock + output logic lsu_bus_ibuf_c1_clk, // ibuf clock + output logic lsu_bus_buf_c1_clk, // ibuf clock + output logic lsu_busm_clk, // bus clock - output logic lsu_free_c2_clk, // free double pulse clock + output logic lsu_free_c2_clk, // free double pulse clock - input logic scan_mode // Scan mode + input logic scan_mode // Scan mode ); - logic lsu_c1_m_clken, lsu_c1_r_clken; - logic lsu_c2_m_clken, lsu_c2_r_clken; - logic lsu_c1_m_clken_q, lsu_c1_r_clken_q; - logic lsu_store_c1_m_clken, lsu_store_c1_r_clken; + logic lsu_c1_m_clken, lsu_c1_r_clken; + logic lsu_c2_m_clken, lsu_c2_r_clken; + logic lsu_c1_m_clken_q, lsu_c1_r_clken_q; + logic lsu_store_c1_m_clken, lsu_store_c1_r_clken; - logic lsu_stbuf_c1_clken; - logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken; + logic lsu_stbuf_c1_clken; + logic lsu_bus_ibuf_c1_clken, lsu_bus_buf_c1_clken; - logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken; + logic lsu_free_c1_clken, lsu_free_c1_clken_q, lsu_free_c2_clken; - //------------------------------------------------------------------------------------------- - // Clock Enable logic - //------------------------------------------------------------------------------------------- + //------------------------------------------------------------------------------------------- + // Clock Enable logic + //------------------------------------------------------------------------------------------- - assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override; - assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override; + assign lsu_c1_m_clken = lsu_p.valid | dma_dccm_req | clk_override; + assign lsu_c1_r_clken = lsu_pkt_m.valid | lsu_c1_m_clken_q | clk_override; - assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override; - assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override; + assign lsu_c2_m_clken = lsu_c1_m_clken | lsu_c1_m_clken_q | clk_override; + assign lsu_c2_r_clken = lsu_c1_r_clken | lsu_c1_r_clken_q | clk_override; - assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & lsu_pkt_d.store) | clk_override) ; - assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & lsu_pkt_m.store) | clk_override) ; + assign lsu_store_c1_m_clken = ((lsu_c1_m_clken & lsu_pkt_d.store) | clk_override); + assign lsu_store_c1_r_clken = ((lsu_c1_r_clken & lsu_pkt_m.store) | clk_override); - assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override; - assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override; - assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en; - assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override; + assign lsu_stbuf_c1_clken = ldst_stbuf_reqvld_r | stbuf_reqvld_any | stbuf_reqvld_flushed_any | clk_override; + assign lsu_bus_ibuf_c1_clken = lsu_busreq_r | clk_override; + assign lsu_bus_obuf_c1_clken = (lsu_bus_buffer_pend_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en; + assign lsu_bus_buf_c1_clken = ~lsu_bus_buffer_empty_any | lsu_busreq_r | dec_tlu_force_halt | clk_override; - assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) | + assign lsu_free_c1_clken = (lsu_p.valid | lsu_pkt_d.valid | lsu_pkt_m.valid | lsu_pkt_r.valid) | ~lsu_bus_buffer_empty_any | ~lsu_stbuf_empty_any | clk_override; - assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override; + assign lsu_free_c2_clken = lsu_free_c1_clken | lsu_free_c1_clken_q | clk_override; - // Flops - rvdff #(1) lsu_free_c1_clkenff (.din(lsu_free_c1_clken), .dout(lsu_free_c1_clken_q), .clk(active_clk), .*); + // Flops + rvdff #(1) lsu_free_c1_clkenff ( + .din (lsu_free_c1_clken), + .dout(lsu_free_c1_clken_q), + .clk (active_clk), + .* + ); - rvdff #(1) lsu_c1_m_clkenff (.din(lsu_c1_m_clken), .dout(lsu_c1_m_clken_q), .clk(lsu_free_c2_clk), .*); - rvdff #(1) lsu_c1_r_clkenff (.din(lsu_c1_r_clken), .dout(lsu_c1_r_clken_q), .clk(lsu_free_c2_clk), .*); + rvdff #(1) lsu_c1_m_clkenff ( + .din (lsu_c1_m_clken), + .dout(lsu_c1_m_clken_q), + .clk (lsu_free_c2_clk), + .* + ); + rvdff #(1) lsu_c1_r_clkenff ( + .din (lsu_c1_r_clken), + .dout(lsu_c1_r_clken_q), + .clk (lsu_free_c2_clk), + .* + ); - // Clock Headers - rvoclkhdr lsu_c1m_cgc ( .en(lsu_c1_m_clken), .l1clk(lsu_c1_m_clk), .* ); - rvoclkhdr lsu_c1r_cgc ( .en(lsu_c1_r_clken), .l1clk(lsu_c1_r_clk), .* ); + // Clock Headers + rvoclkhdr lsu_c1m_cgc ( + .en(lsu_c1_m_clken), + .l1clk(lsu_c1_m_clk), + .* + ); + rvoclkhdr lsu_c1r_cgc ( + .en(lsu_c1_r_clken), + .l1clk(lsu_c1_r_clk), + .* + ); - rvoclkhdr lsu_c2m_cgc ( .en(lsu_c2_m_clken), .l1clk(lsu_c2_m_clk), .* ); - rvoclkhdr lsu_c2r_cgc ( .en(lsu_c2_r_clken), .l1clk(lsu_c2_r_clk), .* ); + rvoclkhdr lsu_c2m_cgc ( + .en(lsu_c2_m_clken), + .l1clk(lsu_c2_m_clk), + .* + ); + rvoclkhdr lsu_c2r_cgc ( + .en(lsu_c2_r_clken), + .l1clk(lsu_c2_r_clk), + .* + ); - rvoclkhdr lsu_store_c1m_cgc (.en(lsu_store_c1_m_clken), .l1clk(lsu_store_c1_m_clk), .*); - rvoclkhdr lsu_store_c1r_cgc (.en(lsu_store_c1_r_clken), .l1clk(lsu_store_c1_r_clk), .*); + rvoclkhdr lsu_store_c1m_cgc ( + .en(lsu_store_c1_m_clken), + .l1clk(lsu_store_c1_m_clk), + .* + ); + rvoclkhdr lsu_store_c1r_cgc ( + .en(lsu_store_c1_r_clken), + .l1clk(lsu_store_c1_r_clk), + .* + ); - rvoclkhdr lsu_stbuf_c1_cgc ( .en(lsu_stbuf_c1_clken), .l1clk(lsu_stbuf_c1_clk), .* ); - rvoclkhdr lsu_bus_ibuf_c1_cgc ( .en(lsu_bus_ibuf_c1_clken), .l1clk(lsu_bus_ibuf_c1_clk), .* ); - rvoclkhdr lsu_bus_buf_c1_cgc ( .en(lsu_bus_buf_c1_clken), .l1clk(lsu_bus_buf_c1_clk), .* ); + rvoclkhdr lsu_stbuf_c1_cgc ( + .en(lsu_stbuf_c1_clken), + .l1clk(lsu_stbuf_c1_clk), + .* + ); + rvoclkhdr lsu_bus_ibuf_c1_cgc ( + .en(lsu_bus_ibuf_c1_clken), + .l1clk(lsu_bus_ibuf_c1_clk), + .* + ); + rvoclkhdr lsu_bus_buf_c1_cgc ( + .en(lsu_bus_buf_c1_clken), + .l1clk(lsu_bus_buf_c1_clk), + .* + ); - assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en; + assign lsu_busm_clken = (~lsu_bus_buffer_empty_any | lsu_busreq_r | clk_override) & lsu_bus_clk_en; - rvclkhdr lsu_bus_obuf_c1_cgc ( .en(lsu_bus_obuf_c1_clken), .l1clk(lsu_bus_obuf_c1_clk), .* ); - rvclkhdr lsu_busm_cgc (.en(lsu_busm_clken), .l1clk(lsu_busm_clk), .*); + rvclkhdr lsu_bus_obuf_c1_cgc ( + .en(lsu_bus_obuf_c1_clken), + .l1clk(lsu_bus_obuf_c1_clk), + .* + ); + rvclkhdr lsu_busm_cgc ( + .en(lsu_busm_clken), + .l1clk(lsu_busm_clk), + .* + ); - rvoclkhdr lsu_free_cgc (.en(lsu_free_c2_clken), .l1clk(lsu_free_c2_clk), .*); + rvoclkhdr lsu_free_cgc ( + .en(lsu_free_c2_clken), + .l1clk(lsu_free_c2_clk), + .* + ); endmodule diff --git a/Flow/design/lsu/el2_lsu_dccm_ctl.sv b/Flow/design/lsu/el2_lsu_dccm_ctl.sv index 8278cd1..afeac4a 100644 --- a/Flow/design/lsu/el2_lsu_dccm_ctl.sv +++ b/Flow/design/lsu/el2_lsu_dccm_ctl.sv @@ -27,387 +27,506 @@ // //******************************************************************************** module el2_lsu_dccm_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) - ( - input logic lsu_c2_m_clk, // clocks - input logic lsu_c2_r_clk, // clocks - input logic lsu_c1_r_clk, // clocks - input logic lsu_store_c1_r_clk, // clocks - input logic lsu_free_c2_clk, // clocks - input logic clk_override, // Override non-functional clock gating - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + `include "el2_param.vh" +) ( + input logic lsu_c2_m_clk, // clocks + input logic lsu_c2_r_clk, // clocks + input logic lsu_c1_r_clk, // clocks + input logic lsu_store_c1_r_clk, // clocks + input logic lsu_free_c2_clk, // clocks + input logic clk_override, // Override non-functional clock gating + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic rst_l, // reset, active low + input logic rst_l, // reset, active low - input el2_lsu_pkt_t lsu_pkt_r,// lsu packets - input el2_lsu_pkt_t lsu_pkt_m,// lsu packets - input el2_lsu_pkt_t lsu_pkt_d,// lsu packets - input logic addr_in_dccm_d, // address maps to dccm - input logic addr_in_pic_d, // address maps to pic - input logic addr_in_pic_m, // address maps to pic - input logic addr_in_dccm_m, addr_in_dccm_r, // address in dccm per pipe stage - input logic addr_in_pic_r, // address in pic per pipe stage - input logic lsu_raw_fwd_lo_r, lsu_raw_fwd_hi_r, - input logic lsu_commit_r, // lsu instruction in r commits - input logic ldst_dual_m, ldst_dual_r,// load/store is unaligned at 32 bit boundary per pipe stage + input el2_lsu_pkt_t lsu_pkt_r, // lsu packets + input el2_lsu_pkt_t lsu_pkt_m, // lsu packets + input el2_lsu_pkt_t lsu_pkt_d, // lsu packets + input logic addr_in_dccm_d, // address maps to dccm + input logic addr_in_pic_d, // address maps to pic + input logic addr_in_pic_m, // address maps to pic + input logic addr_in_dccm_m, + addr_in_dccm_r, // address in dccm per pipe stage + input logic addr_in_pic_r, // address in pic per pipe stage + input logic lsu_raw_fwd_lo_r, + lsu_raw_fwd_hi_r, + input logic lsu_commit_r, // lsu instruction in r commits + input logic ldst_dual_m, + ldst_dual_r, // load/store is unaligned at 32 bit boundary per pipe stage - // lsu address down the pipe - input logic [31:0] lsu_addr_d, - input logic [pt.DCCM_BITS-1:0] lsu_addr_m, - input logic [31:0] lsu_addr_r, + // lsu address down the pipe + input logic [ 31:0] lsu_addr_d, + input logic [pt.DCCM_BITS-1:0] lsu_addr_m, + input logic [ 31:0] lsu_addr_r, - // lsu address down the pipe - needed to check unaligned - input logic [pt.DCCM_BITS-1:0] end_addr_d, - input logic [pt.DCCM_BITS-1:0] end_addr_m, - input logic [pt.DCCM_BITS-1:0] end_addr_r, + // lsu address down the pipe - needed to check unaligned + input logic [pt.DCCM_BITS-1:0] end_addr_d, + input logic [pt.DCCM_BITS-1:0] end_addr_m, + input logic [pt.DCCM_BITS-1:0] end_addr_r, - input logic stbuf_reqvld_any, // write enable - input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned) + input logic stbuf_reqvld_any, // write enable + input logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // stbuf address (aligned) - input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf - input logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits - input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load - input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load - input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load - input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // the read out from stbuf + input logic [ pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // the encoded data with ECC bits + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf fowarding to load + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf fowarding to load + input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf fowarding to load + input logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf fowarding to load - output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm - output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm - output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc - output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, - output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0 - output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0 + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_r, // right justified, ie load byte will have data at 7:0 + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_corr_r, // right justified & ECC corrected, ie load byte will have data at 7:0 - input logic lsu_double_ecc_error_r, // lsu has a DED - input logic single_ecc_error_hi_r, // sec detected on hi dccm bank - input logic single_ecc_error_lo_r, // sec detected on lower dccm bank - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data - input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits - input logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits + input logic lsu_double_ecc_error_r, // lsu has a DED + input logic single_ecc_error_hi_r, // sec detected on hi dccm bank + input logic single_ecc_error_lo_r, // sec detected on lower dccm bank + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data + input logic [ pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // the encoded data with ECC bits + input logic [ pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // the encoded data with ECC bits - output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm - output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm - output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc - output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, - output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0 + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // data from the dccm + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // data from the dccm + ecc + output logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, + output logic [pt.DCCM_DATA_WIDTH-1:0] lsu_ld_data_m, // right justified, ie load byte will have data at 7:0 - input logic lsu_double_ecc_error_m, // lsu has a DED - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data - input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data + input logic lsu_double_ecc_error_m, // lsu has a DED + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data + input logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data - input logic [31:0] store_data_m, // Store data M-stage - input logic dma_dccm_wen, // Perform DMA writes only for word/dword - input logic dma_pic_wen, // Perform PIC writes - input logic [2:0] dma_mem_tag_m, // DMA Buffer entry number M-stage - input logic [31:0] dma_mem_addr, // DMA request address - input logic [63:0] dma_mem_wdata, // DMA write data - input logic [31:0] dma_dccm_wdata_lo, // Shift the dma data to lower bits to make it consistent to lsu stores - input logic [31:0] dma_dccm_wdata_hi, // Shift the dma data to lower bits to make it consistent to lsu stores - input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata - input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata + input logic [31:0] store_data_m, // Store data M-stage + input logic dma_dccm_wen, // Perform DMA writes only for word/dword + input logic dma_pic_wen, // Perform PIC writes + input logic [2:0] dma_mem_tag_m, // DMA Buffer entry number M-stage + input logic [31:0] dma_mem_addr, // DMA request address + input logic [63:0] dma_mem_wdata, // DMA write data + input logic [31:0] dma_dccm_wdata_lo, // Shift the dma data to lower bits to make it consistent to lsu stores + input logic [31:0] dma_dccm_wdata_hi, // Shift the dma data to lower bits to make it consistent to lsu stores + input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata + input logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata - output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, - output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, - output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm - output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm - output logic [31:0] store_data_r, // raw store data to be sent to bus - output logic ld_single_ecc_error_r, - output logic ld_single_ecc_error_r_ff, + output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, + output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // data from the dccm + output logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // data from the dccm + output logic [31:0] store_data_r, // raw store data to be sent to bus + output logic ld_single_ecc_error_r, + output logic ld_single_ecc_error_r_ff, - output logic [31:0] picm_mask_data_m, // pic data to stbuf - output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic - output logic lsu_dccm_rden_m, // dccm read - output logic lsu_dccm_rden_r, // dccm read + output logic [31:0] picm_mask_data_m, // pic data to stbuf + output logic lsu_stbuf_commit_any, // stbuf wins the dccm port or is to pic + output logic lsu_dccm_rden_m, // dccm read + output logic lsu_dccm_rden_r, // dccm read - output logic dccm_dma_rvalid, // dccm serviving the dma load - output logic dccm_dma_ecc_error, // DMA load had ecc error - output logic [2:0] dccm_dma_rtag, // DMA return tag - output logic [63:0] dccm_dma_rdata, // dccm data to dma request + output logic dccm_dma_rvalid, // dccm serviving the dma load + output logic dccm_dma_ecc_error, // DMA load had ecc error + output logic [ 2:0] dccm_dma_rtag, // DMA return tag + output logic [63:0] dccm_dma_rdata, // dccm data to dma request - // DCCM ports - output logic dccm_wren, // dccm interface -- write - output logic dccm_rden, // dccm interface -- write - output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank - output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank - output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank - output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank - output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank + // DCCM ports + output logic dccm_wren, // dccm interface -- write + output logic dccm_rden, // dccm interface -- write + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_lo, // dccm interface -- wr addr for lo bank + output logic [pt.DCCM_BITS-1:0] dccm_wr_addr_hi, // dccm interface -- wr addr for hi bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_lo, // dccm interface -- read address for lo bank + output logic [pt.DCCM_BITS-1:0] dccm_rd_addr_hi, // dccm interface -- read address for hi bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_lo, // dccm write data for lo bank + output logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_wr_data_hi, // dccm write data for hi bank - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm - input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_lo, // dccm read data back from the dccm + input logic [pt.DCCM_FDATA_WIDTH-1:0] dccm_rd_data_hi, // dccm read data back from the dccm - // PIC ports - output logic picm_wren, // write to pic - output logic picm_rden, // read to pick - output logic picm_mken, // write to pic need a mask - output logic [31:0] picm_rdaddr, // address for pic read access - output logic [31:0] picm_wraddr, // address for pic write access - output logic [31:0] picm_wr_data, // write data - input logic [31:0] picm_rd_data, // read data + // PIC ports + output logic picm_wren, // write to pic + output logic picm_rden, // read to pick + output logic picm_mken, // write to pic need a mask + output logic [31:0] picm_rdaddr, // address for pic read access + output logic [31:0] picm_wraddr, // address for pic write access + output logic [31:0] picm_wr_data, // write data + input logic [31:0] picm_rd_data, // read data - input logic scan_mode // scan mode + input logic scan_mode // scan mode ); - localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH); + localparam DCCM_WIDTH_BITS = $clog2(pt.DCCM_BYTE_WIDTH); - logic lsu_dccm_rden_d, lsu_dccm_wren_d; - logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r; - logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns; - logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff; - logic lsu_double_ecc_error_r_ff; - logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff; - logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in ; - logic [63:0] picm_rd_data_m; + logic lsu_dccm_rden_d, lsu_dccm_wren_d; + logic ld_single_ecc_error_lo_r, ld_single_ecc_error_hi_r; + logic ld_single_ecc_error_lo_r_ns, ld_single_ecc_error_hi_r_ns; + logic ld_single_ecc_error_lo_r_ff, ld_single_ecc_error_hi_r_ff; + logic lsu_double_ecc_error_r_ff; + logic [pt.DCCM_BITS-1:0] ld_sec_addr_lo_r_ff, ld_sec_addr_hi_r_ff; + logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r_in, store_data_hi_r_in; + logic [63:0] picm_rd_data_m; - logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi; - logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo; - logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r; + logic dccm_wr_bypass_d_m_hi, dccm_wr_bypass_d_r_hi; + logic dccm_wr_bypass_d_m_lo, dccm_wr_bypass_d_r_lo; + logic kill_ecc_corr_lo_r, kill_ecc_corr_hi_r; - // byte_en flowing down - logic [3:0] store_byteen_m ,store_byteen_r; - logic [7:0] store_byteen_ext_m, store_byteen_ext_r; + // byte_en flowing down + logic [3:0] store_byteen_m, store_byteen_r; + logic [7:0] store_byteen_ext_m, store_byteen_ext_r; - if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 - logic [63:0] lsu_rdata_r, lsu_rdata_corr_r; - logic [63:0] dccm_rdata_r, dccm_rdata_corr_r; - logic [63:0] stbuf_fwddata_r; - logic [7:0] stbuf_fwdbyteen_r; - logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r; - logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r; - logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r; - logic [63:0] picm_rd_data_r; - logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc; - logic [2:0] dma_mem_tag_r; - logic stbuf_fwddata_en; + if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1 + logic [63:0] lsu_rdata_r, lsu_rdata_corr_r; + logic [63:0] dccm_rdata_r, dccm_rdata_corr_r; + logic [63:0] stbuf_fwddata_r; + logic [ 7:0] stbuf_fwdbyteen_r; + logic [31:0] stbuf_fwddata_lo_r, stbuf_fwddata_hi_r; + logic [3:0] stbuf_fwdbyteen_lo_r, stbuf_fwdbyteen_hi_r; + logic [31:0] lsu_rdata_lo_r, lsu_rdata_hi_r; + logic [63:0] picm_rd_data_r; + logic [63:32] lsu_ld_data_r_nc, lsu_ld_data_corr_r_nc; + logic [2:0] dma_mem_tag_r; + logic stbuf_fwddata_en; - assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma; - assign dccm_dma_ecc_error = lsu_double_ecc_error_r; - assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0]; - assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}}; - assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0]; - assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0]; + assign dccm_dma_rvalid = lsu_pkt_r.valid & lsu_pkt_r.load & lsu_pkt_r.dma; + assign dccm_dma_ecc_error = lsu_double_ecc_error_r; + assign dccm_dma_rtag[2:0] = dma_mem_tag_r[2:0]; + assign dccm_dma_rdata[63:0] = ldst_dual_r ? lsu_rdata_corr_r[63:0] : {2{lsu_rdata_corr_r[31:0]}}; + assign {lsu_ld_data_r_nc[63:32], lsu_ld_data_r[31:0]} = lsu_rdata_r[63:0] >> 8*lsu_addr_r[1:0]; + assign {lsu_ld_data_corr_r_nc[63:32], lsu_ld_data_corr_r[31:0]} = lsu_rdata_corr_r[63:0] >> 8*lsu_addr_r[1:0]; - assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0]; - assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0],dccm_rdata_lo_r[31:0]}; - assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0],sec_data_lo_r[31:0]}; - assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}; - assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}; - assign stbuf_fwddata_en = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override; + assign picm_rd_data_r[63:32] = picm_rd_data_r[31:0]; + assign dccm_rdata_r[63:0] = {dccm_rdata_hi_r[31:0], dccm_rdata_lo_r[31:0]}; + assign dccm_rdata_corr_r[63:0] = {sec_data_hi_r[31:0], sec_data_lo_r[31:0]}; + assign stbuf_fwddata_r[63:0] = {stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}; + assign stbuf_fwdbyteen_r[7:0] = {stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}; + assign stbuf_fwddata_en = (|stbuf_fwdbyteen_hi_m[3:0]) | (|stbuf_fwdbyteen_lo_m[3:0]) | clk_override; - for (genvar i=0; i<8; i++) begin: GenDMAData - assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : + for (genvar i = 0; i < 8; i++) begin : GenDMAData + assign lsu_rdata_corr_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_corr_r[(8*i)+7:8*i])); - assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : + assign lsu_rdata_r[(8*i)+7:8*i] = stbuf_fwdbyteen_r[i] ? stbuf_fwddata_r[(8*i)+7:8*i] : (addr_in_pic_r ? picm_rd_data_r[(8*i)+7:8*i] : ({8{addr_in_dccm_r}} & dccm_rdata_r[(8*i)+7:8*i])); - end - rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff (.*, .din(dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((lsu_dccm_rden_m & ldst_dual_m) | clk_override)); - rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff (.*, .din(dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_dccm_rden_m | clk_override)); - rvdffe #(2*pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff (.*, .din({dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]}), - .dout({dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]}), .en(lsu_dccm_rden_m | clk_override)); - rvdff #(8) stbuf_fwdbyteen_ff (.*, .din({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), .clk(lsu_c2_r_clk)); - rvdffe #(64) stbuf_fwddata_ff (.*, .din({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}), .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}), .en(stbuf_fwddata_en)); - rvdffe #(32) picm_rddata_rff (.*, .din(picm_rd_data_m[31:0]), .dout(picm_rd_data_r[31:0]), .en(addr_in_pic_m | clk_override)); - rvdff #(3) dma_mem_tag_rff (.*, .din(dma_mem_tag_m[2:0]), .dout(dma_mem_tag_r[2:0]), .clk(lsu_c1_r_clk)); + end + rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_hi_r_ff ( + .*, + .din (dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]), + .dout(dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]), + .en ((lsu_dccm_rden_m & ldst_dual_m) | clk_override) + ); + rvdffe #(pt.DCCM_DATA_WIDTH) dccm_rdata_lo_r_ff ( + .*, + .din (dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]), + .dout(dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]), + .en (lsu_dccm_rden_m | clk_override) + ); + rvdffe #(2 * pt.DCCM_ECC_WIDTH) dccm_data_ecc_r_ff ( + .*, + .din({ + dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] + }), + .dout({ + dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0], dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0] + }), + .en(lsu_dccm_rden_m | clk_override) + ); + rvdff #(8) stbuf_fwdbyteen_ff ( + .*, + .din ({stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}), + .dout({stbuf_fwdbyteen_hi_r[3:0], stbuf_fwdbyteen_lo_r[3:0]}), + .clk (lsu_c2_r_clk) + ); + rvdffe #(64) stbuf_fwddata_ff ( + .*, + .din ({stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}), + .dout({stbuf_fwddata_hi_r[31:0], stbuf_fwddata_lo_r[31:0]}), + .en (stbuf_fwddata_en) + ); + rvdffe #(32) picm_rddata_rff ( + .*, + .din (picm_rd_data_m[31:0]), + .dout(picm_rd_data_r[31:0]), + .en (addr_in_pic_m | clk_override) + ); + rvdff #(3) dma_mem_tag_rff ( + .*, + .din (dma_mem_tag_m[2:0]), + .dout(dma_mem_tag_r[2:0]), + .clk (lsu_c1_r_clk) + ); - end else begin: L2U_Plus1_0 + end else begin : L2U_Plus1_0 - logic [63:0] lsu_rdata_m, lsu_rdata_corr_m; - logic [63:0] dccm_rdata_m, dccm_rdata_corr_m; - logic [63:0] stbuf_fwddata_m; - logic [7:0] stbuf_fwdbyteen_m; - logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc; - logic [31:0] lsu_ld_data_corr_m; + logic [63:0] lsu_rdata_m, lsu_rdata_corr_m; + logic [63:0] dccm_rdata_m, dccm_rdata_corr_m; + logic [63:0] stbuf_fwddata_m; + logic [ 7:0] stbuf_fwdbyteen_m; + logic [63:32] lsu_ld_data_m_nc, lsu_ld_data_corr_m_nc; + logic [31:0] lsu_ld_data_corr_m; - assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma; - assign dccm_dma_ecc_error = lsu_double_ecc_error_m; - assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0]; - assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}}; - assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0]; - assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0]; + assign dccm_dma_rvalid = lsu_pkt_m.valid & lsu_pkt_m.load & lsu_pkt_m.dma; + assign dccm_dma_ecc_error = lsu_double_ecc_error_m; + assign dccm_dma_rtag[2:0] = dma_mem_tag_m[2:0]; + assign dccm_dma_rdata[63:0] = ldst_dual_m ? lsu_rdata_corr_m[63:0] : {2{lsu_rdata_corr_m[31:0]}}; + assign {lsu_ld_data_m_nc[63:32], lsu_ld_data_m[31:0]} = lsu_rdata_m[63:0] >> 8*lsu_addr_m[1:0]; + assign {lsu_ld_data_corr_m_nc[63:32], lsu_ld_data_corr_m[31:0]} = lsu_rdata_corr_m[63:0] >> 8*lsu_addr_m[1:0]; - assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0],dccm_rdata_lo_m[31:0]}; - assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0],sec_data_lo_m[31:0]}; - assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}; - assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}; + assign dccm_rdata_m[63:0] = {dccm_rdata_hi_m[31:0], dccm_rdata_lo_m[31:0]}; + assign dccm_rdata_corr_m[63:0] = {sec_data_hi_m[31:0], sec_data_lo_m[31:0]}; + assign stbuf_fwddata_m[63:0] = {stbuf_fwddata_hi_m[31:0], stbuf_fwddata_lo_m[31:0]}; + assign stbuf_fwdbyteen_m[7:0] = {stbuf_fwdbyteen_hi_m[3:0], stbuf_fwdbyteen_lo_m[3:0]}; - for (genvar i=0; i<8; i++) begin: GenLoop - assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : + for (genvar i = 0; i < 8; i++) begin : GenLoop + assign lsu_rdata_corr_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_corr_m[(8*i)+7:8*i])); - assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : + assign lsu_rdata_m[(8*i)+7:8*i] = stbuf_fwdbyteen_m[i] ? stbuf_fwddata_m[(8*i)+7:8*i] : (addr_in_pic_m ? picm_rd_data_m[(8*i)+7:8*i] : ({8{addr_in_dccm_m}} & dccm_rdata_m[(8*i)+7:8*i])); - end + end - rvdffe #(32) lsu_ld_data_corr_rff(.*, .din(lsu_ld_data_corr_m[31:0]), .dout(lsu_ld_data_corr_r[31:0]), .en((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override)); - end + rvdffe #(32) lsu_ld_data_corr_rff ( + .*, + .din (lsu_ld_data_corr_m[31:0]), + .dout(lsu_ld_data_corr_r[31:0]), + .en ((lsu_pkt_m.valid & lsu_pkt_m.load & (addr_in_pic_m | addr_in_dccm_m)) | clk_override) + ); + end - assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | + assign kill_ecc_corr_lo_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | (((lsu_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m); - assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | + assign kill_ecc_corr_hi_r = (((lsu_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_d[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_d.valid & lsu_pkt_d.store & lsu_pkt_d.dma & addr_in_dccm_d) | (((lsu_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) | (end_addr_m[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2])) & lsu_pkt_m.valid & lsu_pkt_m.store & lsu_pkt_m.dma & addr_in_dccm_m); - assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r; - assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r; - assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r; + assign ld_single_ecc_error_lo_r = lsu_pkt_r.load & single_ecc_error_lo_r & ~lsu_raw_fwd_lo_r; + assign ld_single_ecc_error_hi_r = lsu_pkt_r.load & single_ecc_error_hi_r & ~lsu_raw_fwd_hi_r; + assign ld_single_ecc_error_r = (ld_single_ecc_error_lo_r | ld_single_ecc_error_hi_r) & ~lsu_double_ecc_error_r; - assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r; - assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r; - assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff; + assign ld_single_ecc_error_lo_r_ns = ld_single_ecc_error_lo_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_lo_r; + assign ld_single_ecc_error_hi_r_ns = ld_single_ecc_error_hi_r & (lsu_commit_r | lsu_pkt_r.dma) & ~kill_ecc_corr_hi_r; + assign ld_single_ecc_error_r_ff = (ld_single_ecc_error_lo_r_ff | ld_single_ecc_error_hi_r_ff) & ~lsu_double_ecc_error_r_ff; - assign lsu_stbuf_commit_any = stbuf_reqvld_any & + assign lsu_stbuf_commit_any = stbuf_reqvld_any & (~(lsu_dccm_rden_d | lsu_dccm_wren_d | ld_single_ecc_error_r_ff) | (lsu_dccm_rden_d & ~((stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == lsu_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS]) | (stbuf_addr_any[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS] == end_addr_d[pt.DCCM_WIDTH_BITS+:pt.DCCM_BANK_BITS])))); - // No need to read for aligned word/dword stores since ECC will come by new data completely - assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | (lsu_pkt_d.store & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d; + // No need to read for aligned word/dword stores since ECC will come by new data completely + assign lsu_dccm_rden_d = lsu_pkt_d.valid & (lsu_pkt_d.load | (lsu_pkt_d.store & (~(lsu_pkt_d.word | lsu_pkt_d.dword) | (lsu_addr_d[1:0] != 2'b0)))) & addr_in_dccm_d; - // DMA will read/write in decode stage - assign lsu_dccm_wren_d = dma_dccm_wen; + // DMA will read/write in decode stage + assign lsu_dccm_wren_d = dma_dccm_wen; - // DCCM inputs - assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff; - assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d; - assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) : + // DCCM inputs + assign dccm_wren = lsu_dccm_wren_d | lsu_stbuf_commit_any | ld_single_ecc_error_r_ff; + assign dccm_rden = lsu_dccm_rden_d & addr_in_dccm_d; + assign dccm_wr_addr_lo[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]) : lsu_dccm_wren_d ? lsu_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0]; - assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) : + assign dccm_wr_addr_hi[pt.DCCM_BITS-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] : ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]) : lsu_dccm_wren_d ? end_addr_d[pt.DCCM_BITS-1:0] : stbuf_addr_any[pt.DCCM_BITS-1:0]; - assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0]; - assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0]; - assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : + assign dccm_rd_addr_lo[pt.DCCM_BITS-1:0] = lsu_addr_d[pt.DCCM_BITS-1:0]; + assign dccm_rd_addr_hi[pt.DCCM_BITS-1:0] = end_addr_d[pt.DCCM_BITS-1:0]; + assign dccm_wr_data_lo[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_lo_r_ff ? {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) : (dma_dccm_wen ? {dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0]} : {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]}); - assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : + assign dccm_wr_data_hi[pt.DCCM_FDATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? (ld_single_ecc_error_hi_r_ff ? {sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]} : {sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0],sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]}) : (dma_dccm_wen ? {dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0],dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0]} : {stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0],stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]}); - // DCCM outputs - assign store_byteen_m[3:0] = {4{lsu_pkt_m.store}} & + // DCCM outputs + assign store_byteen_m[3:0] = {4{lsu_pkt_m.store}} & (({4{lsu_pkt_m.by}} & 4'b0001) | ({4{lsu_pkt_m.half}} & 4'b0011) | ({4{lsu_pkt_m.word}} & 4'b1111)); - assign store_byteen_r[3:0] = {4{lsu_pkt_r.store}} & + assign store_byteen_r[3:0] = {4{lsu_pkt_r.store}} & (({4{lsu_pkt_r.by}} & 4'b0001) | ({4{lsu_pkt_r.half}} & 4'b0011) | ({4{lsu_pkt_r.word}} & 4'b1111)); - assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m - assign store_byteen_ext_r[7:0] = {4'b0,store_byteen_r[3:0]} << lsu_addr_r[1:0]; + assign store_byteen_ext_m[7:0] = {4'b0,store_byteen_m[3:0]} << lsu_addr_m[1:0]; // The packet in m + assign store_byteen_ext_r[7:0] = {4'b0, store_byteen_r[3:0]} << lsu_addr_r[1:0]; - assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; - assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; + assign dccm_wr_bypass_d_m_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; + assign dccm_wr_bypass_d_m_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_m[pt.DCCM_BITS-1:2]) & addr_in_dccm_m; - assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; - assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; + assign dccm_wr_bypass_d_r_lo = (stbuf_addr_any[pt.DCCM_BITS-1:2] == lsu_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; + assign dccm_wr_bypass_d_r_hi = (stbuf_addr_any[pt.DCCM_BITS-1:2] == end_addr_r[pt.DCCM_BITS-1:2]) & addr_in_dccm_r; - if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1 - logic dccm_wren_Q; - logic [31:0] dccm_wr_data_Q; - logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q; - logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r; + if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U1_Plus1_1 + logic dccm_wren_Q; + logic [31:0] dccm_wr_data_Q; + logic dccm_wr_bypass_d_m_lo_Q, dccm_wr_bypass_d_m_hi_Q; + logic [31:0] store_data_pre_hi_r, store_data_pre_lo_r; - assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0]; + assign {store_data_pre_hi_r[31:0], store_data_pre_lo_r[31:0]} = {32'b0,store_data_r[31:0]} << 8*lsu_addr_r[1:0]; - for (genvar i=0; i<4; i++) begin - assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]); - assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]); + for (genvar i = 0; i < 4; i++) begin + assign store_data_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)]); + assign store_data_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)]); - assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : + assign store_datafn_lo_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i] ? store_data_pre_lo_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_lo_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_lo_r[(8*i)+7:(8*i)])); - assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : + assign store_datafn_hi_r[(8*i)+7:(8*i)] = store_byteen_ext_r[i+4] ? store_data_pre_hi_r[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : ((dccm_wren_Q & dccm_wr_bypass_d_m_hi_Q) ? dccm_wr_data_Q[(8*i)+7:(8*i)] : sec_data_hi_r[(8*i)+7:(8*i)])); - end + end - rvdff #(1) dccm_wren_ff (.*, .din(lsu_stbuf_commit_any), .dout(dccm_wren_Q), .clk(lsu_free_c2_clk)); // ECC load errors writing to dccm shouldn't fwd to stores in pipe - rvdffe #(32) dccm_wrdata_ff (.*, .din(stbuf_data_any[31:0]), .dout(dccm_wr_data_Q[31:0]), .en(lsu_stbuf_commit_any | clk_override), .clk(clk)); - rvdff #(1) dccm_wrbyp_dm_loff (.*, .din(dccm_wr_bypass_d_m_lo), .dout(dccm_wr_bypass_d_m_lo_Q), .clk(lsu_free_c2_clk)); - rvdff #(1) dccm_wrbyp_dm_hiff (.*, .din(dccm_wr_bypass_d_m_hi), .dout(dccm_wr_bypass_d_m_hi_Q), .clk(lsu_free_c2_clk)); - rvdff #(32) store_data_rff (.*, .din(store_data_m[31:0]), .dout(store_data_r[31:0]), .clk(lsu_store_c1_r_clk)); + rvdff #(1) dccm_wren_ff ( + .*, + .din (lsu_stbuf_commit_any), + .dout(dccm_wren_Q), + .clk (lsu_free_c2_clk) + ); // ECC load errors writing to dccm shouldn't fwd to stores in pipe + rvdffe #(32) dccm_wrdata_ff ( + .*, + .din (stbuf_data_any[31:0]), + .dout(dccm_wr_data_Q[31:0]), + .en (lsu_stbuf_commit_any | clk_override), + .clk (clk) + ); + rvdff #(1) dccm_wrbyp_dm_loff ( + .*, + .din (dccm_wr_bypass_d_m_lo), + .dout(dccm_wr_bypass_d_m_lo_Q), + .clk (lsu_free_c2_clk) + ); + rvdff #(1) dccm_wrbyp_dm_hiff ( + .*, + .din (dccm_wr_bypass_d_m_hi), + .dout(dccm_wr_bypass_d_m_hi_Q), + .clk (lsu_free_c2_clk) + ); + rvdff #(32) store_data_rff ( + .*, + .din (store_data_m[31:0]), + .dout(store_data_r[31:0]), + .clk (lsu_store_c1_r_clk) + ); - end else begin: L2U1_Plus1_0 + end else begin : L2U1_Plus1_0 - logic [31:0] store_data_hi_m, store_data_lo_m; - logic [63:0] store_data_mask; - assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0]; + logic [31:0] store_data_hi_m, store_data_lo_m; + logic [63:0] store_data_mask; + assign {store_data_hi_m[31:0] , store_data_lo_m[31:0]} = {32'b0,store_data_m[31:0]} << 8*lsu_addr_m[1:0]; - for (genvar i=0; i<4; i++) begin - assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] : + for (genvar i = 0; i < 4; i++) begin + assign store_data_hi_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i+4] ? store_data_hi_m[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_hi) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_hi_m[(8*i)+7:(8*i)]); - assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] : + assign store_data_lo_r_in[(8*i)+7:(8*i)] = store_byteen_ext_m[i] ? store_data_lo_m[(8*i)+7:(8*i)] : ((lsu_stbuf_commit_any & dccm_wr_bypass_d_m_lo) ? stbuf_data_any[(8*i)+7:(8*i)] : sec_data_lo_m[(8*i)+7:(8*i)]); - assign store_datafn_lo_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_lo & ~store_byteen_ext_r[i]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_lo_r[(8*i)+7:(8*i)]; - assign store_datafn_hi_r[(8*i)+7:(8*i)] = (lsu_stbuf_commit_any & dccm_wr_bypass_d_r_hi & ~store_byteen_ext_r[i+4]) ? stbuf_data_any[(8*i)+7:(8*i)] : store_data_hi_r[(8*i)+7:(8*i)]; - end // for (genvar i=0; i> 8*lsu_addr_r[1:0]) & store_data_mask[31:0]; + for (genvar i = 0; i < 4; i++) begin + assign store_data_mask[(8*i)+7:(8*i)] = {8{store_byteen_r[i]}}; + end + assign store_data_r[31:0] = 32'({store_data_hi_r[31:0],store_data_lo_r[31:0]} >> 8*lsu_addr_r[1:0]) & store_data_mask[31:0]; - rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff (.*, .din(store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en((ldst_dual_m & lsu_pkt_m.valid & lsu_pkt_m.store) | clk_override), .clk(clk)); - rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff (.*, .din(store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .clk(lsu_store_c1_r_clk)); + rvdffe #(pt.DCCM_DATA_WIDTH) store_data_hi_rff ( + .*, + .din (store_data_hi_r_in[pt.DCCM_DATA_WIDTH-1:0]), + .dout(store_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), + .en ((ldst_dual_m & lsu_pkt_m.valid & lsu_pkt_m.store) | clk_override), + .clk (clk) + ); + rvdff #(pt.DCCM_DATA_WIDTH) store_data_lo_rff ( + .*, + .din (store_data_lo_r_in[pt.DCCM_DATA_WIDTH-1:0]), + .dout(store_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), + .clk (lsu_store_c1_r_clk) + ); - end + end - assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out - assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc + assign dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_DATA_WIDTH-1:0]; // for ld choose dccm_out + assign dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_DATA_WIDTH-1:0]; // for ld this is used for ecc - assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; - assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; + assign dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_lo[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; + assign dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0] = dccm_rd_data_hi[pt.DCCM_FDATA_WIDTH-1:pt.DCCM_DATA_WIDTH]; - // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits - assign picm_wren = (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r) | dma_pic_wen; - assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d; - assign picm_mken = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d; // Get the mask for stores - assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]}; + // PIC signals. PIC ignores the lower 2 bits of address since PIC memory registers are 32-bits + assign picm_wren = (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_pic_r & lsu_commit_r) | dma_pic_wen; + assign picm_rden = lsu_pkt_d.valid & lsu_pkt_d.load & addr_in_pic_d; + assign picm_mken = lsu_pkt_d.valid & lsu_pkt_d.store & addr_in_pic_d; // Get the mask for stores + assign picm_rdaddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},lsu_addr_d[pt.PIC_BITS-1:0]}; - assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])}; + assign picm_wraddr[31:0] = pt.PIC_BASE_ADDR | {{32-pt.PIC_BITS{1'b0}},(dma_pic_wen ? dma_mem_addr[pt.PIC_BITS-1:0] : lsu_addr_r[pt.PIC_BITS-1:0])}; - assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0]; + assign picm_wr_data[31:0] = dma_pic_wen ? dma_mem_wdata[31:0] : store_datafn_lo_r[31:0]; - assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0]; - assign picm_rd_data_m[63:0] = {picm_rd_data[31:0],picm_rd_data[31:0]}; + assign picm_mask_data_m[31:0] = picm_rd_data_m[31:0]; + assign picm_rd_data_m[63:0] = {picm_rd_data[31:0], picm_rd_data[31:0]}; - if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable - rvdff #(1) dccm_rden_mff (.*, .din(lsu_dccm_rden_d), .dout(lsu_dccm_rden_m), .clk(lsu_c2_m_clk)); - rvdff #(1) dccm_rden_rff (.*, .din(lsu_dccm_rden_m), .dout(lsu_dccm_rden_r), .clk(lsu_c2_r_clk)); + if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable + rvdff #(1) dccm_rden_mff ( + .*, + .din (lsu_dccm_rden_d), + .dout(lsu_dccm_rden_m), + .clk (lsu_c2_m_clk) + ); + rvdff #(1) dccm_rden_rff ( + .*, + .din (lsu_dccm_rden_m), + .dout(lsu_dccm_rden_r), + .clk (lsu_c2_r_clk) + ); - // ECC correction flops since dccm write happens next cycle - // We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M. - // In that case these (_ff) flops are needed only in plus1 configuration - rvdff #(1) ld_double_ecc_error_rff (.*, .din(lsu_double_ecc_error_r), .dout(lsu_double_ecc_error_r_ff), .clk(lsu_free_c2_clk)); - rvdff #(1) ld_single_ecc_error_hi_rff (.*, .din(ld_single_ecc_error_hi_r_ns), .dout(ld_single_ecc_error_hi_r_ff), .clk(lsu_free_c2_clk)); - rvdff #(1) ld_single_ecc_error_lo_rff (.*, .din(ld_single_ecc_error_lo_r_ns), .dout(ld_single_ecc_error_lo_r_ff), .clk(lsu_free_c2_clk)); - rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff (.*, .din(end_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk)); - rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff (.*, .din(lsu_addr_r[pt.DCCM_BITS-1:0]), .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk)); + // ECC correction flops since dccm write happens next cycle + // We are writing to dccm in r+1 for ecc correction since fast_int needs to be blocked in decode - 1. We can probably write in r for plus0 configuration since we know ecc error in M. + // In that case these (_ff) flops are needed only in plus1 configuration + rvdff #(1) ld_double_ecc_error_rff ( + .*, + .din (lsu_double_ecc_error_r), + .dout(lsu_double_ecc_error_r_ff), + .clk (lsu_free_c2_clk) + ); + rvdff #(1) ld_single_ecc_error_hi_rff ( + .*, + .din (ld_single_ecc_error_hi_r_ns), + .dout(ld_single_ecc_error_hi_r_ff), + .clk (lsu_free_c2_clk) + ); + rvdff #(1) ld_single_ecc_error_lo_rff ( + .*, + .din (ld_single_ecc_error_lo_r_ns), + .dout(ld_single_ecc_error_lo_r_ff), + .clk (lsu_free_c2_clk) + ); + rvdffe #(pt.DCCM_BITS) ld_sec_addr_hi_rff ( + .*, + .din (end_addr_r[pt.DCCM_BITS-1:0]), + .dout(ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0]), + .en (ld_single_ecc_error_r | clk_override), + .clk (clk) + ); + rvdffe #(pt.DCCM_BITS) ld_sec_addr_lo_rff ( + .*, + .din (lsu_addr_r[pt.DCCM_BITS-1:0]), + .dout(ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0]), + .en (ld_single_ecc_error_r | clk_override), + .clk (clk) + ); - end else begin: Gen_dccm_disable - assign lsu_dccm_rden_m = '0; - assign lsu_dccm_rden_r = '0; + end else begin : Gen_dccm_disable + assign lsu_dccm_rden_m = '0; + assign lsu_dccm_rden_r = '0; - assign lsu_double_ecc_error_r_ff = 1'b0; - assign ld_single_ecc_error_hi_r_ff = 1'b0; - assign ld_single_ecc_error_lo_r_ff = 1'b0; - assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0; - assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0; - end + assign lsu_double_ecc_error_r_ff = 1'b0; + assign ld_single_ecc_error_hi_r_ff = 1'b0; + assign ld_single_ecc_error_lo_r_ff = 1'b0; + assign ld_sec_addr_hi_r_ff[pt.DCCM_BITS-1:0] = '0; + assign ld_sec_addr_lo_r_ff[pt.DCCM_BITS-1:0] = '0; + end endmodule diff --git a/Flow/design/lsu/el2_lsu_dccm_mem.sv b/Flow/design/lsu/el2_lsu_dccm_mem.sv index 5892dba..ed426f3 100644 --- a/Flow/design/lsu/el2_lsu_dccm_mem.sv +++ b/Flow/design/lsu/el2_lsu_dccm_mem.sv @@ -27,17 +27,14 @@ // //******************************************************************************** -`define EL2_LOCAL_DCCM_RAM_TEST_PORTS .TEST1(dccm_ext_in_pkt[i].TEST1), \ - .RME(dccm_ext_in_pkt[i].RME), \ - .RM(dccm_ext_in_pkt[i].RM), \ - .LS(dccm_ext_in_pkt[i].LS), \ - .DS(dccm_ext_in_pkt[i].DS), \ - .SD(dccm_ext_in_pkt[i].SD), \ - .TEST_RNM(dccm_ext_in_pkt[i].TEST_RNM), \ - .BC1(dccm_ext_in_pkt[i].BC1), \ - .BC2(dccm_ext_in_pkt[i].BC2), \ - - +`define EL2_LOCAL_DCCM_RAM_TEST_PORTS .TEST1(dccm_ext_in_pkt[i].TEST1),\ + .RME(dccm_ext_in_pkt[i].RME),\ + .RM(dccm_ext_in_pkt[i].RM),\ + .LS(dccm_ext_in_pkt[i].LS),\ + .DS(dccm_ext_in_pkt[i].DS),\ + .SD(dccm_ext_in_pkt[i].SD),\ + .TEST_RNM(dccm_ext_in_pkt[i].TEST_RNM),\ + .BC1(dccm_ext_in_pkt[i].BC1),.BC2(dccm_ext_in_pkt[i].BC2), module el2_lsu_dccm_mem import el2_pkg::*; diff --git a/Flow/design/lsu/el2_lsu_ecc.sv b/Flow/design/lsu/el2_lsu_ecc.sv index 99484e0..5c1f081 100644 --- a/Flow/design/lsu/el2_lsu_ecc.sv +++ b/Flow/design/lsu/el2_lsu_ecc.sv @@ -26,216 +26,269 @@ // //******************************************************************************** module el2_lsu_ecc -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) -( - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - input logic lsu_c2_r_clk, // clock - input logic clk_override, // Override non-functional clock gating - input logic rst_l, // reset, active low - input logic scan_mode, // scan mode + `include "el2_param.vh" +) ( + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + input logic lsu_c2_r_clk, // clock + input logic clk_override, // Override non-functional clock gating + input logic rst_l, // reset, active low + input logic scan_mode, // scan mode - input el2_lsu_pkt_t lsu_pkt_m, // packet in m - input el2_lsu_pkt_t lsu_pkt_r, // packet in r - input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, + input el2_lsu_pkt_t lsu_pkt_m, // packet in m + input el2_lsu_pkt_t lsu_pkt_r, // packet in r + input logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, - input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging + input logic dec_tlu_core_ecc_disable, // disables the ecc computation and error flagging - input logic lsu_dccm_rden_r, // dccm rden - input logic addr_in_dccm_r, // address in dccm - input logic [pt.DCCM_BITS-1:0] lsu_addr_r, // start address - input logic [pt.DCCM_BITS-1:0] end_addr_r, // end address - input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm - input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm - input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc - input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, // data from the dccm + ecc - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data R-stage - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data R-stage - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data R+1 stage - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data R+1 stage + input logic lsu_dccm_rden_r, // dccm rden + input logic addr_in_dccm_r, // address in dccm + input logic [ pt.DCCM_BITS-1:0] lsu_addr_r, // start address + input logic [ pt.DCCM_BITS-1:0] end_addr_r, // end address + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_r, // data from the dccm + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_r, // data from the dccm + input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_r, // data from the dccm + ecc + input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_r, // data from the dccm + ecc + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r, // corrected dccm data R-stage + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r, // corrected dccm data R-stage + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_r_ff, // corrected dccm data R+1 stage + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_r_ff, // corrected dccm data R+1 stage - input logic ld_single_ecc_error_r, // ld has a single ecc error - input logic ld_single_ecc_error_r_ff, // ld has a single ecc error - input logic lsu_dccm_rden_m, // dccm rden - input logic addr_in_dccm_m, // address in dccm - input logic [pt.DCCM_BITS-1:0] lsu_addr_m, // start address - input logic [pt.DCCM_BITS-1:0] end_addr_m, // end address - input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem - input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem - input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem - input logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data M-stage - output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data M-stage + input logic ld_single_ecc_error_r, // ld has a single ecc error + input logic ld_single_ecc_error_r_ff, // ld has a single ecc error + input logic lsu_dccm_rden_m, // dccm rden + input logic addr_in_dccm_m, // address in dccm + input logic [ pt.DCCM_BITS-1:0] lsu_addr_m, // start address + input logic [ pt.DCCM_BITS-1:0] end_addr_m, // end address + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_m, // raw data from mem + input logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_lo_m, // raw data from mem + input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_m, // ecc read out from mem + input logic [ pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_lo_m, // ecc read out from mem + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_m, // corrected dccm data M-stage + output logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_lo_m, // corrected dccm data M-stage - input logic dma_dccm_wen, // Perform DMA writes only for word/dword - input logic [31:0] dma_dccm_wdata_lo, // Shifted dma data to lower bits to make it consistent to lsu stores - input logic [31:0] dma_dccm_wdata_hi, // Shifted dma data to lower bits to make it consistent to lsu stores - output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata - output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata + input logic dma_dccm_wen, // Perform DMA writes only for word/dword + input logic [31:0] dma_dccm_wdata_lo, // Shifted dma data to lower bits to make it consistent to lsu stores + input logic [31:0] dma_dccm_wdata_hi, // Shifted dma data to lower bits to make it consistent to lsu stores + output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_hi, // ECC bits for the DMA wdata + output logic [pt.DCCM_ECC_WIDTH-1:0] dma_dccm_wdata_ecc_lo, // ECC bits for the DMA wdata - output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // Encoded data with ECC bits - output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // Encoded data with ECC bits - output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // Encoded data with ECC bits + output logic [pt.DCCM_ECC_WIDTH-1:0] stbuf_ecc_any, // Encoded data with ECC bits + output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_hi_r_ff, // Encoded data with ECC bits + output logic [pt.DCCM_ECC_WIDTH-1:0] sec_data_ecc_lo_r_ff, // Encoded data with ECC bits - output logic single_ecc_error_hi_r, // sec detected - output logic single_ecc_error_lo_r, // sec detected on lower dccm bank - output logic lsu_single_ecc_error_r, // or of the 2 - output logic lsu_double_ecc_error_r, // double error detected + output logic single_ecc_error_hi_r, // sec detected + output logic single_ecc_error_lo_r, // sec detected on lower dccm bank + output logic lsu_single_ecc_error_r, // or of the 2 + output logic lsu_double_ecc_error_r, // double error detected - output logic lsu_single_ecc_error_m, // or of the 2 - output logic lsu_double_ecc_error_m // double error detected + output logic lsu_single_ecc_error_m, // or of the 2 + output logic lsu_double_ecc_error_m // double error detected - ); +); - logic is_ldst_r; - logic is_ldst_hi_any, is_ldst_lo_any; - logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any; - logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any; - logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any; - logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any; - logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any; - logic single_ecc_error_hi_any, single_ecc_error_lo_any; - logic double_ecc_error_hi_any, double_ecc_error_lo_any; + logic is_ldst_r; + logic is_ldst_hi_any, is_ldst_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] dccm_wdata_hi_any, dccm_wdata_lo_any; + logic [pt.DCCM_ECC_WIDTH-1:0] dccm_wdata_ecc_hi_any, dccm_wdata_ecc_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] dccm_rdata_hi_any, dccm_rdata_lo_any; + logic [pt.DCCM_ECC_WIDTH-1:0] dccm_data_ecc_hi_any, dccm_data_ecc_lo_any; + logic [pt.DCCM_DATA_WIDTH-1:0] sec_data_hi_any, sec_data_lo_any; + logic single_ecc_error_hi_any, single_ecc_error_lo_any; + logic double_ecc_error_hi_any, double_ecc_error_lo_any; - logic double_ecc_error_hi_m, double_ecc_error_lo_m; - logic double_ecc_error_hi_r, double_ecc_error_lo_r; + logic double_ecc_error_hi_m, double_ecc_error_lo_m; + logic double_ecc_error_hi_r, double_ecc_error_lo_r; - logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc; + logic [6:0] ecc_out_hi_nc, ecc_out_lo_nc; - if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 - logic ldst_dual_m, ldst_dual_r; - logic is_ldst_m; - logic is_ldst_hi_r, is_ldst_lo_r; + if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1 + logic ldst_dual_m, ldst_dual_r; + logic is_ldst_m; + logic is_ldst_hi_r, is_ldst_lo_r; - assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]); - assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | lsu_pkt_r.store) & addr_in_dccm_r & lsu_dccm_rden_r; - assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable; - assign is_ldst_hi_r = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA + assign ldst_dual_r = (lsu_addr_r[2] != end_addr_r[2]); + assign is_ldst_r = lsu_pkt_r.valid & (lsu_pkt_r.load | lsu_pkt_r.store) & addr_in_dccm_r & lsu_dccm_rden_r; + assign is_ldst_lo_r = is_ldst_r & ~dec_tlu_core_ecc_disable; + assign is_ldst_hi_r = is_ldst_r & ldst_dual_r & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA - assign is_ldst_hi_any = is_ldst_hi_r; - assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]; - assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0]; - assign is_ldst_lo_any = is_ldst_lo_r; - assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]; - assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_hi_any = is_ldst_hi_r; + assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_r[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_r[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_lo_any = is_ldst_lo_r; + assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_r[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_r[pt.DCCM_ECC_WIDTH-1:0]; - assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; - assign single_ecc_error_hi_r = single_ecc_error_hi_any; - assign double_ecc_error_hi_r = double_ecc_error_hi_any; - assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; - assign single_ecc_error_lo_r = single_ecc_error_lo_any; - assign double_ecc_error_lo_r = double_ecc_error_lo_any; + assign sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; + assign single_ecc_error_hi_r = single_ecc_error_hi_any; + assign double_ecc_error_hi_r = double_ecc_error_hi_any; + assign sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; + assign single_ecc_error_lo_r = single_ecc_error_lo_any; + assign double_ecc_error_lo_r = double_ecc_error_lo_any; - assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r; - assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r; + assign lsu_single_ecc_error_r = single_ecc_error_hi_r | single_ecc_error_lo_r; + assign lsu_double_ecc_error_r = double_ecc_error_hi_r | double_ecc_error_lo_r; - end else begin: L2U_Plus1_0 + end else begin : L2U_Plus1_0 - logic ldst_dual_m; - logic is_ldst_m; - logic is_ldst_hi_m, is_ldst_lo_m; + logic ldst_dual_m; + logic is_ldst_m; + logic is_ldst_hi_m, is_ldst_lo_m; - assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]); - assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & addr_in_dccm_m & lsu_dccm_rden_m; - assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable; - assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA + assign ldst_dual_m = (lsu_addr_m[2] != end_addr_m[2]); + assign is_ldst_m = lsu_pkt_m.valid & (lsu_pkt_m.load | lsu_pkt_m.store) & addr_in_dccm_m & lsu_dccm_rden_m; + assign is_ldst_lo_m = is_ldst_m & ~dec_tlu_core_ecc_disable; + assign is_ldst_hi_m = is_ldst_m & (ldst_dual_m | lsu_pkt_m.dma) & ~dec_tlu_core_ecc_disable; // Always check the ECC Hi/Lo for DMA since we don't align for DMA - assign is_ldst_hi_any = is_ldst_hi_m; - assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]; - assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0]; - assign is_ldst_lo_any = is_ldst_lo_m; - assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]; - assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_hi_any = is_ldst_hi_m; + assign dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_hi_m[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_hi_m[pt.DCCM_ECC_WIDTH-1:0]; + assign is_ldst_lo_any = is_ldst_lo_m; + assign dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = dccm_rdata_lo_m[pt.DCCM_DATA_WIDTH-1:0]; + assign dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_data_ecc_lo_m[pt.DCCM_ECC_WIDTH-1:0]; - assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; - assign double_ecc_error_hi_m = double_ecc_error_hi_any; - assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; - assign double_ecc_error_lo_m = double_ecc_error_lo_any; + assign sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]; + assign double_ecc_error_hi_m = double_ecc_error_hi_any; + assign sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0] = sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]; + assign double_ecc_error_lo_m = double_ecc_error_lo_any; - assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any; - assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m; + assign lsu_single_ecc_error_m = single_ecc_error_hi_any | single_ecc_error_lo_any; + assign lsu_double_ecc_error_m = double_ecc_error_hi_m | double_ecc_error_lo_m; - // Flops - rvdff #(1) lsu_single_ecc_err_r (.din(lsu_single_ecc_error_m), .dout(lsu_single_ecc_error_r), .clk(lsu_c2_r_clk), .*); - rvdff #(1) lsu_double_ecc_err_r (.din(lsu_double_ecc_error_m), .dout(lsu_double_ecc_error_r), .clk(lsu_c2_r_clk), .*); - rvdff #(.WIDTH(1)) ldst_sec_lo_rff (.din(single_ecc_error_lo_any), .dout(single_ecc_error_lo_r), .clk(lsu_c2_r_clk), .*); - rvdff #(.WIDTH(1)) ldst_sec_hi_rff (.din(single_ecc_error_hi_any), .dout(single_ecc_error_hi_r), .clk(lsu_c2_r_clk), .*); - rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rff (.din(sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*); - rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rff (.din(sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .en(lsu_single_ecc_error_m | clk_override), .*); + // Flops + rvdff #(1) lsu_single_ecc_err_r ( + .din (lsu_single_ecc_error_m), + .dout(lsu_single_ecc_error_r), + .clk (lsu_c2_r_clk), + .* + ); + rvdff #(1) lsu_double_ecc_err_r ( + .din (lsu_double_ecc_error_m), + .dout(lsu_double_ecc_error_r), + .clk (lsu_c2_r_clk), + .* + ); + rvdff #( + .WIDTH(1) + ) ldst_sec_lo_rff ( + .din (single_ecc_error_lo_any), + .dout(single_ecc_error_lo_r), + .clk (lsu_c2_r_clk), + .* + ); + rvdff #( + .WIDTH(1) + ) ldst_sec_hi_rff ( + .din (single_ecc_error_hi_any), + .dout(single_ecc_error_hi_r), + .clk (lsu_c2_r_clk), + .* + ); + rvdffe #( + .WIDTH(pt.DCCM_DATA_WIDTH) + ) sec_data_hi_rff ( + .din (sec_data_hi_m[pt.DCCM_DATA_WIDTH-1:0]), + .dout(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), + .en (lsu_single_ecc_error_m | clk_override), + .* + ); + rvdffe #( + .WIDTH(pt.DCCM_DATA_WIDTH) + ) sec_data_lo_rff ( + .din (sec_data_lo_m[pt.DCCM_DATA_WIDTH-1:0]), + .dout(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), + .en (lsu_single_ecc_error_m | clk_override), + .* + ); - end + end - // Logic for ECC generation during write - assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]); - assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0); + // Logic for ECC generation during write + assign dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_lo[pt.DCCM_DATA_WIDTH-1:0] : stbuf_data_any[pt.DCCM_DATA_WIDTH-1:0]); + assign dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0] = ld_single_ecc_error_r_ff ? sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0] : (dma_dccm_wen ? dma_dccm_wdata_hi[pt.DCCM_DATA_WIDTH-1:0] : 32'h0); - assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; - assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; - assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; - assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; - assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + assign sec_data_ecc_hi_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; + assign sec_data_ecc_lo_r_ff[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + assign stbuf_ecc_any[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; + assign dma_dccm_wdata_ecc_hi[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]; + assign dma_dccm_wdata_ecc_lo[pt.DCCM_ECC_WIDTH-1:0] = dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]; - // Instantiate ECC blocks - if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable + // Instantiate ECC blocks + if (pt.DCCM_ENABLE == 1) begin : Gen_dccm_enable - //Detect/Repair for Hi - rvecc_decode lsu_ecc_decode_hi ( - // Inputs - .en(is_ldst_hi_any), - .sed_ded (1'b0), // 1 : means only detection - .din(dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), - .ecc_in(dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), - // Outputs - .dout(sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]), - .ecc_out (ecc_out_hi_nc[6:0]), - .single_ecc_error(single_ecc_error_hi_any), - .double_ecc_error(double_ecc_error_hi_any), - .* - ); + //Detect/Repair for Hi + rvecc_decode lsu_ecc_decode_hi ( + // Inputs + .en(is_ldst_hi_any), + .sed_ded(1'b0), // 1 : means only detection + .din(dccm_rdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_in(dccm_data_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), + // Outputs + .dout(sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_out(ecc_out_hi_nc[6:0]), + .single_ecc_error(single_ecc_error_hi_any), + .double_ecc_error(double_ecc_error_hi_any), + .* + ); - //Detect/Repair for Lo - rvecc_decode lsu_ecc_decode_lo ( - // Inputs - .en(is_ldst_lo_any), - .sed_ded (1'b0), // 1 : means only detection - .din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0] ), - .ecc_in(dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), - // Outputs - .dout(sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]), - .ecc_out (ecc_out_lo_nc[6:0]), - .single_ecc_error(single_ecc_error_lo_any), - .double_ecc_error(double_ecc_error_lo_any), - .* - ); + //Detect/Repair for Lo + rvecc_decode lsu_ecc_decode_lo ( + // Inputs + .en(is_ldst_lo_any), + .sed_ded(1'b0), // 1 : means only detection + .din(dccm_rdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_in(dccm_data_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), + // Outputs + .dout(sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0]), + .ecc_out(ecc_out_lo_nc[6:0]), + .single_ecc_error(single_ecc_error_lo_any), + .double_ecc_error(double_ecc_error_lo_any), + .* + ); - rvecc_encode lsu_ecc_encode_hi ( - //Inputs - .din(dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), - //Outputs - .ecc_out(dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), - .* - ); - rvecc_encode lsu_ecc_encode_lo ( - //Inputs - .din(dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]), - //Outputs - .ecc_out(dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), - .* - ); - end else begin: Gen_dccm_disable // block: Gen_dccm_enable - assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0; - assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0; - assign single_ecc_error_hi_any = '0; - assign double_ecc_error_hi_any = '0; - assign single_ecc_error_lo_any = '0; - assign double_ecc_error_lo_any = '0; - end + rvecc_encode lsu_ecc_encode_hi ( + //Inputs + .din(dccm_wdata_hi_any[pt.DCCM_DATA_WIDTH-1:0]), + //Outputs + .ecc_out(dccm_wdata_ecc_hi_any[pt.DCCM_ECC_WIDTH-1:0]), + .* + ); + rvecc_encode lsu_ecc_encode_lo ( + //Inputs + .din(dccm_wdata_lo_any[pt.DCCM_DATA_WIDTH-1:0]), + //Outputs + .ecc_out(dccm_wdata_ecc_lo_any[pt.DCCM_ECC_WIDTH-1:0]), + .* + ); + end else begin : Gen_dccm_disable // block: Gen_dccm_enable + assign sec_data_hi_any[pt.DCCM_DATA_WIDTH-1:0] = '0; + assign sec_data_lo_any[pt.DCCM_DATA_WIDTH-1:0] = '0; + assign single_ecc_error_hi_any = '0; + assign double_ecc_error_hi_any = '0; + assign single_ecc_error_lo_any = '0; + assign double_ecc_error_lo_any = '0; + end - rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_hi_rplus1ff (.din(sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*); - rvdffe #(.WIDTH(pt.DCCM_DATA_WIDTH)) sec_data_lo_rplus1ff (.din(sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), .en(ld_single_ecc_error_r | clk_override), .clk(clk), .*); + rvdffe #( + .WIDTH(pt.DCCM_DATA_WIDTH) + ) sec_data_hi_rplus1ff ( + .din (sec_data_hi_r[pt.DCCM_DATA_WIDTH-1:0]), + .dout(sec_data_hi_r_ff[pt.DCCM_DATA_WIDTH-1:0]), + .en (ld_single_ecc_error_r | clk_override), + .clk (clk), + .* + ); + rvdffe #( + .WIDTH(pt.DCCM_DATA_WIDTH) + ) sec_data_lo_rplus1ff ( + .din (sec_data_lo_r[pt.DCCM_DATA_WIDTH-1:0]), + .dout(sec_data_lo_r_ff[pt.DCCM_DATA_WIDTH-1:0]), + .en (ld_single_ecc_error_r | clk_override), + .clk (clk), + .* + ); -endmodule // el2_lsu_ecc +endmodule // el2_lsu_ecc diff --git a/Flow/design/lsu/el2_lsu_lsc_ctl.sv b/Flow/design/lsu/el2_lsu_lsc_ctl.sv index d9aeb1f..98a3ecf 100644 --- a/Flow/design/lsu/el2_lsu_lsc_ctl.sv +++ b/Flow/design/lsu/el2_lsu_lsc_ctl.sv @@ -26,316 +26,483 @@ // //******************************************************************************** module el2_lsu_lsc_ctl -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - )( - input logic rst_l, // reset, active low - input logic clk_override, // Override non-functional clock gating - input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. + `include "el2_param.vh" +) ( + input logic rst_l, // reset, active low + input logic clk_override, // Override non-functional clock gating + input logic clk, // Clock only while core active. Through one clock header. For flops with second clock header built in. Connected to ACTIVE_L2CLK. - // clocks per pipe - input logic lsu_c1_m_clk, - input logic lsu_c1_r_clk, - input logic lsu_c2_m_clk, - input logic lsu_c2_r_clk, - input logic lsu_store_c1_m_clk, + // clocks per pipe + input logic lsu_c1_m_clk, + input logic lsu_c1_r_clk, + input logic lsu_c2_m_clk, + input logic lsu_c2_r_clk, + input logic lsu_store_c1_m_clk, - input logic [31:0] lsu_ld_data_r, // Load data R-stage - input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage - input logic lsu_single_ecc_error_r, // ECC single bit error R-stage - input logic lsu_double_ecc_error_r, // ECC double bit error R-stage + input logic [31:0] lsu_ld_data_r, // Load data R-stage + input logic [31:0] lsu_ld_data_corr_r, // ECC corrected data R-stage + input logic lsu_single_ecc_error_r, // ECC single bit error R-stage + input logic lsu_double_ecc_error_r, // ECC double bit error R-stage - input logic [31:0] lsu_ld_data_m, // Load data M-stage - input logic lsu_single_ecc_error_m, // ECC single bit error M-stage - input logic lsu_double_ecc_error_m, // ECC double bit error M-stage + input logic [31:0] lsu_ld_data_m, // Load data M-stage + input logic lsu_single_ecc_error_m, // ECC single bit error M-stage + input logic lsu_double_ecc_error_m, // ECC double bit error M-stage - input logic flush_m_up, // Flush M and D stage - input logic flush_r, // Flush R-stage - input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage - input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage - input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage + input logic flush_m_up, // Flush M and D stage + input logic flush_r, // Flush R-stage + input logic ldst_dual_d, // load/store is unaligned at 32 bit boundary D-stage + input logic ldst_dual_m, // load/store is unaligned at 32 bit boundary M-stage + input logic ldst_dual_r, // load/store is unaligned at 32 bit boundary R-stage - input logic [31:0] exu_lsu_rs1_d, // address - input logic [31:0] exu_lsu_rs2_d, // store data + input logic [31:0] exu_lsu_rs1_d, // address + input logic [31:0] exu_lsu_rs2_d, // store data - input el2_lsu_pkt_t lsu_p, // lsu control packet - input logic dec_lsu_valid_raw_d, // Raw valid for address computation - input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses + input el2_lsu_pkt_t lsu_p, // lsu control packet + input logic dec_lsu_valid_raw_d, // Raw valid for address computation + input logic [11:0] dec_lsu_offset_d, // 12b offset for load/store addresses - input logic [31:0] picm_mask_data_m, // PIC data M-stage - input logic [31:0] bus_read_data_m, // the bus return data - output logic [31:0] lsu_result_m, // lsu load data - output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF - // lsu address down the pipe - output logic [31:0] lsu_addr_d, - output logic [31:0] lsu_addr_m, - output logic [31:0] lsu_addr_r, - // lsu address down the pipe - needed to check unaligned - output logic [31:0] end_addr_d, - output logic [31:0] end_addr_m, - output logic [31:0] end_addr_r, - // store data down the pipe - output logic [31:0] store_data_m, + input logic [31:0] picm_mask_data_m, // PIC data M-stage + input logic [31:0] bus_read_data_m, // the bus return data + output logic [31:0] lsu_result_m, // lsu load data + output logic [31:0] lsu_result_corr_r, // This is the ECC corrected data going to RF + // lsu address down the pipe + output logic [31:0] lsu_addr_d, + output logic [31:0] lsu_addr_m, + output logic [31:0] lsu_addr_r, + // lsu address down the pipe - needed to check unaligned + output logic [31:0] end_addr_d, + output logic [31:0] end_addr_m, + output logic [31:0] end_addr_r, + // store data down the pipe + output logic [31:0] store_data_m, - input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control - output logic lsu_exc_m, // Access or misaligned fault - output logic is_sideeffects_m, // is sideffects space - output logic lsu_commit_r, // lsu instruction in r commits - output logic lsu_single_ecc_error_incr,// LSU inc SB error counter - output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet + input logic [31:0] dec_tlu_mrac_ff, // CSR for memory region control + output logic lsu_exc_m, // Access or misaligned fault + output logic is_sideeffects_m, // is sideffects space + output logic lsu_commit_r, // lsu instruction in r commits + output logic lsu_single_ecc_error_incr, // LSU inc SB error counter + output el2_lsu_error_pkt_t lsu_error_pkt_r, // lsu exception packet - output logic [31:1] lsu_fir_addr, // fast interrupt address - output logic [1:0] lsu_fir_error, // Error during fast interrupt lookup + output logic [31:1] lsu_fir_addr, // fast interrupt address + output logic [ 1:0] lsu_fir_error, // Error during fast interrupt lookup - // address in dccm/pic/external per pipe stage - output logic addr_in_dccm_d, - output logic addr_in_dccm_m, - output logic addr_in_dccm_r, + // address in dccm/pic/external per pipe stage + output logic addr_in_dccm_d, + output logic addr_in_dccm_m, + output logic addr_in_dccm_r, - output logic addr_in_pic_d, - output logic addr_in_pic_m, - output logic addr_in_pic_r, + output logic addr_in_pic_d, + output logic addr_in_pic_m, + output logic addr_in_pic_r, - output logic addr_external_m, + output logic addr_external_m, - // DMA slave - input logic dma_dccm_req, - input logic [31:0] dma_mem_addr, - input logic [2:0] dma_mem_sz, - input logic dma_mem_write, - input logic [63:0] dma_mem_wdata, + // DMA slave + input logic dma_dccm_req, + input logic [31:0] dma_mem_addr, + input logic [ 2:0] dma_mem_sz, + input logic dma_mem_write, + input logic [63:0] dma_mem_wdata, - // Store buffer related signals - output el2_lsu_pkt_t lsu_pkt_d, - output el2_lsu_pkt_t lsu_pkt_m, - output el2_lsu_pkt_t lsu_pkt_r, + // Store buffer related signals + output el2_lsu_pkt_t lsu_pkt_d, + output el2_lsu_pkt_t lsu_pkt_m, + output el2_lsu_pkt_t lsu_pkt_r, - input logic scan_mode // Scan mode + input logic scan_mode // Scan mode - ); +); - logic [31:3] end_addr_pre_m, end_addr_pre_r; - logic [31:0] full_addr_d; - logic [31:0] full_end_addr_d; - logic [31:0] lsu_rs1_d; - logic [11:0] lsu_offset_d; - logic [31:0] rs1_d; - logic [11:0] offset_d; - logic [12:0] end_addr_offset_d; - logic [2:0] addr_offset_d; + logic [31:3] end_addr_pre_m, end_addr_pre_r; + logic [31:0] full_addr_d; + logic [31:0] full_end_addr_d; + logic [31:0] lsu_rs1_d; + logic [11:0] lsu_offset_d; + logic [31:0] rs1_d; + logic [11:0] offset_d; + logic [12:0] end_addr_offset_d; + logic [ 2:0] addr_offset_d; - logic [63:0] dma_mem_wdata_shifted; - logic addr_external_d; - logic addr_external_r; - logic access_fault_d, misaligned_fault_d; - logic access_fault_m, misaligned_fault_m; + logic [63:0] dma_mem_wdata_shifted; + logic addr_external_d; + logic addr_external_r; + logic access_fault_d, misaligned_fault_d; + logic access_fault_m, misaligned_fault_m; - logic fir_dccm_access_error_d, fir_nondccm_access_error_d; - logic fir_dccm_access_error_m, fir_nondccm_access_error_m; + logic fir_dccm_access_error_d, fir_nondccm_access_error_d; + logic fir_dccm_access_error_m, fir_nondccm_access_error_m; - logic [3:0] exc_mscause_d, exc_mscause_m; - logic [31:0] rs1_d_raw; - logic [31:0] store_data_d, store_data_pre_m, store_data_m_in; - logic [31:0] bus_read_data_r; + logic [3:0] exc_mscause_d, exc_mscause_m; + logic [31:0] rs1_d_raw; + logic [31:0] store_data_d, store_data_pre_m, store_data_m_in; + logic [31:0] bus_read_data_r; - el2_lsu_pkt_t dma_pkt_d; - el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in; - el2_lsu_error_pkt_t lsu_error_pkt_m; + el2_lsu_pkt_t dma_pkt_d; + el2_lsu_pkt_t lsu_pkt_m_in, lsu_pkt_r_in; + el2_lsu_error_pkt_t lsu_error_pkt_m; - // Premux the rs1/offset for dma - assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0]; - assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}}; - assign rs1_d_raw[31:0] = lsu_rs1_d[31:0]; - assign offset_d[11:0] = lsu_offset_d[11:0]; + // Premux the rs1/offset for dma + assign lsu_rs1_d[31:0] = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0]; + assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}}; + assign rs1_d_raw[31:0] = lsu_rs1_d[31:0]; + assign offset_d[11:0] = lsu_offset_d[11:0]; - assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0]; + assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0]; - // generate the ls address - rvlsadder lsadder (.rs1(rs1_d[31:0]), - .offset(offset_d[11:0]), - .dout(full_addr_d[31:0]) - ); - - // Module to generate the memory map of the address - el2_lsu_addrcheck addrcheck ( - .start_addr_d(full_addr_d[31:0]), - .end_addr_d(full_end_addr_d[31:0]), - .rs1_region_d(rs1_d[31:28]), - .* + // generate the ls address + rvlsadder lsadder ( + .rs1(rs1_d[31:0]), + .offset(offset_d[11:0]), + .dout(full_addr_d[31:0]) ); - // Calculate start/end address for load/store - assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111); - assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]}; - assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]}; - assign end_addr_d[31:0] = full_end_addr_d[31:0]; - assign lsu_exc_m = access_fault_m | misaligned_fault_m; + // Module to generate the memory map of the address + el2_lsu_addrcheck addrcheck ( + .start_addr_d(full_addr_d[31:0]), + .end_addr_d (full_end_addr_d[31:0]), + .rs1_region_d(rs1_d[31:28]), + .* + ); - // Goes to TLU to increment the ECC error counter - assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid; + // Calculate start/end address for load/store + assign addr_offset_d[2:0] = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111); + assign end_addr_offset_d[12:0] = {offset_d[11], offset_d[11:0]} + {9'b0, addr_offset_d[2:0]}; + assign full_end_addr_d[31:0] = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]}; + assign end_addr_d[31:0] = full_end_addr_d[31:0]; + assign lsu_exc_m = access_fault_m | misaligned_fault_m; - if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1 - logic access_fault_r, misaligned_fault_r; - logic [3:0] exc_mscause_r; - logic fir_dccm_access_error_r, fir_nondccm_access_error_r; + // Goes to TLU to increment the ECC error counter + assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid; - // Generate exception packet - assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int; - assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma; - assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store; - assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r; - assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0]; - assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0]; + if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U_Plus1_1 + logic access_fault_r, misaligned_fault_r; + logic [3:0] exc_mscause_r; + logic fir_dccm_access_error_r, fir_nondccm_access_error_r; - assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00)); + // Generate exception packet + assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int; + assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma; + assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store; + assign lsu_error_pkt_r.exc_type = ~misaligned_fault_r; + assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0]; + assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0]; - rvdff #(1) access_fault_rff (.din(access_fault_m), .dout(access_fault_r), .clk(lsu_c1_r_clk), .*); - rvdff #(1) misaligned_fault_rff (.din(misaligned_fault_m), .dout(misaligned_fault_r), .clk(lsu_c1_r_clk), .*); - rvdff #(4) exc_mscause_rff (.din(exc_mscause_m[3:0]), .dout(exc_mscause_r[3:0]), .clk(lsu_c1_r_clk), .*); - rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_m), .dout(fir_dccm_access_error_r), .clk(lsu_c1_r_clk), .*); - rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*); + assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00)); - end else begin: L2U_Plus1_0 - logic [1:0] lsu_fir_error_m; + rvdff #(1) access_fault_rff ( + .din (access_fault_m), + .dout(access_fault_r), + .clk (lsu_c1_r_clk), + .* + ); + rvdff #(1) misaligned_fault_rff ( + .din (misaligned_fault_m), + .dout(misaligned_fault_r), + .clk (lsu_c1_r_clk), + .* + ); + rvdff #(4) exc_mscause_rff ( + .din (exc_mscause_m[3:0]), + .dout(exc_mscause_r[3:0]), + .clk (lsu_c1_r_clk), + .* + ); + rvdff #(1) fir_dccm_access_error_mff ( + .din (fir_dccm_access_error_m), + .dout(fir_dccm_access_error_r), + .clk (lsu_c1_r_clk), + .* + ); + rvdff #(1) fir_nondccm_access_error_mff ( + .din (fir_nondccm_access_error_m), + .dout(fir_nondccm_access_error_r), + .clk (lsu_c1_r_clk), + .* + ); - // Generate exception packet - assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up; - assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma; - assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store; - assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m; - assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0]; - assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0]; + end else begin : L2U_Plus1_0 + logic [1:0] lsu_fir_error_m; - assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00)); + // Generate exception packet + assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up; + assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma; + assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store; + assign lsu_error_pkt_m.exc_type = ~misaligned_fault_m; + assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0]; + assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0]; - rvdff #(1) lsu_exc_valid_rff (.*, .din(lsu_error_pkt_m.exc_valid), .dout(lsu_error_pkt_r.exc_valid), .clk(lsu_c2_r_clk)); - rvdff #(1) lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error), .dout(lsu_error_pkt_r.single_ecc_error), .clk(lsu_c2_r_clk)); - rvdffe #($bits(el2_lsu_error_pkt_t)-2) lsu_error_pkt_rff (.*, .din(lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override)); - rvdff #(2) lsu_fir_error_rff (.*, .din(lsu_fir_error_m[1:0]), .dout(lsu_fir_error[1:0]), .clk(lsu_c2_r_clk)); - end + assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00)); - //Create DMA packet - always_comb begin - dma_pkt_d = '0; - dma_pkt_d.valid = dma_dccm_req; - dma_pkt_d.dma = 1'b1; - dma_pkt_d.store = dma_mem_write; - dma_pkt_d.load = ~dma_mem_write; - dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0); - dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1); - dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10); - dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11); - end + rvdff #(1) lsu_exc_valid_rff ( + .*, + .din (lsu_error_pkt_m.exc_valid), + .dout(lsu_error_pkt_r.exc_valid), + .clk (lsu_c2_r_clk) + ); + rvdff #(1) lsu_single_ecc_error_rff ( + .*, + .din (lsu_error_pkt_m.single_ecc_error), + .dout(lsu_error_pkt_r.single_ecc_error), + .clk (lsu_c2_r_clk) + ); + rvdffe #($bits( + el2_lsu_error_pkt_t + ) - 2) lsu_error_pkt_rff ( + .*, + .din (lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), + .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), + .en (lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override) + ); + rvdff #(2) lsu_fir_error_rff ( + .*, + .din (lsu_fir_error_m[1:0]), + .dout(lsu_fir_error[1:0]), + .clk (lsu_c2_r_clk) + ); + end - always_comb begin - lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d; - lsu_pkt_m_in = lsu_pkt_d; - lsu_pkt_r_in = lsu_pkt_m; + //Create DMA packet + always_comb begin + dma_pkt_d = '0; + dma_pkt_d.valid = dma_dccm_req; + dma_pkt_d.dma = 1'b1; + dma_pkt_d.store = dma_mem_write; + dma_pkt_d.load = ~dma_mem_write; + dma_pkt_d.by = (dma_mem_sz[2:0] == 3'b0); + dma_pkt_d.half = (dma_mem_sz[2:0] == 3'b1); + dma_pkt_d.word = (dma_mem_sz[2:0] == 3'b10); + dma_pkt_d.dword = (dma_mem_sz[2:0] == 3'b11); + end - lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req; - lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma); - lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ; - end + always_comb begin + lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d; + lsu_pkt_m_in = lsu_pkt_d; + lsu_pkt_r_in = lsu_pkt_m; - // C2 clock for valid and C1 for other bits of packet - rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk)); - rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk)); + lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req; + lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma); + lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma); + end - rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk)); - rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk)); + // C2 clock for valid and C1 for other bits of packet + rvdff #(1) lsu_pkt_vldmff ( + .*, + .din (lsu_pkt_m_in.valid), + .dout(lsu_pkt_m.valid), + .clk (lsu_c2_m_clk) + ); + rvdff #(1) lsu_pkt_vldrff ( + .*, + .din (lsu_pkt_r_in.valid), + .dout(lsu_pkt_r.valid), + .clk (lsu_c2_r_clk) + ); + + rvdff #($bits( + el2_lsu_pkt_t + ) - 1) lsu_pkt_mff ( + .*, + .din (lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), + .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), + .clk (lsu_c1_m_clk) + ); + rvdff #($bits( + el2_lsu_pkt_t + ) - 1) lsu_pkt_rff ( + .*, + .din (lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), + .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), + .clk (lsu_c1_r_clk) + ); - if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1 - logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r; + if (pt.LOAD_TO_USE_PLUS1 == 1) begin : L2U1_Plus1_1 + logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r; - assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0]; - assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; + assign lsu_ld_datafn_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0]; + assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; - // this is really R stage signal - assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) | + // this is really R stage signal + assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_r[7:0]}) | ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) | ({32{lsu_pkt_r.word}} & lsu_ld_datafn_r[31:0]); - // this signal is used for gpr update - assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | + // this signal is used for gpr update + assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) | ({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]); - end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1 - logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r; + end else begin : L2U1_Plus1_0 // block: L2U1_Plus1_1 + logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r; - assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0]; - assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; + assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0]; + assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0]; - // this result must look at prior stores and merge them in - assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) | + // this result must look at prior stores and merge them in + assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by }} & {24'b0,lsu_ld_datafn_m[7:0]}) | ({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) | ({32{~lsu_pkt_m.unsign & lsu_pkt_m.by }} & {{24{ lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) | ({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{ lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) | ({32{lsu_pkt_m.word}} & lsu_ld_datafn_m[31:0]); - // this signal is used for gpr update - assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | + // this signal is used for gpr update + assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) | ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by }} & {{24{ lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) | ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{ lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) | ({32{lsu_pkt_r.word}} & lsu_ld_datafn_corr_r[31:0]); - end + end - // Fast interrupt address - assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1]; + // Fast interrupt address + assign lsu_fir_addr[31:1] = lsu_ld_data_corr_r[31:1]; - // absence load/store all 0's - assign lsu_addr_d[31:0] = full_addr_d[31:0]; + // absence load/store all 0's + assign lsu_addr_d[31:0] = full_addr_d[31:0]; - // Interrupt as a flush source allows the WB to occur - assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma; + // Interrupt as a flush source allows the WB to occur + assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma; - assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores - assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage + assign dma_mem_wdata_shifted[63:0] = dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000}; // Shift the dma data to lower bits to make it consistent to lsu stores + assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0]; // Write to PIC still happens in r stage - assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0]; + assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0]; - assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]); + assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]); - rvdff #(32) sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]), .clk(lsu_store_c1_m_clk)); + rvdff #(32) sdmff ( + .*, + .din (store_data_m_in[31:0]), + .dout(store_data_pre_m[31:0]), + .clk (lsu_store_c1_m_clk) + ); - rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk)); - rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk)); + rvdff #(32) samff ( + .*, + .din (lsu_addr_d[31:0]), + .dout(lsu_addr_m[31:0]), + .clk (lsu_c1_m_clk) + ); + rvdff #(32) sarff ( + .*, + .din (lsu_addr_m[31:0]), + .dout(lsu_addr_r[31:0]), + .clk (lsu_c1_r_clk) + ); - assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving - assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving + assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3]; // This is for power saving + assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3]; // This is for power saving - rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override)); - rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override)); + rvdffe #(29) end_addr_hi_mff ( + .*, + .din (end_addr_d[31:3]), + .dout(end_addr_pre_m[31:3]), + .en ((lsu_pkt_d.valid & ldst_dual_d) | clk_override) + ); + rvdffe #(29) end_addr_hi_rff ( + .*, + .din (end_addr_m[31:3]), + .dout(end_addr_pre_r[31:3]), + .en ((lsu_pkt_m.valid & ldst_dual_m) | clk_override) + ); - rvdff #(3) end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk)); - rvdff #(3) end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk)); + rvdff #(3) end_addr_lo_mff ( + .*, + .din (end_addr_d[2:0]), + .dout(end_addr_m[2:0]), + .clk (lsu_c1_m_clk) + ); + rvdff #(3) end_addr_lo_rff ( + .*, + .din (end_addr_m[2:0]), + .dout(end_addr_r[2:0]), + .clk (lsu_c1_r_clk) + ); - rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*); - rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*); + rvdff #(1) addr_in_dccm_mff ( + .din (addr_in_dccm_d), + .dout(addr_in_dccm_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(1) addr_in_dccm_rff ( + .din (addr_in_dccm_m), + .dout(addr_in_dccm_r), + .clk (lsu_c1_r_clk), + .* + ); - rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*); - rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*); + rvdff #(1) addr_in_pic_mff ( + .din (addr_in_pic_d), + .dout(addr_in_pic_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(1) addr_in_pic_rff ( + .din (addr_in_pic_m), + .dout(addr_in_pic_r), + .clk (lsu_c1_r_clk), + .* + ); - rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*); - rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*); + rvdff #(1) addr_external_mff ( + .din (addr_external_d), + .dout(addr_external_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(1) addr_external_rff ( + .din (addr_external_m), + .dout(addr_external_r), + .clk (lsu_c1_r_clk), + .* + ); - rvdff #(1) access_fault_mff (.din(access_fault_d), .dout(access_fault_m), .clk(lsu_c1_m_clk), .*); - rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*); - rvdff #(4) exc_mscause_mff (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*); + rvdff #(1) access_fault_mff ( + .din (access_fault_d), + .dout(access_fault_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(1) misaligned_fault_mff ( + .din (misaligned_fault_d), + .dout(misaligned_fault_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(4) exc_mscause_mff ( + .din (exc_mscause_d[3:0]), + .dout(exc_mscause_m[3:0]), + .clk (lsu_c1_m_clk), + .* + ); - rvdff #(1) fir_dccm_access_error_mff (.din(fir_dccm_access_error_d), .dout(fir_dccm_access_error_m), .clk(lsu_c1_m_clk), .*); - rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*); + rvdff #(1) fir_dccm_access_error_mff ( + .din (fir_dccm_access_error_d), + .dout(fir_dccm_access_error_m), + .clk (lsu_c1_m_clk), + .* + ); + rvdff #(1) fir_nondccm_access_error_mff ( + .din (fir_nondccm_access_error_d), + .dout(fir_nondccm_access_error_m), + .clk (lsu_c1_m_clk), + .* + ); - rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override)); + rvdffe #(32) bus_read_data_r_ff ( + .*, + .din (bus_read_data_m[31:0]), + .dout(bus_read_data_r[31:0]), + .en (addr_external_m | clk_override) + ); endmodule diff --git a/Flow/design/lsu/el2_lsu_stbuf.sv b/Flow/design/lsu/el2_lsu_stbuf.sv index db62520..69bbd14 100644 --- a/Flow/design/lsu/el2_lsu_stbuf.sv +++ b/Flow/design/lsu/el2_lsu_stbuf.sv @@ -28,313 +28,392 @@ module el2_lsu_stbuf -import el2_pkg::*; + import el2_pkg::*; #( -`include "el2_param.vh" - ) -( - input logic clk, // core clock - input logic rst_l, // reset + `include "el2_param.vh" +) ( + input logic clk, // core clock + input logic rst_l, // reset - input logic lsu_stbuf_c1_clk, // stbuf clock - input logic lsu_free_c2_clk, // free clk + input logic lsu_stbuf_c1_clk, // stbuf clock + input logic lsu_free_c2_clk, // free clk - // Store Buffer input - input logic store_stbuf_reqvld_r, // core instruction goes to stbuf - input logic lsu_commit_r, // lsu commits - input logic dec_lsu_valid_raw_d, // Speculative decode valid - input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding - input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding - input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores - input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores + // Store Buffer input + input logic store_stbuf_reqvld_r, // core instruction goes to stbuf + input logic lsu_commit_r, // lsu commits + input logic dec_lsu_valid_raw_d, // Speculative decode valid + input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_hi_r, // merged data from the dccm for stores. This is used for fwding + input logic [pt.DCCM_DATA_WIDTH-1:0] store_data_lo_r, // merged data from the dccm for stores. This is used for fwding + input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_hi_r, // merged data from the dccm for stores + input logic [pt.DCCM_DATA_WIDTH-1:0] store_datafn_lo_r, // merged data from the dccm for stores - // Store Buffer output - output logic stbuf_reqvld_any, // stbuf is draining - output logic stbuf_reqvld_flushed_any, // Top entry is flushed - output logic [pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address - output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data + // Store Buffer output + output logic stbuf_reqvld_any, // stbuf is draining + output logic stbuf_reqvld_flushed_any, // Top entry is flushed + output logic [ pt.LSU_SB_BITS-1:0] stbuf_addr_any, // address + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any, // stbuf data - input logic lsu_stbuf_commit_any, // pop the stbuf as it commite - output logic lsu_stbuf_full_any, // stbuf is full - output logic lsu_stbuf_empty_any, // stbuf is empty - output logic ldst_stbuf_reqvld_r, // needed for clocking + input logic lsu_stbuf_commit_any, // pop the stbuf as it commite + output logic lsu_stbuf_full_any, // stbuf is full + output logic lsu_stbuf_empty_any, // stbuf is empty + output logic ldst_stbuf_reqvld_r, // needed for clocking - input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address D-stage - input logic [31:0] lsu_addr_m, // lsu address M-stage - input logic [31:0] lsu_addr_r, // lsu address R-stage + input logic [pt.LSU_SB_BITS-1:0] lsu_addr_d, // lsu address D-stage + input logic [ 31:0] lsu_addr_m, // lsu address M-stage + input logic [ 31:0] lsu_addr_r, // lsu address R-stage - input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end address D-stage - needed to check unaligned - input logic [31:0] end_addr_m, // lsu end address M-stage - needed to check unaligned - input logic [31:0] end_addr_r, // lsu end address R-stage - needed to check unaligned + input logic [pt.LSU_SB_BITS-1:0] end_addr_d, // lsu end address D-stage - needed to check unaligned + input logic [31:0] end_addr_m, // lsu end address M-stage - needed to check unaligned + input logic [31:0] end_addr_r, // lsu end address R-stage - needed to check unaligned - input logic ldst_dual_d, ldst_dual_m, ldst_dual_r, - input logic addr_in_dccm_m, // address is in dccm - input logic addr_in_dccm_r, // address is in dccm + input logic ldst_dual_d, + ldst_dual_m, + ldst_dual_r, + input logic addr_in_dccm_m, // address is in dccm + input logic addr_in_dccm_r, // address is in dccm - // Forwarding signals - input logic lsu_cmpen_m, // needed for forwarding stbuf - load - input el2_lsu_pkt_t lsu_pkt_m, // LSU packet M-stage - input el2_lsu_pkt_t lsu_pkt_r, // LSU packet R-stage + // Forwarding signals + input logic lsu_cmpen_m, // needed for forwarding stbuf - load + input el2_lsu_pkt_t lsu_pkt_m, // LSU packet M-stage + input el2_lsu_pkt_t lsu_pkt_r, // LSU packet R-stage - output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data - output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf data - output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf data - output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf data + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m, // stbuf data + output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m, // stbuf data + output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m, // stbuf data + output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m, // stbuf data - input logic scan_mode // Scan mode + input logic scan_mode // Scan mode ); - localparam DEPTH = pt.LSU_STBUF_DEPTH; - localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH; - localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH; - localparam DEPTH_LOG2 = $clog2(DEPTH); + localparam DEPTH = pt.LSU_STBUF_DEPTH; + localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH; + localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH; + localparam DEPTH_LOG2 = $clog2(DEPTH); - // These are the fields in the store queue - logic [DEPTH-1:0] stbuf_vld; - logic [DEPTH-1:0] stbuf_dma_kill; - logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr; - logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteen; - logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_data; + // These are the fields in the store queue + logic [ DEPTH-1:0] stbuf_vld; + logic [ DEPTH-1:0] stbuf_dma_kill; + logic [ DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr; + logic [ DEPTH-1:0][ BYTE_WIDTH-1:0] stbuf_byteen; + logic [ DEPTH-1:0][ DATA_WIDTH-1:0] stbuf_data; - logic [DEPTH-1:0] sel_lo; - logic [DEPTH-1:0] stbuf_wr_en; - logic [DEPTH-1:0] stbuf_dma_kill_en; - logic [DEPTH-1:0] stbuf_reset; - logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin; - logic [DEPTH-1:0][DATA_WIDTH-1:0] stbuf_datain; - logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_byteenin; + logic [ DEPTH-1:0] sel_lo; + logic [ DEPTH-1:0] stbuf_wr_en; + logic [ DEPTH-1:0] stbuf_dma_kill_en; + logic [ DEPTH-1:0] stbuf_reset; + logic [ DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin; + logic [ DEPTH-1:0][ DATA_WIDTH-1:0] stbuf_datain; + logic [ DEPTH-1:0][ BYTE_WIDTH-1:0] stbuf_byteenin; - logic [7:0] store_byteen_ext_r; - logic [BYTE_WIDTH-1:0] store_byteen_hi_r; - logic [BYTE_WIDTH-1:0] store_byteen_lo_r; + logic [ 7:0] store_byteen_ext_r; + logic [BYTE_WIDTH-1:0] store_byteen_hi_r; + logic [BYTE_WIDTH-1:0] store_byteen_lo_r; - logic WrPtrEn, RdPtrEn; - logic [DEPTH_LOG2-1:0] WrPtr, RdPtr; - logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr; - logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1; + logic WrPtrEn, RdPtrEn; + logic [DEPTH_LOG2-1:0] WrPtr, RdPtr; + logic [DEPTH_LOG2-1:0] NxtWrPtr, NxtRdPtr; + logic [DEPTH_LOG2-1:0] WrPtrPlus1, WrPtrPlus2, RdPtrPlus1; - logic dual_stbuf_write_r; + logic dual_stbuf_write_r; - logic isdccmst_m, isdccmst_r; - logic [3:0] stbuf_numvld_any, stbuf_specvld_any; - logic [1:0] stbuf_specvld_m, stbuf_specvld_r; + logic isdccmst_m, isdccmst_r; + logic [3:0] stbuf_numvld_any, stbuf_specvld_any; + logic [1:0] stbuf_specvld_m, stbuf_specvld_r; - logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m; + logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m; - // variables to detect matching from the store queue - logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo; - logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo; - logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m; - logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m; + // variables to detect matching from the store queue + logic [DEPTH-1:0] stbuf_match_hi, stbuf_match_lo; + logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo; + logic [DATA_WIDTH-1:0] stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m; + logic [BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m; - // logic to detect matching from the pipe - needed for store - load forwarding - logic [BYTE_WIDTH-1:0] ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; - logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; + // logic to detect matching from the pipe - needed for store - load forwarding + logic [BYTE_WIDTH-1:0] + ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi; + logic ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi; - logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo; - logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi; + logic [BYTE_WIDTH-1:0] ld_byte_hit_lo, ld_byte_rhit_lo; + logic [BYTE_WIDTH-1:0] ld_byte_hit_hi, ld_byte_rhit_hi; - logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r; - logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r; - // byte_en flowing down - logic [7:0] ldst_byteen_r; - logic [7:0] ldst_byteen_ext_r; - // fwd data through the pipe - logic [31:0] ld_fwddata_rpipe_lo; - logic [31:0] ld_fwddata_rpipe_hi; + logic [BYTE_WIDTH-1:0] ldst_byteen_hi_r; + logic [BYTE_WIDTH-1:0] ldst_byteen_lo_r; + // byte_en flowing down + logic [ 7:0] ldst_byteen_r; + logic [ 7:0] ldst_byteen_ext_r; + // fwd data through the pipe + logic [ 31:0] ld_fwddata_rpipe_lo; + logic [ 31:0] ld_fwddata_rpipe_hi; - // coalescing signals - logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r; - logic store_coalesce_lo_r, store_coalesce_hi_r; + // coalescing signals + logic [DEPTH-1:0] store_matchvec_lo_r, store_matchvec_hi_r; + logic store_coalesce_lo_r, store_coalesce_hi_r; - //---------------------------------------- - // Logic starts here - //---------------------------------------- - // Create high/low byte enables - assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0]; - assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}}; - assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}}; + //---------------------------------------- + // Logic starts here + //---------------------------------------- + // Create high/low byte enables + assign store_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0]; + assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}}; + assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}}; - assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1; - assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1; - assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10; + assign RdPtrPlus1[DEPTH_LOG2-1:0] = RdPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus1[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 1'b1; + assign WrPtrPlus2[DEPTH_LOG2-1:0] = WrPtr[DEPTH_LOG2-1:0] + 2'b10; - // ecc error on both hi/lo - assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r; - assign ldst_stbuf_reqvld_r = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r); + // ecc error on both hi/lo + assign dual_stbuf_write_r = ldst_dual_r & store_stbuf_reqvld_r; + assign ldst_stbuf_reqvld_r = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r); // Store Buffer coalescing - for (genvar i=0; i= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1)); + assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0); + + // Load forwarding logic from the store queue + assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2( + BYTE_WIDTH + )] = end_addr_m[pt.LSU_SB_BITS-1:$clog2( + BYTE_WIDTH + )]; + + assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2( + BYTE_WIDTH + )] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2( + BYTE_WIDTH + )]; + + always_comb begin : GenLdFwd + stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0; + stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0; + + for (int i = 0; i < DEPTH; i++) begin + stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == + cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & + stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m; + stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == + cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & + stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m; + + // Kill the store buffer entry if there is a dma store since it already updated the dccm + stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store; + + for (int j = 0; j < BYTE_WIDTH; j++) begin + stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i]; + stbuf_fwdbyteen_hi_pre_m[j] |= stbuf_fwdbyteenvec_hi[i][j]; + + stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i]; + stbuf_fwdbyteen_lo_pre_m[j] |= stbuf_fwdbyteenvec_lo[i][j]; end - end + end + end // block: GenLdFwd - // These go to store buffer to detect full - assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma; - assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma; + always_comb begin : GenLdData + stbuf_fwddata_hi_pre_m[31:0] = '0; + stbuf_fwddata_lo_pre_m[31:0] = '0; - assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m); - assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r); - assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] + {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]}; + for (int i = 0; i < DEPTH; i++) begin + stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0]; + stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0]; - assign lsu_stbuf_full_any = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1)); - assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0); + end - // Load forwarding logic from the store queue - assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; + end // block: GenLdData - assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]; - - always_comb begin: GenLdFwd - stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0] = '0; - stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0] = '0; - - for (int i=0; i